Source code for json2xml.dicttoxml

from __future__ import annotations

import datetime
import logging
import numbers
from collections.abc import Callable, Sequence
from decimal import Decimal
from fractions import Fraction
from functools import lru_cache
from random import SystemRandom
from typing import Any, Union, cast

__lazy_modules__ = ["defusedxml.minidom"]

# Create a safe random number generator
_SAFE_RANDOM = SystemRandom()

# Set up logging
LOG = logging.getLogger("dicttoxml")
_XML_ESCAPE_CHARS = frozenset("&\"'<>")


[docs] def make_id(element: str, start: int = 100000, end: int = 999999) -> str: """ Generate a random ID for a given element. Args: element (str): The element to generate an ID for. start (int, optional): The lower bound for the random number. Defaults to 100000. end (int, optional): The upper bound for the random number. Defaults to 999999. Returns: str: The generated ID. """ return f"{element}_{_SAFE_RANDOM.randint(start, end)}"
[docs] def get_unique_id(element: str) -> str: """ Generate a unique ID for a given element. Args: element (str): The element to generate an ID for. Returns: str: The unique ID. """ ids: list[str] = [] # initialize list of unique ids this_id = make_id(element) dup = True while dup: if this_id not in ids: dup = False ids.append(this_id) else: # pragma: no cover this_id = make_id(element) return ids[-1]
ELEMENT = Union[ str, int, float, bool, complex, Decimal, Fraction, numbers.Number, Sequence[Any], datetime.datetime, datetime.date, None, dict[str, Any], ]
[docs] def get_xml_type(val: Any) -> str: """ Get the XML type of a given value. Args: val (ELEMENT): The value to get the type of. Returns: str: The XML type. """ if val is None: return "null" val_type = type(val) if val_type is str: return "str" if val_type is int: return "int" if val_type is float: return "float" if val_type is bool: return "bool" if isinstance(val, numbers.Number): return "number" if isinstance(val, dict): return "dict" if isinstance(val, Sequence): return "list" return type(val).__name__
[docs] def escape_xml(s: str | int | float | numbers.Number | None) -> str: """ Escape a string for use in XML. Args: s (str | numbers.Number): The string to escape. Returns: str: The escaped string. """ if isinstance(s, str): if not _XML_ESCAPE_CHARS.intersection(s): return s s = s.replace("&", "&amp;") s = s.replace('"', "&quot;") s = s.replace("'", "&apos;") s = s.replace("<", "&lt;") s = s.replace(">", "&gt;") return str(s)
[docs] def make_attrstring(attr: dict[str, Any]) -> str: """ Create a string of XML attributes from a dictionary. Args: attr (dict[str, Any]): The dictionary of attributes. Returns: str: The string of XML attributes. """ if not attr: return "" if len(attr) == 1: key, val = next(iter(attr.items())) if key == "type": return f' type="{val}"' return f' {key}="{escape_xml(val)}"' attrstring = " ".join([f'{k}="{escape_xml(v)}"' for k, v in attr.items()]) return f" {attrstring}"
def _is_fast_valid_xml_name(key: str) -> bool: """Return True for ASCII XML names known to be accepted by the legacy parser.""" if not key or not key.isascii() or ":" in key: return False first = key[0] if not (first.isalpha() or first == "_"): return False return all(char.isalnum() or char in {"-", "_", "."} for char in key[1:])
[docs] @lru_cache(maxsize=4096) def key_is_valid_xml(key: str) -> bool: """ Check if a key is a valid XML name. Args: key (str): The key to check. Returns: bool: True if the key is a valid XML name, False otherwise. """ key = str(key) if _is_fast_valid_xml_name(key): return True if not key or key.isdigit() or ":" in key: return False from defusedxml.minidom import parseString test_xml = f'<?xml version="1.0" encoding="UTF-8" ?><{key}>foo</{key}>' try: parseString(test_xml) return True except Exception: # minidom does not implement exceptions well return False
[docs] def make_valid_xml_name(key: str, attr: dict[str, Any]) -> tuple[str, dict[str, Any]]: """Return a valid XML element name and carry the original key as metadata when needed.""" key = str(key) if key_is_valid_xml(key): return key, attr if key.isdigit(): return f"n{key}", attr key_with_underscores = key.replace(" ", "_") if key_is_valid_xml(key_with_underscores): return key_with_underscores, attr if ":" in key and key_is_valid_xml(key.replace(":", "")): return key, attr attr["name"] = key return "key", attr
[docs] def wrap_cdata(s: str | int | float | numbers.Number) -> str: """Wraps a string into CDATA sections""" s = str(s).replace("]]>", "]]]]><![CDATA[>") return "<![CDATA[" + s + "]]>"
[docs] def default_item_func(parent: str) -> str: return "item"
# XPath 3.1 json-to-xml conversion # Spec: https://www.w3.org/TR/xpath-functions-31/#json-to-xml-mapping XPATH_FUNCTIONS_NS = "http://www.w3.org/2005/xpath-functions"
[docs] def get_xpath31_tag_name(val: Any) -> str: """ Determine XPath 3.1 tag name by Python type. See: https://www.w3.org/TR/xpath-functions-31/#func-json-to-xml Args: val: The value to get the tag name for. Returns: str: The XPath 3.1 tag name (map, array, string, number, boolean, null). """ if val is None: return "null" if isinstance(val, bool): return "boolean" if isinstance(val, dict): return "map" if isinstance(val, (int, float, numbers.Number)): return "number" if isinstance(val, str): return "string" if isinstance(val, (bytes, bytearray)): return "string" if isinstance(val, Sequence): return "array" return "string"
# @lat: [[behavior#XPath 3.1 format]]
[docs] def convert_to_xpath31(obj: Any, parent_key: str | None = None) -> str: """ Convert a Python object to XPath 3.1 json-to-xml format. See: https://www.w3.org/TR/xpath-functions-31/#json-to-xml-mapping Args: obj: The object to convert. parent_key: The key from the parent dict (used for key attribute). Returns: str: XML string in XPath 3.1 format. """ key_attr = f' key="{escape_xml(parent_key)}"' if parent_key is not None else "" tag_name = get_xpath31_tag_name(obj) if tag_name == "null": return f"<null{key_attr}/>" if tag_name == "boolean": return f"<boolean{key_attr}>{str(obj).lower()}</boolean>" if tag_name == "number": return f"<number{key_attr}>{obj}</number>" if tag_name == "string": return f"<string{key_attr}>{escape_xml(str(obj))}</string>" if tag_name == "map": children = "".join(convert_to_xpath31(v, k) for k, v in obj.items()) return f"<map{key_attr}>{children}</map>" if tag_name == "array": children = "".join(convert_to_xpath31(item) for item in obj) return f"<array{key_attr}>{children}</array>" return f"<string{key_attr}>{escape_xml(str(obj))}</string>"
[docs] def convert( obj: Any, ids: Any, attr_type: bool, item_func: Callable[[str], str], cdata: bool, item_wrap: bool, parent: str = "root", list_headers: bool = False, ) -> str: """Routes the elements of an object to the right function to convert them based on their data type""" item_name = item_func(parent) # since bool is also a subtype of number.Number and int, the check for bool # never comes and hence we get wrong value for the xml type bool # here, we just change order and check for bool first, because no other # type other than bool can be true for bool check if isinstance(obj, bool): return convert_bool(key=item_name, val=obj, attr_type=attr_type, cdata=cdata) if isinstance(obj, numbers.Number): return convert_kv( key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata ) if isinstance(obj, str): return convert_kv( key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata ) if hasattr(obj, "isoformat") and isinstance( obj, (datetime.datetime, datetime.date) ): return convert_kv( key=item_name, val=obj.isoformat(), attr_type=attr_type, attr={}, cdata=cdata, ) if obj is None: return convert_none(key=item_name, attr_type=attr_type, cdata=cdata) if isinstance(obj, dict): return convert_dict(cast("dict[str, Any]", obj), ids, parent, attr_type, item_func, cdata, item_wrap, list_headers=list_headers) if isinstance(obj, Sequence): return convert_list(obj, ids, parent, attr_type, item_func, cdata, item_wrap, list_headers=list_headers) raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})")
[docs] def is_primitive_type(val: Any) -> bool: return val is None or isinstance(val, (str, bool, numbers.Number))
[docs] def dict2xml_str( attr_type: bool, attr: dict[str, Any], item: dict[str, Any], item_func: Callable[[str], str], cdata: bool, item_name: str, item_wrap: bool, parentIsList: bool, parent: str = "", list_headers: bool = False, ) -> str: """ parse dict2xml """ ids: list[str] = [] # initialize list of unique ids subtree = "" # Initialize subtree with default empty string if attr_type: attr["type"] = get_xml_type(item) val_attr = dict(item["@attrs"]) if "@attrs" in item else dict(attr) if "@val" in item: rawitem = item["@val"] elif "@attrs" in item: rawitem = {key: value for key, value in item.items() if key != "@attrs"} else: rawitem = item if is_primitive_type(rawitem): if rawitem is None: subtree = "" elif isinstance(rawitem, bool): subtree = str(rawitem).lower() else: subtree = escape_xml(str(rawitem)) else: # we can not use convert_dict, because rawitem could be non-dict subtree = convert( rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name, list_headers=list_headers ) if parentIsList and list_headers: if len(val_attr) > 0 and not item_wrap: attrstring = make_attrstring(val_attr) return f"<{parent}{attrstring}>{subtree}</{parent}>" return f"<{parent}>{subtree}</{parent}>" elif item.get("@flat", False) or (parentIsList and not item_wrap): return subtree attrstring = make_attrstring(val_attr) return f"<{item_name}{attrstring}>{subtree}</{item_name}>"
[docs] def list2xml_str( attr_type: bool, attr: dict[str, Any], item: Sequence[Any], item_func: Callable[[str], str], cdata: bool, item_name: str, item_wrap: bool, list_headers: bool = False, ) -> str: ids: list[str] = [] # initialize list of unique ids if attr_type: attr["type"] = get_xml_type(item) flat = False subtree = "" # Initialize subtree with default empty string if item_name.endswith("@flat"): item_name = item_name[0:-5] flat = True subtree = convert_list( items=item, ids=ids, parent=item_name, attr_type=attr_type, item_func=item_func, cdata=cdata, item_wrap=item_wrap, list_headers=list_headers ) if flat or (len(item) > 0 and is_primitive_type(item[0]) and not item_wrap): return subtree elif list_headers: return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}</{item_name}>"
[docs] def convert_dict( obj: dict[str, Any], ids: list[str], parent: str, attr_type: bool, item_func: Callable[[str], str], cdata: bool, item_wrap: bool, list_headers: bool = False ) -> str: """Converts a dict into an XML string.""" output: list[str] = [] addline = output.append for key, val in obj.items(): attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} key_is_flat = isinstance(key, str) and key.endswith("@flat") xml_key = key[:-5] if key_is_flat else key key, attr = make_valid_xml_name(xml_key, attr) # since bool is also a subtype of number.Number and int, the check for bool # never comes and hence we get wrong value for the xml type bool # here, we just change order and check for bool first, because no other # type other than bool can be true for bool check if isinstance(val, bool): addline(convert_bool_valid_name(key, val, attr_type, attr)) elif isinstance(val, (numbers.Number, str)): addline( convert_kv_valid_name( key=key, val=val, attr_type=attr_type, attr=attr, cdata=cdata ) ) elif hasattr(val, "isoformat"): # datetime addline( convert_kv_valid_name( key=key, val=val.isoformat(), attr_type=attr_type, attr=attr, cdata=cdata, ) ) elif isinstance(val, dict): addline( dict2xml_str( attr_type, attr, val, item_func, cdata, key, item_wrap, False, list_headers=list_headers ) ) elif isinstance(val, Sequence): addline( list2xml_str( attr_type=attr_type, attr=attr, item=val, item_func=item_func, cdata=cdata, item_name=f"{key}@flat" if key_is_flat else key, item_wrap=item_wrap, list_headers=list_headers ) ) elif not val: addline(convert_none_valid_name(key, attr_type, attr)) else: raise TypeError(f"Unsupported data type: {val} ({type(val).__name__})") return "".join(output)
[docs] def convert_list( items: Sequence[Any], ids: list[str] | None, parent: str, attr_type: bool, item_func: Callable[[str], str], cdata: bool, item_wrap: bool, list_headers: bool = False, ) -> str: """Converts a list into an XML string.""" output: list[str] = [] addline = output.append item_name = item_func(parent) # Is item_name still relevant if item_wrap is false if item_name.endswith("@flat"): item_name = item_name[:-5] item_name, item_name_attr = make_valid_xml_name(item_name, {}) scalar_key = item_name if item_wrap else parent scalar_key, scalar_key_attr = make_valid_xml_name(scalar_key, {}) this_id = None if ids: this_id = get_unique_id(parent) for i, item in enumerate(items): attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} if isinstance(item, bool): if item_name_attr: attr.update(item_name_attr) addline(convert_bool_valid_name(item_name, item, attr_type, attr)) elif isinstance(item, (numbers.Number, str)): if scalar_key_attr: attr.update(scalar_key_attr) if item_wrap: addline( convert_kv_valid_name( key=scalar_key, val=item, attr_type=attr_type, attr=attr, cdata=cdata, ) ) else: addline( convert_kv_valid_name( key=scalar_key, val=item, attr_type=attr_type, attr=attr, cdata=cdata, ) ) elif hasattr(item, "isoformat"): # datetime if item_name_attr: attr.update(item_name_attr) addline( convert_kv_valid_name( key=item_name, val=item.isoformat(), attr_type=attr_type, attr=attr, cdata=cdata, ) ) elif isinstance(item, dict): addline( dict2xml_str( attr_type=attr_type, attr=attr, item=item, item_func=item_func, cdata=cdata, item_name=item_name, item_wrap=item_wrap, parentIsList=True, parent=parent, list_headers=list_headers ) ) elif isinstance(item, Sequence): addline( list2xml_str( attr_type=attr_type, attr=attr, item=item, item_func=item_func, cdata=cdata, item_name=item_name, item_wrap=item_wrap, list_headers=list_headers ) ) elif item is None: if item_name_attr: attr.update(item_name_attr) addline(convert_none_valid_name(item_name, attr_type, attr)) else: raise TypeError(f"Unsupported data type: {item} ({type(item).__name__})") return "".join(output)
[docs] def convert_kv( key: str, val: str | int | float | numbers.Number | datetime.datetime | datetime.date, attr_type: bool, attr: dict[str, Any] | None = None, cdata: bool = False, ) -> str: """Converts a number, string, or datetime into an XML element""" if attr is None: attr = {} key, attr = make_valid_xml_name(key, attr) # Convert datetime to isoformat string if hasattr(val, "isoformat") and isinstance(val, (datetime.datetime, datetime.date)): val = val.isoformat() if attr_type: attr["type"] = get_xml_type(val) attr_string = make_attrstring(attr) return f"<{key}{attr_string}>{wrap_cdata(val) if cdata else escape_xml(val)}</{key}>"
[docs] def convert_kv_valid_name( key: str, val: str | int | float | numbers.Number | datetime.datetime | datetime.date, attr_type: bool, attr: dict[str, Any], cdata: bool = False, ) -> str: """Converts a scalar into an XML element when the caller already validated the key.""" if hasattr(val, "isoformat") and isinstance(val, (datetime.datetime, datetime.date)): val = val.isoformat() attr = dict(attr) if attr_type: attr["type"] = get_xml_type(val) attr_string = make_attrstring(attr) return f"<{key}{attr_string}>{wrap_cdata(val) if cdata else escape_xml(val)}</{key}>"
[docs] def convert_bool( key: str, val: bool, attr_type: bool, attr: dict[str, Any] | None = None, cdata: bool = False ) -> str: """Converts a boolean into an XML element""" if attr is None: attr = {} key, attr = make_valid_xml_name(key, attr) if attr_type: attr["type"] = get_xml_type(val) attr_string = make_attrstring(attr) return f"<{key}{attr_string}>{str(val).lower()}</{key}>"
[docs] def convert_bool_valid_name( key: str, val: bool, attr_type: bool, attr: dict[str, Any], ) -> str: """Converts a boolean when the caller already validated the key.""" attr = dict(attr) if attr_type: attr["type"] = "bool" attr_string = make_attrstring(attr) return f"<{key}{attr_string}>{str(val).lower()}</{key}>"
[docs] def convert_none( key: str, attr_type: bool, attr: dict[str, Any] | None = None, cdata: bool = False ) -> str: """Converts a null value into an XML element""" if attr is None: attr = {} key, attr = make_valid_xml_name(key, attr) if attr_type: attr["type"] = get_xml_type(None) attr_string = make_attrstring(attr) return f"<{key}{attr_string}></{key}>"
[docs] def convert_none_valid_name( key: str, attr_type: bool, attr: dict[str, Any] ) -> str: """Converts a null value when the caller already validated the key.""" attr = dict(attr) if attr_type: attr["type"] = "null" attr_string = make_attrstring(attr) return f"<{key}{attr_string}></{key}>"
# @lat: [[architecture#Conversion engine]]
[docs] def dicttoxml( obj: ELEMENT, root: bool = True, custom_root: str = "root", ids: list[int] | None = None, attr_type: bool = True, item_wrap: bool = True, item_func: Callable[[str], str] = default_item_func, cdata: bool = False, xml_namespaces: dict[str, Any] | None = None, list_headers: bool = False, xpath_format: bool = False, ) -> bytes: """ Converts a python object into XML. :param dict obj: dictionary :param bool root: Default is True specifies wheter the output is wrapped in an XML root element :param custom_root: Default is 'root' allows you to specify a custom root element. :param bool ids: Default is False specifies whether elements get unique ids. :param bool attr_type: Default is True specifies whether elements get a data type attribute. :param bool item_wrap: Default is True specifies whether to nest items in array in <item/> Example if True ..code-block:: python data = {'bike': ['blue', 'green']} .. code-block:: xml <bike> <item>blue</item> <item>green</item> </bike> Example if False ..code-block:: python data = {'bike': ['blue', 'green']} ..code-block:: xml <bike>blue</bike> <bike>green</bike>' :param item_func: items in a list. Default is 'item' specifies what function should generate the element name for :param bool cdata: Default is False specifies whether string values should be wrapped in CDATA sections. :param xml_namespaces: is a dictionary where key is xmlns prefix and value the urn, Default is {}. Example: .. code-block:: python { 'flex': 'http://www.w3.org/flex/flexBase', 'xsl': "http://www.w3.org/1999/XSL/Transform"} results in .. code-block:: xml <root xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:flex="http://www.w3.org/flex/flexBase"> :param bool list_headers: Default is False Repeats the header for every element in a list. Example if True: .. code-block:: python "Bike": [ {'frame_color': 'red'}, {'frame_color': 'green'} ]} results in .. code-block:: xml <Bike><frame_color>red</frame_color></Bike> <Bike><frame_color>green</frame_color></Bike> :param bool xpath_format: Default is False When True, produces XPath 3.1 json-to-xml compliant output as specified by W3C (https://www.w3.org/TR/xpath-functions-31/#func-json-to-xml). Uses type-based element names (map, array, string, number, boolean, null) with key attributes and the http://www.w3.org/2005/xpath-functions namespace. Example: .. code-block:: python {"name": "John", "age": 30} results in .. code-block:: xml <map xmlns="http://www.w3.org/2005/xpath-functions"> <string key="name">John</string> <number key="age">30</number> </map> Dictionaries-keys with special char '@' has special meaning: @attrs: This allows custom xml attributes: .. code-block:: python {'@attr':{'a':'b'}, 'x':'y'} results in .. code-block:: xml <root a="b"><x>y</x></root> @flat: If a key ends with @flat (or dict contains key '@flat'), encapsulating node is omitted. Similar to item_wrap. @val: @attrs requires complex dict type. If primitive type should be used, then @val is used as key. To add custom xml-attributes on a list {'list': [4, 5, 6]}, you do this: .. code-block:: python {'list': {'@attrs': {'a':'b','c':'d'}, '@val': [4, 5, 6]} which results in .. code-block:: xml <list a="b" c="d"><item>4</item><item>5</item><item>6</item></list> """ if xpath_format: xml_content = convert_to_xpath31(obj) output = [ '<?xml version="1.0" encoding="UTF-8" ?>', xml_content.replace("<map", f'<map xmlns="{XPATH_FUNCTIONS_NS}"', 1) if xml_content.startswith("<map") else xml_content.replace("<array", f'<array xmlns="{XPATH_FUNCTIONS_NS}"', 1) if xml_content.startswith("<array") else f'<map xmlns="{XPATH_FUNCTIONS_NS}">{xml_content}</map>', ] return "".join(output).encode("utf-8") output = [] namespace_str = "" if xml_namespaces is None: xml_namespaces = {} for prefix in xml_namespaces: if prefix == 'xsi': for schema_att in xml_namespaces[prefix]: if schema_att == 'schemaInstance': ns = xml_namespaces[prefix]['schemaInstance'] namespace_str += f' xmlns:{prefix}="{ns}"' elif schema_att == 'schemaLocation': ns = xml_namespaces[prefix][schema_att] namespace_str += f' xsi:{schema_att}="{ns}"' elif prefix == 'xmlns': # xmns needs no prefix ns = xml_namespaces[prefix] namespace_str += f' xmlns="{ns}"' else: ns = xml_namespaces[prefix] namespace_str += f' xmlns:{prefix}="{ns}"' if root: output.append('<?xml version="1.0" encoding="UTF-8" ?>') output_elem = convert( obj, ids, attr_type, item_func, cdata, item_wrap, parent=custom_root, list_headers=list_headers ) output.append(f"<{custom_root}{namespace_str}>{output_elem}</{custom_root}>") else: output.append( convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="", list_headers=list_headers) ) return "".join(output).encode("utf-8")