Source code for fastr.helpers.xmltodict

# Copyright 2011-2014 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
This module contains tool for converting python dictionaries into XML object
and vice-versa.
"""
__author__ = 'hachterberg'

import re
import xml.etree.ElementTree as ElementTree
import xml.dom.minidom

import fastr

__list_entry_tag__ = 'li'
__tag_symbol_start__ = 'p_'


def _toxml(data, element=None, key=None):
    """
    Write data to an XML string

    :param data: object to write
    :param element: element to write
    :param key: key to set
    :return: XML string
    :rtype: str
    """
    toplevel = False
    if element is None:
        toplevel = True
        if key is None:
            key = 'root'

    if isinstance(data, dict):
        if element is not None:
            subelement = ElementTree.SubElement(element, key)
        else:
            element = ElementTree.Element(key)
            subelement = element

        for key, value in data.items():
            key = _prepkey(key)

            if _issimpletype(value):
                if isinstance(value, str) and ("\n" in value or len(value) > 48):
                    extra_element = ElementTree.SubElement(subelement, key)
                    extra_element.text = _to_str(value)
                else:
                    subelement.set(key, _to_str(value))
            elif value is None:
                ElementTree.SubElement(subelement, key)
            elif isinstance(value, (list, tuple)):
                _toxml(value, subelement, key)
            elif isinstance(value, dict):
                _toxml(value, subelement, key)
            else:
                raise TypeError('Invalid class to serialize ({})'.format(type(value).__name__))

    elif isinstance(data, (list, tuple)):
        if element is not None:
            subelement = ElementTree.SubElement(element, key)
        else:
            element = ElementTree.Element(key)
            subelement = element

        if key[-1] == 's':
            subkey = key[:-1]
        else:
            subkey = __list_entry_tag__

        for value in data:
            if _issimpletype(value):
                entry_element = ElementTree.SubElement(subelement, subkey)
                entry_element.text = _to_str(value)
            elif isinstance(value, list):
                _toxml(value, subelement, subkey)
            elif isinstance(value, dict):
                _toxml(value, subelement, subkey)
            elif value is None:
                ElementTree.SubElement(subelement, subkey)
            else:
                raise TypeError('Invalid class to serialize ({})'.format(type(value).__name__))

    elif data is None:
        if key is not None:
            element.set(key, '')

    elif _issimpletype(data):
        if key is not None:
            element.set(key, _to_str(data))
        else:
            element.text = _to_str(data)

    else:
        fastr.log.warning('Warning invalid type {}'.format(type(data).__name__))

    if toplevel:
        xml_string = xml.dom.minidom.parseString(ElementTree.tostring(element))
        xml_string = xml_string.toprettyxml(indent='    ').encode('utf-8')
        return xml_string


def _fromxml(root):
    """
    Parse ETree into objects

    :param root: root elemnt to parse
    :return:
    """
    if root.tag == 'root':
        return _parse_elem(root)
    else:
        return _parse_elem(root)


def _issimpletype(data):
    """
    Check if a value is a simple type (str, int, float, bool, unicode)

    :param data: value to check
    :return: flag indicating a simple type
    """
    return isinstance(data, (int, float, bool, str))


def _prepkey(key):
    """
    Prepare a key for use in XML, this means padding it with valid characters
    if needed and encoding special characters.

    :param str key: key to prepare
    :return: prepared key
    :rtype: str
    """
    newstr = []
    for char in key:
        if char.isalnum() or char in '-_':
            newstr.append(char)
        else:
            newstr.append('__{:03d}__'.format(ord(char)))

    key = ''.join(newstr)

    if not key[0].isalpha():
        key = '{}{}'.format(__tag_symbol_start__, key)
    return key


def _parse_elem(element):
    """
    Parse a single element

    :param element: element to parse
    :return: resulting object
    """
    # Prepare text, avoid whitespace issues
    text = element.text
    if text is not None and text.strip() == '':
        text = None

    if text is None and len(element.attrib) == 0 and len(element) == 0:
        # Empty, must be a none
        return None
    elif text is None and len(element.attrib) == 0 and all([x.tag == __list_entry_tag__ or x.tag == element.tag[:-1] for x in element]):
        # Must be list
        return [_parse_elem(x) for x in element]
    elif text is not None:
        # Must be simple text
        return _from_str(element.text)
    else:
        # Must be dict
        result = dict()

        for key, value in element.attrib.items():
            key = _parse_key(key)
            result[key] = _from_str(value)

        # Check if we have list children
        counts = {}
        for child in element:
            key = _parse_key(child.tag)
            if key in counts:
                counts[key] += 1
            else:
                counts[key] = 1

        for child in element:
            key = _parse_key(child.tag)

            if counts[key] == 1:
                # Entry is a single value child
                result[key] = _parse_elem(child)
            else:
                # An entry is a list
                if key not in result:
                    result[key] = []
                result[key].append(_parse_elem(child))

        return result


def _parse_key(key):
    """
    Parse a key, removes padding an special character encoding used for XML

    :param str key: key to parse
    :return: cleaned key
    :rtype: str
    """
    if key.startswith(__tag_symbol_start__):
        key = key[len(__tag_symbol_start__):]

    for match in re.findall(r'__\d\d\d__', key):
        key = re.sub(match, chr(int(match[2:5])), key)

    return key


def _to_str(value):
    """
    Convert a basic type to a str representation

    :param value: value to convert
    :return: string version
    :rtype: str
    """
    if isinstance(value, str):
        try:
            float(value)
            return '___{}'.format(value)
        except ValueError:
            return value
    else:
        return str(value)


def _from_str(text):
    """
    Convert a str to another type

    :param str text: str to parse
    :return: parsed value
    """
    # Convert to Boolean
    if text.lower() == 'true':
        return True
    elif text.lower() == 'false':
        return False
    # Find strings that started with illegal character and get escaped by ___
    elif text.startswith('___'):
        return text[3:]

    # Attempt conversion to numericals
    try:
        out = int(text)
        return out
    except ValueError:
        pass

    try:
        out = float(text)
        return out
    except ValueError:
        pass

    # Must be a str
    return str(text)


[docs]def dumps(data): """ Write a dict to an XML string :param data: data to write :return: the XML data :rtype: str """ return _toxml(data)
[docs]def dump(data, filehandle): """ Write a dict to an XML file :param data: data to write :param filehandle: file handle to write to """ filehandle.write(dumps(data))
[docs]def loads(data): """ Load an xml string and parse it to a dict :param str data: the xml data to load :return: the parsed data """ root = ElementTree.fromstring(data) return _fromxml(root)
[docs]def load(filehandle): """ Load an xml file and parse it to a dict :param filehandle: file handle to load :return: the parsed data """ tree = ElementTree.parse(filehandle) root = tree.getroot() return _fromxml(root)