Generic XML message parsing

39 Views Asked by At

I have a number of XML message types with an elaborate header and sequence structure, but separate business message types. I'm trying convert these to objects in Python, and so far my bottom-level structure looks like this:

T = TypeVar("T", bound=XmlRecord)

def _extract_fixed_gen_data(rec_type: Type[T], xml: dict) -> Dict[TimeCode, List[T]]:
    results = {}
    for data in xml["JPMR00010"]:
        key = TimeCode(int(data["JP06219"]))
        records = [
            rec_type(record)  # pylint: disable=too-many-function-args
            for record in data["JPM00011"]["JPMR00011"]
        ]
        results[key] = records
    return results

class BusinessMessage(Generic[T], XmlRecord):
    """Contains the business data associated with the XML payload."""

    # Some other fields

    data = XmlProperty[list]("JPM00010", list, converter=lambda raw: _extract_fixed_gen_data(T, raw))

And XmlProperty and XmlRecord are defined like this:

X = TypeVar("X")

class XmlProperty(Generic[X]):

    def __init__(  # pylint: disable=too-many-arguments
        self,
        xml_key: str,
        dtype,
        allow_empty: bool = False,
        alternates: Optional[List[str]] = None,
        converter: Optional[Callable[[Any], Any]] = None,
    ):
        # Set the base fvields on the property
        self._xml_key = xml_key
        self._alternate_keys = alternates if alternates else []
        self._dtype = dtype
        self._allow_empty = allow_empty
        self._value = None
        self._converter = None
        self._expects_dict = False
        if converter is not None:
            self._converter = converter

    def parse(self, obj: object, data: dict):
        raw_value = None
        if self._xml_key in data:
            raw_value = data[self._xml_key]
        else:
            alt_value = next(
                filter(lambda alt: alt in data, self._alternate_keys), None
            )
            if alt_value is not None:
                raw_value = data[alt_value]
            elif not self._allow_empty:
                raise KeyError(f"XML data is missing property {self._xml_key}")

        if self._converter is not None:
            raw_value = (
                self._converter(raw_value, data)
                if self._expects_dict
                else self._converter(raw_value)
            )

        value = None
        if raw_value is not None:
            value = (
                self._dtype(raw_value) if type(raw_value) != self._dtype else raw_value
            )

        self.__set__(obj, value)

    def __set_name__(self, owner: object, name: str):
        self.public_name = name
        self.private_name = "_" + name

    def __get__(self, obj: object, objtype: Optional[type] = None):
        if obj is None:
            return self
        return getattr(obj, self.private_name)

    def __set__(self, obj: object, value: Optional[X]):
        setattr(obj, self.private_name, value)

class XmlRecord:
    def __init__(self, data: dict):
        self._xml_properties = {
            name: prop
            for name, prop in self.__class__.__dict__.items()
            if isinstance(prop, XmlProperty)
        }

        for name, prop in self._xml_properties.items():
            prop.parse(self, data)

The issue comes in trying to inject a generic argument into _extract_fixed_gen_data. Obviously, I can't inject T directly into the call because it's a type-variable and not a type. I could add a generic context to XmlProperty, which would allow me to get around the issue, but that could get messy very quickly. Does anyone have any recommendations on how to proceed here?

0

There are 0 best solutions below