How to sort xml file by xsi:type?

60 Views Asked by At

I know there are already several questions concerning the sorting of an xml, but none of them seem to work with my case. I have the following xml file representing a cutout of a data schema of an esri file geodatabase:

import xml.etree.ElementTree as ET
from operator import attrgetter

data = """<esri:Workspace xmlns:esri='http://www.esri.com/schemas/ArcGIS/10.8' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema'>
    <WorkspaceDefinition xsi:type='esri:WorkspaceDefinition'>
        <WorkspaceType>esriLocalDatabaseWorkspace</WorkspaceType>
        <Version/>
        <Domains xsi:type='esri:ArrayOfDomain'/>
        <Sequences xsi:type='esri:ArrayOfSequence'/>
        <DatasetDefinitions xsi:type='esri:ArrayOfDataElement'>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureDataset'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
        </DatasetDefinitions>
    </WorkspaceDefinition>
    <WorkspaceData xsi:type='esri:WorkspaceData'/>
</esri:Workspace>"""    
    
root_1 = ET.fromstring(data)

I want to sort it by tag and by the DataElement type, so that it is sorted like this:

WorkspaceData {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:WorkspaceData'}
WorkspaceDefinition {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:WorkspaceDefinition'}
     DatasetDefinitions {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDataElement'}
         DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
         DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
         DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
         DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
         DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
         DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureDataset'}
     Domains {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDomain'}
     Sequences {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfSequence'}
     Version {}
     WorkspaceType {}

So far, I managed to sort by tag, but how can I sort by the DataElement type? Here is my code so far:

root_1[:] = sorted(root_1,  key=attrgetter("tag")) # WorkspaceData, WorkspaceDefinition
for node in root_1.findall("*"):  # DatasetDefinitions, Domains, Sequences, Version, WorkspaceType
    node[:] = sorted(node, key=attrgetter("tag"))
    print(node)
    for subnode in node.findall("*"): #DataElement, Domain
        subnode[:] = sorted(subnode, key=attrgetter("tag"))
        #subnode[:] = sorted(subnode, key=subnode.get['xsi:type']) # not working!
        print("\t", subnode.tag, subnode.attrib)
        for subsubnode in subnode.findall("*"): 
            print("\t\t", subsubnode.tag, subsubnode.attrib)
            subsubnode[:] = sorted(subsubnode,  key=attrgetter("tag"))
1

There are 1 best solutions below

0
On BEST ANSWER

IIUC, you can slightly change the key= parameter in sorted():

import xml.etree.ElementTree as ET
from operator import attrgetter

data = """<esri:Workspace xmlns:esri='http://www.esri.com/schemas/ArcGIS/10.8' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema'>
    <WorkspaceDefinition xsi:type='esri:WorkspaceDefinition'>
        <WorkspaceType>esriLocalDatabaseWorkspace</WorkspaceType>
        <Version/>
        <Domains xsi:type='esri:ArrayOfDomain'/>
        <Sequences xsi:type='esri:ArrayOfSequence'/>
        <DatasetDefinitions xsi:type='esri:ArrayOfDataElement'>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureDataset'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
            <DataElement xsi:type='esri:DEFeatureClass'/>
        </DatasetDefinitions>
    </WorkspaceDefinition>
    <WorkspaceData xsi:type='esri:WorkspaceData'/>
</esri:Workspace>"""

root_1 = ET.fromstring(data)

root_1[:] = sorted(root_1, key=attrgetter("tag"))  # WorkspaceData, WorkspaceDefinition

for node in root_1.findall(
    "*"
):  # DatasetDefinitions, Domains, Sequences, Version, WorkspaceType
    node[:] = sorted(node, key=attrgetter("tag"))
    print(node)
    for subnode in node.findall("*"):  # DataElement, Domain
        subnode[:] = sorted(
            subnode,
            key=lambda node: (            # <--- change key= here
                node.tag,
                node.get("{http://www.w3.org/2001/XMLSchema-instance}type"),
            ),
        )
        print("\t", subnode.tag, subnode.attrib)
        for subsubnode in subnode.findall("*"):
            print("\t\t", subsubnode.tag, subsubnode.attrib)
            subsubnode[:] = sorted(
                subsubnode,
                key=attrgetter("tag"),
            )

Prints:

<Element 'WorkspaceData' at 0x7f5ff630bec0>
<Element 'WorkspaceDefinition' at 0x7f5ff6316610>
         DatasetDefinitions {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDataElement'}
                 DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
                 DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
                 DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
                 DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
                 DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
                 DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureDataset'}
         Domains {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDomain'}
         Sequences {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfSequence'}
         Version {}
         WorkspaceType {}