Arrays returned by xml2js do not maintain all the order

1.4k Views Asked by At

When xml elements are bundled some of the order is lost.

For example, the xml I'm reading has elements that look like:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<b>
  <c n="1">
    <v n="1">
      <w>w1</w>
      <w>w2</w>
      <w>w3</w>
      <k>w4</k>
      <q>w4mod<x>a</x></q>
      <w>w5</w>
      <w>w6</w>
      <k>w7</k>
      <q>w7</q>
      <q>Mod7</q>
      <w>w8</w>
    </v>
  </c>
</b>

there can be any number of <w> something </w> entries and 0 or more <k>something</k> entries and 0 or more <q> something </q> entries.

But I need to maintain the order within the parse. When I use the following code:

    xml2js = require('xml2js'),
    util = require('util');

var parser = new xml2js.Parser({explicitChildren: true}, {preserveChildrenOrder:true});

fs.readFile(__dirname + '/Test.xml', function(err, data) {
    parser.parseString(data, function (err, result) {
        console.log(util.inspect(result, false, null, true))
    });
});

I get:

  b: {
    '$$': {
      c: [
        {
          '$': { n: '1' },
          '$$': {
            v: [
              {
                '$': { n: '1' },
                '$$': {
                  w: [ 'w1', 'w2', 'w3', 'w5', 'w6', 'w8' ],
                  k: [ 'w4', 'w7' ],
                  q: [
                    { _: 'w4mod', '$$': { x: [ 'a' ] } },
                    'w7',
                    'Mod7'
                  ]
                }
              }
            ]
          }
        }
      ]
    }
  }
}

what I'd like is something like

w: [ 'w1', 'w2', 'w3'],
k: ['w4'],
q:[ { _: 'w4mod', '$$': { x: [ 'a' ] } }],
w: [ 'w5', 'w6'],
k: ['w7'],
q: ['w7', 'Mod7'],
w: ['w8']
}

etc

Is there some way to maintain the order of this information?

2

There are 2 best solutions below

2
On

The constructor requires 1 argument: new xml2js.Parser({optionName: value}). You provided 2 arguments.

Try this:

var parser = new xml2js.Parser({explicitChildren: true, preserveChildrenOrder:true});

You should get a preserved order in the $$ key from the explicitChildren:

v: [
    {
        $: { n: '1' },
        $$: [
            { _: 'w1', '#name': 'w' },
            { _: 'w2', '#name': 'w' },
            { _: 'w3', '#name': 'w' },
            { _: 'w4', '#name': 'k' },
            {
                _: 'w4mod',
                '#name': 'q',
                $$: [{ _: 'a', '#name': 'x' }],
                x: ['a']
            },
            { _: 'w5', '#name': 'w' },
            { _: 'w6', '#name': 'w' },
            { _: 'w7', '#name': 'k' },
            { _: 'w7', '#name': 'q' },
            { _: 'Mod7', '#name': 'q' },
            { _: 'w8', '#name': 'w' }
        ],
        w: ['w1', 'w2', 'w3', 'w5', 'w6', 'w8'],
        k: ['w4', 'w7'],
        q: [
            {
                _: 'w4mod',
                $$: [{ _: 'a', '#name': 'x' }],
                x: ['a']
            },
            'w7',
            'Mod7'
        ]
    }
]

Also this issue might be related: https://github.com/Leonidas-from-XIV/node-xml2js/issues/499 but it seems to work fine without the charsAsChildren option.

Otherwise, this library might help you: https://github.com/nashwaan/xml-js

0
On

You can use camaro for this. the downside of camaro is you need to know the structure of the xml before hand.

Here's how to do it

const { transform } = require('camaro')

async function main() {
    const xml = `<?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <b>
      <c n="1">
        <v n="1">
          <w>w1</w>
          <w>w2</w>
          <w>w3</w>
          <k>w4</k>
          <q>w4mod<x>a</x></q>
          <w>w5</w>
          <w>w6</w>
          <k>w7</k>
          <q>w7</q>
          <q>Mod7</q>
          <w>w8</w>
        </v>
      </c>
    </b>`

    const template = {
        items: ['/b/c/v/node()', {
            key: 'name()', // get node name of the current child
            value: '.' // get text of the current child. can add more field as well
        }]
    }

    console.log(await transform(xml, template));
}

main()

Output:

{
  items: [
    { key: 'w', value: 'w1' },
    { key: 'w', value: 'w2' },
    { key: 'w', value: 'w3' },
    { key: 'k', value: 'w4' },
    { key: 'q', value: 'w4moda' },
    { key: 'w', value: 'w5' },
    { key: 'w', value: 'w6' },
    { key: 'k', value: 'w7' },
    { key: 'q', value: 'w7' },
    { key: 'q', value: 'Mod7' },
    { key: 'w', value: 'w8' }
  ]
}