Error spawning multiple threads to delete multiple same elements

127 Views Asked by At

I want to delete same elements replicated 2353218 times keeping only one, in the xml. Tried to spawn the process but getting following error. without spawn it is taking too much time. Please help.

 xquery version "1.0-ml";
    let $input := doc("http://www.somedomain.com/name/12345.xml")/xpath/toMultipleElement[2 to last()]

    let $batch-size := 50000

    let $input-size := fn:count($input)

    let $num-batches :=  xs:int(math:ceil($input-size div $batch-size ))

    let $result :=
    <root>{

    for $batch-start in (1 to $num-batches)
      let $processing-seq := $input[($batch-size * ($batch-start - 1) + 1)  to ($batch-size * ($batch-start ))]
      return

        xdmp:spawn-function(function() {
        xdmp:node-delete($processing-seq),

        <success batch-start='{$batch-start}'> processing sequence deleted</success>
        }, 
        <options xmlns="xdmp:eval">
          <result>true</result>
          <transaction-mode>update-auto-commit</transaction-mode>
        </options>)
    }</root>

    return 
    xdmp:save("D:/batch-wise-delete.xml", $result)

Error: [1.0-ml] XDMP-DELEXTNODES: let $processing-seq := $input[$batch-size * ($batch-start - 1) + 1 to $batch-size * $batch-start] -- Cannot delete external nodes

2

There are 2 best solutions below

0
On

I recommend that instead of attempting to delete all the unwanted nodes that instead you reconstruct the document by inclusion in one pass.

The basic strategy is documented here https://developer.marklogic.com/blog/xquery-recursive-descent

essentially -- create a new document by recursing over all the nodes in the existing document and returning them unchanged except exlucde the unwanted nodes. then save the new document over the old. This can be done in one transaction very efficiently.

0
On

Instead of deleting all the children, just write a new parent having one child.

let $parent := doc("http://www.somedomain.com/name/12345.xml")/xpath/parent
let $chosen-child := $parent/toMultipleElement[1]
return xdmp:node-replace($parent, <parent>{ $chosen-child }</parent>