Issue using cts:not-query or cts:and-not-query

179 Views Asked by At

My MarkLogic version is 9. I have 2 queries returning expected results when run separately but when I try to combine them I get no result.

My data looks like below, I need to match only ABC, not ABC/* (or ABC/D if this is the searched criteria and in this case, not ABC/D/*)

<root xmlns:ns1="http://ns1"> 
   <ns1:security>
     <ns1:elem>ABC</ns1:elem>
     <ns1:elem>ABC/D</ns1:elem>
     <ns1:elem>ABC/D/E</ns1:elem>
   </ns1:security>
</root>

Below code returns 4 results

xquery version "1.0-ml";
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";

search:resolve(        
        <cts:path-range-query operator="=" xmlns:ns1="http://ns1">
          <cts:path-expression>//ns1:security/ns1:elem</cts:path-expression>
          <cts:value xsi:type="xs:string" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">ABC</cts:value>
        </cts:path-range-query>
)

and this one only 3 results

xquery version "1.0-ml";
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";

search:resolve( 
      <cts:element-query>
        <cts:element xmlns:ns1="http://ns1">ns1:security</cts:element>
        <cts:element-value-query>
          <cts:element xmlns:ns1="http://ns1">ns1:elem</cts:element>
          <cts:text xml:lang="en">ABC/*</cts:text>
          <cts:option>wildcarded</cts:option>
        </cts:element-value-query>
      </cts:element-query>
)

So I was expecting to get 1 result when running this

xquery version "1.0-ml";
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";

search:resolve(
    <cts:and-not-query>

      <cts:positive>  
        <cts:path-range-query operator="=" xmlns:ns1="http://ns1">
          <cts:path-expression>//ns1:security/ns1:elem</cts:path-expression>
          <cts:value xsi:type="xs:string" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">ABC</cts:value>
        </cts:path-range-query>
      </cts:positive>
      
      <cts:negative>
        <cts:element-query>
          <cts:element xmlns:ns1="http://ns1">ns1:security</cts:element>
          <cts:element-value-query>
            <cts:element xmlns:ns1="http://ns1">ns1:elem</cts:element>
            <cts:text xml:lang="en">ABC/*</cts:text>
            <cts:option>wildcarded</cts:option>
          </cts:element-value-query>
        </cts:element-query>

      </cts:negative>      
    </cts:and-not-query>
)

I tried also this but same result

xquery version "1.0-ml";
import module namespace search = "http://marklogic.com/appservices/search" at "/MarkLogic/appservices/search/search.xqy";

search:resolve(
    <cts:and-query>

      <cts:path-range-query operator="=" xmlns:ns1="http://ns1">
        <cts:path-expression>//ns1:security/ns1:elem</cts:path-expression>
        <cts:value xsi:type="xs:string" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">ABC</cts:value>
      </cts:path-range-query>
      
      <cts:not-query>
        <cts:element-query>
          <cts:element xmlns:ns1="http://ns1">ns1:security</cts:element>
          <cts:element-value-query>
            <cts:element xmlns:ns1="http://ns1">ns1:elem</cts:element>
            <cts:text xml:lang="en">ABC/*</cts:text>
            <cts:option>wildcarded</cts:option>
          </cts:element-value-query>
        </cts:element-query>
      </cts:not-query>

    </cts:and-query>
)

Is this a known bug ? Am I doing something wrong here ? Any help appreciated :)

Search plan

<search:response snippet-format="snippet" total="0" start="1" page-length="10" xmlns:search="http://marklogic.com/appservices/search">
  <search:plan>
    <qry:query-plan xmlns:qry="http://marklogic.com/cts/query">
      <qry:expr-trace>impl:apply-search(map:map(&lt;map:map xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" .../&gt;), "xdmp:plan", fn:false())</qry:expr-trace>
      <qry:info-trace>Analyzing path for search: fn:collection()</qry:info-trace>
      <qry:info-trace>Step 1 is searchable: fn:collection()</qry:info-trace>
      <qry:info-trace>Path is fully searchable.</qry:info-trace>
      <qry:info-trace>Gathering constraints.</qry:info-trace>
      <qry:info-trace>Comparison contributed string range value constraint: //ns1:security/ns1:elem = "CTPA"</qry:info-trace>
      <qry:partial-plan>
    <qry:range-query weight="0" min-occurs="1" max-occurs="4294967295" xmlns:xs="http://www.w3.org/2001/XMLSchema">
      <qry:key>12785637774270294680</qry:key>
      <qry:annotation>path(//ns1:security/ns1:elem)</qry:annotation>
      <qry:lower-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:lower-bound>
      <qry:upper-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:upper-bound>
    </qry:range-query>
      </qry:partial-plan>
      <qry:elem-word-trace text="CTPA" elem-name="elem" elem-uri="http://ns1">
    <qry:key>6185531260368494803</qry:key>
      </qry:elem-word-trace>
      <qry:info-trace>Search query contributed 1 constraint: cts:and-query((cts:path-range-query("//ns1:security/ns1:elem", "=", "CTPA", ("collation=http://marklogic.com/collation/"), 1), cts:not-query(cts:element-query(xs:QName("ns1:security"), cts:element-value-query(xs:QName("ns1:elem"), "CTPA/*", ("wildcarded","lang=en"), 1), ()), 1)), ())</qry:info-trace>
      <qry:partial-plan>
    <qry:and-not-two-queries>
      <qry:range-query weight="0" min-occurs="1" max-occurs="4294967295" xmlns:xs="http://www.w3.org/2001/XMLSchema">
        <qry:key>12785637774270294680</qry:key>
        <qry:annotation>path(//ns1:security/ns1:elem)</qry:annotation>
        <qry:lower-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:lower-bound>
        <qry:upper-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:upper-bound>
      </qry:range-query>
      <qry:and-two-queries>
        <qry:or-two-queries>
          <qry:term-query weight="0">
        <qry:key>17253116673510471442</qry:key>
        <qry:annotation>element(ns1:security)</qry:annotation>
          </qry:term-query>
          <qry:term-query weight="0">
        <qry:key>12929598538251878498</qry:key>
        <qry:annotation>link-child(descendant(element(ns1:security)))</qry:annotation>
          </qry:term-query>
        </qry:or-two-queries>
        <qry:term-query weight="1">
          <qry:key>6185531260368494803</qry:key>
          <qry:annotation>element(http://one.oecd.org/ns1:elem,word("CTPA"))</qry:annotation>
        </qry:term-query>
      </qry:and-two-queries>
    </qry:and-not-two-queries>
      </qry:partial-plan>
      <qry:info-trace>Executing search.</qry:info-trace>
      <qry:ordering/>
      <qry:final-plan>
    <qry:and-query>
      <qry:and-not-two-queries>
        <qry:range-query weight="0" min-occurs="1" max-occurs="4294967295" xmlns:xs="http://www.w3.org/2001/XMLSchema">
          <qry:key>12785637774270294680</qry:key>
          <qry:annotation>path(//ns1:security/ns1:elem)</qry:annotation>
          <qry:lower-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:lower-bound>
          <qry:upper-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:upper-bound>
        </qry:range-query>
        <qry:and-two-queries>
          <qry:or-two-queries>
        <qry:term-query weight="0">
          <qry:key>17253116673510471442</qry:key>
          <qry:annotation>element(ns1:security)</qry:annotation>
        </qry:term-query>
        <qry:term-query weight="0">
          <qry:key>12929598538251878498</qry:key>
          <qry:annotation>link-child(descendant(element(ns1:security)))</qry:annotation>
        </qry:term-query>
          </qry:or-two-queries>
          <qry:term-query weight="1">
        <qry:key>6185531260368494803</qry:key>
        <qry:annotation>element(http://one.oecd.org/ns1:elem,word("CTPA"))</qry:annotation>
          </qry:term-query>
        </qry:and-two-queries>
      </qry:and-not-two-queries>
    </qry:and-query>
      </qry:final-plan>
      <qry:info-trace>Selected 0 fragments to filter</qry:info-trace>
      <qry:result estimate="0"/>
    </qry:query-plan>
  </search:plan>
  <search:metrics>
    <search:query-resolution-time>PT0.001512S</search:query-resolution-time>
    <search:total-time>PT0.0024561S</search:total-time>
  </search:metrics>
</search:response>
1

There are 1 best solutions below

0
On

So, I believe the issue is that the negative query, looking to exclude the ns1:elem elements that have values starting with CTPA/ is producing a term-query for the word CTPA. From the plan, we see:

<qry:term-query weight="1">
  <qry:key>6185531260368494803</qry:key>                              
  <qry:annotation>element(http://one.oecd.org/ns1:elem,word("CTPA"))</qry:annotation>
</qry:term-query>

inside of the not-query:

<qry:and-not-two-queries>
  <qry:range-query weight="0" min-occurs="1" max-occurs="4294967295" xmlns:xs="http://www.w3.org/2001/XMLSchema">
    <qry:key>12785637774270294680</qry:key>
    <qry:annotation>path(//ns1:security/ns1:elem)</qry:annotation>
    <qry:lower-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:lower-bound>
    <qry:upper-bound xsi:type="xs:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">CTPA</qry:upper-bound>
  </qry:range-query>
  <qry:and-two-queries>
    <qry:or-two-queries>
  <qry:term-query weight="0">
    <qry:key>17253116673510471442</qry:key>
    <qry:annotation>element(ns1:security)</qry:annotation>
  </qry:term-query>
  <qry:term-query weight="0">
    <qry:key>12929598538251878498</qry:key>
    <qry:annotation>link-child(descendant(element(ns1:security)))</qry:annotation>
  </qry:term-query>
    </qry:or-two-queries>
    <qry:term-query weight="1">
  <qry:key>6185531260368494803</qry:key>
  <qry:annotation>element(http://one.oecd.org/ns1:elem,word("CTPA"))</qry:annotation>
    </qry:term-query>
  </qry:and-two-queries>
</qry:and-not-two-queries>

Now, if that were a positive query, you would scoop up the desired items as well as some false positives, and then they could be filtered. But when you negate it, those false positives (the ones that are only the value CTPA) are included in the set to exclude, so everything is eliminated.

There are database options such as trailing wildcard searches and three character searches that can help with query resolution, as well as some other wildcard indexing options that can help with wildcarded queries. Read more in the documentation for Understanding and Using Wildcard Searches.

I enabled both of those options mentioned above, and changed the word-query to a value-query with the punctuation-sensitive and wildcarded options applied:

<cts:not-query>
  <cts:element-query>
    <cts:element xmlns:ns1="http://ns1">ns1:security</cts:element>
    <cts:element-value-query>
      <cts:element xmlns:ns1="http://ns1">ns1:elem</cts:element>
      <cts:text xml:lang="en">ABC/*</cts:text>
      <cts:option>wildcarded</cts:option>
      <cts:option>punctuation-sensitive</cts:option>
    </cts:element-value-query>
  </cts:element-query>
</cts:not-query>

and then it produced this query in my plan:

<qry:and-three-queries>
  <qry:term-query weight="1">
    <qry:key>11040420969293892357</qry:key>
    <qry:annotation>element(http://ns1:elem,word("ABC"))</qry:annotation>
  </qry:term-query>
  <qry:term-query weight="1">
    <qry:key>5369780126042640453</qry:key>
    <qry:annotation>word("ABC*")</qry:annotation>
  </qry:term-query>
  <qry:term-query weight="1">
    <qry:key>15274949237949545150</qry:key>
    <qry:annotation>word("*BC/*")</qry:annotation>
  </qry:term-query>
</qry:and-three-queries>

And is now producing the expected results, finding those with ABC and not those that have ABC/*.