Selective XML index query plan

669 Views Asked by At

I am currently exploring the SQL server XML column and Selective indexes for our needs. For doing so I created table called Incidents and created the Selective Index and Secondary selective Indexes (scripts below).

When I run the following query it does use the selective index but the query plan does the IS NOT NULL predicate on Severity column data and then the sort on it. This degrades the performance of the query significantly when the data in table is large. I have seen with 4 million rows in table it takes ~20 sec to complete following query.

Am I missing anything here?

select TOP 100 Data.value('(/Incident/Severity)[1]', 'int') AS Severity,

Data.value('(/Incident/OwningTenantId)[1]', 'VARCHAR(800)') AS OwningTenantId,

Data.value('(/Incident/OwningTeamId)[1]', 'NVARCHAR(800)') AS OwningTeamId

FROM Incidents

WHERE Data.value('(/Incident/Severity)[1]', 'int') = 1
ORDER BY Data.value('(/Incident/OwningTenantId)[1]', 'NVARCHAR(800)')

Index:

CREATE TABLE [dbo].[Incidents](
    [id] [uniqueidentifier] NOT NULL,
    [Data] [xml] NOT NULL,
 CONSTRAINT [PK_Incidents] PRIMARY KEY CLUSTERED 
(
    [id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

GO

CREATE SELECTIVE XML INDEX sxi_Incident_Data ON Incidents(Data)
 FOR 
 (
 Severity = '/Incident/Severity' AS SQL int SINGLETON,
 OwningTeamId = '/Incident/OwningTeamId' AS SQL NVARCHAR(400) SINGLETON,
 OwningTenantId = '/Incident/OwningTenantId' AS SQL NVARCHAR(400) SINGLETON,
 id = '/Incident/_id' AS SQL BIGINT SINGLETON
 )

 GO

 create xml index sxi_secondary_severity on Incidents(Data)
 using xml index sxi_Incident_Data
 for (Severity);

 GO

 create xml index sxi_secondary_OwningTeamId on Incidents(Data)
 using xml index sxi_Incident_Data
 for (OwningTeamId); 

 GO

 create xml index sxi_secondary_OwningTenantId on Incidents(Data)
 using xml index sxi_Incident_Data
 for (OwningTenantId); 

 GO


 create xml index sxi_secondary_Id on Incidents(Data)
 using xml index sxi_Incident_Data
 for (id); 

 GO

Sample XML:

<Incident>
  <_id>123</_id>
  <Severity>3</Severity>
  <IncidentStatus>RESOLVED</IncidentStatus>
  <CreateDate>2014-05-04 05:43:58.317</CreateDate>
  <LastUpdateDate>2014-05-06 18:47:39.037</LastUpdateDate>
  <AlertSourceLocalId>20070</AlertSourceLocalId>
  <SourceIncidentId>35d0bfe4-ccb9-491f-a30c-ea7685ffe8c0</SourceIncidentId>
  <SourceCreateDate>2014-05-04 02:51:14.000</SourceCreateDate>
  <SourceCreatedBy>Someone</SourceCreatedBy>
  <SourceModifiedDate>2014-05-04 05:43:57.797</SourceModifiedDate>
  <SourceOrigin>Some Origin</SourceOrigin>
  <CorrelationId>correlatioid</CorrelationId>
  <RoutingId>Route123</RoutingId>
  <Datacenter>Unknown</Datacenter>
  <Environment>INT</Environment>
  <DeviceGroup>Devicegroup</DeviceGroup>
  <DeviceName>DeviceName</DeviceName>
  <RaisingEnvironment>PROD</RaisingEnvironment>
  <RaisingDatacenter>Unknown</RaisingDatacenter>
  <RaisingDeviceGroup>DEviceGroup</RaisingDeviceGroup>
  <RaisingDeviceName>FakeDevice</RaisingDeviceName>
  <PrimaryIncidentId>1234</PrimaryIncidentId>
  <RelatedLinksCount>0</RelatedLinksCount>
  <ExternalLinksCount>0</ExternalLinksCount>
  <HitCount>0</HitCount>
  <ChildCount>0</ChildCount>
  <Title>Some Title</Title>
  <ReproSteps>&#x0;</ReproSteps>
  <OwningTenantId>564</OwningTenantId>
  <OwningTeamId>123</OwningTeamId>
  <ResolveDate>2014-05-06 18:47:39.037</ResolveDate>
  <ResolvedBy>SomeOne</ResolvedBy>
  <MitigateDate>2014-05-06 18:45:55.403</MitigateDate>
  <MitigatedBy>Someone</MitigatedBy>
  <Mitigation>N/A</Mitigation>
  <IsNoise>0</IsNoise>
  <IsSecurityRisk>0</IsSecurityRisk>
  <IsCustomerImpacting>0</IsCustomerImpacting>
  <OriginatingTenantId>10066</OriginatingTenantId>
  <ImpactStartDate>2014-05-01 23:31:22.000</ImpactStartDate>
  <RootCauseNeedsInvestigation>0</RootCauseNeedsInvestigation>
  <ConnectorTenantId>10066</ConnectorTenantId>
  <RelationshipId>1852546</RelationshipId>
  <SuppressAutoUpdate>0</SuppressAutoUpdate>
</Incident>

Repro: Create Table indices

-- Create Table 
IF(EXISTS(SELECT * FROM sys.tables WHERE [Name] = 'XmlTable' AND [Type] = 'U'))
BEGIN
    DROP TABLE XmlTable
END

CREATE TABLE [dbo].[XmlTable](
    [id] [uniqueidentifier] NOT NULL,
    [Data] [xml] NULL
PRIMARY KEY CLUSTERED 
(
    [id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

GO

-- Populate Data
DECLARE @i INT  = 0
DECLARE @XML NVARCHAR(MAX), 
        @Severity INT, 
        @OwningTeamId VARCHAR(400), 
        @OwningTenantId VARCHAR(400), 
        @IncidentStatus varchar(100),
        @Mod SMALLINT
WHILE @i < 500
BEGIN   
    SET @i = @i + 1
    SET @Mod = @i % 3

    SELECT @Severity = @Mod + 1,
           @OwningTeamId = 'OwningTeam' + CAST(@Mod AS VARCHAR),    
           @OwningTenantId = 'OwningTenantId' + CAST(@Mod AS VARCHAR),
           @IncidentStatus = CASE @Mod 
                                WHEN 0 THEN 'Active' 
                                WHEN 1 THEN 'Resolved' 
                                WHEN 2 THEN 'Closed' 
                             END    
    SET @XML = 
    '<Incident>' +
        '<_id>' + CAST(@i AS VARCHAR) + '</_id>' +
        '<Severity>' + CAST(@Severity AS VARCHAR) + '</Severity>' +
        '<OwningTeamId>' + @OwningTeamId + '</OwningTeamId>' +
        '<OwningTenantId>' + @OwningTenantId + '</OwningTenantId>' +
        '<IncidentStatus>' + @IncidentStatus + '</IncidentStatus>' +
    '</Incident>'
    INSERT INTO XmlTable
    SELECT NEWID(), @XML
END


-- Creat Indices
CREATE SELECTIVE XML INDEX [sxi_Data] ON [dbo].[XmlTable]
(
    [Data]
)
FOR
(
[Severity] = '/Incident/Severity' as SQL [int] SINGLETON , 
[OwningTeamId] = '/Incident/OwningTeamId' as SQL [nvarchar](400) SINGLETON , 
[OwningTenantId] = '/Incident/OwningTenantId' as SQL [nvarchar](400) SINGLETON , 
[id] = '/Incident/_id' as SQL [bigint] SINGLETON , 
[TicketStatus] = '/Incident/IncidentStatus' as SQL [nvarchar](100) SINGLETON 
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO

CREATE XML INDEX [sxi_secondary_Id] ON [dbo].[XmlTable]
(
    [Data]
)USING XML INDEX [sxi_Data] FOR (
[id]
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO

CREATE XML INDEX [sxi_secondary_OwningTeamId] ON [dbo].[XmlTable]
(
    [Data]
)USING XML INDEX [sxi_Data] FOR (
[OwningTeamId]
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO

USE [XMLDocuemntStore]
GO


CREATE XML INDEX [sxi_secondary_OwningTenantId] ON [dbo].[XmlTable]
(
    [Data]
)USING XML INDEX [sxi_Data] FOR (
[OwningTenantId]
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO

USE [XMLDocuemntStore]
GO


CREATE XML INDEX [sxi_secondary_severity] ON [dbo].[XmlTable]
(
    [Data]
)USING XML INDEX [sxi_Data] FOR (
[Severity]
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO

Sample Query: Check the query plan on right sides.

select TOP 100 Data.value('(/Incident/Severity)[1]', 'int') AS Severity
FROM XmlTable
WHERE Data.value('(/Incident/Severity)[1]', 'int') = 1
ORDER BY Data.value('(/Incident/OwningTenantId)[1]', 'NVARCHAR(800)')
1

There are 1 best solutions below

0
On

The SORT TOP N is needed because of the [1] in your XPATH query. To get rid of that you'll need to ensure SQL Server the required xml element only occurs once within an incident element. For that you'll need to strongly type your XML using an XSD document. You can create one like so:

CREATE XML SCHEMA COLLECTION Incident_XSD AS
N'<?xml version="1.0" encoding="UTF-16"?>
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
  <xs:element name="Incident">
    <xs:complexType>
      <xs:sequence>
        <xs:element type="xs:int" name="_id" />
        <xs:element type="xs:int" name="Severity" />
        <xs:element type="xs:string" name="OwningTeamId" />
        <xs:element type="xs:string" name="OwningTenantId" />
        <xs:element type="xs:string" name="IncidentStatus"/>
      </xs:sequence>
    </xs:complexType>
  </xs:element>
</xs:schema>' ;
GO

Use it in your table definition like so

[Data] [xml](Incident_XSD) NULL

Now the following query is valid

select TOP 100 Data.value('/Incident[1]/Severity', 'int') AS Severity
FROM XmlTable
WHERE Data.value('/Incident[1]/Severity', 'int') = 1
ORDER BY Data.value('/Incident[1]/OwningTenantId', 'NVARCHAR(800)')

Returns within a second or 2 with a milion rows in the table.

PS: You might want to reconsider using GUIDs as primary key