T-SQL SQL-SQL Challenge: Can You Trust the Query Optimizer? I know I canโ€™t!

This question is related to my previous one (posted as an anonymous user - I now have an account), and before I start, I would like to give Rob Farley a loan to ensure the correct indexing scheme.

But the problem is not with the indexing scheme.

Query Optimizer!

Request:

SELECT s.ID_i , s.ShortName_v , sp.Path_v , ( SELECT TOP 1 1 -- has also user access on subsites ? FROM SitePath_T usp , UserSiteRight_t usr WHERE usr.SiteID_i = usp.SiteID_i AND usp.Path_v LIKE sp.Path_v + '%_' AND usr.UserID_i = 1 ) FROM Site_T s , SitePath_T sp WHERE sp.SiteID_i = s.ID_i AND s.ShortName_v LIKE '[ay]%' AND s.ParentID_i = 1 AND EXISTS ( SELECT * FROM SitePath_T usp , UserSiteRight_t usr WHERE usr.SiteID_i = usp.SiteID_i AND usp.Path_v LIKE sp.Path_v + '%' AND usr.UserID_i = 1 ) 

... works in:

 CPU Reads Writes Duration 2073 49572 0 2241 -- more than 2 sec 

Execution plan:

  |--Compute Scalar(DEFINE:([Expr1014]=[Expr1014])) |--Nested Loops(Left Outer Join, OUTER REFERENCES:([sp].[Path_v])) |--Nested Loops(Left Semi Join, OUTER REFERENCES:([Expr1016], [Expr1017], [Expr1018], [Expr1019])) | |--Merge Join(Inner Join, MERGE:([sp].[SiteID_i])=([s].[ID_i]), RESIDUAL:([dbo].[SitePath_T].[SiteID_i] as [sp].[SiteID_i]=[dbo].[Site_T].[ID_i] as [s].[ID_i])) | | |--Compute Scalar(DEFINE:([Expr1016]=[dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%', [Expr1017]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1018]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1019]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'))) | | | |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_SiteID_<Path>] AS [sp]), ORDERED FORWARD) | | |--Sort(ORDER BY:([s].[ID_i] ASC)) | | |--Clustered Index Seek(OBJECT:([dbo].[Site_T].[IDXC_Site_ParentID+ShortName+ID] AS [s]), SEEK:([s].[ParentID_i]=(1) AND [s].[ShortName_v] >= '9รพรพรพรพรพ' AND [s].[ShortName_v] < 'Z'), WHERE:([dbo].[Site_T].[ShortName_v] as [s].[ShortName_v] like '[ay]%') ORDERED FORWARD) | |--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1020]) WITH UNORDERED PREFETCH) | |--Clustered Index Scan(OBJECT:([dbo].[SitePath_T].[IDXC_SitePath_Path+SiteID] AS [usp]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [Expr1016])) | |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD) |--Compute Scalar(DEFINE:([Expr1014]=(1))) |--Top(TOP EXPRESSION:((1))) |--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1021]) WITH UNORDERED PREFETCH) |--Clustered Index Scan(OBJECT:([dbo].[SitePath_T].[IDXC_SitePath_Path+SiteID] AS [usp]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_')) |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD) 

But if I apply indexes, the following query:

 SELECT s.ID_i , s.ShortName_v , sp.Path_v , ( SELECT TOP 1 1 -- has also user access on subsites ? FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) ) -- same performance when using WITH ( INDEX ( [IDX_SitePath_Path_INC<SiteID>] ) ) , UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) ) WHERE usr.SiteID_i = usp.SiteID_i AND usp.Path_v LIKE sp.Path_v + '%_' AND usr.UserID_i = 1) FROM Site_T s , SitePath_T sp WITH ( INDEX ( [IDX_SitePath_SiteID+Path] ) ) -- same performance when using WITH ( INDEX ( [IDX_SitePath_SiteID_INC<Path>] ) ) WHERE sp.SiteID_i = s.ID_i AND s.ShortName_v LIKE '[ay]%' AND s.ParentID_i = 1 AND EXISTS ( SELECT * FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) ) -- same performance when using WITH ( INDEX ( [IDX_SitePath_Path_INC<SiteID>] ) ) , UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) ) WHERE usr.SiteID_i = usp.SiteID_i AND usp.Path_v LIKE sp.Path_v + '%' AND usr.UserID_i = 1 ) 

will run in:

 CPU Reads Writes Duration 50 11237 0 55 

duration will decrease to 55 milliseconds (more than 2 seconds) !!!!

And I am pleased with this result!

Execution plan:

  |--Compute Scalar(DEFINE:([Expr1014]=[Expr1014])) |--Nested Loops(Left Outer Join, OUTER REFERENCES:([sp].[Path_v])) |--Nested Loops(Left Semi Join, OUTER REFERENCES:([Expr1016], [Expr1017], [Expr1018], [Expr1019])) | |--Merge Join(Inner Join, MERGE:([sp].[SiteID_i])=([s].[ID_i]), RESIDUAL:([dbo].[SitePath_T].[SiteID_i] as [sp].[SiteID_i]=[dbo].[Site_T].[ID_i] as [s].[ID_i])) | | |--Compute Scalar(DEFINE:([Expr1016]=[dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%', [Expr1017]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1018]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1019]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'))) | | | |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_SiteID_<Path>] AS [sp]), ORDERED FORWARD) | | |--Sort(ORDER BY:([s].[ID_i] ASC)) | | |--Clustered Index Seek(OBJECT:([dbo].[Site_T].[IDXC_Site_ParentID+ShortName+ID] AS [s]), SEEK:([s].[ParentID_i]=(1) AND [s].[ShortName_v] >= '9รพรพรพรพรพ' AND [s].[ShortName_v] < 'Z'), WHERE:([dbo].[Site_T].[ShortName_v] as [s].[ShortName_v] like '[ay]%') ORDERED FORWARD) | |--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1023]) WITH UNORDERED PREFETCH) | |--Nested Loops(Inner Join, OUTER REFERENCES:([Expr1017], [Expr1018], [Expr1019])) | | |--Compute Scalar(DEFINE:([Expr1017]=[Expr1017], [Expr1018]=[Expr1018], [Expr1019]=[Expr1019])) | | | |--Constant Scan | | |--Index Seek(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp]), SEEK:([usp].[Path_v] > [Expr1017] AND [usp].[Path_v] < [Expr1018]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [Expr1016]) ORDERED FORWARD) | |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD) |--Compute Scalar(DEFINE:([Expr1014]=(1))) |--Top(TOP EXPRESSION:((1))) |--Nested Loops(Inner Join, OUTER REFERENCES:([usp].[SiteID_i], [Expr1027]) WITH UNORDERED PREFETCH) |--Nested Loops(Inner Join, OUTER REFERENCES:([Expr1024], [Expr1025], [Expr1026])) | |--Compute Scalar(DEFINE:([Expr1024]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1025]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1026]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'))) | | |--Constant Scan | |--Index Seek(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp]), SEEK:([usp].[Path_v] > [Expr1024] AND [usp].[Path_v] < [Expr1025]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_') ORDERED FORWARD) |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=(1) AND [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i]) ORDERED FORWARD) 

The next step is to run it for different users, so I will declare UserID_i as a variable:

 DECLARE @UserID_i INT SELECT @UserID_i = 1 

BUT NOW THE QUESTION NOW WILL BE A SLOW SAYING TALE !!!

 SELECT s.ID_i , s.ShortName_v , sp.Path_v , ( SELECT TOP 1 1 -- has also user access on subsites ? FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) ) , UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) ) WHERE usr.SiteID_i = usp.SiteID_i AND usp.Path_v LIKE sp.Path_v + '%_' AND usr.UserID_i = @UserID_i) FROM Site_T s , SitePath_T sp WITH ( INDEX ( [IDX_SitePath_SiteID+Path] ) ) WHERE sp.SiteID_i = s.ID_i AND s.ShortName_v LIKE '[ay]%' AND s.ParentID_i = 1 AND EXISTS ( SELECT * FROM SitePath_T usp WITH ( INDEX ( [IDX_SitePath_Path+SiteID] ) ) , UserSiteRight_t usr WITH ( INDEX ( [IDX_UserSiteRight_UserID+SiteID] ) ) WHERE usr.SiteID_i = usp.SiteID_i AND usp.Path_v LIKE sp.Path_v + '%' AND usr.UserID_i = @UserID_i ) 

Duration now for 7 seconds !!!

 CPU Reads Writes Duration 7421 149984 35 7625 

And the execution plan:

  |--Compute Scalar(DEFINE:([Expr1014]=[Expr1014])) |--Nested Loops(Left Outer Join, OUTER REFERENCES:([sp].[Path_v])) |--Nested Loops(Left Semi Join, WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [Expr1016])) | |--Merge Join(Inner Join, MERGE:([sp].[SiteID_i])=([s].[ID_i]), RESIDUAL:([dbo].[SitePath_T].[SiteID_i] as [sp].[SiteID_i]=[dbo].[Site_T].[ID_i] as [s].[ID_i])) | | |--Compute Scalar(DEFINE:([Expr1016]=[dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%', [Expr1017]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1018]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'), [Expr1019]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%'))) | | | |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_SiteID+Path] AS [sp]), ORDERED FORWARD) | | |--Sort(ORDER BY:([s].[ID_i] ASC)) | | |--Clustered Index Seek(OBJECT:([dbo].[Site_T].[IDXC_Site_ParentID+ShortName+ID] AS [s]), SEEK:([s].[ParentID_i]=(1) AND [s].[ShortName_v] >= '9รพรพรพรพรพ' AND [s].[ShortName_v] < 'Z'), WHERE:([dbo].[Site_T].[ShortName_v] as [s].[ShortName_v] like '[ay]%') ORDERED FORWARD) | |--Table Spool | |--Hash Match(Inner Join, HASH:([usr].[SiteID_i])=([usp].[SiteID_i])) | |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=[@UserID_i]) ORDERED FORWARD) | |--Index Scan(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp])) |--Compute Scalar(DEFINE:([Expr1014]=(1))) |--Top(TOP EXPRESSION:((1))) |--Nested Loops(Inner Join, WHERE:([dbo].[UserSiteRight_T].[SiteID_i] as [usr].[SiteID_i]=[dbo].[SitePath_T].[SiteID_i] as [usp].[SiteID_i])) |--Nested Loops(Inner Join, OUTER REFERENCES:([Expr1020], [Expr1021], [Expr1022])) | |--Compute Scalar(DEFINE:([Expr1020]=LikeRangeStart([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1021]=LikeRangeEnd([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'), [Expr1022]=LikeRangeInfo([dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_'))) | | |--Constant Scan | |--Index Seek(OBJECT:([dbo].[SitePath_T].[IDX_SitePath_Path+SiteID] AS [usp]), SEEK:([usp].[Path_v] > [Expr1020] AND [usp].[Path_v] < [Expr1021]), WHERE:([dbo].[SitePath_T].[Path_v] as [usp].[Path_v] like [dbo].[SitePath_T].[Path_v] as [sp].[Path_v]+'%_') ORDERED FORWARD) |--Table Spool |--Index Seek(OBJECT:([dbo].[UserSiteRight_T].[IDX_UserSiteRight_UserID+SiteID] AS [usr]), SEEK:([usr].[UserID_i]=[@UserID_i]) ORDERED FORWARD) 

The execution plan changes completely when I use a variable instead of hard coding the value of UserID_i!

Why does the query optimizer behave as follows?

How to ensure that the execution plan is the same as the second quick request?

Thanks.


UPDATE 1


Deleted (irrelevant)


UPDATE 2


It seems that I'm not the only one who has this problem.

Please check the following topics:
Why is the SqlServer optimizer so confused with the parameters?
Known issue: SQL Server 2005 stored procedure did not complete with parameter


UPDATE 3


An excellent article from the SQL Server query optimization team covering the sniffing parameter: I smell the p> parameter

+4
sql-server sql-server-2005 query-optimization
source share
4 answers

After reading the above articles (in Update 2 and Update 3 ), I finally understood how Sql Server processes / caches execution plans.

Adding OPTION ( RECOMPILE ) at the end of my SELECT will cause Sql Server to recalculate the execution plan (and not use the cached) each time the query is executed, thus choosing the best plan that matches the variable.

+1
source share

Is there a reason why you cannot use index hints (as in the second query) when using a variable (in the third query)? Itโ€™s strange that the query optimizer makes such a bad decision when there is an accessible index, but it knows only limited information about your data and selects as best as possible.

Some statistics about indexed columns may help you, in fact - they track data, data layout and some other information about what the table actually contains, while indexes themselves are created only on top of the table metadata, and the query optimizer does not select the data itself (if not statistics to help him do this).

Did you run the Database Advisor in the query? Highlighting the query and selecting "Analyzing the query in the Database Engine Tuning Advisor" from the "Query" menu in SSMS will use the table data to offer some statistics for you - this can make a huge difference.

+2
source share

EDIT:

First you need a coverage index (ParentID_i, ID_i). Do you have one?

Secondly:

I am trying to get all sites with depth = 0 that have sub-sites available to the user.

This description does not match the requests you submitted here .

This will return all sites w / depth = 0 (i.e. more parents) that have sub-sites available to the user:

 ; WITH Site_R AS ( SELECT s.ID_i , s.ParentID_i FROM Site_T s , UserSiteRight_T usr WHERE usr.SiteID_i = s.ID_i AND usr.UserID_i = @UserID_i -- plus any other filters UNION ALL SELECT s.ID_i , s.ParentID_i FROM Site_T s , Site_R sr WHERE s.ID_i = sr.ParentID_i ) SELECT DISTINCT ID_i FROM Site_R WHERE ParentID_i IS NULL 

Is this the result set you want?

Do not add unnecessary columns to a recursive CTE. Join them later, after recursion, after reduction.

0
source share

As Peter asked why I did not use recursion, I provide below a recursive cte that will return the correct result:

 ; WITH Site_R AS ( SELECT s.ID_i , sp.Path_v , s.ID_i AS SubSiteID_i , sp.Path_v AS SubPath_v , 0 AS Depth_i FROM Site_T s , SitePath_T sp WHERE sp.SiteID_i = s.ID_i AND s.ShortName_v LIKE '[ay]%' AND s.ParentID_i = 1 UNION ALL SELECT sr.ID_i , sr.Path_v , s.ID_i , sp.Path_v , Depth_i+1 FROM Site_T s , Site_R sr , SitePath_T sp WHERE sp.SiteID_i = s.ID_i AND s.ParentID_i = sr.SubSiteID_i ) SELECT us.* , ( SELECT usr.UserID_i FROM UserSiteRight_T usr WHERE usr.SiteID_i = us.SubSiteID_i AND UseriD_i = 1 ) AS UserID_i FROM Site_R us 

The result of the first rows with the UserSiteRight_T.UserID_i column added showing access to SubSiteID_i:

 ID_i Path_v SubSiteID_i SubPath_v Depth_i UserSiteRight_T.UserID_i ------- ----------- ----------- --------------- ----------- ----------- 2 1.2. 2 1.2. 0 1 3 1.3. 3 1.3. 0 NULL 3 1.3. 4 1.3.4. 1 1 3 1.3. 5 1.3.15863. 1 1 3 1.3. 6 1.3.6. 1 NULL 3 1.3. 7 1.3.6.7. 2 1 3 1.3. 8 1.3.8. 1 1 9 1.9. 9 1.9. 0 NULL 9 1.9. 10 1.9.10. 1 NULL 9 1.9. 11 1.9.10.11. 2 1 9 1.9. 12 1.9.10.12. 2 1 9 1.9. 13 1.9.13. 1 NULL 9 1.9. 14 1.9.13.14. 2 NULL 9 1.9. 15 1.9.13.14.15. 3 1 9 1.9. 16 1.9.13.14.16. 3 1 9 1.9. 17 1.9.13.17. 2 NULL 9 1.9. 18 1.9.13.17.18. 3 1 9 1.9. 19 1.9.19. 1 1 9 1.9. 20 1.9.20. 1 NULL 

My end result should be Group By in the first column with the last column NOT NULL.
Or the following recursive query:

 ; WITH Site_R AS ( SELECT s.ID_i , sp.Path_v , s.ID_i AS SubSiteID_i , sp.Path_v AS SubPath_v , 0 AS Depth_i FROM Site_T s , SitePath_T sp WHERE sp.SiteID_i = s.ID_i AND s.ShortName_v LIKE '[ay]%' AND s.ParentID_i = 1 UNION ALL SELECT sr.ID_i , sr.Path_v , s.ID_i , sp.Path_v , Depth_i+1 FROM Site_T s , Site_R sr , SitePath_T sp WHERE sp.SiteID_i = s.ID_i AND s.ParentID_i = sr.SubSiteID_i ) SELECT us.ID_i FROM Site_R us , UserSiteRight_T usr WHERE usr.SiteID_i = us.SubSiteID_i AND UseriD_i = 1 GROUP BY ID_i 

which basically builds the whole tree and selects only ancestors having SubSiteID_i , accessible by UserID_i . Or:

 ; WITH Site_R AS ( SELECT s.ID_i , s.ID_i AS SubSiteID_i , 0 AS Depth_i , ( SELECT 1 FROM UserSiteRight_T usr WHERE usr.SiteID_i = s.ID_i AND usr.UserID_i = @UserID_i ) AS HasRight_b FROM Site_T s WHERE s.ShortName_v LIKE '[ay]%' AND s.ParentID_i = @ParentID_i UNION ALL SELECT sr.ID_i , s.ID_i , Depth_i+1 , ( SELECT 1 FROM UserSiteRight_T usr WHERE usr.SiteID_i = s.ID_i AND usr.UserID_i = @UserID_i ) FROM Site_T s , Site_R sr WHERE s.ParentID_i = sr.SubSiteID_i AND ( sr.HasRight_b IS NULL OR Depth_i = 0 ) ) SELECT * FROM Site_R Where HasRight_b IS NOT NULL 
0
source share

All Articles