CTE全名是Common Table Expression,语法基础请参考MSDN文档:https://msdn.microsoft.com/zh-cn/library/ms175972.aspx。
CTE Recursion诞生之时,着实让人惊艳了一把。被很多吃瓜群众以讹传讹之后,“慢”似乎成了CTE Recursion最大的原罪。
很多时候,用到CTE Recursion的场景,无非是千八百条的数据量,最大也不过万八千条,所以“慢”算不上个问题。直到前几天,一个群友问:500W的数据做CTE递归时,怎么做性能优化……
结论:
- 合理的索引会极大的提升CTE Recursion的性能;
- 根据实验结果猜测:Sql Server2016对CTE Recursion做了优化,缺失合理索引的前提下,性能有极大的提升(受测试样本影响,结果可能不准确。无论如何,索引可以帮到你)。
Talk is cheap,Show me the code!所以,原因如下图(样本数据:1W零1条^^):
拒绝耍流氓,测试代码如下:
IF OBJECT_ID('dbo.TestCte', 'U') IS NOT NULL
DROP TABLE dbo.TestCte;
GO
CREATE TABLE dbo.TestCte
(
Id VARCHAR(10) NOT NULL ,
ParentId VARCHAR(10) NULL
);
WITH cte_001
AS ( SELECT 1 AS a UNION ALL
SELECT 2 AS a UNION ALL
SELECT 3 AS a UNION ALL
SELECT 4 AS a UNION ALL
SELECT 5 AS a UNION ALL
SELECT 6 AS a UNION ALL
SELECT 7 AS a UNION ALL
SELECT 8 AS a UNION ALL
SELECT 9 AS a UNION ALL
SELECT 10 AS a )
INSERT dbo.TestCte
( Id, ParentId )
SELECT RIGHT( '' + CAST ( T01.Id AS VARCHAR(10) ), 10 ) AS Id ,
RIGHT( '' + CAST ( CEILING( T01.Id / 10 ) AS VARCHAR(10) ), 10 ) AS ParentId
FROM (
SELECT ROW_NUMBER() OVER ( ORDER BY cte_001.a ) AS Id
FROM cte_001
CROSS JOIN cte_001 AS A
CROSS JOIN cte_001 AS B
CROSS JOIN cte_001 AS C
CROSS JOIN cte_001 AS D
CROSS JOIN cte_001 AS E
CROSS JOIN (SELECT TOP 5 * FROM cte_001) AS F ) AS T01;
GO INSERT DBO.TestCte ( Id, ParentId )
VALUES ( '', NULL );
GO
--无索引版本
SET STATISTICS TIME ON;
SET STATISTICS IO ON; IF OBJECT_ID('dbo.T', 'U') IS NOT NULL
DROP TABLE dbo.T;
GO
CREATE TABLE dbo.T
(
RN UNIQUEIDENTIFIER PRIMARY KEY,
Id VARCHAR(10) ,
ParentId VARCHAR(10) ,
Memo1 NVARCHAR(128) DEFAULT ( N'我是占位置的!我是占位置的!我是占位置的!我是占位置的!我是占位置的!' ) ,
Memo2 NVARCHAR(128) DEFAULT ( N'我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!' )
);
INSERT dbo.T
( RN ,
Id ,
ParentId
)
SELECT N.RN ,
N.Id ,
N.ParentId
FROM ( SELECT NEWID() AS RN ,
Id ,
ParentId
FROM dbo.TestCte
WHERE Id < 10001--测试数据量,改这里
) AS N
ORDER BY RN ASC;
GO
WITH cte_001
AS ( SELECT Id ,
ParentId
FROM dbo.T
WHERE ParentId IS NULL
UNION ALL
SELECT T01.Id ,
T01.ParentId
FROM T AS T01
INNER JOIN cte_001 AS T02 ON T02.Id = T01.ParentId
)
SELECT COUNT(*)
FROM cte_001;
--有索引版本
SET STATISTICS TIME ON;
SET STATISTICS IO ON;
IF OBJECT_ID('dbo.T', 'U') IS NOT NULL
DROP TABLE dbo.T;
GO
CREATE TABLE dbo.T
(
RN UNIQUEIDENTIFIER PRIMARY KEY,
Id VARCHAR(10) ,
ParentId VARCHAR(10) ,
Memo1 NVARCHAR(128) DEFAULT ( N'我是占位置的!我是占位置的!我是占位置的!我是占位置的!我是占位置的!' ) ,
Memo2 NVARCHAR(128) DEFAULT ( N'我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!我是很骄傲的!' )
);
INSERT dbo.T ( RN , Id , ParentId )
SELECT N.RN , N.Id , N.ParentId
FROM ( SELECT NEWID() AS RN , Id , ParentId
FROM dbo.TestCte
WHERE Id < 10001--测试数据量,改这里
) AS N
ORDER BY RN ASC;
GO --创建索引
CREATE NONCLUSTERED INDEX IDX_DBO_T_PARENTID_ID
ON [dbo].[T] ([ParentId], Id)
GO WITH cte_001
AS ( SELECT Id , ParentId
FROM dbo.T
WHERE ParentId IS NULL
UNION ALL
SELECT T01.Id , T01.ParentId
FROM T AS T01
INNER JOIN cte_001 AS T02 ON T02.Id = T01.ParentId
)
SELECT COUNT(*)
FROM cte_001;