SQL Server下实现利用SQL Server Agent Job对索引重建实现Balance Load

时间:2022-05-20 17:44:09

昨天工作中遇到这样一个场景,有个项目需要把某台服务器下所有的表和索引都启用数据压缩(data_compression=page),已经启用了的表和索引就不需要再压缩一次了。统计一下后发现要运行的REBUILD INDEX代码多达上万条,而整个服务器上的所有需要压缩的数据库对象大小加起来估计接近1TB。这种情况下如果把所有工作都交给一条脚本来完成,估计整个周末都跑不完。那么我们的想法就是用多个SQL Server Agent Job来做完成这个任务,分配单位是表(也就是说把同一张表的所有索引重建任务都分配给Job 1,这样避免出现不同Job间对同时间对同一张表的索引重建导致死锁问题),分配原则是按数据页面总大小依次分配,就像这样:

No    Index_Name  Pages      Assign_To 

1    Index1     10000    Job1

1    Index2     9000      Job2

1    Index3     8000      Job3

1    Index4     7000      Job1

.....

下面是实现代码:

/*
this script works to compress pages of tables of all database of the server,
skipping those which were compressed and system databases like master, msdb, tempdb, model
*/ /*first of all, get the datbase list to construct the sql**/ IF object_id('tempdb..#t') IS NOT NULL
BEGIN
DROP TABLE #t
END
GO CREATE TABLE #t(db SYSNAME, object_id INT, index_id INT)
GO IF object_id('tempdb..#t2') IS NOT NULL
BEGIN
DROP TABLE #t2
END
GO CREATE TABLE #t2(ID INT IDENTITY(1,1), db SYSNAME, stm VARCHAR(MAX), dpages INT, object_id INT, index_id INT)
GO DECLARE @db SYSNAME,
@sql VARCHAR(MAX) = '',
@sql2 VARCHAR(MAX) = '' DECLARE cur CURSOR FOR
SELECT name FROM sys.databases WHERE name not in ('master', 'msdb', 'tempdb', 'model') OPEN cur FETCH NEXT FROM cur INTO @db WHILE @@FETCH_STATUS = 0
BEGIN
SET @sql = @sql + 'UNION ALL select distinct '''+@db+''' db, object_id, index_id from [' + @db + '].sys.partitions(nolock) where data_compression_desc <> ''PAGE''' + CHAR(13);
SET @sql2 = @sql2 + 'UNION ALL SELECT '''+@db+''' as db, CASE id.type_desc WHEN ''HEAP'' THEN ''ALTER TABLE '+QUOTENAME(@db)+'.'' + QUOTENAME(sch.name) + ''.'' + QUOTENAME(ob.name) + '' REBUILD PARTITION = ALL WITH (DATA_COMPRESSION = PAGE);''
ELSE ''ALTER INDEX '' + QUOTENAME(id.name) + '' ON '+QUOTENAME(@db)+'.'' + QUOTENAME(sch.name) + ''.'' + QUOTENAME(ob.name) + '' REBUILD WITH (DATA_COMPRESSION = PAGE);'' END AS stm, syid.dpages, id.object_id, id.index_id
FROM ['+@db+'].sys.indexes id join
['+@db+'].sys.sysindexes syid(nolock) ON syid.id = id.object_id AND syid.indid = id.index_id join
['+@db+'].sys.objects ob(nolock) ON ob.object_id = id.object_id join
['+@db+'].sys.schemas sch(nolock) ON sch.schema_id = ob.schema_id join
#t non_compr on non_compr.object_id = id.object_id and non_compr.index_id = id.index_id
WHERE ob.type_desc = ''USER_TABLE'' ' + CHAR(13); FETCH NEXT FROM cur INTO @db
END CLOSE cur
DEALLOCATE cur SET @sql = RIGHT(@sql, LEN(@sql)-10) SET @sql2 = RIGHT(@sql2, LEN(@sql2)-10) --INSERT #t EXEC(@sql) /*table #t stores temp data queried from sys.partitions because I found that this DMV runs
slowly when joining other tables in some databases like DW. To avoid it, I store data in a temporary table.**/
INSERT #t EXEC(@sql) /*table #t2 works to output statements to rebuild indexes**/
INSERT #t2(db, stm, dpages, object_id, index_id) EXEC (@sql2) --SELECT * FROM #t2 IF OBJECT_ID('Stage.dbo.table1') IS NOT NULL
DROP TABLE Stage.dbo.table1
GO DECLARE @nbr_of_workers AS SMALLINT
SET @nbr_of_workers = 6; --actually @nbr_of_workers should be the number plus 1 ;WITH bal_load AS (
SELECT db, object_id, dpages as obj_dpages, ROW_NUMBER() OVER (PARTITION BY groupfactor ORDER BY dpages DESC) AS group_nbr FROM (
SELECT db, object_id, dpages, NTILE((SELECT COUNT(DISTINCT db+CAST(object_id AS VARCHAR)) FROM #t2)/@nbr_of_workers) OVER (ORDER BY dpages DESC) AS groupfactor FROM (
SELECT db, object_id, SUM(dpages) dpages FROM #t2 GROUP BY db, object_id) T) T) SELECT t.id, t.db, t.stm, t.dpages, t.object_id, t.index_id , bal_load.obj_dpages, bal_load.group_nbr, 0 AS compressed
INTO Stage.dbo.table1
FROM #t2 t JOIN bal_load ON bal_load.db = t.db AND bal_load.object_id = t.object_id
ORDER BY bal_load.group_nbr, bal_load.db, bal_load.obj_dpages DESC, bal_load.object_id CREATE UNIQUE CLUSTERED INDEX CLST_IX_table1 ON Stage.dbo.table1(ID);
GO

Stage.dbo.table1这张表存储了所有要run的命令,然后group_nbr这个栏位表明哪些命令属哪个job

然后在各个job的T-SQL代码里面就这样写:

IF EXISTS(SELECT * FROM Stage.dbo.table1(NOLOCK) WHERE compressed = 0 AND group_nbr=1)
BEGIN
IF OBJECT_ID('tempdb..#t') IS NOT NULL DROP TABLE #t
CREATE TABLE #t(row_nbr INT IDENTITY(1,1) PRIMARY KEY CLUSTERED, ID INT, stm VARCHAR(MAX)) DECLARE @cmd VARCHAR(MAX), @ID INT, @stm VARCHAR(MAX), @curr_row_nbr INT = 1, @max_row_nbr INT INSERT #t(ID, stm)
SELECT ID, stm
FROM Stage.dbo.table1(NOLOCK)
WHERE compressed = 0 AND group_nbr=1; SET @max_row_nbr = SCOPE_IDENTITY(); WHILE @curr_row_nbr <= @max_row_nbr
BEGIN
BEGIN TRY
SELECT @ID = ID, @stm = stm FROM #t WHERE row_nbr = @curr_row_nbr;
EXEC(@stm);
UPDATE Stage.dbo.table1 SET compressed = 1 WHERE ID = @ID;
SET @curr_row_nbr = @curr_row_nbr + 1;
END TRY
BEGIN CATCH
--SELECT ERROR_MESSAGE()
SET @curr_row_nbr = @curr_row_nbr + 1;
CONTINUE
END CATCH
END
END

搞定