背景
我们在数据库出现阻塞及时邮件预警提醒中监控了数据库的阻塞情况,为了更好的维护数据库,特别是提升终端客户用户体验,我们要尽量避免在数据库中出现死锁的情况。我们知道收集死锁可以开启跟踪标志如1204,然后在日志中查看死锁相关信息,或者使用Profiler去跟踪死锁,我们希望所有的死锁信息收集到某表供我们后期优化分析使用,我们可以使用相对比较轻量的自带扩展事件(system_health)来完成这个需求。测试环境
Microsoft SQL Server 2012 - 11.0.2100.60 (X64) Feb 10 2012 19:39:15 Copyright (c) Microsoft Corporation Enterprise Edition (64-bit) on Windows NT 6.2 <X64> (Build 9200: )实现过程
a.新建存放死锁的表
IF DB_ID('azure_monitor') IS NULL
BEGIN
CREATE DATABASE azure_monitor ;
END
GO
ALTER DATABASE azure_monitor SET RECOVERY SIMPLE;
GO
USE [azure_monitor]; --存放死锁信息的库名
GO
IF OBJECT_ID('monitor_deadlock', 'U') IS NOT NULL
DROP TABLE dbo.monitor_deadlock;
GO
CREATE TABLE [dbo].[monitor_deadlock]
(
[ServerName] [VARCHAR](50),
[DataBaseName] [NVARCHAR](100) NULL ,
[DeadlockID] [BIGINT] NULL ,
[TransactionTime] [DATETIME] NULL ,
[DeadlockGraph] [XML] NULL ,
[DeadlockObjects] [NVARCHAR](MAX) NULL ,
[Victim] [INT] NOT NULL ,
[SPID] [INT] NULL ,
[ProcedureName] [VARCHAR](200) NULL ,
[LockMode] [CHAR](1) NULL ,
[Code] [VARCHAR](1000) NULL ,
[ClientApp] [NVARCHAR](245) NULL ,
[HostName] [VARCHAR](20) NULL ,
[LoginName] [VARCHAR](20) NULL ,
[InputBuffer] [VARCHAR](1000) NULL ,
[Capture_date] [DATETIME] NOT NULL ,
[capture_day] AS ( CONVERT([VARCHAR](12), [Capture_date], ( 112 )) ) ,
[comfirm_user] [NVARCHAR](50) NULL ,
[comfirm_flag] [INT] NOT NULL
)
ON [PRIMARY] TEXTIMAGE_ON [PRIMARY];
GO
ALTER TABLE [dbo].[monitor_deadlock] ADD CONSTRAINT [DF__monitor_d__Captu__2CF2ADDF] DEFAULT (GETDATE()) FOR [Capture_date];
GO
ALTER TABLE [dbo].[monitor_deadlock] ADD CONSTRAINT [DF__monitor_d__comfi__2DE6D218] DEFAULT ((1)) FOR [comfirm_flag];
GO
b.新建读取死锁的存储过程
USE [azure_monitor];
--存放读取死锁信息的存储过程的库名
GO
IF OBJECT_ID('monitor_P_deadlock', 'P') IS NULL
EXEC( 'CREATE procedure dbo.monitor_P_deadlock AS ');
GO
/*=============================================
-- Author: jil.wen
-- Create date: 2017/04/11
-- Description: 监控数据库上死锁情况;
-- demo : exec dbo.monitor_P_deadlock
============================================= */
ALTER PROCEDURE monitor_P_deadlock
AS
BEGIN
-- DELETE FROM dbo.monitor_deadlock
-- WHERE [capture_day] = CONVERT([VARCHAR](12), GETDATE(), ( 112 ))
-- AND comfirm_flag = 1;
DECLARE @SessionName sysname;
DECLARE @Servername VARCHAR(50);
SELECT @Servername = @@SERVERNAME;
SELECT @SessionName = 'system_health';
/*
SELECT Session_Name = s.name, s.blocked_event_fire_time, s.dropped_buffer_count, s.dropped_event_count, s.pending_buffers
FROM sys.dm_xe_session_targets t
INNER JOIN sys.dm_xe_sessions s ON s.address = t.event_session_address
WHERE target_name = 'event_file'
--*/
IF OBJECT_ID('tempdb..#Events') IS NOT NULL
BEGIN
DROP TABLE #Events;
END;
DECLARE @Target_File NVARCHAR(1000) ,
@Target_Dir NVARCHAR(1000) ,
@Target_File_WildCard NVARCHAR(1000);
SELECT @Target_File = CAST(t.target_data AS XML).value('EventFileTarget[1]/File[1]/@name',
'NVARCHAR(256)')
FROM sys.dm_xe_session_targets t
INNER JOIN sys.dm_xe_sessions s ON s.address = t.event_session_address
WHERE s.name = @SessionName
AND t.target_name = 'event_file';
SELECT @Target_Dir = LEFT(@Target_File,
LEN(@Target_File) - CHARINDEX('\',
REVERSE(@Target_File)));
SELECT @Target_File_WildCard = @Target_Dir + '\' + @SessionName
+ '_*.xel';
--Keep this as a separate table because it's called twice in the next query. You don't want this running twice.
SELECT DeadlockGraph = CAST(event_data AS XML) ,
DeadlockID = ROW_NUMBER() OVER ( ORDER BY file_name, file_offset )
INTO #Events
FROM sys.fn_xe_file_target_read_file(@Target_File_WildCard, NULL,
NULL, NULL) AS F
WHERE event_data LIKE '<event name="xml_deadlock_report%';
WITH Victims
AS ( SELECT VictimID = Deadlock.Victims.value('@id',
'varchar(50)') ,
e.DeadlockID
FROM #Events e
CROSS APPLY e.DeadlockGraph.nodes('/event/data/value/deadlock/victim-list/victimProcess')
AS Deadlock ( Victims )
),
DeadlockObjects
AS ( SELECT DISTINCT
e.DeadlockID ,
ObjectName = Deadlock.Resources.value('@objectname',
'nvarchar(256)')
FROM #Events e
CROSS APPLY e.DeadlockGraph.nodes('/event/data/value/deadlock/resource-list/*')
AS Deadlock ( Resources )
)
INSERT INTO monitor_deadlock
( ServerName ,
DataBaseName ,
DeadlockID ,
TransactionTime ,
DeadlockGraph ,
DeadlockObjects ,
Victim ,
SPID ,
ProcedureName ,
LockMode ,
Code ,
ClientApp ,
HostName ,
LoginName ,
InputBuffer
)
SELECT @Servername AS ServerName ,
DatabaseName ,
DeadlockID ,
TransactionTime ,
DeadlockGraph ,
DeadlockObjects ,
Victim ,
SPID ,
ProcedureName ,
LockMode ,
Code ,
ClientApp ,
HostName ,
LoginName ,
InputBuffer
FROM ( SELECT DatabaseName = LEFT(SUBSTRING(( SELECT
( ', '
+ o.ObjectName )
FROM
DeadlockObjects o
WHERE
o.DeadlockID = e.DeadlockID
ORDER BY o.ObjectName
FOR
XML
PATH('')
), 3, 4000),
CHARINDEX('.',
SUBSTRING(( SELECT
( ', '
+ o.ObjectName )
FROM
DeadlockObjects o
WHERE
o.DeadlockID = e.DeadlockID
ORDER BY o.ObjectName
FOR
XML
PATH('')
), 3, 4000)) - 1) ,
e.DeadlockID ,
TransactionTime = Deadlock.Process.value('@lasttranstarted',
'datetime') ,
DeadlockGraph ,
DeadlockObjects = SUBSTRING(( SELECT
( ', '
+ o.ObjectName )
FROM
DeadlockObjects o
WHERE
o.DeadlockID = e.DeadlockID
ORDER BY o.ObjectName
FOR
XML
PATH('')
), 3, 4000) ,
Victim = CASE WHEN v.VictimID IS NOT NULL
THEN 1
ELSE 0
END ,
SPID = Deadlock.Process.value('@spid',
'int') ,
ProcedureName = Deadlock.Process.value('executionStack[1]/frame[1]/@procname[1]',
'varchar(200)') ,
LockMode = Deadlock.Process.value('@lockMode',
'char(1)') ,
Code = Deadlock.Process.value('executionStack[1]/frame[1]',
'varchar(1000)') ,
ClientApp = CASE LEFT(Deadlock.Process.value('@clientapp',
'varchar(100)'),
29)
WHEN 'SQLAgent - TSQL JobStep (Job '
THEN 'SQLAgent Job: '
+ ( SELECT
name
FROM
msdb..sysjobs sj
WHERE
SUBSTRING(Deadlock.Process.value('@clientapp',
'varchar(100)'),
32, 32) = ( SUBSTRING(sys.fn_varbintohexstr(sj.job_id),
3, 100) )
) + ' - '
+ SUBSTRING(Deadlock.Process.value('@clientapp',
'varchar(100)'),
67,
LEN(Deadlock.Process.value('@clientapp',
'varchar(100)'))
- 67)
ELSE Deadlock.Process.value('@clientapp',
'varchar(100)')
END ,
HostName = Deadlock.Process.value('@hostname',
'varchar(20)') ,
LoginName = Deadlock.Process.value('@loginname',
'varchar(20)') ,
InputBuffer = Deadlock.Process.value('inputbuf[1]',
'varchar(1000)')
FROM #Events e
CROSS APPLY e.DeadlockGraph.nodes('/event/data/value/deadlock/process-list/process')
AS Deadlock ( Process )
LEFT JOIN Victims v ON v.DeadlockID = e.DeadlockID
AND v.VictimID = Deadlock.Process.value('@id',
'varchar(50)')
) X --In a subquery to make filtering easier (use column names, not XML parsing), no other reason
ORDER BY DeadlockID DESC;
END;
c.在Agent新建job调用上述【monitor_P_deadlock】存储过程
省略,详情可以参考数据库出现阻塞及时邮件预警提醒(下)
d.收集效果如下
注意事项
- 如需使用Agent代理发送预警邮件,就要注意Agent是否正常运行;
- 是否有调用上述脚本的数据库用户权限;
- 数据库自带扩展事件system_health是否正常运行;