How can I count pair-wise occurrences in a SQL Server table? Please note that the order of the given sequence has to be accounted for and shouldn't be changed.
如何计算SQL Server表中成对出现的情况?请注意,给定序列的顺序必须被考虑,不应该被更改。
Original table:
原始表:
1 2 3 4
--------
1 | A A A B
2 | A # don't count
3 | B A A
4 | B # don't count
Result:
结果:
1 | AA = 3
2 | AB = 1
3 | BB = 0
4 | BA = 1
In addition, the code has to work for large datasets.
此外,代码必须适用于大型数据集。
Edit:
编辑:
A pair in this context is a set of two values {x[ij], x[(i+1)j]}, where i=1,...,4 and j=1,...,4. Further, pairs that have the form A null
or B null
shouldn't be counted. Moreover, null A
or null B
can't happen, therefore they don't have to be accounted for.
在这个上下文中,一对是两个值{x[ij], x[(i+1)j]}的集合,其中i=1,…4和j = 1,…,4。此外,不应该计算具有null或B null形式的对。此外,null A或null B不可能发生,因此它们不需要被解释。
3 个解决方案
#1
1
LiveDemo
CREATE TABLE #tab([1] NVARCHAR(100), [2] NVARCHAR(100),
[3] NVARCHAR(100), [4] NVARCHAR(100));
INSERT INTO #tab
VALUES ('A', 'A', 'A', 'B') ,('A' , NULL ,NULL ,NULL )
,('B' ,'A' ,'A', NULL),('B', NULL, NULL, NULL);
WITH cte AS
(
SELECT pair = [1] + [2] FROM #tab
UNION ALL
SELECT pair = [2] + [3] FROM #tab
UNION ALL
SELECT pair = [3] + [4] FROM #tab
), cte2 AS
(
SELECT [1] AS val FROM #tab
UNION ALL SELECT [2] FROM #tab
UNION ALL SELECT [3] FROM #tab
UNION ALL SELECT [4] FROM #tab
), all_pairs AS
(
SELECT DISTINCT a.val + b.val AS pair
FROM cte2 a
CROSS JOIN cte2 b
WHERE a.val IS NOT NULL and b.val IS NOT NULL
)
SELECT a.pair, result = COUNT(c.pair)
FROM all_pairs a
LEFT JOIN cte c
ON a.pair = c.pair
GROUP BY a.pair;
How it works:
它是如何工作的:
-
cte
create all pairs (1,2), (2,3), (3,4) - cte创建所有对(1,2)、(2,3)、(3,4)
-
cte2
get all values from column - cte2从列中获取所有值。
-
all_pairs
create all possible pairs of valuesAA, AB, BA, BB
- all_pair创建所有可能的值对AA、AB、BA、BB
- Final use grouping and
COUNT
to get number of occurences. - 最后使用分组和计数来得到发生的次数。
EDIT:
编辑:
You can concatenate result as below:
你可以将结果串联在一起:
LiveDemo2
...
, final AS
(
SELECT a.pair, result = COUNT(c.pair), rn = ROW_NUMBER() OVER(ORDER BY a.pair)
FROM all_pairs a
LEFT JOIN cte c
ON a.pair = c.pair
GROUP BY a.pair
)
SELECT rn, [result] = pair + ' = ' + CAST(result AS NVARCHAR(100))
FROM final
#2
2
I just want to point out a pretty easy way to express this logic:
我只是想指出一种非常简单的方式来表达这种逻辑:
with vals as (
select 'A' as val union all select 'B'
)
pairs as (
select t1.val as val1, t2.val as val2
from vals t1 cross join vals t2
)
select p.*,
(select count(*)
from original
where [1] = val1 and [2] = val2 or
[2] = val1 and [3] = val2 or
[3] = val1 and [4] = val2
) as cnt
from pairs p
order by cnt desc;
This doesn't have great performance characteristics, that is actually easily fixed by using three subqueries and indexes on the data columns.
这并没有很好的性能特征,实际上很容易通过在数据列上使用三个子查询和索引来修复。
#3
1
with cte as (
select 1 as id, 'A' as [1], 'A' as [2], 'A' as [3], 'B' as [4]
union all select 2 , 'A', NULL,NULL,NULL
union all select 3 , 'B', 'A','A',NULL
union all select 4 , 'B',NULL,NULL,NULL
)
, Vals as (
select 'AA' as Val
union all select 'AB'
union all select 'BB'
union all select 'BA'
)
, UNPVT as (
/*UNPIVOT to convert the columns to be rows*/
SELECT id , VAL + LEAD(VAL) OVER (PARTITION BY ID ORDER BY SEQ) as Code
FROM (
select ID,[1],[2],[3],[4] from cte
) P
UNPIVOT (Val FOR Seq IN ([1],[2],[3],[4])
) AS UNPVT
)
select Vals.Val, count(UNPVT.Code) from UNPVT right join Vals on UNPVT.Code = Vals.Val
group by Vals.Val
CTE: contains your data. Vals: contains the returned code. UnPVT: to convert the columns to be rows.
CTE:包含您的数据。Vals:包含返回的代码。UnPVT:将列转换为行。
#1
1
LiveDemo
CREATE TABLE #tab([1] NVARCHAR(100), [2] NVARCHAR(100),
[3] NVARCHAR(100), [4] NVARCHAR(100));
INSERT INTO #tab
VALUES ('A', 'A', 'A', 'B') ,('A' , NULL ,NULL ,NULL )
,('B' ,'A' ,'A', NULL),('B', NULL, NULL, NULL);
WITH cte AS
(
SELECT pair = [1] + [2] FROM #tab
UNION ALL
SELECT pair = [2] + [3] FROM #tab
UNION ALL
SELECT pair = [3] + [4] FROM #tab
), cte2 AS
(
SELECT [1] AS val FROM #tab
UNION ALL SELECT [2] FROM #tab
UNION ALL SELECT [3] FROM #tab
UNION ALL SELECT [4] FROM #tab
), all_pairs AS
(
SELECT DISTINCT a.val + b.val AS pair
FROM cte2 a
CROSS JOIN cte2 b
WHERE a.val IS NOT NULL and b.val IS NOT NULL
)
SELECT a.pair, result = COUNT(c.pair)
FROM all_pairs a
LEFT JOIN cte c
ON a.pair = c.pair
GROUP BY a.pair;
How it works:
它是如何工作的:
-
cte
create all pairs (1,2), (2,3), (3,4) - cte创建所有对(1,2)、(2,3)、(3,4)
-
cte2
get all values from column - cte2从列中获取所有值。
-
all_pairs
create all possible pairs of valuesAA, AB, BA, BB
- all_pair创建所有可能的值对AA、AB、BA、BB
- Final use grouping and
COUNT
to get number of occurences. - 最后使用分组和计数来得到发生的次数。
EDIT:
编辑:
You can concatenate result as below:
你可以将结果串联在一起:
LiveDemo2
...
, final AS
(
SELECT a.pair, result = COUNT(c.pair), rn = ROW_NUMBER() OVER(ORDER BY a.pair)
FROM all_pairs a
LEFT JOIN cte c
ON a.pair = c.pair
GROUP BY a.pair
)
SELECT rn, [result] = pair + ' = ' + CAST(result AS NVARCHAR(100))
FROM final
#2
2
I just want to point out a pretty easy way to express this logic:
我只是想指出一种非常简单的方式来表达这种逻辑:
with vals as (
select 'A' as val union all select 'B'
)
pairs as (
select t1.val as val1, t2.val as val2
from vals t1 cross join vals t2
)
select p.*,
(select count(*)
from original
where [1] = val1 and [2] = val2 or
[2] = val1 and [3] = val2 or
[3] = val1 and [4] = val2
) as cnt
from pairs p
order by cnt desc;
This doesn't have great performance characteristics, that is actually easily fixed by using three subqueries and indexes on the data columns.
这并没有很好的性能特征,实际上很容易通过在数据列上使用三个子查询和索引来修复。
#3
1
with cte as (
select 1 as id, 'A' as [1], 'A' as [2], 'A' as [3], 'B' as [4]
union all select 2 , 'A', NULL,NULL,NULL
union all select 3 , 'B', 'A','A',NULL
union all select 4 , 'B',NULL,NULL,NULL
)
, Vals as (
select 'AA' as Val
union all select 'AB'
union all select 'BB'
union all select 'BA'
)
, UNPVT as (
/*UNPIVOT to convert the columns to be rows*/
SELECT id , VAL + LEAD(VAL) OVER (PARTITION BY ID ORDER BY SEQ) as Code
FROM (
select ID,[1],[2],[3],[4] from cte
) P
UNPIVOT (Val FOR Seq IN ([1],[2],[3],[4])
) AS UNPVT
)
select Vals.Val, count(UNPVT.Code) from UNPVT right join Vals on UNPVT.Code = Vals.Val
group by Vals.Val
CTE: contains your data. Vals: contains the returned code. UnPVT: to convert the columns to be rows.
CTE:包含您的数据。Vals:包含返回的代码。UnPVT:将列转换为行。