I have a table like this
我有一张这样的桌子
sessionId | hostname ------ | ------ a1 | domain1 a1 | domain2 a2 | domain1 a3 | domain1 a3 | domain2 a4 | domain2
What I want is to build a logical table containing the follwoing
我想要的是构建一个包含follwoing的逻辑表
sessionId | only domain1 | only domain2 | domain1 OR domain2 | domain1 AND domain2 -----------|----------------|--------------|--------------------|-------------------- a1 | 1 | 1 | 1 | 1 a2 | 1 | 0 | 1 | 0 a3 | 1 | 1 | 1 | 1 a4 | 0 | 1 | 1 | 0
I guess there's a simple solution for this, but I can't get my head over it :(
我想这有一个简单的解决方案,但我无法理解它:(
3 个解决方案
#1
1
You can use conditional aggregation:
您可以使用条件聚合:
select (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0
then 1 else 0
end) as domain1,
(case when sum(case when hostname = 'domain2' then 1 else 0 end) > 0
then 1 else 0
end) as domain2,
(case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 or
sum(case when hostname = 'domain2' then 1 else 0 end) > 0
then 1 else 0
end) as either,
(case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 and
sum(case when hostname = 'domain2' then 1 else 0 end) > 0
then 1 else 0
end) as both
from t
group by sessionid;
#2
1
Try this :
尝试这个 :
Declare @Table as Table (sessionId varchar(100),hostname varchar(100))
Insert into @Table Values
('a1','domain1'),
('a1','domain2'),
('a2','domain1'),
('a3','domain1'),
('a3','domain2'),
('a4','domain2')
Select distinct T.sessionId,
case when s1.sessionid is null then 0 else 1 end [only domain1],
case when s2.sessionid is null then 0 else 1 end [only domain2],
case when
(
case when s1.sessionid is null then 0 else 1 end = 1 or
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 OR domain2],
case when
(
case when s1.sessionid is null then 0 else 1 end = 1 and
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 AND domain2]
from @Table T
Left Join
(
Select sessionId From @Table where hostname = 'domain1'
) s1 on s1.sessionId = T.sessionId
Left Join
(
Select sessionId From @Table where hostname = 'domain2'
) s2 on s2.sessionId = T.sessionId
#3
1
For BigQuery Standard SQL
对于BigQuery Standard SQL
#standardSQL
SELECT
sessionId,
SIGN(COUNTIF(hostname='domain1')) only_domain1,
SIGN(COUNTIF(hostname='domain2')) only_domain2,
SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable`
GROUP BY sessionId
you can test / play with it using dummy data from your question
你可以使用问题中的虚拟数据来测试/玩它
#standardSQL
WITH `yourproject.yourdataset.yourtable` AS (
SELECT 'a1' sessionId, 'domain1' hostname UNION ALL
SELECT 'a1', 'domain2' UNION ALL
SELECT 'a2', 'domain1' UNION ALL
SELECT 'a3', 'domain1' UNION ALL
SELECT 'a3', 'domain2' UNION ALL
SELECT 'a4', 'domain2'
)
SELECT
sessionId,
SIGN(COUNTIF(hostname='domain1')) only_domain1,
SIGN(COUNTIF(hostname='domain2')) only_domain2,
SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable`
GROUP BY sessionId
ORDER BY sessionId
#1
1
You can use conditional aggregation:
您可以使用条件聚合:
select (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0
then 1 else 0
end) as domain1,
(case when sum(case when hostname = 'domain2' then 1 else 0 end) > 0
then 1 else 0
end) as domain2,
(case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 or
sum(case when hostname = 'domain2' then 1 else 0 end) > 0
then 1 else 0
end) as either,
(case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 and
sum(case when hostname = 'domain2' then 1 else 0 end) > 0
then 1 else 0
end) as both
from t
group by sessionid;
#2
1
Try this :
尝试这个 :
Declare @Table as Table (sessionId varchar(100),hostname varchar(100))
Insert into @Table Values
('a1','domain1'),
('a1','domain2'),
('a2','domain1'),
('a3','domain1'),
('a3','domain2'),
('a4','domain2')
Select distinct T.sessionId,
case when s1.sessionid is null then 0 else 1 end [only domain1],
case when s2.sessionid is null then 0 else 1 end [only domain2],
case when
(
case when s1.sessionid is null then 0 else 1 end = 1 or
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 OR domain2],
case when
(
case when s1.sessionid is null then 0 else 1 end = 1 and
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 AND domain2]
from @Table T
Left Join
(
Select sessionId From @Table where hostname = 'domain1'
) s1 on s1.sessionId = T.sessionId
Left Join
(
Select sessionId From @Table where hostname = 'domain2'
) s2 on s2.sessionId = T.sessionId
#3
1
For BigQuery Standard SQL
对于BigQuery Standard SQL
#standardSQL
SELECT
sessionId,
SIGN(COUNTIF(hostname='domain1')) only_domain1,
SIGN(COUNTIF(hostname='domain2')) only_domain2,
SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable`
GROUP BY sessionId
you can test / play with it using dummy data from your question
你可以使用问题中的虚拟数据来测试/玩它
#standardSQL
WITH `yourproject.yourdataset.yourtable` AS (
SELECT 'a1' sessionId, 'domain1' hostname UNION ALL
SELECT 'a1', 'domain2' UNION ALL
SELECT 'a2', 'domain1' UNION ALL
SELECT 'a3', 'domain1' UNION ALL
SELECT 'a3', 'domain2' UNION ALL
SELECT 'a4', 'domain2'
)
SELECT
sessionId,
SIGN(COUNTIF(hostname='domain1')) only_domain1,
SIGN(COUNTIF(hostname='domain2')) only_domain2,
SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable`
GROUP BY sessionId
ORDER BY sessionId