根据列A将列B内容解析为逻辑部分

时间:2021-03-08 19:19:59

I have a table like this

我有一张这样的桌子

sessionId  |   hostname
------     |   ------
a1         |   domain1
a1         |   domain2
a2         |   domain1
a3         |   domain1
a3         |   domain2
a4         |   domain2

What I want is to build a logical table containing the follwoing

我想要的是构建一个包含follwoing的逻辑表

sessionId  |   only domain1 | only domain2 | domain1 OR domain2 | domain1 AND domain2 
-----------|----------------|--------------|--------------------|--------------------
a1         |   1            | 1            | 1                  | 1
a2         |   1            | 0            | 1                  | 0
a3         |   1            | 1            | 1                  | 1
a4         |   0            | 1            | 1                  | 0

I guess there's a simple solution for this, but I can't get my head over it :(

我想这有一个简单的解决方案,但我无法理解它:(

3 个解决方案

#1


1  

You can use conditional aggregation:

您可以使用条件聚合:

select (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 
             then 1 else 0
        end) as domain1,
       (case when sum(case when hostname = 'domain2' then 1 else 0 end) > 0
             then 1 else 0
        end) as domain2,
       (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 or
                  sum(case when hostname = 'domain2' then 1 else 0 end) > 0
             then 1 else 0
        end) as either,
       (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 and
                  sum(case when hostname = 'domain2' then 1 else 0 end) > 0
             then 1 else 0
        end) as both          
from t
group by sessionid;

#2


1  

Try this :

尝试这个 :

Declare @Table as Table (sessionId varchar(100),hostname varchar(100))
Insert into @Table Values
('a1','domain1'),
('a1','domain2'),
('a2','domain1'),
('a3','domain1'),
('a3','domain2'),
('a4','domain2')

Select distinct T.sessionId,
case when s1.sessionid is null then 0 else 1 end [only domain1],
case when s2.sessionid is null then 0 else 1 end [only domain2],
case when 
(
case when s1.sessionid is null then 0 else 1 end = 1 or 
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 OR domain2],
case when 
(
case when s1.sessionid is null then 0 else 1 end = 1 and 
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 AND domain2]
 from @Table T
Left Join
(
Select sessionId From @Table where hostname = 'domain1'
) s1 on s1.sessionId = T.sessionId 
Left Join
(
Select sessionId From @Table where hostname = 'domain2'
) s2 on s2.sessionId = T.sessionId 

#3


1  

For BigQuery Standard SQL

对于BigQuery Standard SQL

#standardSQL
SELECT 
  sessionId, 
  SIGN(COUNTIF(hostname='domain1')) only_domain1, 
  SIGN(COUNTIF(hostname='domain2')) only_domain2, 
  SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
  SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable` 
GROUP BY sessionId   

you can test / play with it using dummy data from your question

你可以使用问题中的虚拟数据来测试/玩它

#standardSQL
WITH `yourproject.yourdataset.yourtable` AS (
  SELECT 'a1' sessionId, 'domain1' hostname UNION ALL
  SELECT 'a1', 'domain2' UNION ALL
  SELECT 'a2', 'domain1' UNION ALL
  SELECT 'a3', 'domain1' UNION ALL
  SELECT 'a3', 'domain2' UNION ALL
  SELECT 'a4', 'domain2' 
)
SELECT 
  sessionId, 
  SIGN(COUNTIF(hostname='domain1')) only_domain1, 
  SIGN(COUNTIF(hostname='domain2')) only_domain2, 
  SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
  SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable` 
GROUP BY sessionId 
ORDER BY sessionId

#1


1  

You can use conditional aggregation:

您可以使用条件聚合:

select (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 
             then 1 else 0
        end) as domain1,
       (case when sum(case when hostname = 'domain2' then 1 else 0 end) > 0
             then 1 else 0
        end) as domain2,
       (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 or
                  sum(case when hostname = 'domain2' then 1 else 0 end) > 0
             then 1 else 0
        end) as either,
       (case when sum(case when hostname = 'domain1' then 1 else 0 end) > 0 and
                  sum(case when hostname = 'domain2' then 1 else 0 end) > 0
             then 1 else 0
        end) as both          
from t
group by sessionid;

#2


1  

Try this :

尝试这个 :

Declare @Table as Table (sessionId varchar(100),hostname varchar(100))
Insert into @Table Values
('a1','domain1'),
('a1','domain2'),
('a2','domain1'),
('a3','domain1'),
('a3','domain2'),
('a4','domain2')

Select distinct T.sessionId,
case when s1.sessionid is null then 0 else 1 end [only domain1],
case when s2.sessionid is null then 0 else 1 end [only domain2],
case when 
(
case when s1.sessionid is null then 0 else 1 end = 1 or 
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 OR domain2],
case when 
(
case when s1.sessionid is null then 0 else 1 end = 1 and 
case when s2.sessionid is null then 0 else 1 end = 1
) then 1 else 0 end [domain1 AND domain2]
 from @Table T
Left Join
(
Select sessionId From @Table where hostname = 'domain1'
) s1 on s1.sessionId = T.sessionId 
Left Join
(
Select sessionId From @Table where hostname = 'domain2'
) s2 on s2.sessionId = T.sessionId 

#3


1  

For BigQuery Standard SQL

对于BigQuery Standard SQL

#standardSQL
SELECT 
  sessionId, 
  SIGN(COUNTIF(hostname='domain1')) only_domain1, 
  SIGN(COUNTIF(hostname='domain2')) only_domain2, 
  SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
  SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable` 
GROUP BY sessionId   

you can test / play with it using dummy data from your question

你可以使用问题中的虚拟数据来测试/玩它

#standardSQL
WITH `yourproject.yourdataset.yourtable` AS (
  SELECT 'a1' sessionId, 'domain1' hostname UNION ALL
  SELECT 'a1', 'domain2' UNION ALL
  SELECT 'a2', 'domain1' UNION ALL
  SELECT 'a3', 'domain1' UNION ALL
  SELECT 'a3', 'domain2' UNION ALL
  SELECT 'a4', 'domain2' 
)
SELECT 
  sessionId, 
  SIGN(COUNTIF(hostname='domain1')) only_domain1, 
  SIGN(COUNTIF(hostname='domain2')) only_domain2, 
  SIGN(COUNTIF(hostname='domain1')+COUNTIF(hostname='domain2')) domain1_or_domain2,
  SIGN(COUNTIF(hostname='domain1')*COUNTIF(hostname='domain2')) domain1_and_domain2
FROM `yourproject.yourdataset.yourtable` 
GROUP BY sessionId 
ORDER BY sessionId