一行转多行 及多行转一行的 hive语句

时间:2022-07-03 10:28:05
 
注意 :|,; 是特殊符号,要用 "\\|", "\\;"来表示。
 
一行转多行
usertags 里面有很多项,每项之间以逗号分隔
 
create table tag_count2 as
select tag,count(gid) from (
select gid,tag from (select b.gid ,b.usertags from zhangb.gid_tags b group by b.gid,b.usertags) a
lateral view explode(split(a.usertags, ',')) t as tag
) p where substr(p.gid,1,7)='ANDROID' group by p.tag;
 
# 其中的 t 是把a表中的usertags 由一行变成多行后形成的新表的别称,是表的名称
 
导出数据:
insert overwrite local directory '/home/zhangb/tag_count'row format delimited fields terminated by '\t'
select distinct * from zhangb.tag_count
 
原格式:
gid1    abc,abd,abf
gid2    abc,snm
...
处理后输出格式:
gid1   abc
gid1   abd
gid1   abf
gid2   abc
gid2  snm
...
 
 
***多行转一行
1、演示多列转为单行
数据文件及内容: student.txt
xiaoming|english|92.0
xiaoming|chinese|98.0
xiaoming|math|89.5
huahua|chinese|80.0
huahua|math|89.5

创建表studnet:
create table student(name string,subject string,score decimal(4,1))
row format delimited
fields terminated by '|';

导入数据:
load data local inpath '/home/hadoop/hivetestdata/student.txt' into table student;


列转为行演示:
hive (hive)> select name,concat_ws(',',collect_set(subject)) from student group by name;
huahua chinese,math
xiaoming english,chinese,math


hive (hive)> select name,concat_ws(',',collect_set(concat(subject,'=',score))) from student group by name;
huahua chinese=80,math=89.5
xiaoming english=92,chinese=98,math=89.5