1.hive实现wordcont例子
创建表
create table t_word_count_data(line string);
导入本地数据(也可以导入hdfs上数(此时不需要加local)据如:/user/root/input/wc_data)
load data local inpath '/lost+found/wc_data' overwrite into table t_word_count_data;
以空格为切分统计单词个数然后将其写入新表t_word_count
create table t_word_count as
select word,count(1) as count
from (
select explode(split(line,' ')) as word
from t_word_count_data
) w
group by word
order by word;
2.创建表并导入数据
create table if not exists test.student
(
name string comment 'student name',
age int comment 'student age',
cource array<string>,
body map<string,float>,
address struct<street:string,city:string,state:string>
)
comment 'the info of student'
row format delimited fields terminated by '\001'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile;
建表语句的相关解释:
row format delimited fields terminated by '\001' #列的分隔符为 '\001'
collection items terminated by '\002' #指定集合元素间的分隔符为'\002'
map keys terminated by '\003' #指定了类型为map的字段的键值对分隔符为'\003'
lines terminated by '\n' #指定了行的分隔符(但是目前hive仅支持'\n'作为行分隔符,所以这句没有什么作用)
stored as textfile #指定存储格式为 文本文件
默认记录和字段分隔符:
\n 每行一条记录
^A 分隔列(八进制 \001)
^B 分隔ARRAY或者STRUCT中的元素,或者MAP中多个键值对之间分隔(八进制 \002)
^C 分隔MAP中键值对的“键”和“值”(八进制 \003)
测试数据如下:
xiaoming^A22^A陕西^B深圳^B宝鸡^Aweight^C50.5^Axixiang^Bshenzheng^Bchina
xiaohong^A22^A陕西^B深圳^B宝鸡^Aweight^C40.5^Ashekou^Bshenzheng^Bchina
导入数据:
load data local inpath '/lost+found/student' overwrite into table student;