---------- 禁止修改 ----------
drop database if exists mydb cascade;---------- 禁止修改 -------------------- begin -------------创建mydb数据库
create database if not exists mydb;---使用mydb数据库
use mydb;---创建表user
create table usertab(
id string,
sex string,
time string,
education string,
occupation string,
income string,
area string,
desired_area string,
city_countryside string
)
row format delimited fields terminated by ',';---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table usertab;--查询每一个用户从出生到2019-06-10的总天数
select id,datediff('2019-06-10',regexp_replace(time,'/','-')) from usertab;
2 同一个地区相同的教育程度的最高收入
---------- 禁止修改 ----------
drop database if exists mydb cascade;---------- 禁止修改 -------------------- begin ------------创建mydb数据库
create database if not exists mydb;---使用mydb数据库
use mydb;---创建表user
create table usertab1(
id int,
sex string,
time string,
education string,
occupation string,
income string,
area string,
desired_area string,
city_countryside string
)
row format delimited fields terminated by ',';---导入数据:/root/data.txt
load data local inpath '/root/data1.txt' into table usertab1;--同一个地区相同的教育程度的最高收入
select area,education,income
from(
select area,education,income,row_number()over(
partition by area, education order by income desc
) as t1
from usertab1
) as t2
where t2.t1 =1;---------- end ----------
3 统计各级学历所占总人数百分比
---------- 禁止修改 ----------
drop database if exists mydb cascade;
set hive.mapred.mode=nonstrict;---------- 禁止修改 -------------------- begin ------------创建mydb数据库
create database if not exists mydb;---使用mydb数据库
use mydb;---创建表user
create table usertab2(
id int,
sex string,
time string,
education string,
occupation string,
income string,
area string,
desired_area string,
city_countryside string
)
row format delimited fields terminated by ',';---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table usertab2;--统计各级学历所占总人数百分比(对结果保留两位小数)
select concat(round(t1.cnted *100/ t2.cnt,2),'%'), t1.education
from
(
select count(*) as cnted,education
from usertab2
group by education
) as t1,(
select count(*) as cnt from usertab2
) as t2
order by t1.education;---------- end ----------