知识点:
格式转换:cast(xxx as int)
按某列分桶某列排序,排序后打标机;例如:求每个地区工资最高的那个人的信息;
ROW_NUMBER() OVER(PARTITION BY COLUMN ORDER BY COLUMN)
row_number() over(distribute by t1.loc sort by cast(t1.sal as int) desc) as index
dept表
hive> select * from dept;
# deptno(部门编号) dname(部门名称) loc(部门所在地区)
ACCOUNTING NEW YORK
RESEARCH DALLAS
SALES CHICAGO
OPERATIONS BOSTON
ump表
hive> select * from ump; # 员工编号 员工姓名 职务 领导编号 入职日期 工资 奖金 部门编号
# empno ename job mgr hiredate sal comm deptno
SMITH CLERK -- 800.0 0.0
ALLEN SALESMAN -- 1600.0 300.0
WARD SALESMAN -- 1250.0 500.0
JONES MANAGER -- 2975.0 0.0
MARTIN SALESMAN -- 1250.0 1400.0
BLAKE MANAGER -- 2850.0 0.0
CLARK MANAGER -- 2450.0 0.0
SCOTT ANALYST -- 3000.0 0.0
KING PRESIDENT NULL -- 5000.0 0.0
TURNER SALESMAN -- 1500.0 0.0
ADAMS CLERK -- 1100.0 0.0
JAMES CLERK -- 950.0 0.0
FORD ANALYST -- 3000.0 0.0
MILLER CLERK -- 1300.0 0.0 10
(1) 查询总员工数
select count(empno) from ump; #Total MapReduce CPU Time Spent: seconds msec
#OK
#
(2) 查询总共有多少种职位
select count(distinct job) from ump;
#Total MapReduce CPU Time Spent: seconds msec
#OK
#
(3) 统计每个职位有多少个员工,并且按照数量从大到小排序
select job ,count (*)as emp_cnt
from ump
group by job
order by emp_cnt desc; SALESMAN
CLERK
MANAGER
ANALYST
PRESIDENT
(4) 查询入职最早的员工
select ump.ename,ump.hiredate
from ump
join
(select min(hiredate) as hiredate from ump)t1
where ump.hiredate=t1.hiredate; #SMITH --
(5) 统计出每个岗位的最高工资和平均工资
select job ,max(sal)as max_sale,avg(sal)as min_sale
from ump
group by job;
ANALYST 3000.0 3000.0
CLERK 950.0 1037.5
MANAGER 2975.0 2758.3333333333335
PRESIDENT 5000.0 5000.0
SALESMAN 1600.0 1400.0
(6) 查询出每个地区工资最高的员工
select t2.loc,t2.ename,t2.sal
from
(select t1.loc,t1.ename,t1.sal,
row_number() over(distribute by t1.loc sort by cast(t1.sal as int) desc) as index
from
(select dept.loc,ump.ename,ump.sal from
dept join ump
on dept.deptno=ump.deptno)t1
)t2
where t2.index=; #CHICAGO BLAKE 2850.0
#DALLAS FORD 3000.0
#NEW KING 5000.0
(7) 查询上半年入职员工最多的地区
select t1.loc,count(*)as cnt
from
(select dept.loc,ump.ename,
cast(substr(ump.hiredate,,) as int) as hire_month
from dept join ump
on dept.deptno=ump.deptno)t1
where t1.hire_month<=
group by t1.loc
order by cnt desc
limit ; CHICAGO