0. 说明
全排序(order by) | 部分排序(sort by) | hash 分区(distribute by) | cluster by
1. 前期准备
1.1 建表
create table user_order(id int, name string, age int, province string, city string)
row format delimited
fields terminated by '\t';
1.2 设置 reduce 个数
set mapreduce.job.reduces=2;
2. 全排序(order by)
使用一个 reduce,在真实使用中,需要加 limit 限制。
truncate table user_order;
insert into user_order select * from user_par order by id;
3. 部分排序(sort by )
在每个 reduce 中分别排序
truncate table user_order; insert into user_order select * from user_par sort by id;
4. hash 分区(distribute by )
未排序
truncate table user_order; insert into user_order select * from user_par distribute by id;
5. cluster by
cluster by = distribute by + sort by
truncate table user_order; insert into user_order select * from user_par cluster by id;