hive提前过滤重要性

时间:2023-03-10 07:19:57
hive提前过滤重要性

hive提前过滤

create table sospdm.tmp_yinfei_test_01
(
id string
)
partitioned by (statis_date string)
; create table sospdm.tmp_yinfei_test_02
(
id string
)
partitioned by (statis_date string)
; select t1.*
from tmp_yinfei_test_01 t1
left join tmp_yinfei_test_02 t2
on t1.id=t2.id
where t1.statis_date='' and t2.statis_date=''
;
select t1.*
from tmp_yinfei_test_01 t1
left join tmp_yinfei_test_02 t2
on t1.id=t2.id and t1.statis_date='' and t2.statis_date=''
;
select t1.*
from
(
select * from tmp_yinfei_test_01 where statis_date=''
) t1
left join
(
select * from tmp_yinfei_test_02 where statis_date=''
) t2
on t1.id=t2.id
;
=========================test1===================================== explain select t1.*
from tmp_yinfei_test_01 t1
left join tmp_yinfei_test_02 t2
on t1.id=t2.id
where t1.statis_date='' and t2.statis_date=''
; hive> explain select t1.*
> from tmp_yinfei_test_01 t1
> left join tmp_yinfei_test_02 t2
> on t1.id=t2.id
> where t1.statis_date='' and t2.statis_date=''
> ;
OK
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1 STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: (statis_date = '') (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: id (type: string)
sort order: +
Map-reduce partition columns: id (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
TableScan
alias: t2
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: id (type: string)
sort order: +
Map-reduce partition columns: id (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: statis_date (type: string)
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
keys:
0 id (type: string)
1 id (type: string)
outputColumnNames: _col0, _col6
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: (_col6 = '') (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: string), '' (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: true
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink Time taken: 0.399 seconds, Fetched: 58 row(s) 结论:t2表会扫全表
=========================test2=====================================
explain select t1.*
from tmp_yinfei_test_01 t1
left join tmp_yinfei_test_02 t2
on t1.id=t2.id and t1.statis_date='' and t2.statis_date=''
;
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1 STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: id (type: string)
sort order: +
Map-reduce partition columns: id (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: statis_date (type: string)
TableScan
alias: t2
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: (statis_date = '') (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: id (type: string)
sort order: +
Map-reduce partition columns: id (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
filter predicates:
0 {(VALUE._col0 = '')}
1
keys:
0 id (type: string)
1 id (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: true
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
结论:t1表会扫全表
=========================test3=====================================
explain select t1.*
from
(
select * from tmp_yinfei_test_01 where statis_date=''
) t1
left join
(
select * from tmp_yinfei_test_02 where statis_date=''
) t2
on t1.id=t2.id
;
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1 STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: tmp_yinfei_test_01
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: (statis_date = '') (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: id (type: string), '' (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
value expressions: _col1 (type: string)
TableScan
alias: tmp_yinfei_test_02
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: (statis_date = '') (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: id (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
Left Outer Join0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: true
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink