hbase-0.92.1过滤器学习

时间:2021-03-06 18:27:52

查看当前hbase中支持的过滤器

hbase(main):060:0* show_filters
Documentation on filters mentioned below can be found at: https://our.intern.facebook.com/intern/wiki/index.php/HBase/Filter_Language
ColumnPrefixFilter
TimestampsFilter
PageFilter
MultipleColumnPrefixFilter
FamilyFilter
ColumnPaginationFilter
SingleColumnValueFilter
RowFilter
QualifierFilter
ColumnRangeFilter
ValueFilter
PrefixFilter
SingleColumnValueExcludeFilter
ColumnCountGetFilter
InclusiveStopFilter
DependentColumnFilter
FirstKeyOnlyFilter
KeyOnlyFilter

新建测试表和数据

hbase(main):072:0* create 'emp', 'mycf'

put 'emp', 'row1', 'mycf:name', 'adams'
put 'emp', 'row1', 'mycf:depart', 'research'
put 'emp', 'row1', 'mycf:job', 'clerk'
put 'emp', 'row1', 'mycf:id', ''
put 'emp', 'row1', 'mycf:locate', 'dallas' put 'emp', 'row2', 'mycf:name', 'allen'
put 'emp', 'row2', 'mycf:depart', 'sales'
put 'emp', 'row2', 'mycf:job', 'salesman'
put 'emp', 'row2', 'mycf:id', ''
put 'emp', 'row2', 'mycf:locate', 'chicago' put 'emp', 'row3', 'mycf:name', 'blake'
put 'emp', 'row3', 'mycf:depart', 'sales'
put 'emp', 'row3', 'mycf:job', 'manager'
put 'emp', 'row3', 'mycf:id', ''
put 'emp', 'row3', 'mycf:locate', 'chicago' put 'emp', 'row4', 'mycf:name', 'clark'
put 'emp', 'row4', 'mycf:depart', 'accounting'
put 'emp', 'row4', 'mycf:job', 'manager'
put 'emp', 'row4', 'mycf:id', ''
put 'emp', 'row4', 'mycf:locate', 'new york' put 'emp', 'row5', 'mycf:name', 'ford'
put 'emp', 'row5', 'mycf:depart', 'research'
put 'emp', 'row5', 'mycf:job', 'analyst'
put 'emp', 'row5', 'mycf:id', ''
put 'emp', 'row5', 'mycf:locate', 'dallas' put 'emp', 'row6', 'mycf:name', 'james'
put 'emp', 'row6', 'mycf:depart', 'sales'
put 'emp', 'row6', 'mycf:job', 'clerk'
put 'emp', 'row6', 'mycf:id', ''
put 'emp', 'row6', 'mycf:locate', 'chicago' put 'emp', 'row7', 'mycf:name', 'jones'
put 'emp', 'row7', 'mycf:depart', 'research'
put 'emp', 'row7', 'mycf:job', 'manager'
put 'emp', 'row7', 'mycf:id', ''
put 'emp', 'row7', 'mycf:locate', 'dallas' put 'emp', 'row8', 'mycf:name', 'king'
put 'emp', 'row8', 'mycf:depart', 'accounting'
put 'emp', 'row8', 'mycf:job', 'president'
put 'emp', 'row8', 'mycf:id', ''
put 'emp', 'row8', 'mycf:locate', 'new york' hbase(main):180:0> scan 'emp'
ROW COLUMN+CELL
row1 column=mycf:depart, timestamp=1555846776542, value=research
row1 column=mycf:id, timestamp=1555846776590, value=7876
row1 column=mycf:job, timestamp=1555846776566, value=clerk
row1 column=mycf:locate, timestamp=1555846776618, value=dallas
row1 column=mycf:name, timestamp=1555846776511, value=adams
row2 column=mycf:depart, timestamp=1555846776687, value=sales
row2 column=mycf:id, timestamp=1555846776736, value=7499
row2 column=mycf:job, timestamp=1555846776712, value=salesman
row2 column=mycf:locate, timestamp=1555846776770, value=chicago
row2 column=mycf:name, timestamp=1555846776662, value=allen
row3 column=mycf:depart, timestamp=1555846776838, value=sales
row3 column=mycf:id, timestamp=1555846776887, value=7698
row3 column=mycf:job, timestamp=1555846776863, value=manager
row3 column=mycf:locate, timestamp=1555846776912, value=chicago
row3 column=mycf:name, timestamp=1555846776806, value=blake
row4 column=mycf:depart, timestamp=1555846776976, value=accounting
row4 column=mycf:id, timestamp=1555846777027, value=7782
row4 column=mycf:job, timestamp=1555846777002, value=manager
row4 column=mycf:locate, timestamp=1555846777086, value=new york
row4 column=mycf:name, timestamp=1555846776952, value=clark
row5 column=mycf:depart, timestamp=1555846777146, value=research
row5 column=mycf:id, timestamp=1555846777193, value=7902
row5 column=mycf:job, timestamp=1555846777169, value=analyst
row5 column=mycf:locate, timestamp=1555846777218, value=dallas
row5 column=mycf:name, timestamp=1555846777121, value=ford
row6 column=mycf:depart, timestamp=1555846777277, value=sales
row6 column=mycf:id, timestamp=1555846777324, value=7900
row6 column=mycf:job, timestamp=1555846777301, value=clerk
row6 column=mycf:locate, timestamp=1555846777355, value=chicago
row6 column=mycf:name, timestamp=1555846777253, value=james
row7 column=mycf:depart, timestamp=1555846777416, value=research
row7 column=mycf:id, timestamp=1555846777465, value=7566
row7 column=mycf:job, timestamp=1555846777441, value=manager
row7 column=mycf:locate, timestamp=1555846777491, value=dallas
row7 column=mycf:name, timestamp=1555846777390, value=jones
row8 column=mycf:depart, timestamp=1555846777556, value=accounting
row8 column=mycf:id, timestamp=1555846777604, value=7839
row8 column=mycf:job, timestamp=1555846777581, value=president
row8 column=mycf:locate, timestamp=1555846777628, value=new york
row8 column=mycf:name, timestamp=1555846777526, value=king
8 row(s) in 0.1420 seconds

PrefixFilter

hbase(main):191:0* scan 'emp', {FILTER => "PrefixFilter ('row1')"}
ROW COLUMN+CELL
row1 column=mycf:depart, timestamp=1555846776542, value=research
row1 column=mycf:id, timestamp=1555846776590, value=7876
row1 column=mycf:job, timestamp=1555846776566, value=clerk
row1 column=mycf:locate, timestamp=1555846776618, value=dallas
row1 column=mycf:name, timestamp=1555846776511, value=adams
1 row(s) in 0.0510 seconds

ColumnPrefixFilter

hbase(main):192:0> scan 'emp', {FILTER => "ColumnPrefixFilter ('name')"}
ROW COLUMN+CELL
row1 column=mycf:name, timestamp=1555846776511, value=adams
row2 column=mycf:name, timestamp=1555846776662, value=allen
row3 column=mycf:name, timestamp=1555846776806, value=blake
row4 column=mycf:name, timestamp=1555846776952, value=clark
row5 column=mycf:name, timestamp=1555846777121, value=ford
row6 column=mycf:name, timestamp=1555846777253, value=james
row7 column=mycf:name, timestamp=1555846777390, value=jones
row8 column=mycf:name, timestamp=1555846777526, value=king
8 row(s) in 0.0720 seconds

ValueFilter

hbase(main):038:0* scan 'emp', FILTER=>"ValueFilter(=,'substring:sales')"
ROW COLUMN+CELL
row2 column=mycf:depart, timestamp=1555846776687, value=sales
row2 column=mycf:job, timestamp=1555846776712, value=salesman
row3 column=mycf:depart, timestamp=1555846776838, value=sales
row6 column=mycf:depart, timestamp=1555846777277, value=sales
3 row(s) in 0.0540 seconds
create 'test', 'cf1', { SPLITS => ['rk-1000', 'rk-2000', 'rk-3000'] }

for i in ''..'' do for j in ''..'' do \
for k in ''..'' do put 'test', "rk-#{i}#{j}#{k}", \
"cf1:#{j}#{k}", "#{j}#{k}" end end end