Hadoop集群环境:
三台机器:namenode0, datanode1, datanode2
操作系统:Ubuntu 11.04 Server version
Haddop版本: hadoop-0.20.2-cdh3u1
HBase版本:hbase-0.90.4-cdh3u2
Java版本:jdk-6u29-linux-x64
客户端机器:
注意点:
- hbase-site.xml 和 hbase-default.xml文件必须在CLASSPATH中
- 本机需安装log4j
- HBase客户端程序需引用:hadoop-core-0.20.2-cdh3u2.jar,hbase-0.90.4-cdh3u2.jar,zookeeper-3.3.3-cdh3u2.jar,commons-logging-1.1.1.jar,log4j-1.2.16.jar
import java.util.HashMap; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.util.Bytes; public class HBaseSingleColumnFilter { //HBaseSingleColumnFilter: 'table_name' 'columnfamliy_name' 'column_name' ['equal', 'grater] 'column_value' public static void main(String[] args) { try { if(args.length != 5) { System.out.print("HBaseSingleColumnFilter: 'table_name' 'columnfamliy_name' 'column_name' ['equal', 'grater] 'column_value'"); return; } Map<String, CompareOp> operators = new HashMap<String, CompareOp>(); operators.put("equal", CompareOp.EQUAL); operators.put("greater", CompareOp.GREATER); operators.put("greaterorequal", CompareOp.GREATER_OR_EQUAL); operators.put("less", CompareOp.LESS); operators.put("lessorequal", CompareOp.LESS_OR_EQUAL); operators.put("notequal", CompareOp.NOT_EQUAL); long beginTime = System.currentTimeMillis(); Configuration conf = HBaseConfiguration.create(); String tableName = args[0]; String columnFamilyName = args[1]; String columnName = args[2]; CompareOp operator = CompareOp.NO_OP; if(operators.containsKey(args[3])) { operator = operators.get(args[3]); } String columnValue = args[4]; HTable table = new HTable(conf, tableName); Filter singleColumnFilter = new SingleColumnValueFilter(Bytes.toBytes(columnFamilyName), Bytes.toBytes(columnName), operator, Bytes.toBytes(columnValue)); Scan s = new Scan(); s.setFilter(singleColumnFilter); ResultScanner scanner = table.getScanner(s); scanner = table.getScanner(s); long totalCount = 0; for (Result result = scanner.next(); result != null; result = scanner.next()) { totalCount++; System.out.println(result.toString()); } long endTime = System.currentTimeMillis(); System.out.println("action cost " + String.valueOf(endTime - beginTime) + " milseconds " + "find rows " + String.valueOf(totalCount)); } catch (Exception e) { e.printStackTrace(); } } }
对一个有309行,一个列族一个列的table进行查询:
xyz cf1 val equal val_87 : action cost 569 milseconds find rows 1
xyz cf1 val notequal val_87 :action cost 861 milseconds find rows 308
xyz cf1 val less val_87 :action cost 887 milseconds find rows 301
对一个有1365148行,一个列族6个列的table进行查询
apacheAccessLogHive6 cf1 ipaddress equal 223.166.115.141 : action cost 11895 milseconds find rows 327
apacheAccessLogHive6 cf1 ipaddress equal 223.166.115.141 : action cost 11895 milseconds find rows 327