HBase入门教程

时间:2023-03-09 15:09:43
HBase入门教程

# 背景

最近看到公司一个项目用到hbase, 之前也一直想看下hbase。个人理解Hbase作为一个nosql数据库,逻辑模型感觉跟关系型数据库有点类似。一个table,有row即行,列。不过列是一个列族,可以放多个字段,类似下面这种格式

table users

行   | 列族      | value     |   列族   |   value

rows1  | info:name    | zhangsan   |   ....    |  ...

rows1  | info:address  |  wudaokou   |   ....    |  ...  

# 安装

说下安装吧,有三种模式:单机、伪分布式、集群。这里我用的单机,官网:https://hbase.apache.org/downloads.html

下载,解压

安装步骤一定要安装官网说明来,博客这些都太老了

1. hbase-env.sh设置JAVA_HOME

2. hbase-site.xml

<configuration>
<property>
<name>hbase.rootdir</name>
<value>file:///Users/gxf/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/Users/gxf/zookeeper</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
<description>
Controls whether HBase will check for stream capabilities (hflush/hsync). Disable this if you intend to run on LocalFileSystem, denoted by a rootdir
with the 'file://' scheme, but be mindful of the NOTE below. WARNING: Setting this to false blinds you to potential data loss and
inconsistent system state in the event of process and/or node failures. If
HBase is complaining of an inability to use hsync or hflush it's most
likely not a false positive.
</description>
</property>
</configuration>

这里,安装和单机部署基本完成

$HBASE_HOME/bin/start-hbase.sh启动hbase

http://localhost:16010/master-status这个能正确显示即启动成功

# 使用

hbaset提供了一个命令行客户端,我们可以使用命令行客户端,创建、删除、修改、查询表,插入记录,插叙记录,删除记录,修改记录

$HBASE_HOME/bin/hbase shell

启动客户端,基本命令在官网也可以看,建议在官网看。我也是搬运工,顺便熟悉一下,做个备忘录

1. list命令,列出所有的表

list

2. 新建user表,列族为info,存放用户的基本信息

create 'user', 'info'

3. 删除表,要先disable,再drop

disable 'user'
drop 'user'

4. 插入数据, put 'tablename', 'row', 'cf:col', 'value'

put 'user', 'row1', 'info:name', 'guanxianseng'

5. 查询数据 scan tablenanme

scan 'user'

# java客户端

pom.xml

<dependencies>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.4.8</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<version>1.4.8</version>
</dependency> <!-- log -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.8.0-beta2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.11.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.11.1</version>
</dependency> </dependencies>

这里我用了log,所有加了log4j等log依赖

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory; public class HBaseTest { private static Configuration conf = null;
private static Connection connection = null;
private static Admin admin = null;
private static Logger logger = LoggerFactory.getLogger(HBaseTest.class); static {
//设置连接信息
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "localhost");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf.setInt("hbase.rpc.timeout", 2000);
conf.setInt("hbase.client.operation.timeout", 3000);
conf.setInt("hbase.client.scanner.timeout.period", 6000);
try {
connection = ConnectionFactory.createConnection(conf);
admin = connection.getAdmin();
} catch (Exception e) {
e.printStackTrace();
}
} public static void main(String[] args) throws Exception {
String tableName = "test3";
String[] colFam = new String[]{"colFam"};
// createTable(tableName, colFam);
// deleteTable(tableName);
// listTables();
// addData("users", "row3", "info", "name", "guanxianseng");
// deleteData("users", "row1", "info", "name");
// query("users", "row2", "info", "name");
scan("users", "row1", "row2");
} /**
* scan数据
* */
public static void scan(String tableNameStr, String startRowKey, String stopRowKey)
throws IOException {
Table table = connection.getTable(TableName.valueOf(tableNameStr));
Scan scan = new Scan();
ResultScanner resultScanner = table.getScanner(scan);
for(Result result : resultScanner){
showCell(result);
}
} /**
* 查询数据
* */
public static void query(String tableNameStr, String rowkey, String colFam, String col)
throws IOException {
Table table = connection.getTable(TableName.valueOf(tableNameStr));
Get get = new Get(rowkey.getBytes());
Result result = table.get(get);
showCell(result);
} /**
* 打印一个cell所有数据
* */
private static void showCell(Result result){
for(Cell cell : result.rawCells()){
logger.info("rawname:{}, timestamp:{}, colFam:{}, colName:{}, value:{}", new String(CellUtil.cloneRow(cell)), cell.getTimestamp(),
new String(CellUtil.cloneFamily(cell)), new String(CellUtil.cloneQualifier(cell)), new String(CellUtil.cloneValue(cell)));
}
} /**
* 删除数据
* */
public static void deleteData(String tableNameStr, String row, String colFam, String col) throws IOException {
Table table = connection.getTable(TableName.valueOf(tableNameStr));
Delete delete = new Delete(row.getBytes());
table.delete(delete);
logger.info("delete tablename: {}, row:{}, colFam:{}, col:{}", tableNameStr, row, colFam, col);
} /**
* 向表中插入数据
* */
public static void addData(String tableNameStr, String rowkey, String colFam, String col, String value)
throws IOException {
TableName tableName = TableName.valueOf(tableNameStr);
Table table = connection.getTable(tableName);
Put put = new Put(rowkey.getBytes());
put.addColumn(colFam.getBytes(), col.getBytes(), value.getBytes());
table.put(put);
table.close();
logger.info("put table:{}, rowkey:{}, colFam:{}, col:{}, value:{}", tableNameStr, rowkey, colFam, col, value);
} /**
* 列出所有的表
* */
public static void listTables() throws IOException {
HTableDescriptor[] hTableDescriptors = admin.listTables();
for(HTableDescriptor hTableDescriptor : hTableDescriptors){
logger.info("table :{}", hTableDescriptor.getTableName());
}
} /**
* 创建表
*/
public static void createTable(String tableNameStr, String[] colFam) {
try {
TableName tableName = TableName.valueOf(tableNameStr);
Table table = connection.getTable(tableName);
if (admin.tableExists(tableName)) {
//表已经存在
logger.info("table {} already exist", tableNameStr);
} else {
//表不存在
HTableDescriptor hTableDescriptor = new HTableDescriptor(tableNameStr);
for (String colStr : colFam) {
HColumnDescriptor columnDescriptor = new HColumnDescriptor(colStr);
hTableDescriptor.addFamily(columnDescriptor);
}
admin.createTable(hTableDescriptor);
logger.info("creat table success");
admin.close();
}
} catch (Exception e) {
e.printStackTrace();
}
} /**
* 删除表 1. disable 2. delete
*/
public static void deleteTable(String tableNameStr) throws Exception {
TableName tableName = TableName.valueOf(tableNameStr);
if (!admin.tableExists(tableName)) {
logger.error("table :{} not exist", tableNameStr);
} else {
admin.disableTable(tableName);
admin.deleteTable(tableName);
logger.info("delete table:{}", tableNameStr);
}
}
}

这个java demo也参考了网上的demo