hbase 批量插入api

时间:2022-02-28 08:47:34

1、数据格式a.txt:

1363157985066 1372623050300-FD-07-A4-72-B8:CMCC120.196.100.82i02.c.aliimg.com2427248124681200
1363157995052 138265441015C-0E-8B-C7-F1-E0:CMCC120.197.40.440 2640200
1363157991076 1392643565620-10-7A-28-CC-0A:CMCC120.196.100.9924 1321512 200
1363154400022 139262511065C-0E-8B-8B-B1-50:CMCC120.197.40.440 2400200
1363157993044 1821157596194-71-AC-CD-E6-18:CMCC-EASY120.196.100.99iface.qiyi.com视频站点1512 15272106 200
1363157995074 841384135C-0E-8B-8C-E8-20:7DaysInn120.197.40.4122.72.52.12201641161432200
1363157993055 13560439658C4-17-FE-BA-DE-D9:CMCC120.196.100.991815 1116954 200
1363157995033 159201332575C-0E-8B-C7-BA-20:CMCC120.197.40.4sug.so.360.cn信息安全2020 31562936 200
1363157983019 1371919941968-A1-B7-03-07-B1:CMCC-EASY120.196.100.8240 2400200
1363157984041 136605779915C-0E-8B-92-5C-20:CMCC-EASY120.197.40.4s19.cnzz.com网站统计249 6960690 200
1363157973098 150136858585C-0E-8B-C7-F7-90:CMCC120.197.40.4rank.ie.sogou.com搜索引擎2827 36593538 200
1363157986029 15989002119E8-99-C4-4E-93-E0:CMCC-EASY120.196.100.99网站统计33 1938180 200
1363157992093 13560439658C4-17-FE-BA-DE-D9:CMCC120.196.100.99159 9184938 200
1363157986041 134802531045C-0E-8B-C7-FC-80:CMCC-EASY120.197.40.433 180180 200
1363157984040 136028465655C-0E-8B-8B-B6-00:CMCC120.197.40.42052.flash2-http.qq.com综合门户1512 19382910 200
1363157995093 1392231446600-FD-07-A2-EC-BA:CMCC120.196.100.82img.qfc.cn121230083720200
1363157982040 135024688235C-0A-5B-6A-0B-D4:CMCC-EASY120.196.100.99y0.ifengimg.com综合门户57102 7335110349 200
1363157986072 1832017338284-25-DB-4F-10-1A:CMCC-EASY120.196.100.99input.shouji.sogou.com搜索引擎2118 95312412 200
1363157990043 1392505741300-1F-64-E1-E6-9A:CMCC120.196.100.55t3.baidu.com搜索引擎6963 1105848243 200
1363157988072 1376077871000-FD-07-A4-7B-08:CMCC120.196.100.8222 120120 200
1363157985079 1382307000120-7C-8F-70-68-1F:CMCC120.196.100.9963 360180 200
1363157985069 1360021750200-1F-64-E2-E8-B1:CMCC120.196.100.5518138 1080186852 200


2、hbase 创建表 create ‘wlan‘,‘cf‘


3、代码

package com.utils;


import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;


public class HBaseImport {
static class BatchMapper extends Mapper<LongWritable, Text, LongWritable, Text>{
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, LongWritable, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] splited = line.split("\t");
SimpleDateFormat simpleDateFormatimpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
String format = simpleDateFormatimpleDateFormat.format(new Date(Long.parseLong(splited[0].trim())));
String rowKey=splited[1]+"_"+format;
Text v2s = new Text();
v2s.set(rowKey+"\t"+line);
context.write(key, v2s);
}
}
static class BatchReducer extends TableReducer<LongWritable, Text, NullWritable>{
private String family="cf";//列族


@Override
protected void reduce(LongWritable arg0, Iterable<Text> v2s,
Reducer<LongWritable, Text, NullWritable, Mutation>.Context context)
throws IOException, InterruptedException {
for (Text v2 : v2s) {
String[] splited = v2.toString().split("\t");
String rowKey = splited[0];
Put put = new Put(rowKey.getBytes());
put.add(family.getBytes(), "raw".getBytes(), v2.toString().getBytes());
put.add(family.getBytes(), "rePortTime".getBytes(), splited[1].getBytes());
put.add(family.getBytes(), "msisdn".getBytes(), splited[2].getBytes());
put.add(family.getBytes(), "apmac".getBytes(), splited[3].getBytes());
put.add(family.getBytes(), "acmac".getBytes(), splited[4].getBytes());
put.add(family.getBytes(), "host".getBytes(), splited[5].getBytes());
put.add(family.getBytes(), "siteType".getBytes(), splited[6].getBytes());
put.add(family.getBytes(), "upPackNum".getBytes(), splited[7].getBytes());
put.add(family.getBytes(), "downPackNum".getBytes(), splited[8].getBytes());
put.add(family.getBytes(), "upPayLoad".getBytes(), splited[9].getBytes());
put.add(family.getBytes(), "downPayLoad".getBytes(), splited[10].getBytes());
put.add(family.getBytes(), "httpStatus".getBytes(), splited[11].getBytes());
context.write(NullWritable.get(), put);
}
}
}
private static final String TableName = "waln_log";
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.80.20,192.168.80.21,192.168.80.22");
//conf.set("hbase.rootdir", "hdfs://cluster/hbase");
conf.set("hbase.rootdir", "hdfs://192.168.80.20:9000/hbase");
conf.set(TableOutputFormat.OUTPUT_TABLE, TableName);

Job job = new Job(conf, HBaseImport.class.getSimpleName());
TableMapReduceUtil.addDependencyJars(job);
job.setJarByClass(HBaseImport.class);

job.setMapperClass(BatchMapper.class);
job.setReducerClass(BatchReducer.class);

job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TableOutputFormat.class);

FileInputFormat.setInputPaths(job, "hdfs://192.168.80.20:9000/data");
System.out.println("xxxxxxx1xxxxxxxx");
job.waitForCompletion(true);
}
}