使用mapreduce向HBase写入数据,只有map,没有reduce

时间:2022-09-20 18:05:40
 1 /**
 2  * 使用mapreduce程序向HBase写入数据
 3  * 只有map,没有reduce
 4  * 使用TableMapReduceUtil.initTableReducerJob("user",null, job);
 5         TableMapReduceUtil.addDependencyJars(job);
 6  */
 7 package com.beifeng.hbase;
 8 
 9 import java.io.IOException;
10 
11 import org.apache.hadoop.conf.Configuration;
12 import org.apache.hadoop.fs.Path;
13 import org.apache.hadoop.hbase.HBaseConfiguration;
14 import org.apache.hadoop.hbase.client.Put;
15 //注意这个类是hbase.mapreduce,而不是hbase.mapred,否则会报莫名莫名其妙的错误
16 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
17 import org.apache.hadoop.hbase.mapreduce.TableReducer;
18 import org.apache.hadoop.io.LongWritable;
19 import org.apache.hadoop.io.NullWritable;
20 import org.apache.hadoop.io.Text;
21 import org.apache.hadoop.mapreduce.Job;
22 import org.apache.hadoop.mapreduce.Mapper;
23 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
24 
25 public class MapToHbase {
26     
27     private static class Map  extends Mapper<LongWritable, Text, NullWritable, Put>{
28         String keyrow = null;
29         String columnFamily = null;
30         String name = null;
31         String age = null;
32         String gender = null;
33         String hobby = null;
34         @Override
35         protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, NullWritable, Put>.Context context)
36                 throws IOException, InterruptedException {
37             String[] strs = value.toString().split("\\t");
38             if (strs.length == 6 && !strs[0].equals("rowkey")) {//skip first 1 row
39                 keyrow = strs[0];
40                 columnFamily = strs[1];
41                 name = strs[2];
42                 age = strs[3];
43                 gender = strs[4];
44                 hobby = strs[5];
45                 Put put = new Put(keyrow.getBytes());
46                 put.addColumn(columnFamily.getBytes(), "name".getBytes(), name.getBytes());
47                 put.addColumn(columnFamily.getBytes(), "age".getBytes(), age.getBytes());
48                 put.addColumn(columnFamily.getBytes(), "gender".getBytes(), gender.getBytes());
49                 put.addColumn(columnFamily.getBytes(), "hobby".getBytes(), hobby.getBytes());
50                 
51                 context.write(NullWritable.get(), put);
52             }
53         }
54         
55     }
56     
57     /*public static class testReduce extends TableReducer<NullWritable, Put, NullWritable>{
58 
59         @Override
60         protected void reduce(NullWritable key, Iterable<Put> values,
61                 Reducer<NullWritable, Put, NullWritable, Mutation>.Context context)
62                 throws IOException, InterruptedException {
63             for(Put put : values){
64                 context.write(NullWritable.get(), put);
65             }
66         }
67     }*/
68     
69     
70     public static void main(String[] args) throws IllegalArgumentException, IOException {
71         Configuration config = HBaseConfiguration.create();
72         //设置zookeeper的配置
73         config.set("hbase.zookeeper.quorum", "hadoopMaster,hdp102,hdp103");
74         Job job = Job.getInstance(config);
75         job.setJarByClass(MapToHbase.class);
76         job.setMapperClass(Map.class);
77         job.setMapOutputKeyClass(NullWritable.class);
78         //只有map没有reduce,所以设置reduce的数目为0
79         job.setNumReduceTasks(0);
80         //设置数据的输入路径,没有使用参数,直接在程序中写入HDFS的路径
81         FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.56.101:9000/test")); 
82         //驱动函数
83         TableMapReduceUtil.initTableReducerJob("user",null, job);
84         TableMapReduceUtil.addDependencyJars(job);
85         boolean b;
86         try {
87             b = job.waitForCompletion(true);
88         } catch (ClassNotFoundException e) {
89             e.printStackTrace();
90         } catch (InterruptedException e) {
91             e.printStackTrace();
92         }
93     }
94 
95 }

源测试数据为:

rowkey    columnFamily    name    age    gender    hobby
2001    info    jacky    12    male    run
2002    info    lilian    13    male    run
2003    info    buke    14    male    run
2004    info    sam    15    male    pingpang
2005    info    lucy    16    male    pingpang
2006    info    yaya    17    male    pingpang
2007    info    james    18    female    slag
2008    info    gugu    19    female    slag
2009    info    gaga    20    female    read
2010    info    salila    21    female    cook
2011    info    frank    22    female    smile

HBase查询结果为:

 2001                                      column=info:age, timestamp=1499800935968, value=12                                                                          
 2001                                      column=info:gender, timestamp=1499800935968, value=male                                                                     
 2001                                      column=info:hobby, timestamp=1499800935968, value=run                                                                       
 2001                                      column=info:name, timestamp=1499800935968, value=jacky                                                                      
 2002                                      column=info:age, timestamp=1499800935968, value=13                                                                          
 2002                                      column=info:gender, timestamp=1499800935968, value=male                                                                     
 2002                                      column=info:hobby, timestamp=1499800935968, value=run                                                                       
 2002                                      column=info:name, timestamp=1499800935968, value=lilian                                                                     
 2003                                      column=info:age, timestamp=1499800935968, value=14                                                                          
 2003                                      column=info:gender, timestamp=1499800935968, value=male                                                                     
 2003                                      column=info:hobby, timestamp=1499800935968, value=run                                                                       
 2003                                      column=info:name, timestamp=1499800935968, value=buke                                                                       
 2004                                      column=info:age, timestamp=1499800935968, value=15                                                                          
 2004                                      column=info:gender, timestamp=1499800935968, value=male                                                                     
 2004                                      column=info:hobby, timestamp=1499800935968, value=pingpang                                                                  
 2004                                      column=info:name, timestamp=1499800935968, value=sam                                                                        
 2005                                      column=info:age, timestamp=1499800935968, value=16                                                                          
 2005                                      column=info:gender, timestamp=1499800935968, value=male    

 

转载请注明出处,谢谢。