自定义Writable类型

时间:2022-02-02 16:09:14

对处理较复杂数据时, Writable自带的类型不能满足要求,可以创建一个类是使用Writable接口,实现一个write()和readFields()方法,还需要无参的构造(用于反射),和toString()(用于输出)

网上看到了一个统计手机流量的代码,大致数据如下

2323,13083012211,apmac,acmac,host,type,pack,pack,1000,1200,yes
2323,13083012211,apmac,acmac,host,type,pack,pack,1000,1200,yes
2323,13083012211,apmac,acmac,host,type,pack,pack,1000,1200,yes
2323,13083012204,apmac,acmac,host,type,pack,pack,200,300,yes
2323,13083012204,apmac,acmac,host,type,pack,pack,200,300,yes
2323,13083012204,apmac,acmac,host,type,pack,pack,200,300,yes

其他数据不用关心, 只看第2第9第10 列的数据, 分别表示手机号, 上传流量, 下载流量

MapReduce类

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class DataCount {
public static class DCMapper extends Mapper<LongWritable, Text, Text, DataBean>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//accept
String line = value.toString();
//split
String[] fields = line.split(",");
System.out.println(fields[1]+" "+fields[8]+" "+fields[9]);
String tel = fields[1];
long up = Long.parseLong(fields[8]);
long down = Long.parseLong(fields[9]);
DataBean bean = new DataBean(tel, up, down);
//send
context.write(new Text(tel), bean);
}

}

public static class DCReducer extends Reducer<Text, DataBean, Text, DataBean>{

@Override
protected void reduce(Text key, Iterable<DataBean> values, Context context)
throws IOException, InterruptedException {
long up_sum = 0;
long down_sum = 0;
for(DataBean bean : values){
up_sum += bean.getUpPayLoad();
down_sum += bean.getDownPayLoad();
}
DataBean bean = new DataBean("", up_sum, down_sum);
context.write(key, bean);
}
}

public static void main(String[] args) throws Exception {
args = new String[]{"/input/data1","/output"};
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);

job.setJarByClass(DataCount.class);

job.setMapperClass(DCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DataBean.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));

job.setReducerClass(DCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DataBean.class);
FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);

}
}

自定义的Writable

package com.cyh;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class DataBean implements Writable{

private String tel;

private long upPayLoad;

private long downPayLoad;

private long totalPayLoad;


public DataBean(){}

public DataBean(String tel, long upPayLoad, long downPayLoad) {
this.tel = tel;
this.upPayLoad = upPayLoad;
this.downPayLoad = downPayLoad;
this.totalPayLoad = upPayLoad + downPayLoad;
}



@Override
public String toString() {
return this.upPayLoad + "\t" + this.downPayLoad + "\t" + this.totalPayLoad;
}

// notice : 1 type 2 order
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(tel);
out.writeLong(upPayLoad);
out.writeLong(downPayLoad);
out.writeLong(totalPayLoad);
}

@Override
public void readFields(DataInput in) throws IOException {
this.tel = in.readUTF();
this.upPayLoad = in.readLong();
this.downPayLoad = in.readLong();
this.totalPayLoad = in.readLong();

}

public String getTel() {
return tel;
}

public void setTel(String tel) {
this.tel = tel;
}

public long getUpPayLoad() {
return upPayLoad;
}

public void setUpPayLoad(long upPayLoad) {
this.upPayLoad = upPayLoad;
}

public long getDownPayLoad() {
return downPayLoad;
}

public void setDownPayLoad(long downPayLoad) {
this.downPayLoad = downPayLoad;
}

public long getTotalPayLoad() {
return totalPayLoad;
}

public void setTotalPayLoad(long totalPayLoad) {
this.totalPayLoad = totalPayLoad;
}
}