file1.txt
a 1
b 2
a 3
b 3
a 5
b 7
c 3
c 5
file2.txt
a 1
b 7
c 5
a 1
c 3
结果:
a 2.2
b 4.75
c 4.0
代码:
package org.apache.hadoop.examples; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; //import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; //import org.apache.hadoop.util.GenericOptionsParser; public class Average { public static class AverageMaper extends Mapper<Object,Text,Text,Text> { //private final static IntWritable one=new IntWritable(1); private static Text word=new Text(); public void map(Object key,Text value,Context context) throws IOException,InterruptedException { StringTokenizer itr=new StringTokenizer(value.toString()); while(itr.hasMoreTokens()) { word.set(itr.nextToken()); if(itr.hasMoreTokens()) context.write(word, new Text(itr.nextToken()+",1")); } } } public static class AveragerCombine extends Reducer<Text,Text,Text,Text> { public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException { int sum=0; int cnt=0; for(Text val:values) { String []str=val.toString().split(","); sum+=Integer.parseInt(str[0]); cnt+=Integer.parseInt(str[1]); } context.write(key,new Text(sum+","+cnt)); } } public static class AveragerReduce extends Reducer<Text,Text,Text,DoubleWritable> { public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException { int sum=0; int cnt=0; for(Text val:values) { String []str=val.toString().split(","); sum+=Integer.parseInt(str[0]); cnt+=Integer.parseInt(str[1]); } double res=(sum*1.0)/cnt; context.write(key, new DoubleWritable(res)); } } public static void main(String[] args) throws Exception{ // TODO Auto-generated method stub Configuration conf=new Configuration(); String []myargs=args; if(myargs.length!=2) { System.err.println("please input two args<in> <out>\n"); System.exit(2); } Job job=new Job(conf,"average job"); job.setJarByClass(Average.class); job.setMapperClass(AverageMaper.class); job.setCombinerClass(AveragerCombine.class); job.setReducerClass(AveragerReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(myargs[0])); FileOutputFormat.setOutputPath(job,new Path(myargs[1])); System.exit(job.waitForCompletion(true)?0:1); } }