hadoop笔记之MapReduce的应用案例(利用MapReduce进行排序)

时间:2023-03-09 22:26:44
hadoop笔记之MapReduce的应用案例(利用MapReduce进行排序)

MapReduce的应用案例(利用MapReduce进行排序)

MapReduce的应用案例(利用MapReduce进行排序)

思路:

hadoop笔记之MapReduce的应用案例(利用MapReduce进行排序)

Reduce之后直接进行结果合并

具体样例:

程序名:Sort.java

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Partitioner;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class Sort {
//map将输入中的value化成IntWritable类型作为输出的key
public static class Map extends
Mapper<Object, Text, IntWritable, IntWritable> {

private static IntWritable data = new IntWritable();
//实现map函数
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();

data.set(Integer.parseInt(line));

context.write(data, new IntWritable(1));

}

}
/*reduce将输入中的key复制到输出数据的key上,
然后根据输入的value-list中的元素的个数决定key的输出次数,
用全局linenum来代表key的位次*/
public static class Reduce extends
Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {

private static IntWritable linenum = new IntWritable(1);
//实现reduce函数
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {

for (IntWritable val : values) {

context.write(linenum, key);

linenum = new IntWritable(linenum.get() + 1);
}

}
}

public static class Partition extends Partitioner<IntWritable, IntWritable> {

@Override
public int getPartition(IntWritable key, IntWritable value,
int numPartitions) {
int MaxNumber = 65223;
int bound = MaxNumber / numPartitions + 1;
int keynumber = key.get();
for (int i = 0; i < numPartitions; i++) {
if (keynumber < bound * i && keynumber >= bound * (i - 1))
return i - 1;
}
return 0;
}
}

/**
* @param args
*/

public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage WordCount <int> <out>");
System.exit(2);
}
Job job = new Job(conf, "Sort");
job.setJarByClass(Sort.class);
//设置map和reduce处理类
job.setMapperClass(Map.class);
job.setPartitionerClass(Partition.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}