hadoop实现grep示例分享

时间:2022-04-29 04:23:11

hadoop做的一个简单grep程序,可从文档中提取包含某些字符串的行

 

复制代码代码如下:


/*
 * 一个简单grep程序,可从文档中提取包含莫些字符串的行
 */

 

public class grep extends Configured  implements Tool{

 public static  class grepMap extends Mapper<LongWritable, Text, Text,NullWritable>{

  public void map(LongWritable line,Text value,Context context) throws IOException, InterruptedException{
   //通过Configuration获取参数
   String str = context.getConfiguration().get("grep");
   if(value.toString().contains(str)){
    context.write(value, NullWritable.get());
   }
  }
 }
 @Override
 public int run(String[] args) throws Exception {

  if(args.length!=3){
   System.out.println("ERROR");
   System.exit(1);
  }

  Configuration configuration = getConf();
  //传递参数
  configuration.set("grep", args[2]);
  Job job = new Job(configuration,"grep");

  job.setJarByClass(grep.class);
  job.setMapperClass(grepMap.class);
  job.setNumReduceTasks(0);

  job.setMapOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);

  Path in = new Path(args[0]);
  Path out = new Path(args[1]);
  FileSystem fileSystem = out.getFileSystem(configuration);
  if(fileSystem.exists(out))
   fileSystem.delete(out, true);

  FileInputFormat.addInputPath(job, in);
  FileOutputFormat.setOutputPath(job, out);

  System.exit(job.waitForCompletion(true)?0:1);
  return 0;
 }