Hadoop学习之路(二十五)MapReduce的API使用(二)

时间:2022-12-13 20:38:58

学生成绩---增强版

数据信息

Hadoop学习之路(二十五)MapReduce的API使用(二)Hadoop学习之路(二十五)MapReduce的API使用(二)
 1 computer,huangxiaoming,85,86,41,75,93,42,85
 2 computer,xuzheng,54,52,86,91,42
 3 computer,huangbo,85,42,96,38
 4 english,zhaobenshan,54,52,86,91,42,85,75
 5 english,liuyifei,85,41,75,21,85,96,14
 6 algorithm,liuyifei,75,85,62,48,54,96,15
 7 computer,huangjiaju,85,75,86,85,85
 8 english,liuyifei,76,95,86,74,68,74,48
 9 english,huangdatou,48,58,67,86,15,33,85
10 algorithm,huanglei,76,95,86,74,68,74,48
11 algorithm,huangjiaju,85,75,86,85,85,74,86
12 computer,huangdatou,48,58,67,86,15,33,85
13 english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
14 english,huangbo,85,42,96,38,55,47,22
15 algorithm,liutao,85,75,85,99,66
16 computer,huangzitao,85,86,41,75,93,42,85
17 math,wangbaoqiang,85,86,41,75,93,42,85
18 computer,liujialing,85,41,75,21,85,96,14,74,86
19 computer,liuyifei,75,85,62,48,54,96,15
20 computer,liutao,85,75,85,99,66,88,75,91
21 computer,huanglei,76,95,86,74,68,74,48
22 english,liujialing,75,85,62,48,54,96,15
23 math,huanglei,76,95,86,74,68,74,48
24 math,huangjiaju,85,75,86,85,85,74,86
25 math,liutao,48,58,67,86,15,33,85
26 english,huanglei,85,75,85,99,66,88,75,91
27 math,xuzheng,54,52,86,91,42,85,75
28 math,huangxiaoming,85,75,85,99,66,88,75,91
29 math,liujialing,85,86,41,75,93,42,85,75
30 english,huangxiaoming,85,86,41,75,93,42,85
31 algorithm,huangdatou,48,58,67,86,15,33,85
32 algorithm,huangzitao,85,86,41,75,93,42,85,75
View Code

数据解释

数据字段个数不固定:
第一个是课程名称,总共四个课程,computer,math,english,algorithm,
第二个是学生姓名,后面是每次考试的分数

统计需求

1、统计每门课程的参考人数和课程平均分

2、统计每门课程参考学生的平均分,并且按课程存入不同的结果文件,要求一门课程一个结果文件,并且按平均分从高到低排序,分数保留一位小数

3、求出每门课程参考学生成绩最高的学生的信息:课程,姓名和平均分

第一题

MRAvgScore1.java

Hadoop学习之路(二十五)MapReduce的API使用(二)Hadoop学习之路(二十五)MapReduce的API使用(二)
  1 /**
  2  * 需求:统计每门课程的参考人数和课程平均分
  3  * */
  4 public class MRAvgScore1 {
  5 
  6     public static void main(String[] args) throws Exception {
  7         
  8         Configuration conf1 = new Configuration();
  9         Configuration conf2 = new Configuration();
 10         
 11         Job job1 = Job.getInstance(conf1);
 12         Job job2 = Job.getInstance(conf2);
 13             
 14         job1.setJarByClass(MRAvgScore1.class);
 15         job1.setMapperClass(AvgScoreMapper1.class);
 16         //job.setReducerClass(MFReducer.class);
 17             
 18         job1.setOutputKeyClass(Text.class);
 19         job1.setOutputValueClass(DoubleWritable.class);
 20         
 21         Path inputPath1 = new Path("D:\\MR\\hw\\work3\\input");
 22         Path outputPath1 = new Path("D:\\MR\\hw\\work3\\output_hw1_1");
 23             
 24         FileInputFormat.setInputPaths(job1, inputPath1);
 25         FileOutputFormat.setOutputPath(job1, outputPath1);
 26         
 27         
 28         job2.setMapperClass(AvgScoreMapper2.class);
 29         job2.setReducerClass(AvgScoreReducer2.class);
 30             
 31         job2.setOutputKeyClass(Text.class);
 32         job2.setOutputValueClass(DoubleWritable.class);
 33         
 34         Path inputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw1_1");
 35         Path outputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw1_end");
 36             
 37         FileInputFormat.setInputPaths(job2, inputPath2);
 38         FileOutputFormat.setOutputPath(job2, outputPath2);
 39         
 40         JobControl control = new JobControl("AvgScore");
 41         
 42         ControlledJob aJob = new ControlledJob(job1.getConfiguration());
 43         ControlledJob bJob = new ControlledJob(job2.getConfiguration());
 44         
 45         bJob.addDependingJob(aJob);
 46         
 47         control.addJob(aJob);
 48         control.addJob(bJob);
 49         
 50         Thread thread = new Thread(control);
 51         thread.start();
 52         
 53         while(!control.allFinished()) {
 54             thread.sleep(1000);
 55         }
 56         System.exit(0);
 57         
 58     }
 59     
 60     
 61     
 62     /**
 63      * 数据类型:computer,huangxiaoming,85,86,41,75,93,42,85
 64      * 
 65      * 需求:统计每门课程的参考人数和课程平均分
 66      * 
 67      * 分析:以课程名称+姓名作为key,以平均分数作为value
 68      * */
 69     public static class AvgScoreMapper1 extends Mapper<LongWritable, Text, Text, DoubleWritable>{
 70         
 71         @Override
 72         protected void map(LongWritable key, Text value,Context context)
 73                 throws IOException, InterruptedException {
 74             
 75             String[] splits = value.toString().split(",");
 76             //拼接成要输出的key
 77             String outKey = splits[0]+"\t"+splits[1];
 78             int length = splits.length;
 79             int sum = 0;
 80             //求出成绩的总和
 81             for(int i=2;i<length;i++) {
 82                 sum += Integer.parseInt(splits[i]);
 83             }
 84             //求出平均分
 85             double outValue = sum / (length - 2);
 86             
 87             context.write(new Text(outKey), new DoubleWritable(outValue));
 88             
 89         }
 90         
 91     }
 92     
 93     /**
 94      * 对第一次MapReduce输出的结果进一步计算,第一步输出结果样式为
 95      *  math    huangjiaju    82.0
 96      *  math    huanglei    74.0
 97      *    math    huangxiaoming    83.0
 98      *    math    liujialing    72.0
 99      *    math    liutao    56.0
100      *    math    wangbaoqiang    72.0
101      *    math    xuzheng    69.0
102      * 
103      *    需求:统计每门课程的参考人数和课程平均分 
104      *    分析:以课程名称作为key,以分数作为value进行 输出
105      * 
106      * */
107     public static class AvgScoreMapper2 extends Mapper<LongWritable, Text, Text, DoubleWritable>{
108         
109         @Override
110         protected void map(LongWritable key, Text value,Context context)
111                 throws IOException, InterruptedException {
112             
113             String[] splits = value.toString().split("\t");
114             String outKey = splits[0];
115             String outValue = splits[2];
116             
117             context.write(new Text(outKey), new DoubleWritable(Double.parseDouble(outValue)));
118         }
119         
120     }
121     
122     /**
123      * 针对同一门课程,对values进行遍历计数,看看有多少人参加了考试,并计算出平均成绩
124      * */
125     public static class AvgScoreReducer2 extends Reducer<Text, DoubleWritable, Text, Text>{
126         
127         @Override
128         protected void reduce(Text key, Iterable<DoubleWritable> values,
129                 Context context) throws IOException, InterruptedException {
130             
131             int count = 0;
132             double sum = 0;
133             for(DoubleWritable value : values) {
134                 count++;
135                 sum += value.get();
136             }
137             
138             double avg = sum / count;
139             String outValue = count + "\t" + avg;
140             context.write(key, new Text(outValue));
141         }
142         
143     }
144     
145     
146 }
View Code

第二题

MRAvgScore2.java

Hadoop学习之路(二十五)MapReduce的API使用(二)Hadoop学习之路(二十五)MapReduce的API使用(二)
 1 public class MRAvgScore2 {
 2 
 3     public static void main(String[] args) throws Exception {
 4         
 5         Configuration conf = new Configuration();
 6         
 7         Job job = Job.getInstance(conf);
 8             
 9         job.setJarByClass(MRAvgScore2.class);
10         job.setMapperClass(ScoreMapper3.class);
11         job.setReducerClass(ScoreReducer3.class);
12             
13         job.setOutputKeyClass(StudentBean.class);
14         job.setOutputValueClass(NullWritable.class);
15         
16         job.setPartitionerClass(CoursePartitioner.class);
17         job.setNumReduceTasks(4);
18         
19         Path inputPath = new Path("D:\\MR\\hw\\work3\\output_hw1_1");
20         Path outputPath = new Path("D:\\MR\\hw\\work3\\output_hw2_1");
21             
22         FileInputFormat.setInputPaths(job, inputPath);
23         FileOutputFormat.setOutputPath(job, outputPath);
24         boolean isDone = job.waitForCompletion(true);
25         System.exit(isDone ? 0 : 1);
26     }
27     
28     
29     public static class ScoreMapper3 extends Mapper<LongWritable, Text, StudentBean, NullWritable>{
30         
31         @Override
32         protected void map(LongWritable key, Text value,Context context)
33                 throws IOException, InterruptedException {
34             
35             String[] splits = value.toString().split("\t");
36             
37             double score = Double.parseDouble(splits[2]);
38             DecimalFormat df = new DecimalFormat("#.0");
39             df.format(score);
40             
41             StudentBean student = new StudentBean(splits[0],splits[1],score);
42             
43             context.write(student, NullWritable.get());
44             
45         }
46         
47     }
48     
49     public static class ScoreReducer3 extends Reducer<StudentBean, NullWritable, StudentBean, NullWritable>{
50         
51         @Override
52         protected void reduce(StudentBean key, Iterable<NullWritable> values,Context context)
53                 throws IOException, InterruptedException {
54 
55             for(NullWritable nvl : values){
56                 context.write(key, nvl);
57             }
58             
59         }
60     }
61 }
View Code

StudentBean.java

Hadoop学习之路(二十五)MapReduce的API使用(二)Hadoop学习之路(二十五)MapReduce的API使用(二)
 1 public class StudentBean implements WritableComparable<StudentBean>{
 2     private String course;
 3     private String name;
 4     private double avgScore;
 5     
 6     public String getCourse() {
 7         return course;
 8     }
 9     public void setCourse(String course) {
10         this.course = course;
11     }
12     public String getName() {
13         return name;
14     }
15     public void setName(String name) {
16         this.name = name;
17     }
18     public double getavgScore() {
19         return avgScore;
20     }
21     public void setavgScore(double avgScore) {
22         this.avgScore = avgScore;
23     }
24     public StudentBean(String course, String name, double avgScore) {
25         super();
26         this.course = course;
27         this.name = name;
28         this.avgScore = avgScore;
29     }
30     public StudentBean() {
31         super();
32     }
33     
34     @Override
35     public String toString() {
36         return course + "\t" + name + "\t" + avgScore;
37     }
38     @Override
39     public void readFields(DataInput in) throws IOException {
40         course = in.readUTF();
41         name = in.readUTF();
42         avgScore = in.readDouble();
43     }
44     @Override
45     public void write(DataOutput out) throws IOException {
46         out.writeUTF(course);
47         out.writeUTF(name);
48         out.writeDouble(avgScore);
49     }
50     @Override
51     public int compareTo(StudentBean stu) {
52         double diffent =  this.avgScore - stu.avgScore;
53         if(diffent == 0) {
54             return 0;
55         }else {
56             return diffent > 0 ? -1 : 1;
57         }
58     }
59     
60     
61 }
View Code

第三题

MRScore3.java

Hadoop学习之路(二十五)MapReduce的API使用(二)Hadoop学习之路(二十五)MapReduce的API使用(二)
  1 public class MRScore3 {
  2 
  3     public static void main(String[] args) throws Exception {
  4         
  5         Configuration conf1 = new Configuration();
  6         Configuration conf2 = new Configuration();
  7         
  8         Job job1 = Job.getInstance(conf1);
  9         Job job2 = Job.getInstance(conf2);
 10             
 11         job1.setJarByClass(MRScore3.class);
 12         job1.setMapperClass(MRMapper3_1.class);
 13         //job.setReducerClass(ScoreReducer3.class);
 14         
 15         
 16         job1.setMapOutputKeyClass(IntWritable.class);
 17         job1.setMapOutputValueClass(StudentBean.class);
 18         job1.setOutputKeyClass(IntWritable.class);
 19         job1.setOutputValueClass(StudentBean.class);
 20         
 21         job1.setPartitionerClass(CoursePartitioner2.class);
 22         
 23         job1.setNumReduceTasks(4);
 24         
 25         Path inputPath = new Path("D:\\MR\\hw\\work3\\input");
 26         Path outputPath = new Path("D:\\MR\\hw\\work3\\output_hw3_1");
 27             
 28         FileInputFormat.setInputPaths(job1, inputPath);
 29         FileOutputFormat.setOutputPath(job1, outputPath);
 30         
 31         job2.setMapperClass(MRMapper3_2.class);
 32         job2.setReducerClass(MRReducer3_2.class);
 33         
 34         job2.setMapOutputKeyClass(IntWritable.class);
 35         job2.setMapOutputValueClass(StudentBean.class);
 36         job2.setOutputKeyClass(StudentBean.class);
 37         job2.setOutputValueClass(NullWritable.class);
 38         
 39         Path inputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw3_1");
 40         Path outputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw3_end");
 41             
 42         FileInputFormat.setInputPaths(job2, inputPath2);
 43         FileOutputFormat.setOutputPath(job2, outputPath2);
 44         
 45         JobControl control = new JobControl("Score3");
 46         
 47         ControlledJob aJob = new ControlledJob(job1.getConfiguration());
 48         ControlledJob bJob = new ControlledJob(job2.getConfiguration());
 49         
 50         bJob.addDependingJob(aJob);
 51         
 52         control.addJob(aJob);
 53         control.addJob(bJob);
 54         
 55         Thread thread = new Thread(control);
 56         thread.start();
 57         
 58         while(!control.allFinished()) {
 59             thread.sleep(1000);
 60         }
 61         System.exit(0);
 62         
 63 
 64     }
 65     
 66     
 67     
 68     
 69     public static class MRMapper3_1 extends Mapper<LongWritable, Text, IntWritable, StudentBean>{
 70         
 71         StudentBean outKey = new StudentBean();
 72         IntWritable outValue = new IntWritable();
 73         List<String> scoreList = new ArrayList<>();
 74         
 75         protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
 76             
 77             scoreList.clear();
 78             String[] splits = value.toString().split(",");
 79             long sum = 0;
 80             
 81             for(int i=2;i<splits.length;i++) {
 82                 scoreList.add(splits[i]);
 83                 sum += Long.parseLong(splits[i]);
 84             }
 85             
 86             Collections.sort(scoreList);
 87             outValue.set(Integer.parseInt(scoreList.get(scoreList.size()-1)));
 88             
 89             double avg = sum * 1.0/(splits.length-2);
 90             outKey.setCourse(splits[0]);
 91             outKey.setName(splits[1]);
 92             outKey.setavgScore(avg);
 93             
 94             context.write(outValue, outKey);
 95                                 
 96         };
 97     }
 98     
 99     
100     
101     public static class MRMapper3_2 extends Mapper<LongWritable, Text,IntWritable, StudentBean >{
102         
103         StudentBean outValue = new StudentBean();
104         IntWritable outKey = new IntWritable();
105         
106         protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
107             
108             String[] splits = value.toString().split("\t");
109             outKey.set(Integer.parseInt(splits[0]));
110             
111             outValue.setCourse(splits[1]);
112             outValue.setName(splits[2]);
113             outValue.setavgScore(Double.parseDouble(splits[3]));
114             
115             context.write(outKey, outValue);
116             
117             
118         };
119     }
120     
121     
122     public static class MRReducer3_2 extends Reducer<IntWritable, StudentBean, StudentBean, NullWritable>{
123         
124         StudentBean outKey = new StudentBean();
125         
126         @Override
127         protected void reduce(IntWritable key, Iterable<StudentBean> values,Context context)
128                 throws IOException, InterruptedException {
129             
130             int length = values.toString().length();
131             
132             for(StudentBean value : values) {
133                 outKey = value;
134             }
135             
136             context.write(outKey, NullWritable.get());
137             
138         }
139     }
140     
141     
142 }
View Code