Hadoop 中的Hello world 代码如下:
1 package com.wordcount; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.conf.Configured; 5 import org.apache.hadoop.fs.FileSystem; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.io.IntWritable; 8 import org.apache.hadoop.io.LongWritable; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.Mapper; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.util.Tool; 16 import org.apache.hadoop.util.ToolRunner; 17 18 import java.io.IOException; 19 20 /** 21 * @program: hadoop_demo 22 * @description: 23 * @author: Mr.Walloce 24 * @create: 2018/11/03 15:04 25 **/ 26 public class WordCount extends Configured implements Tool { 27 28 /** 29 * <LongWritable, Text, Text, IntWritable> 输入和输出的key-value类型 30 */ 31 static class MyMap extends Mapper<LongWritable, Text, Text, IntWritable> { 32 //结果输出的字符串 33 Text out_key = new Text(); 34 35 //结果输出的默认值 36 IntWritable out_value = new IntWritable(1); 37 38 /** 39 * @param key 输入的字符串的偏移量 40 * @param value 输入的字符串 41 * @param context 42 * @throws IOException 43 * @throws InterruptedException 44 */ 45 @Override 46 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 47 System.out.println("map阶段开始执行,,,"); 48 String line = value.toString(); 49 long index = key.get(); 50 //对字符串进行处理,获取到单词 51 String[] words = line.split(" "); 52 if (words.length > 0) { 53 for (String word : words) { 54 out_key.set(word); 55 context.write(out_key, out_value); 56 } 57 } 58 System.out.println("map阶段结束。。。"); 59 } 60 } 61 62 /** 63 * <Text, IntWritable, Text, IntWritable>输入和输出的key-value类型 64 */ 65 static class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable> { 66 private IntWritable result = new IntWritable(); 67 68 @Override 69 protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 70 System.out.println("Reduce阶段开始执行..."); 71 int sum = 0; 72 for (IntWritable value : values) { 73 sum += value.get(); 74 } 75 result.set(sum); 76 System.out.println("单词" + key.toString() + ": " + result.get()); 77 context.write(key, result); 78 System.out.println("Reduce阶段结束。。。"); 79 } 80 } 81 82 static class MyCombiner extends Reducer<Text, IntWritable, Text, IntWritable> { 83 private IntWritable result = new IntWritable(); 84 85 @Override 86 protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 87 System.out.println("Combiner阶段开始..."); 88 int sum = 0; 89 for (IntWritable value : values) { 90 sum += value.get(); 91 } 92 result.set(sum); 93 context.write(key, result); 94 System.out.println("Combiner阶段结束。。。"); 95 } 96 } 97 98 public int run(String[] args) throws Exception { 99 100 //Hadoop的八股文 101 Configuration conf = this.getConf(); 102 Job job = Job.getInstance(conf, this.getClass().getSimpleName()); 103 104 //************************对job进行具体的设置************************* 105 //在集群中运行时不写会报错,本地运行科不写(最好写上) 106 job.setJarByClass(WordCount.class); 107 108 //设置输入输出路径 109 Path in_path = new Path(args[0]); 110 FileInputFormat.addInputPath(job, in_path); 111 Path out_path = new Path(args[1]); 112 FileOutputFormat.setOutputPath(job, out_path); 113 114 //输出前判断输出路径是否存在,存在则删除(输出路径不能重复) 115 FileSystem fs = out_path.getFileSystem(conf); 116 if (fs.exists(out_path)) { 117 fs.delete(out_path, true); 118 } 119 120 //运行map类相关的参数设置 121 job.setMapperClass(MyMap.class); 122 job.setMapOutputKeyClass(Text.class); 123 job.setMapOutputValueClass(IntWritable.class); 124 125 //运行Shuffle相关的参数设置 126 job.setCombinerClass(MyCombiner.class); 127 128 //设置reduce类相关的参数设置 129 job.setReducerClass(MyReduce.class); 130 job.setOutputKeyClass(Text.class); 131 job.setOutputValueClass(IntWritable.class); 132 133 //运行是否成功 134 boolean isSuccess = job.waitForCompletion(true); 135 136 //运行成功返回0,反之返回1 137 return isSuccess ? 0 : 1; 138 } 139 140 public static void main(String args[]) { 141 Configuration conf = new Configuration(); 142 143 args = new String[]{ 144 "hdfs://walloce.one:8020/walloce/data/test.txt", 145 "hdfs://walloce.one:8020/walloce/output/"+ Math.random() 146 }; 147 148 try { 149 ToolRunner.run(conf, new WordCount(), args); 150 } catch (Exception e) { 151 e.printStackTrace(); 152 } 153 } 154 }
运行异常代码:
18/11/22 15:06:00 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 18/11/22 15:06:02 INFO client.RMProxy: Connecting to ResourceManager at walloce.one/192.168.206.143:8032 18/11/22 15:06:03 WARN mapreduce.JobSubmitter: No job jar file set. User classes may not be found. See Job or Job#setJar(String). 18/11/22 15:06:03 INFO input.FileInputFormat: Total input paths to process : 1 18/11/22 15:06:03 INFO mapreduce.JobSubmitter: number of splits:1 18/11/22 15:06:04 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1542897380554_0004 18/11/22 15:06:04 INFO mapred.YARNRunner: Job jar is not present. Not adding any jar to the list of resources. 18/11/22 15:06:04 INFO impl.YarnClientImpl: Submitted application application_1542897380554_0004 18/11/22 15:06:04 INFO mapreduce.Job: The url to track the job: http://walloce.one:8088/proxy/application_1542897380554_0004/ 18/11/22 15:06:04 INFO mapreduce.Job: Running job: job_1542897380554_0004 18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 running in uber mode : false 18/11/22 15:06:08 INFO mapreduce.Job: map 0% reduce 0% 18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 failed with state FAILED due to: Application application_1542897380554_0004 failed 2 times due to AM Container for appattempt_1542897380554_0004_000002 exited with exitCode: 1 due to: Exception from container-launch: ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control at org.apache.hadoop.util.Shell.runCommand(Shell.java:538) at org.apache.hadoop.util.Shell.run(Shell.java:455) at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702) at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:300) at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Container exited with a non-zero exit code 1 .Failing this attempt.. Failing the application.
检查配置文件yarn-site.xml:
1 <!-- 指定计算模型在yarn上 --> 2 <property> 3 <name>mapreduce.framework.name</name> 4 <value>yarn</value> 5 </property>
由于是在本地运行,所以计算模型要指定在本地:
1 <!-- 指定计算模型在yarn上 --> 2 <property> 3 <name>mapreduce.framework.name</name> 4 <value>local</value> 5 </property>
文件修改后,可以运行成功。。
运行结果:
1 "C:\Program Files\Java\jdk1.8.0_162\bin\java.exe" "-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2018.1.6\lib\idea_rt.jar=13319:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2018.1.6\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_162\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_162\jre\lib\rt.jar;E:\IdeaWorkspace\hadoop_demo\target\classes;E:\repository3\org\apache\hadoop\hadoop-common\2.5.0\hadoop-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-annotations\2.5.0\hadoop-annotations-2.5.0.jar;C:\Program Files\Java\jdk1.8.0_162\lib\tools.jar;E:\repository3\com\google\guava\guava\11.0.2\guava-11.0.2.jar;E:\repository3\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;E:\repository3\org\apache\commons\commons-math3\3.1.1\commons-math3-3.1.1.jar;E:\repository3\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;E:\repository3\commons-httpclient\commons-httpclient\3.1\commons-httpclient-3.1.jar;E:\repository3\commons-codec\commons-codec\1.4\commons-codec-1.4.jar;E:\repository3\commons-io\commons-io\2.4\commons-io-2.4.jar;E:\repository3\commons-net\commons-net\3.1\commons-net-3.1.jar;E:\repository3\commons-collections\commons-collections\3.2.1\commons-collections-3.2.1.jar;E:\repository3\javax\servlet\servlet-api\2.5\servlet-api-2.5.jar;E:\repository3\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;E:\repository3\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;E:\repository3\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;E:\repository3\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;E:\repository3\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;E:\repository3\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;E:\repository3\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar;E:\repository3\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar;E:\repository3\javax\activation\activation\1.1\activation-1.1.jar;E:\repository3\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;E:\repository3\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;E:\repository3\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;E:\repository3\asm\asm\3.1\asm-3.1.jar;E:\repository3\tomcat\jasper-compiler\5.5.23\jasper-compiler-5.5.23.jar;E:\repository3\tomcat\jasper-runtime\5.5.23\jasper-runtime-5.5.23.jar;E:\repository3\javax\servlet\jsp\jsp-api\2.1\jsp-api-2.1.jar;E:\repository3\commons-el\commons-el\1.0\commons-el-1.0.jar;E:\repository3\commons-logging\commons-logging\1.1.3\commons-logging-1.1.3.jar;E:\repository3\log4j\log4j\1.2.17\log4j-1.2.17.jar;E:\repository3\net\java\dev\jets3t\jets3t\0.9.0\jets3t-0.9.0.jar;E:\repository3\org\apache\httpcomponents\httpclient\4.1.2\httpclient-4.1.2.jar;E:\repository3\org\apache\httpcomponents\httpcore\4.1.2\httpcore-4.1.2.jar;E:\repository3\com\jamesmurty\utils\java-xmlbuilder\0.4\java-xmlbuilder-0.4.jar;E:\repository3\commons-lang\commons-lang\2.6\commons-lang-2.6.jar;E:\repository3\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;E:\repository3\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;E:\repository3\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;E:\repository3\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;E:\repository3\org\slf4j\slf4j-api\1.7.5\slf4j-api-1.7.5.jar;E:\repository3\org\slf4j\slf4j-log4j12\1.7.5\slf4j-log4j12-1.7.5.jar;E:\repository3\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar;E:\repository3\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar;E:\repository3\org\apache\avro\avro\1.7.4\avro-1.7.4.jar;E:\repository3\com\thoughtworks\paranamer\paranamer\2.3\paranamer-2.3.jar;E:\repository3\org\xerial\snappy\snappy-java\1.0.4.1\snappy-java-1.0.4.1.jar;E:\repository3\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-auth\2.5.0\hadoop-auth-2.5.0.jar;E:\repository3\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar;E:\repository3\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar;E:\repository3\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar;E:\repository3\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar;E:\repository3\com\jcraft\jsch\0.1.42\jsch-0.1.42.jar;E:\repository3\com\google\code\findbugs\jsr305\1.3.9\jsr305-1.3.9.jar;E:\repository3\org\apache\commons\commons-compress\1.4.1\commons-compress-1.4.1.jar;E:\repository3\org\tukaani\xz\1.0\xz-1.0.jar;E:\repository3\org\apache\hadoop\hadoop-hdfs\2.5.0\hadoop-hdfs-2.5.0.jar;E:\repository3\commons-daemon\commons-daemon\1.0.13\commons-daemon-1.0.13.jar;E:\repository3\io\netty\netty\3.6.2.Final\netty-3.6.2.Final.jar;E:\repository3\org\apache\hadoop\hadoop-client\2.5.0\hadoop-client-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-app\2.5.0\hadoop-mapreduce-client-app-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-common\2.5.0\hadoop-mapreduce-client-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-client\2.5.0\hadoop-yarn-client-2.5.0.jar;E:\repository3\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-server-common\2.5.0\hadoop-yarn-server-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-shuffle\2.5.0\hadoop-mapreduce-client-shuffle-2.5.0.jar;E:\repository3\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-api\2.5.0\hadoop-yarn-api-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-core\2.5.0\hadoop-mapreduce-client-core-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-yarn-common\2.5.0\hadoop-yarn-common-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-client-jobclient\2.5.0\hadoop-mapreduce-client-jobclient-2.5.0.jar;E:\repository3\org\apache\hadoop\hadoop-mapreduce-examples\2.5.0\hadoop-mapreduce-examples-2.5.0.jar;E:\repository3\org\apache\zookeeper\zookeeper\3.4.5\zookeeper-3.4.5.jar;E:\repository3\jline\jline\0.9.94\jline-0.9.94.jar;E:\repository3\org\jboss\netty\netty\3.2.2.Final\netty-3.2.2.Final.jar" com.wordcount.WordCount 2 18/11/22 15:37:44 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 3 18/11/22 15:37:47 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id 4 18/11/22 15:37:47 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= 5 18/11/22 15:37:49 WARN mapreduce.JobSubmitter: No job jar file set. User classes may not be found. See Job or Job#setJar(String). 6 18/11/22 15:37:49 INFO input.FileInputFormat: Total input paths to process : 1 7 18/11/22 15:37:50 INFO mapreduce.JobSubmitter: number of splits:1 8 18/11/22 15:37:50 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_local857633983_0001 9 18/11/22 15:37:50 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/staging/YinYichang857633983/.staging/job_local857633983_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring. 10 18/11/22 15:37:50 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/staging/YinYichang857633983/.staging/job_local857633983_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring. 11 18/11/22 15:37:51 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/local/localRunner/YinYichang/job_local857633983_0001/job_local857633983_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring. 12 18/11/22 15:37:51 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/local/localRunner/YinYichang/job_local857633983_0001/job_local857633983_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring. 13 18/11/22 15:37:51 INFO mapreduce.Job: The url to track the job: http://localhost:8080/ 14 18/11/22 15:37:51 INFO mapreduce.Job: Running job: job_local857633983_0001 15 18/11/22 15:37:51 INFO mapred.LocalJobRunner: OutputCommitter set in config null 16 18/11/22 15:37:51 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter 17 18/11/22 15:37:51 INFO mapred.LocalJobRunner: Waiting for map tasks 18 18/11/22 15:37:51 INFO mapred.LocalJobRunner: Starting task: attempt_local857633983_0001_m_000000_0 19 18/11/22 15:37:51 INFO util.ProcfsBasedProcessTree: ProcfsBasedProcessTree currently is supported only on Linux. 20 18/11/22 15:37:51 INFO mapred.Task: Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@3a8763db 21 18/11/22 15:37:51 INFO mapred.MapTask: Processing split: hdfs://walloce.one:8020/walloce/data/test.txt:0+173 22 18/11/22 15:37:51 INFO mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer 23 18/11/22 15:37:51 INFO mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584) 24 18/11/22 15:37:51 INFO mapred.MapTask: mapreduce.task.io.sort.mb: 100 25 18/11/22 15:37:51 INFO mapred.MapTask: soft limit at 83886080 26 18/11/22 15:37:51 INFO mapred.MapTask: bufstart = 0; bufvoid = 104857600 27 18/11/22 15:37:51 INFO mapred.MapTask: kvstart = 26214396; length = 6553600 28 map阶段开始执行,,, 29 map阶段结束。。。 30 map阶段开始执行,,, 31 map阶段结束。。。 32 map阶段开始执行,,, 33 map阶段结束。。。 34 map阶段开始执行,,, 35 map阶段结束。。。 36 map阶段开始执行,,, 37 map阶段结束。。。 38 map阶段开始执行,,, 39 map阶段结束。。。 40 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 41 18/11/22 15:37:52 INFO mapreduce.Job: Job job_local857633983_0001 running in uber mode : false 42 18/11/22 15:37:52 INFO mapreduce.Job: map 0% reduce 0% 43 18/11/22 15:37:52 INFO mapred.MapTask: Starting flush of map output 44 18/11/22 15:37:52 INFO mapred.MapTask: Spilling map output 45 18/11/22 15:37:52 INFO mapred.MapTask: bufstart = 0; bufend = 321; bufvoid = 104857600 46 18/11/22 15:37:52 INFO mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214252(104857008); length = 145/6553600 47 Combiner阶段开始... 48 Combiner阶段结束。。。 49 Combiner阶段开始... 50 Combiner阶段结束。。。 51 Combiner阶段开始... 52 Combiner阶段结束。。。 53 Combiner阶段开始... 54 Combiner阶段结束。。。 55 Combiner阶段开始... 56 Combiner阶段结束。。。 57 Combiner阶段开始... 58 Combiner阶段结束。。。 59 Combiner阶段开始... 60 Combiner阶段结束。。。 61 Combiner阶段开始... 62 Combiner阶段结束。。。 63 Combiner阶段开始... 64 Combiner阶段结束。。。 65 18/11/22 15:37:52 INFO mapred.MapTask: Finished spill 0 66 18/11/22 15:37:52 INFO mapred.Task: Task:attempt_local857633983_0001_m_000000_0 is done. And is in the process of committing 67 18/11/22 15:37:52 INFO mapred.LocalJobRunner: map 68 18/11/22 15:37:52 INFO mapred.Task: Task 'attempt_local857633983_0001_m_000000_0' done. 69 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Finishing task: attempt_local857633983_0001_m_000000_0 70 18/11/22 15:37:52 INFO mapred.LocalJobRunner: map task executor complete. 71 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Waiting for reduce tasks 72 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Starting task: attempt_local857633983_0001_r_000000_0 73 18/11/22 15:37:52 INFO util.ProcfsBasedProcessTree: ProcfsBasedProcessTree currently is supported only on Linux. 74 18/11/22 15:37:52 INFO mapred.Task: Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@32eb8b1d 75 18/11/22 15:37:52 INFO mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@5b831a05 76 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: MergerManager: memoryLimit=1291845632, maxSingleShuffleLimit=322961408, mergeThreshold=852618176, ioSortFactor=10, memToMemMergeOutputsThreshold=10 77 18/11/22 15:37:52 INFO reduce.EventFetcher: attempt_local857633983_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events 78 18/11/22 15:37:52 INFO reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local857633983_0001_m_000000_0 decomp: 101 len: 105 to MEMORY 79 18/11/22 15:37:52 INFO reduce.InMemoryMapOutput: Read 101 bytes from map-output for attempt_local857633983_0001_m_000000_0 80 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 101, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->101 81 18/11/22 15:37:52 INFO reduce.EventFetcher: EventFetcher is interrupted.. Returning 82 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied. 83 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs 84 18/11/22 15:37:52 INFO mapred.Merger: Merging 1 sorted segments 85 18/11/22 15:37:52 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 97 bytes 86 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merged 1 segments, 101 bytes to disk to satisfy reduce memory limit 87 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merging 1 files, 105 bytes from disk 88 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce 89 18/11/22 15:37:52 INFO mapred.Merger: Merging 1 sorted segments 90 18/11/22 15:37:52 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 97 bytes 91 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied. 92 18/11/22 15:37:52 INFO Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords 93 Reduce阶段开始执行... 94 单词I: 5 95 Reduce阶段结束。。。 96 Reduce阶段开始执行... 97 单词am: 5 98 Reduce阶段结束。。。 99 Reduce阶段开始执行... 100 单词an: 5 101 Reduce阶段结束。。。 102 Reduce阶段开始执行... 103 单词for: 5 104 Reduce阶段结束。。。 105 Reduce阶段开始执行... 106 单词hello: 1 107 Reduce阶段结束。。。 108 Reduce阶段开始执行... 109 单词linux: 5 110 Reduce阶段结束。。。 111 Reduce阶段开始执行... 112 单词new: 5 113 Reduce阶段结束。。。 114 Reduce阶段开始执行... 115 单词programer: 5 116 Reduce阶段结束。。。 117 Reduce阶段开始执行... 118 单词world!: 1 119 Reduce阶段结束。。。 120 18/11/22 15:37:52 INFO mapred.Task: Task:attempt_local857633983_0001_r_000000_0 is done. And is in the process of committing 121 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied. 122 18/11/22 15:37:52 INFO mapred.Task: Task attempt_local857633983_0001_r_000000_0 is allowed to commit now 123 18/11/22 15:37:52 INFO output.FileOutputCommitter: Saved output of task 'attempt_local857633983_0001_r_000000_0' to hdfs://walloce.one:8020/walloce/output/_temporary/0/task_local857633983_0001_r_000000 124 18/11/22 15:37:52 INFO mapred.LocalJobRunner: reduce > reduce 125 18/11/22 15:37:52 INFO mapred.Task: Task 'attempt_local857633983_0001_r_000000_0' done. 126 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Finishing task: attempt_local857633983_0001_r_000000_0 127 18/11/22 15:37:52 INFO mapred.LocalJobRunner: reduce task executor complete. 128 18/11/22 15:37:53 INFO mapreduce.Job: map 100% reduce 100% 129 18/11/22 15:37:53 INFO mapreduce.Job: Job job_local857633983_0001 completed successfully 130 18/11/22 15:37:53 INFO mapreduce.Job: Counters: 38 131 File System Counters 132 FILE: Number of bytes read=576 133 FILE: Number of bytes written=490527 134 FILE: Number of read operations=0 135 FILE: Number of large read operations=0 136 FILE: Number of write operations=0 137 HDFS: Number of bytes read=346 138 HDFS: Number of bytes written=63 139 HDFS: Number of read operations=15 140 HDFS: Number of large read operations=0 141 HDFS: Number of write operations=6 142 Map-Reduce Framework 143 Map input records=6 144 Map output records=37 145 Map output bytes=321 146 Map output materialized bytes=105 147 Input split bytes=110 148 Combine input records=37 149 Combine output records=9 150 Reduce input groups=9 151 Reduce shuffle bytes=105 152 Reduce input records=9 153 Reduce output records=9 154 Spilled Records=18 155 Shuffled Maps =1 156 Failed Shuffles=0 157 Merged Map outputs=1 158 GC time elapsed (ms)=3 159 CPU time spent (ms)=0 160 Physical memory (bytes) snapshot=0 161 Virtual memory (bytes) snapshot=0 162 Total committed heap usage (bytes)=372244480 163 Shuffle Errors 164 BAD_ID=0 165 CONNECTION=0 166 IO_ERROR=0 167 WRONG_LENGTH=0 168 WRONG_MAP=0 169 WRONG_REDUCE=0 170 File Input Format Counters 171 Bytes Read=173 172 File Output Format Counters 173 Bytes Written=63 174 175 Process finished with exit code 0
由wordcount运行结果可以看出,MapReduce的执行顺序:
1、Map阶段开始
因为map阶段输入的数据时以行为单位,偏移量即为行序号,有多少行map就执行多少次。
2、Combiner阶段开始
经过map阶段的Shuffle后Map阶段结束,开始Combiner阶段,Combiner阶段进行的是每个分区里的数据小聚合,有多少key进行多少次。
3、Reduce阶段开始
Combiner阶段结束后,Reduce阶段需要将所有分区的所有数据进行聚合,得出最终的结果。
记: 心酸的学习历程!