1.准备
事先在hdfs上创建两个目录:
保存上传数据的目录:hdfs://alamps:9000/library/SparkStreaming/data
checkpoint的目录:hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data
------------------------------------------------------
2.源码
package stream; import java.util.Arrays; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.api.java.JavaStreamingContextFactory; import scala.Tuple2; public class SparkStreamingOnHDFS {
public static void main(String[] args) {
/*
* 第一步:配置SparkConf
* 1.至少2条线程:因为Spark Streaming应用程序在运行的时候,至少有一条线程用于不断的循环接收数据,
* 并且至少有一条线程用于处理接收数据(否则无法有线程用于处理数据,随着时间的推移,内存和磁盘都会不堪重负)
* 2.对于集群而言,每个Executor一般肯定不止一个Thread,那对于处理spark streaming的应用程序而言,每个Executor
* 一般分配多少个Core比较合适?根据经验,5个左右的Core是最佳的;
*/
// final SparkConf conf = new SparkConf().setMaster("spark://alamps:7077")
final SparkConf conf = new SparkConf().setMaster("local[2]")
.setAppName("SparkStreamingOnHDFS");
/*
* 第二步:创建SparkStreamingContext,这个是SparkStreaming应用程序所有功能的起始点和程序调度核心
* 1.SparkStreamingContext的构建可以基于SparkConf参数,也可以基于持久化的SparkStreamingContext的内容
* 来恢复过来(典型的场景是Driver崩溃后重新启动,由于Spark Streaming具有连续7*24小时不间断运行的特征,
* 所以需要在Driver重启后继续上一次的状态,此时的状态恢复需要基于曾经的Checkpoint);
* 2.在一个Spark Streaming应用程序中可以创建若干个SparkStreamingContext对象,使用一个SparkStream之前需要把前面
* 运行的SparkStreamingContext对象关闭掉,由此,我们获得一个重大的启发SparkStreaming也只是Spark Core上的一个应用
* 程序而已,只不过Spark Streaming框架运行的话需要Spark工程师写业务逻辑处理代码;
*/
final String checkpointDirectory = "hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data";
JavaStreamingContextFactory factory = new JavaStreamingContextFactory(){ @Override
public JavaStreamingContext create() {
return createContext(checkpointDirectory,conf);
} };
// JavaStreamingContext jsc = new JavaStreamingContext(conf,Durations.seconds(5));
/**
* 可以从失败中恢复Driver,不过还需要指定Driver这个进程运行在Cluster,并且提交应用程序的时候指定--supervise
*/
JavaStreamingContext jsc = JavaStreamingContext.getOrCreate(checkpointDirectory, factory);
/*
* 第三步:创建Spark Streaming输入数据来源input Stream
* 1.数据输入来源可以基于File、HDFS、Flume、Kafka、Socket等;
* 2.在这里我们指定数据来源于网络Socket端口,Spark Streaming连接上该端口并在运行的时候,一直监听该端口的数据
* (当然该端口服务首先必须存在),并且在后续会根据业务需要不断的有数据产生(当然对于Spark Streaming应用程序
* 的运行而言,有无数其处理流程都有一样);
* 3.如果经常在每间隔5秒钟没有数据的话,会不断的启动空的Job其实是会造成调度资源的浪费,因为并没有数据需要发生计算,所以
* 实际的企业级生产环境的代码在具体提交Job前会判断是否有数据,如果没有的话就不再提交Job;
* 4.此处没有Receiver,SparkStreaming应用程序只是按照时间间隔监控目录下每个Batch新增的内容(把新增的)作为RDD的数据来源生成原始RDD
*/
JavaDStream<String> lines = jsc.textFileStream("hdfs://alamps:9000/library/SparkStreaming/data");//此处的是hdfs文件 /*
* 第四步:接下来就像对于RDD编程一样基于DStream进行编程!!!原因是DStream是RDD产生的模板(或者说类),在Spark Streaming具体
* 发生计算前,其实质是把每个Batch的DStream的操作翻译成为对RDD的操作!!!
*对初始的DStream进行Transformation级别的处理,例如map、filter等高阶函数等的编程,来进行具体的数据计算
* 第4.1步:将每一行的字符串拆分成单个的单词
*/
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { //如果是Scala,由于SAM转换,所以可以写成val words = lines.flatMap { line => line.split(" ")} @Override
public Iterable<String> call(String line) throws Exception {
return Arrays.asList(line.split(" "));
}
}); /*
* 第四步:对初始的DStream进行Transformation级别的处理,例如map、filter等高阶函数等的编程,来进行具体的数据计算
* 第4.2步:在单词拆分的基础上对每个单词实例计数为1,也就是word => (word, 1)
*/
JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() { /**
*
*/
private static final long serialVersionUID = 1L; @Override
public Tuple2<String, Integer> call(String word) throws Exception {
return new Tuple2<String, Integer>(word, );
}
}); /*
* 第四步:对初始的DStream进行Transformation级别的处理,例如map、filter等高阶函数等的编程,来进行具体的数据计算
* 第4.3步:在每个单词实例计数为1基础之上统计每个单词在文件中出现的总次数
*/
JavaPairDStream<String, Integer> wordsCount = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() { //对相同的Key,进行Value的累计(包括Local和Reducer级别同时Reduce) /**
*
*/
private static final long serialVersionUID = 1L; public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
}); /*
* 此处的print并不会直接出发Job的执行,因为现在的一切都是在Spark Streaming框架的控制之下的,对于Spark Streaming
* 而言具体是否触发真正的Job运行是基于设置的Duration时间间隔的
*
* 诸位一定要注意的是Spark Streaming应用程序要想执行具体的Job,对Dtream就必须有output Stream操作,
* output Stream有很多类型的函数触发,类print、saveAsTextFile、saveAsHadoopFiles等,最为重要的一个
* 方法是foraeachRDD,因为Spark Streaming处理的结果一般都会放在Redis、DB、DashBoard等上面,foreachRDD
* 主要就是用来完成这些功能的,而且可以随意的自定义具体数据到底放在哪里!!!
*
*/
wordsCount.print(); /*
* Spark Streaming执行引擎也就是Driver开始运行,Driver启动的时候是位于一条新的线程中的,当然其内部有消息循环体,用于
* 接受应用程序本身或者Executor中的消息;
*/
jsc.start(); jsc.awaitTermination();
jsc.close(); }
/**
*
* @Description:工厂模式创建JavaStreamingContext
* @Author: lujinyong168
* @Date: 2016年5月12日 下午10:01:40
*/
private static JavaStreamingContext createContext(String checkpointDirectory,SparkConf sc){
System.out.println("Creating new context");
JavaStreamingContext ssc = new JavaStreamingContext(sc,Durations.seconds());
ssc.checkpoint(checkpointDirectory);
return ssc;
}
}
3. 通过hdfs dfs -put命令上传文件到hdfs://master1:9000/library/SparkStreaming/data目录中,可以查看到隔10秒钟会处理一次。因为我设置的间隔时间是:Durations.seconds(10) [root@alamps TestDir]# vi /usr/local/flume/tmp/TestDir/wc1.txt
hadoop hive hbase
spark stream flume
kafka spark sql
strom python[root@alamps TestDir]# 4.遇到的问题:
[root@alamps TestDir]# hadoop fs -chown hadoop:hadoop hdfs://alamps:9000/library/SparkStreaming/data/
[root@alamps TestDir]# hadoop fs -chown hadoop:hadoop hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data
//
//SLF4J: Class path contains multiple SLF4J bindings.
//SLF4J: Found binding in [jar:file:/home/hadoop/app/spark/lib/spark-assembly-1.2.0-hadoop2.4.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
//SLF4J: Found binding in [jar:file:/home/hadoop/app/spark/lib/spark-examples-1.2.0-hadoop2.4.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
//SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
//SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
//log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
//log4j:WARN Please initialize the log4j system properly.
//log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
//Creating new context
//Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
//18/01/12 23:40:44 INFO SecurityManager: Changing view acls to: hadoop
//18/01/12 23:40:44 INFO SecurityManager: Changing modify acls to: hadoop
//18/01/12 23:40:44 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); users with modify permissions: Set(hadoop)
//18/01/12 23:40:44 INFO Slf4jLogger: Slf4jLogger started
//18/01/12 23:40:44 INFO Remoting: Starting remoting
//18/01/12 23:40:44 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@alamps:42898]
//18/01/12 23:40:44 INFO Utils: Successfully started service 'sparkDriver' on port 42898.
//18/01/12 23:40:44 INFO SparkEnv: Registering MapOutputTracker
//18/01/12 23:40:44 INFO SparkEnv: Registering BlockManagerMaster
//18/01/12 23:40:44 INFO DiskBlockManager: Created local directory at /tmp/spark-local-20180112234044-511d
//18/01/12 23:40:44 INFO MemoryStore: MemoryStore started with capacity 133.6 MB
//18/01/12 23:40:44 INFO HttpFileServer: HTTP File server directory is /tmp/spark-92d5a559-6100-48c0-8a1d-bcac165fe878
//18/01/12 23:40:44 INFO HttpServer: Starting HTTP Server
//18/01/12 23:40:44 INFO Utils: Successfully started service 'HTTP file server' on port 49358.
//18/01/12 23:40:44 INFO Utils: Successfully started service 'SparkUI' on port 4040.
//18/01/12 23:40:44 INFO SparkUI: Started SparkUI at http://alamps:4040
//18/01/12 23:40:45 INFO AkkaUtils: Connecting to HeartbeatReceiver: akka.tcp://sparkDriver@alamps:42898/user/HeartbeatReceiver
//18/01/12 23:40:45 INFO NettyBlockTransferService: Server created on 54231
//18/01/12 23:40:45 INFO BlockManagerMaster: Trying to register BlockManager
//18/01/12 23:40:45 INFO BlockManagerMasterActor: Registering block manager localhost:54231 with 133.6 MB RAM, BlockManagerId(<driver>, localhost, 54231)
//18/01/12 23:40:45 INFO BlockManagerMaster: Registered BlockManager
//18/01/12 23:40:45 INFO FileInputDStream: Duration for remembering RDDs set to 60000 ms for org.apache.spark.streaming.dstream.FileInputDStream@8ff85d
//18/01/12 23:40:45 INFO ForEachDStream: metadataCleanupDelay = -1
//18/01/12 23:40:45 INFO ShuffledDStream: metadataCleanupDelay = -1
//18/01/12 23:40:45 INFO MappedDStream: metadataCleanupDelay = -1
//18/01/12 23:40:45 INFO FlatMappedDStream: metadataCleanupDelay = -1
//18/01/12 23:40:45 INFO MappedDStream: metadataCleanupDelay = -1
//18/01/12 23:40:45 INFO FileInputDStream: metadataCleanupDelay = -1
//18/01/12 23:40:45 INFO FileInputDStream: Slide time = 10000 ms
//18/01/12 23:40:45 INFO FileInputDStream: Storage level = StorageLevel(false, false, false, false, 1)
//18/01/12 23:40:45 INFO FileInputDStream: Checkpoint interval = null
//18/01/12 23:40:45 INFO FileInputDStream: Remember duration = 60000 ms
//18/01/12 23:40:45 INFO FileInputDStream: Initialized and validated org.apache.spark.streaming.dstream.FileInputDStream@8ff85d
//18/01/12 23:40:45 INFO MappedDStream: Slide time = 10000 ms
//18/01/12 23:40:45 INFO MappedDStream: Storage level = StorageLevel(false, false, false, false, 1)
//18/01/12 23:40:45 INFO MappedDStream: Checkpoint interval = null
//18/01/12 23:40:45 INFO MappedDStream: Remember duration = 10000 ms
//18/01/12 23:40:45 INFO MappedDStream: Initialized and validated org.apache.spark.streaming.dstream.MappedDStream@c3bcdf
//18/01/12 23:40:45 INFO FlatMappedDStream: Slide time = 10000 ms
//18/01/12 23:40:45 INFO FlatMappedDStream: Storage level = StorageLevel(false, false, false, false, 1)
//18/01/12 23:40:45 INFO FlatMappedDStream: Checkpoint interval = null
//18/01/12 23:40:45 INFO FlatMappedDStream: Remember duration = 10000 ms
//18/01/12 23:40:45 INFO FlatMappedDStream: Initialized and validated org.apache.spark.streaming.dstream.FlatMappedDStream@a4d9fe
//18/01/12 23:40:45 INFO MappedDStream: Slide time = 10000 ms
//18/01/12 23:40:45 INFO MappedDStream: Storage level = StorageLevel(false, false, false, false, 1)
//18/01/12 23:40:45 INFO MappedDStream: Checkpoint interval = null
//18/01/12 23:40:45 INFO MappedDStream: Remember duration = 10000 ms
//18/01/12 23:40:45 INFO MappedDStream: Initialized and validated org.apache.spark.streaming.dstream.MappedDStream@e3cfbf
//18/01/12 23:40:45 INFO ShuffledDStream: Slide time = 10000 ms
//18/01/12 23:40:45 INFO ShuffledDStream: Storage level = StorageLevel(false, false, false, false, 1)
//18/01/12 23:40:45 INFO ShuffledDStream: Checkpoint interval = null
//18/01/12 23:40:45 INFO ShuffledDStream: Remember duration = 10000 ms
//18/01/12 23:40:45 INFO ShuffledDStream: Initialized and validated org.apache.spark.streaming.dstream.ShuffledDStream@5c2d0f
//18/01/12 23:40:45 INFO ForEachDStream: Slide time = 10000 ms
//18/01/12 23:40:45 INFO ForEachDStream: Storage level = StorageLevel(false, false, false, false, 1)
//18/01/12 23:40:45 INFO ForEachDStream: Checkpoint interval = null
//18/01/12 23:40:45 INFO ForEachDStream: Remember duration = 10000 ms
//18/01/12 23:40:45 INFO ForEachDStream: Initialized and validated org.apache.spark.streaming.dstream.ForEachDStream@1925bfa
//18/01/12 23:40:46 INFO RecurringTimer: Started timer for JobGenerator at time 1515829250000
//18/01/12 23:40:46 INFO JobGenerator: Started JobGenerator at 1515829250000 ms
//18/01/12 23:40:46 INFO JobScheduler: Started JobScheduler
//18/01/12 23:40:50 INFO FileInputDStream: Finding new files took 5 ms
//18/01/12 23:40:50 INFO FileInputDStream: New files at time 1515829250000 ms:
//
//18/01/12 23:40:50 INFO JobScheduler: Added jobs for time 1515829250000 ms
//18/01/12 23:40:50 INFO JobGenerator: Checkpointing graph for time 1515829250000 ms
//18/01/12 23:40:50 INFO DStreamGraph: Updating checkpoint data for time 1515829250000 ms
//18/01/12 23:40:50 INFO JobScheduler: Starting job streaming job 1515829250000 ms.0 from job set of time 1515829250000 ms
//18/01/12 23:40:50 INFO DStreamGraph: Updated checkpoint data for time 1515829250000 ms
//18/01/12 23:40:50 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:40:50 INFO DAGScheduler: Registering RDD 3 (mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:40:50 INFO DAGScheduler: Got job 0 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:40:50 INFO DAGScheduler: Final stage: Stage 1(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:40:50 INFO DAGScheduler: Parents of final stage: List(Stage 0)
//18/01/12 23:40:50 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:40:50 INFO DAGScheduler: Submitting Stage 1 (ShuffledRDD[4] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:40:50 INFO CheckpointWriter: Saving checkpoint for time 1515829250000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829250000'
//18/01/12 23:40:50 WARN SizeEstimator: Failed to check whether UseCompressedOops is set; assuming yes
//18/01/12 23:40:50 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=0, maxMem=140142182
//18/01/12 23:40:50 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:40:50 INFO MemoryStore: ensureFreeSpace(1677) called with curMem=2264, maxMem=140142182
//18/01/12 23:40:50 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 1677.0 B, free 133.6 MB)
//18/01/12 23:40:50 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:54231 (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:40:50 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0
//18/01/12 23:40:50 INFO SparkContext: Created broadcast 0 from broadcast at DAGScheduler.scala:838
//18/01/12 23:40:50 INFO DAGScheduler: Submitting 1 missing tasks from Stage 1 (ShuffledRDD[4] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:40:50 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks
//18/01/12 23:40:50 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 0, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:40:50 INFO Executor: Running task 0.0 in stage 1.0 (TID 0)
//18/01/12 23:40:51 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:40:51 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 20 ms
//18/01/12 23:40:51 INFO Executor: Finished task 0.0 in stage 1.0 (TID 0). 820 bytes result sent to driver
//18/01/12 23:40:51 INFO DAGScheduler: Stage 1 (print at SparkStreamingOnHDFS.java:126) finished in 0.335 s
//18/01/12 23:40:51 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 0) in 308 ms on localhost (1/1)
//18/01/12 23:40:51 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
//18/01/12 23:40:51 INFO DAGScheduler: Job 0 finished: print at SparkStreamingOnHDFS.java:126, took 1.096348 s
//18/01/12 23:40:51 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:40:51 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 0 is 82 bytes
//18/01/12 23:40:51 INFO DAGScheduler: Got job 1 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:40:51 INFO DAGScheduler: Final stage: Stage 3(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:40:51 INFO DAGScheduler: Parents of final stage: List(Stage 2)
//18/01/12 23:40:51 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:40:51 INFO DAGScheduler: Submitting Stage 3 (ShuffledRDD[4] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:40:51 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=3941, maxMem=140142182
//18/01/12 23:40:51 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:40:51 INFO MemoryStore: ensureFreeSpace(1677) called with curMem=6205, maxMem=140142182
//18/01/12 23:40:51 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 1677.0 B, free 133.6 MB)
//18/01/12 23:40:51 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:54231 (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:40:51 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0
//18/01/12 23:40:51 INFO SparkContext: Created broadcast 1 from getCallSite at DStream.scala:294
//18/01/12 23:40:51 INFO DAGScheduler: Submitting 1 missing tasks from Stage 3 (ShuffledRDD[4] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:40:51 INFO TaskSchedulerImpl: Adding task set 3.0 with 1 tasks
//18/01/12 23:40:51 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 1, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:40:51 INFO Executor: Running task 0.0 in stage 3.0 (TID 1)
//18/01/12 23:40:51 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:40:51 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
//18/01/12 23:40:51 INFO Executor: Finished task 0.0 in stage 3.0 (TID 1). 820 bytes result sent to driver
//18/01/12 23:40:51 INFO DAGScheduler: Stage 3 (print at SparkStreamingOnHDFS.java:126) finished in 0.023 s
//18/01/12 23:40:51 INFO DAGScheduler: Job 1 finished: print at SparkStreamingOnHDFS.java:126, took 0.077437 s
//18/01/12 23:40:51 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 1) in 21 ms on localhost (1/1)
//18/01/12 23:40:51 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool
//-------------------------------------------
//Time: 1515829250000 ms
//-------------------------------------------
//
//18/01/12 23:40:51 INFO JobScheduler: Finished job streaming job 1515829250000 ms.0 from job set of time 1515829250000 ms
//18/01/12 23:40:51 INFO JobScheduler: Total delay: 1.327 s for time 1515829250000 ms (execution: 1.240 s)
//18/01/12 23:40:51 INFO FileInputDStream: Cleared 0 old files that were older than 1515829190000 ms:
//18/01/12 23:40:51 INFO ReceivedBlockTracker: Deleting batches ArrayBuffer()
//18/01/12 23:40:51 INFO JobGenerator: Checkpointing graph for time 1515829250000 ms
//18/01/12 23:40:51 INFO DStreamGraph: Updating checkpoint data for time 1515829250000 ms
//18/01/12 23:40:51 INFO DStreamGraph: Updated checkpoint data for time 1515829250000 ms
//18/01/12 23:40:51 INFO CheckpointWriter: Checkpoint for time 1515829250000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829250000', took 5334 bytes and 1206 ms
//18/01/12 23:40:51 INFO DStreamGraph: Clearing checkpoint data for time 1515829250000 ms
//18/01/12 23:40:51 INFO CheckpointWriter: Saving checkpoint for time 1515829250000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829250000'
//18/01/12 23:40:51 INFO DStreamGraph: Cleared checkpoint data for time 1515829250000 ms
//18/01/12 23:40:51 INFO CheckpointWriter: Checkpoint for time 1515829250000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829250000', took 5330 bytes and 121 ms
//18/01/12 23:40:51 INFO DStreamGraph: Clearing checkpoint data for time 1515829250000 ms
//18/01/12 23:40:51 INFO DStreamGraph: Cleared checkpoint data for time 1515829250000 ms
//18/01/12 23:41:00 INFO FileInputDStream: Finding new files took 1 ms
//18/01/12 23:41:00 INFO FileInputDStream: New files at time 1515829260000 ms:
//
//18/01/12 23:41:00 INFO JobScheduler: Starting job streaming job 1515829260000 ms.0 from job set of time 1515829260000 ms
//18/01/12 23:41:00 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:00 INFO DAGScheduler: Registering RDD 8 (mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:41:00 INFO DAGScheduler: Got job 2 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:00 INFO DAGScheduler: Final stage: Stage 5(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:00 INFO DAGScheduler: Parents of final stage: List(Stage 4)
//18/01/12 23:41:00 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:00 INFO DAGScheduler: Submitting Stage 5 (ShuffledRDD[9] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:00 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=7882, maxMem=140142182
//18/01/12 23:41:00 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:00 INFO JobScheduler: Added jobs for time 1515829260000 ms
//18/01/12 23:41:00 INFO JobGenerator: Checkpointing graph for time 1515829260000 ms
//18/01/12 23:41:00 INFO DStreamGraph: Updating checkpoint data for time 1515829260000 ms
//18/01/12 23:41:00 INFO DStreamGraph: Updated checkpoint data for time 1515829260000 ms
//18/01/12 23:41:00 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=10146, maxMem=140142182
//18/01/12 23:41:00 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:00 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:00 INFO BlockManagerMaster: Updated info of block broadcast_2_piece0
//18/01/12 23:41:00 INFO SparkContext: Created broadcast 2 from getCallSite at DStream.scala:294
//18/01/12 23:41:00 INFO DAGScheduler: Submitting 1 missing tasks from Stage 5 (ShuffledRDD[9] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:00 INFO TaskSchedulerImpl: Adding task set 5.0 with 1 tasks
//18/01/12 23:41:00 INFO CheckpointWriter: Saving checkpoint for time 1515829260000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829260000'
//18/01/12 23:41:00 INFO TaskSetManager: Starting task 0.0 in stage 5.0 (TID 2, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:00 INFO Executor: Running task 0.0 in stage 5.0 (TID 2)
//18/01/12 23:41:00 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:00 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
//18/01/12 23:41:00 INFO Executor: Finished task 0.0 in stage 5.0 (TID 2). 820 bytes result sent to driver
//18/01/12 23:41:00 INFO DAGScheduler: Stage 5 (print at SparkStreamingOnHDFS.java:126) finished in 0.014 s
//18/01/12 23:41:00 INFO DAGScheduler: Job 2 finished: print at SparkStreamingOnHDFS.java:126, took 0.059484 s
//18/01/12 23:41:00 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:00 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 1 is 82 bytes
//18/01/12 23:41:00 INFO DAGScheduler: Got job 3 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:00 INFO DAGScheduler: Final stage: Stage 7(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:00 INFO DAGScheduler: Parents of final stage: List(Stage 6)
//18/01/12 23:41:00 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:00 INFO DAGScheduler: Submitting Stage 7 (ShuffledRDD[9] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:00 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=11825, maxMem=140142182
//18/01/12 23:41:00 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:00 INFO TaskSetManager: Finished task 0.0 in stage 5.0 (TID 2) in 14 ms on localhost (1/1)
//18/01/12 23:41:00 INFO TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool
//18/01/12 23:41:00 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=14089, maxMem=140142182
//18/01/12 23:41:00 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:00 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:00 INFO BlockManagerMaster: Updated info of block broadcast_3_piece0
//18/01/12 23:41:00 INFO SparkContext: Created broadcast 3 from getCallSite at DStream.scala:294
//18/01/12 23:41:00 INFO DAGScheduler: Submitting 1 missing tasks from Stage 7 (ShuffledRDD[9] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:00 INFO TaskSchedulerImpl: Adding task set 7.0 with 1 tasks
//18/01/12 23:41:00 INFO TaskSetManager: Starting task 0.0 in stage 7.0 (TID 3, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:00 INFO Executor: Running task 0.0 in stage 7.0 (TID 3)
//18/01/12 23:41:00 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:00 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
//18/01/12 23:41:00 INFO Executor: Finished task 0.0 in stage 7.0 (TID 3). 820 bytes result sent to driver
//18/01/12 23:41:00 INFO DAGScheduler: Stage 7 (print at SparkStreamingOnHDFS.java:126) finished in 0.014 s
//-------------------------------------------
//Time: 1515829260000 ms
//-------------------------------------------
//
//18/01/12 23:41:00 INFO DAGScheduler: Job 3 finished: print at SparkStreamingOnHDFS.java:126, took 0.078915 s
//18/01/12 23:41:00 INFO JobScheduler: Finished job streaming job 1515829260000 ms.0 from job set of time 1515829260000 ms
//18/01/12 23:41:00 INFO JobScheduler: Total delay: 0.160 s for time 1515829260000 ms (execution: 0.149 s)
//18/01/12 23:41:00 INFO ShuffledRDD: Removing RDD 4 from persistence list
//18/01/12 23:41:00 INFO BlockManager: Removing RDD 4
//18/01/12 23:41:00 INFO TaskSetManager: Finished task 0.0 in stage 7.0 (TID 3) in 26 ms on localhost (1/1)
//18/01/12 23:41:00 INFO TaskSchedulerImpl: Removed TaskSet 7.0, whose tasks have all completed, from pool
//18/01/12 23:41:00 INFO MappedRDD: Removing RDD 3 from persistence list
//18/01/12 23:41:00 INFO FlatMappedRDD: Removing RDD 2 from persistence list
//18/01/12 23:41:00 INFO MappedRDD: Removing RDD 1 from persistence list
//18/01/12 23:41:00 INFO FileInputDStream: Cleared 0 old files that were older than 1515829200000 ms:
//18/01/12 23:41:00 INFO BlockManager: Removing RDD 3
//18/01/12 23:41:00 INFO BlockManager: Removing RDD 2
//18/01/12 23:41:00 INFO BlockManager: Removing RDD 1
//18/01/12 23:41:00 INFO ReceivedBlockTracker: Deleting batches ArrayBuffer()
//18/01/12 23:41:00 INFO JobGenerator: Checkpointing graph for time 1515829260000 ms
//18/01/12 23:41:00 INFO DStreamGraph: Updating checkpoint data for time 1515829260000 ms
//18/01/12 23:41:00 INFO DStreamGraph: Updated checkpoint data for time 1515829260000 ms
//18/01/12 23:41:00 INFO CheckpointWriter: Checkpoint for time 1515829260000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829260000', took 5345 bytes and 234 ms
//18/01/12 23:41:00 INFO DStreamGraph: Clearing checkpoint data for time 1515829260000 ms
//18/01/12 23:41:00 INFO DStreamGraph: Cleared checkpoint data for time 1515829260000 ms
//18/01/12 23:41:00 INFO CheckpointWriter: Saving checkpoint for time 1515829260000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829260000'
//18/01/12 23:41:00 INFO CheckpointWriter: Checkpoint for time 1515829260000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829260000', took 5341 bytes and 67 ms
//18/01/12 23:41:00 INFO DStreamGraph: Clearing checkpoint data for time 1515829260000 ms
//18/01/12 23:41:00 INFO DStreamGraph: Cleared checkpoint data for time 1515829260000 ms
//18/01/12 23:41:10 INFO FileInputDStream: Finding new files took 3 ms
//18/01/12 23:41:10 INFO FileInputDStream: New files at time 1515829270000 ms:
//
//18/01/12 23:41:10 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:10 INFO JobScheduler: Starting job streaming job 1515829270000 ms.0 from job set of time 1515829270000 ms
//18/01/12 23:41:10 INFO DAGScheduler: Registering RDD 13 (mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:41:10 INFO DAGScheduler: Got job 4 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:10 INFO DAGScheduler: Final stage: Stage 9(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:10 INFO DAGScheduler: Parents of final stage: List(Stage 8)
//18/01/12 23:41:10 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:10 INFO DAGScheduler: Submitting Stage 9 (ShuffledRDD[14] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:10 INFO JobScheduler: Added jobs for time 1515829270000 ms
//18/01/12 23:41:10 INFO JobGenerator: Checkpointing graph for time 1515829270000 ms
//18/01/12 23:41:10 INFO DStreamGraph: Updating checkpoint data for time 1515829270000 ms
//18/01/12 23:41:10 INFO DStreamGraph: Updated checkpoint data for time 1515829270000 ms
//18/01/12 23:41:10 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=15768, maxMem=140142182
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:10 INFO BlockManager: Removing broadcast 3
//18/01/12 23:41:10 INFO BlockManager: Removing block broadcast_3_piece0
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_3_piece0 of size 1679 dropped from memory (free 140125829)
//18/01/12 23:41:10 INFO BlockManagerInfo: Removed broadcast_3_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:10 INFO BlockManagerMaster: Updated info of block broadcast_3_piece0
//18/01/12 23:41:10 INFO BlockManager: Removing block broadcast_3
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_3 of size 2264 dropped from memory (free 140128093)
//18/01/12 23:41:10 INFO MemoryStore: ensureFreeSpace(1677) called with curMem=14089, maxMem=140142182
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 1677.0 B, free 133.6 MB)
//18/01/12 23:41:10 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:54231 (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:41:10 INFO BlockManagerMaster: Updated info of block broadcast_4_piece0
//18/01/12 23:41:10 INFO SparkContext: Created broadcast 4 from getCallSite at DStream.scala:294
//18/01/12 23:41:10 INFO DAGScheduler: Submitting 1 missing tasks from Stage 9 (ShuffledRDD[14] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:10 INFO TaskSchedulerImpl: Adding task set 9.0 with 1 tasks
//18/01/12 23:41:10 INFO TaskSetManager: Starting task 0.0 in stage 9.0 (TID 4, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:10 INFO Executor: Running task 0.0 in stage 9.0 (TID 4)
//18/01/12 23:41:10 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:10 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
//18/01/12 23:41:10 INFO Executor: Finished task 0.0 in stage 9.0 (TID 4). 820 bytes result sent to driver
//18/01/12 23:41:10 INFO ContextCleaner: Cleaned broadcast 3
//18/01/12 23:41:10 INFO CheckpointWriter: Saving checkpoint for time 1515829270000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829270000'
//18/01/12 23:41:10 INFO DAGScheduler: Stage 9 (print at SparkStreamingOnHDFS.java:126) finished in 0.005 s
//18/01/12 23:41:10 INFO DAGScheduler: Job 4 finished: print at SparkStreamingOnHDFS.java:126, took 0.082960 s
//18/01/12 23:41:10 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:10 INFO TaskSetManager: Finished task 0.0 in stage 9.0 (TID 4) in 11 ms on localhost (1/1)
//18/01/12 23:41:10 INFO TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool
//18/01/12 23:41:10 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 2 is 82 bytes
//18/01/12 23:41:10 INFO DAGScheduler: Got job 5 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:10 INFO DAGScheduler: Final stage: Stage 11(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:10 INFO DAGScheduler: Parents of final stage: List(Stage 10)
//18/01/12 23:41:10 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:10 INFO DAGScheduler: Submitting Stage 11 (ShuffledRDD[14] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:10 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=15766, maxMem=140142182
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:10 INFO MemoryStore: ensureFreeSpace(1677) called with curMem=18030, maxMem=140142182
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 1677.0 B, free 133.6 MB)
//18/01/12 23:41:10 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:54231 (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:41:10 INFO BlockManagerMaster: Updated info of block broadcast_5_piece0
//18/01/12 23:41:10 INFO SparkContext: Created broadcast 5 from getCallSite at DStream.scala:294
//18/01/12 23:41:10 INFO DAGScheduler: Submitting 1 missing tasks from Stage 11 (ShuffledRDD[14] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:10 INFO TaskSchedulerImpl: Adding task set 11.0 with 1 tasks
//18/01/12 23:41:10 INFO TaskSetManager: Starting task 0.0 in stage 11.0 (TID 5, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:10 INFO Executor: Running task 0.0 in stage 11.0 (TID 5)
//18/01/12 23:41:10 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:10 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
//18/01/12 23:41:10 INFO Executor: Finished task 0.0 in stage 11.0 (TID 5). 820 bytes result sent to driver
//18/01/12 23:41:10 INFO TaskSetManager: Finished task 0.0 in stage 11.0 (TID 5) in 8 ms on localhost (1/1)
//18/01/12 23:41:10 INFO TaskSchedulerImpl: Removed TaskSet 11.0, whose tasks have all completed, from pool
//18/01/12 23:41:10 INFO DAGScheduler: Stage 11 (print at SparkStreamingOnHDFS.java:126) finished in 0.003 s
//-------------------------------------------
//Time: 1515829270000 ms
//-------------------------------------------
//
//18/01/12 23:41:10 INFO DAGScheduler: Job 5 finished: print at SparkStreamingOnHDFS.java:126, took 0.135786 s
//18/01/12 23:41:10 INFO JobScheduler: Finished job streaming job 1515829270000 ms.0 from job set of time 1515829270000 ms
//18/01/12 23:41:10 INFO JobScheduler: Total delay: 0.235 s for time 1515829270000 ms (execution: 0.221 s)
//18/01/12 23:41:10 INFO ShuffledRDD: Removing RDD 9 from persistence list
//18/01/12 23:41:10 INFO BlockManager: Removing RDD 9
//18/01/12 23:41:10 INFO MappedRDD: Removing RDD 8 from persistence list
//18/01/12 23:41:10 INFO BlockManager: Removing RDD 8
//18/01/12 23:41:10 INFO FlatMappedRDD: Removing RDD 7 from persistence list
//18/01/12 23:41:10 INFO BlockManager: Removing RDD 7
//18/01/12 23:41:10 INFO BlockManager: Removing broadcast 5
//18/01/12 23:41:10 INFO BlockManager: Removing block broadcast_5_piece0
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_5_piece0 of size 1677 dropped from memory (free 140124152)
//18/01/12 23:41:10 INFO BlockManagerInfo: Removed broadcast_5_piece0 on localhost:54231 in memory (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:41:10 INFO BlockManagerMaster: Updated info of block broadcast_5_piece0
//18/01/12 23:41:10 INFO BlockManager: Removing block broadcast_5
//18/01/12 23:41:10 INFO MemoryStore: Block broadcast_5 of size 2264 dropped from memory (free 140126416)
//18/01/12 23:41:10 INFO MappedRDD: Removing RDD 6 from persistence list
//18/01/12 23:41:10 INFO BlockManager: Removing RDD 6
//18/01/12 23:41:10 INFO ContextCleaner: Cleaned broadcast 5
//18/01/12 23:41:10 INFO FileInputDStream: Cleared 0 old files that were older than 1515829210000 ms:
//18/01/12 23:41:10 INFO ReceivedBlockTracker: Deleting batches ArrayBuffer()
//18/01/12 23:41:10 INFO JobGenerator: Checkpointing graph for time 1515829270000 ms
//18/01/12 23:41:10 INFO DStreamGraph: Updating checkpoint data for time 1515829270000 ms
//18/01/12 23:41:10 INFO DStreamGraph: Updated checkpoint data for time 1515829270000 ms
//18/01/12 23:41:10 INFO CheckpointWriter: Checkpoint for time 1515829270000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829270000', took 5352 bytes and 280 ms
//18/01/12 23:41:10 INFO DStreamGraph: Clearing checkpoint data for time 1515829270000 ms
//18/01/12 23:41:10 INFO DStreamGraph: Cleared checkpoint data for time 1515829270000 ms
//18/01/12 23:41:10 INFO CheckpointWriter: Saving checkpoint for time 1515829270000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829270000'
//18/01/12 23:41:10 INFO CheckpointWriter: Checkpoint for time 1515829270000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829270000', took 5348 bytes and 69 ms
//18/01/12 23:41:10 INFO DStreamGraph: Clearing checkpoint data for time 1515829270000 ms
//18/01/12 23:41:10 INFO DStreamGraph: Cleared checkpoint data for time 1515829270000 ms
//18/01/12 23:41:20 INFO FileInputDStream: Finding new files took 2 ms
//18/01/12 23:41:20 INFO FileInputDStream: New files at time 1515829280000 ms:
//
//18/01/12 23:41:20 INFO JobScheduler: Starting job streaming job 1515829280000 ms.0 from job set of time 1515829280000 ms
//18/01/12 23:41:20 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:20 INFO DAGScheduler: Registering RDD 18 (mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:41:20 INFO DAGScheduler: Got job 6 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:20 INFO DAGScheduler: Final stage: Stage 13(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:20 INFO DAGScheduler: Parents of final stage: List(Stage 12)
//18/01/12 23:41:20 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:20 INFO JobScheduler: Added jobs for time 1515829280000 ms
//18/01/12 23:41:20 INFO JobGenerator: Checkpointing graph for time 1515829280000 ms
//18/01/12 23:41:20 INFO DStreamGraph: Updating checkpoint data for time 1515829280000 ms
//18/01/12 23:41:20 INFO DAGScheduler: Submitting Stage 13 (ShuffledRDD[19] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:20 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=15766, maxMem=140142182
//18/01/12 23:41:20 INFO DStreamGraph: Updated checkpoint data for time 1515829280000 ms
//18/01/12 23:41:20 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:20 INFO CheckpointWriter: Saving checkpoint for time 1515829280000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829280000'
//18/01/12 23:41:20 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=18030, maxMem=140142182
//18/01/12 23:41:20 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:20 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:20 INFO BlockManagerMaster: Updated info of block broadcast_6_piece0
//18/01/12 23:41:20 INFO SparkContext: Created broadcast 6 from getCallSite at DStream.scala:294
//18/01/12 23:41:20 INFO DAGScheduler: Submitting 1 missing tasks from Stage 13 (ShuffledRDD[19] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:20 INFO TaskSchedulerImpl: Adding task set 13.0 with 1 tasks
//18/01/12 23:41:20 INFO TaskSetManager: Starting task 0.0 in stage 13.0 (TID 6, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:20 INFO Executor: Running task 0.0 in stage 13.0 (TID 6)
//18/01/12 23:41:20 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:20 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
//18/01/12 23:41:20 INFO Executor: Finished task 0.0 in stage 13.0 (TID 6). 820 bytes result sent to driver
//18/01/12 23:41:20 INFO TaskSetManager: Finished task 0.0 in stage 13.0 (TID 6) in 6 ms on localhost (1/1)
//18/01/12 23:41:20 INFO TaskSchedulerImpl: Removed TaskSet 13.0, whose tasks have all completed, from pool
//18/01/12 23:41:20 INFO DAGScheduler: Stage 13 (print at SparkStreamingOnHDFS.java:126) finished in 0.002 s
//18/01/12 23:41:20 INFO DAGScheduler: Job 6 finished: print at SparkStreamingOnHDFS.java:126, took 0.040562 s
//18/01/12 23:41:20 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:20 INFO BlockManager: Removing broadcast 6
//18/01/12 23:41:20 INFO BlockManager: Removing block broadcast_6_piece0
//18/01/12 23:41:20 INFO MemoryStore: Block broadcast_6_piece0 of size 1679 dropped from memory (free 140124152)
//18/01/12 23:41:20 INFO BlockManagerInfo: Removed broadcast_6_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:20 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 3 is 82 bytes
//18/01/12 23:41:20 INFO DAGScheduler: Got job 7 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:20 INFO DAGScheduler: Final stage: Stage 15(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:20 INFO DAGScheduler: Parents of final stage: List(Stage 14)
//18/01/12 23:41:20 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:20 INFO DAGScheduler: Submitting Stage 15 (ShuffledRDD[19] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:20 INFO BlockManagerMaster: Updated info of block broadcast_6_piece0
//18/01/12 23:41:20 INFO BlockManager: Removing block broadcast_6
//18/01/12 23:41:20 INFO MemoryStore: Block broadcast_6 of size 2264 dropped from memory (free 140126416)
//18/01/12 23:41:20 INFO ContextCleaner: Cleaned broadcast 6
//18/01/12 23:41:20 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=15766, maxMem=140142182
//18/01/12 23:41:20 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:20 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=18030, maxMem=140142182
//18/01/12 23:41:20 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:20 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:20 INFO BlockManagerMaster: Updated info of block broadcast_7_piece0
//18/01/12 23:41:20 INFO SparkContext: Created broadcast 7 from getCallSite at DStream.scala:294
//18/01/12 23:41:20 INFO DAGScheduler: Submitting 1 missing tasks from Stage 15 (ShuffledRDD[19] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:20 INFO TaskSchedulerImpl: Adding task set 15.0 with 1 tasks
//18/01/12 23:41:20 INFO TaskSetManager: Starting task 0.0 in stage 15.0 (TID 7, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:20 INFO Executor: Running task 0.0 in stage 15.0 (TID 7)
//18/01/12 23:41:20 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:20 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
//18/01/12 23:41:20 INFO Executor: Finished task 0.0 in stage 15.0 (TID 7). 820 bytes result sent to driver
//18/01/12 23:41:20 INFO TaskSetManager: Finished task 0.0 in stage 15.0 (TID 7) in 7 ms on localhost (1/1)
//18/01/12 23:41:20 INFO TaskSchedulerImpl: Removed TaskSet 15.0, whose tasks have all completed, from pool
//18/01/12 23:41:20 INFO DAGScheduler: Stage 15 (print at SparkStreamingOnHDFS.java:126) finished in 0.001 s
//18/01/12 23:41:20 INFO DAGScheduler: Job 7 finished: print at SparkStreamingOnHDFS.java:126, took 0.084666 s
//-------------------------------------------
//Time: 1515829280000 ms
//-------------------------------------------
//
//18/01/12 23:41:20 INFO JobScheduler: Finished job streaming job 1515829280000 ms.0 from job set of time 1515829280000 ms
//18/01/12 23:41:20 INFO JobScheduler: Total delay: 0.141 s for time 1515829280000 ms (execution: 0.129 s)
//18/01/12 23:41:20 INFO ShuffledRDD: Removing RDD 14 from persistence list
//18/01/12 23:41:20 INFO BlockManager: Removing RDD 14
//18/01/12 23:41:20 INFO MappedRDD: Removing RDD 13 from persistence list
//18/01/12 23:41:20 INFO BlockManager: Removing RDD 13
//18/01/12 23:41:20 INFO FlatMappedRDD: Removing RDD 12 from persistence list
//18/01/12 23:41:20 INFO BlockManager: Removing RDD 12
//18/01/12 23:41:20 INFO MappedRDD: Removing RDD 11 from persistence list
//18/01/12 23:41:20 INFO BlockManager: Removing RDD 11
//18/01/12 23:41:20 INFO FileInputDStream: Cleared 0 old files that were older than 1515829220000 ms:
//18/01/12 23:41:20 INFO ReceivedBlockTracker: Deleting batches ArrayBuffer()
//18/01/12 23:41:20 INFO JobGenerator: Checkpointing graph for time 1515829280000 ms
//18/01/12 23:41:20 INFO DStreamGraph: Updating checkpoint data for time 1515829280000 ms
//18/01/12 23:41:20 INFO DStreamGraph: Updated checkpoint data for time 1515829280000 ms
//18/01/12 23:41:20 INFO CheckpointWriter: Checkpoint for time 1515829280000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829280000', took 5363 bytes and 121 ms
//18/01/12 23:41:20 INFO DStreamGraph: Clearing checkpoint data for time 1515829280000 ms
//18/01/12 23:41:20 INFO DStreamGraph: Cleared checkpoint data for time 1515829280000 ms
//18/01/12 23:41:20 INFO CheckpointWriter: Saving checkpoint for time 1515829280000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829280000'
//18/01/12 23:41:20 INFO CheckpointWriter: Checkpoint for time 1515829280000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829280000', took 5359 bytes and 80 ms
//18/01/12 23:41:20 INFO DStreamGraph: Clearing checkpoint data for time 1515829280000 ms
//18/01/12 23:41:20 INFO DStreamGraph: Cleared checkpoint data for time 1515829280000 ms
//18/01/12 23:41:30 INFO FileInputDStream: Finding new files took 2 ms
//18/01/12 23:41:30 INFO FileInputDStream: New files at time 1515829290000 ms:
//
//18/01/12 23:41:30 INFO JobScheduler: Starting job streaming job 1515829290000 ms.0 from job set of time 1515829290000 ms
//18/01/12 23:41:30 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:30 INFO JobScheduler: Added jobs for time 1515829290000 ms
//18/01/12 23:41:30 INFO JobGenerator: Checkpointing graph for time 1515829290000 ms
//18/01/12 23:41:30 INFO DAGScheduler: Registering RDD 23 (mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:41:30 INFO DAGScheduler: Got job 8 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:30 INFO DAGScheduler: Final stage: Stage 17(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:30 INFO DAGScheduler: Parents of final stage: List(Stage 16)
//18/01/12 23:41:30 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:30 INFO DStreamGraph: Updating checkpoint data for time 1515829290000 ms
//18/01/12 23:41:30 INFO DAGScheduler: Submitting Stage 17 (ShuffledRDD[24] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:30 INFO DStreamGraph: Updated checkpoint data for time 1515829290000 ms
//18/01/12 23:41:30 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=19709, maxMem=140142182
//18/01/12 23:41:30 INFO CheckpointWriter: Saving checkpoint for time 1515829290000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829290000'
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_8 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:30 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=21973, maxMem=140142182
//18/01/12 23:41:30 INFO BlockManager: Removing broadcast 7
//18/01/12 23:41:30 INFO BlockManager: Removing block broadcast_7
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_7 of size 2264 dropped from memory (free 140122473)
//18/01/12 23:41:30 INFO BlockManager: Removing block broadcast_7_piece0
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_7_piece0 of size 1679 dropped from memory (free 140124152)
//18/01/12 23:41:30 INFO BlockManagerInfo: Removed broadcast_7_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:30 INFO BlockManagerMaster: Updated info of block broadcast_7_piece0
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:30 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:30 INFO BlockManagerMaster: Updated info of block broadcast_8_piece0
//18/01/12 23:41:30 INFO SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:838
//18/01/12 23:41:30 INFO ContextCleaner: Cleaned broadcast 7
//18/01/12 23:41:30 INFO DAGScheduler: Submitting 1 missing tasks from Stage 17 (ShuffledRDD[24] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:30 INFO TaskSchedulerImpl: Adding task set 17.0 with 1 tasks
//18/01/12 23:41:30 INFO TaskSetManager: Starting task 0.0 in stage 17.0 (TID 8, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:30 INFO Executor: Running task 0.0 in stage 17.0 (TID 8)
//18/01/12 23:41:30 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:30 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
//18/01/12 23:41:30 INFO Executor: Finished task 0.0 in stage 17.0 (TID 8). 820 bytes result sent to driver
//18/01/12 23:41:30 INFO TaskSetManager: Finished task 0.0 in stage 17.0 (TID 8) in 6 ms on localhost (1/1)
//18/01/12 23:41:30 INFO TaskSchedulerImpl: Removed TaskSet 17.0, whose tasks have all completed, from pool
//18/01/12 23:41:30 INFO DAGScheduler: Stage 17 (print at SparkStreamingOnHDFS.java:126) finished in 0.007 s
//18/01/12 23:41:30 INFO DAGScheduler: Job 8 finished: print at SparkStreamingOnHDFS.java:126, took 0.047397 s
//18/01/12 23:41:30 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:30 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 4 is 82 bytes
//18/01/12 23:41:30 INFO DAGScheduler: Got job 9 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:30 INFO DAGScheduler: Final stage: Stage 19(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:30 INFO DAGScheduler: Parents of final stage: List(Stage 18)
//18/01/12 23:41:30 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:30 INFO DAGScheduler: Submitting Stage 19 (ShuffledRDD[24] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:30 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=19709, maxMem=140142182
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_9 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:30 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=21973, maxMem=140142182
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:30 INFO BlockManagerInfo: Added broadcast_9_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:30 INFO BlockManagerMaster: Updated info of block broadcast_9_piece0
//18/01/12 23:41:30 INFO SparkContext: Created broadcast 9 from getCallSite at DStream.scala:294
//18/01/12 23:41:30 INFO DAGScheduler: Submitting 1 missing tasks from Stage 19 (ShuffledRDD[24] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:30 INFO TaskSchedulerImpl: Adding task set 19.0 with 1 tasks
//18/01/12 23:41:30 INFO TaskSetManager: Starting task 0.0 in stage 19.0 (TID 9, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:30 INFO Executor: Running task 0.0 in stage 19.0 (TID 9)
//18/01/12 23:41:30 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:30 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 3 ms
//18/01/12 23:41:30 INFO BlockManager: Removing broadcast 8
//18/01/12 23:41:30 INFO BlockManager: Removing block broadcast_8
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_8 of size 2264 dropped from memory (free 140120794)
//18/01/12 23:41:30 INFO BlockManager: Removing block broadcast_8_piece0
//18/01/12 23:41:30 INFO MemoryStore: Block broadcast_8_piece0 of size 1679 dropped from memory (free 140122473)
//18/01/12 23:41:30 INFO BlockManagerInfo: Removed broadcast_8_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:30 INFO BlockManagerMaster: Updated info of block broadcast_8_piece0
//18/01/12 23:41:30 INFO ContextCleaner: Cleaned broadcast 8
//18/01/12 23:41:30 INFO Executor: Finished task 0.0 in stage 19.0 (TID 9). 820 bytes result sent to driver
//18/01/12 23:41:30 INFO TaskSetManager: Finished task 0.0 in stage 19.0 (TID 9) in 9 ms on localhost (1/1)
//18/01/12 23:41:30 INFO TaskSchedulerImpl: Removed TaskSet 19.0, whose tasks have all completed, from pool
//18/01/12 23:41:30 INFO DAGScheduler: Stage 19 (print at SparkStreamingOnHDFS.java:126) finished in 0.000 s
//-------------------------------------------
//Time: 1515829290000 ms
//-------------------------------------------
//
//18/01/12 23:41:30 INFO DAGScheduler: Job 9 finished: print at SparkStreamingOnHDFS.java:126, took 0.044747 s
//18/01/12 23:41:30 INFO JobScheduler: Finished job streaming job 1515829290000 ms.0 from job set of time 1515829290000 ms
//18/01/12 23:41:30 INFO ShuffledRDD: Removing RDD 19 from persistence list
//18/01/12 23:41:30 INFO BlockManager: Removing RDD 19
//18/01/12 23:41:30 INFO JobScheduler: Total delay: 0.114 s for time 1515829290000 ms (execution: 0.098 s)
//18/01/12 23:41:30 INFO MappedRDD: Removing RDD 18 from persistence list
//18/01/12 23:41:30 INFO BlockManager: Removing RDD 18
//18/01/12 23:41:30 INFO FlatMappedRDD: Removing RDD 17 from persistence list
//18/01/12 23:41:30 INFO BlockManager: Removing RDD 17
//18/01/12 23:41:30 INFO MappedRDD: Removing RDD 16 from persistence list
//18/01/12 23:41:30 INFO BlockManager: Removing RDD 16
//18/01/12 23:41:30 INFO FileInputDStream: Cleared 0 old files that were older than 1515829230000 ms:
//18/01/12 23:41:30 INFO ReceivedBlockTracker: Deleting batches ArrayBuffer()
//18/01/12 23:41:30 INFO JobGenerator: Checkpointing graph for time 1515829290000 ms
//18/01/12 23:41:30 INFO DStreamGraph: Updating checkpoint data for time 1515829290000 ms
//18/01/12 23:41:30 INFO DStreamGraph: Updated checkpoint data for time 1515829290000 ms
//18/01/12 23:41:30 INFO CheckpointWriter: Checkpoint for time 1515829290000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829290000', took 5372 bytes and 503 ms
//18/01/12 23:41:30 INFO DStreamGraph: Clearing checkpoint data for time 1515829290000 ms
//18/01/12 23:41:30 INFO DStreamGraph: Cleared checkpoint data for time 1515829290000 ms
//18/01/12 23:41:30 INFO CheckpointWriter: Saving checkpoint for time 1515829290000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829290000'
//18/01/12 23:41:30 INFO CheckpointWriter: Checkpoint for time 1515829290000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829290000', took 5368 bytes and 115 ms
//18/01/12 23:41:30 INFO DStreamGraph: Clearing checkpoint data for time 1515829290000 ms
//18/01/12 23:41:30 INFO DStreamGraph: Cleared checkpoint data for time 1515829290000 ms
//18/01/12 23:41:40 INFO FileInputDStream: Finding new files took 2 ms
//18/01/12 23:41:40 INFO FileInputDStream: New files at time 1515829300000 ms:
//
//18/01/12 23:41:40 INFO JobScheduler: Starting job streaming job 1515829300000 ms.0 from job set of time 1515829300000 ms
//18/01/12 23:41:40 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:40 INFO DAGScheduler: Registering RDD 28 (mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:41:40 INFO DAGScheduler: Got job 10 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:40 INFO DAGScheduler: Final stage: Stage 21(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:40 INFO DAGScheduler: Parents of final stage: List(Stage 20)
//18/01/12 23:41:40 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:40 INFO JobScheduler: Added jobs for time 1515829300000 ms
//18/01/12 23:41:40 INFO JobGenerator: Checkpointing graph for time 1515829300000 ms
//18/01/12 23:41:40 INFO DStreamGraph: Updating checkpoint data for time 1515829300000 ms
//18/01/12 23:41:40 INFO DStreamGraph: Updated checkpoint data for time 1515829300000 ms
//18/01/12 23:41:40 INFO DAGScheduler: Submitting Stage 21 (ShuffledRDD[29] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:40 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=19709, maxMem=140142182
//18/01/12 23:41:40 INFO CheckpointWriter: Saving checkpoint for time 1515829300000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829300000'
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_10 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:40 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=21973, maxMem=140142182
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:40 INFO BlockManagerInfo: Added broadcast_10_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:40 INFO ContextCleaner: Cleaned shuffle 3
//18/01/12 23:41:40 INFO BlockManager: Removing broadcast 9
//18/01/12 23:41:40 INFO BlockManager: Removing block broadcast_9
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_9 of size 2264 dropped from memory (free 140120794)
//18/01/12 23:41:40 INFO BlockManager: Removing block broadcast_9_piece0
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_9_piece0 of size 1679 dropped from memory (free 140122473)
//18/01/12 23:41:40 INFO BlockManagerInfo: Removed broadcast_9_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:40 INFO BlockManagerMaster: Updated info of block broadcast_9_piece0
//18/01/12 23:41:40 INFO BlockManagerMaster: Updated info of block broadcast_10_piece0
//18/01/12 23:41:40 INFO SparkContext: Created broadcast 10 from broadcast at DAGScheduler.scala:838
//18/01/12 23:41:40 INFO DAGScheduler: Submitting 1 missing tasks from Stage 21 (ShuffledRDD[29] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:40 INFO TaskSchedulerImpl: Adding task set 21.0 with 1 tasks
//18/01/12 23:41:40 INFO TaskSetManager: Starting task 0.0 in stage 21.0 (TID 10, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:40 INFO Executor: Running task 0.0 in stage 21.0 (TID 10)
//18/01/12 23:41:40 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:40 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
//18/01/12 23:41:40 INFO Executor: Finished task 0.0 in stage 21.0 (TID 10). 820 bytes result sent to driver
//18/01/12 23:41:40 INFO ContextCleaner: Cleaned broadcast 9
//18/01/12 23:41:40 INFO TaskSetManager: Finished task 0.0 in stage 21.0 (TID 10) in 12 ms on localhost (1/1)
//18/01/12 23:41:40 INFO TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool
//18/01/12 23:41:40 INFO DAGScheduler: Stage 21 (print at SparkStreamingOnHDFS.java:126) finished in 0.008 s
//18/01/12 23:41:40 INFO DAGScheduler: Job 10 finished: print at SparkStreamingOnHDFS.java:126, took 0.053828 s
//18/01/12 23:41:40 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:40 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 5 is 82 bytes
//18/01/12 23:41:40 INFO DAGScheduler: Got job 11 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:40 INFO DAGScheduler: Final stage: Stage 23(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:40 INFO DAGScheduler: Parents of final stage: List(Stage 22)
//18/01/12 23:41:40 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:40 INFO DAGScheduler: Submitting Stage 23 (ShuffledRDD[29] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:40 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=19709, maxMem=140142182
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_11 stored as values in memory (estimated size 2.2 KB, free 133.6 MB)
//18/01/12 23:41:40 INFO MemoryStore: ensureFreeSpace(1679) called with curMem=21973, maxMem=140142182
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 1679.0 B, free 133.6 MB)
//18/01/12 23:41:40 INFO BlockManagerInfo: Added broadcast_11_piece0 in memory on localhost:54231 (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:40 INFO BlockManagerMaster: Updated info of block broadcast_11_piece0
//18/01/12 23:41:40 INFO SparkContext: Created broadcast 11 from getCallSite at DStream.scala:294
//18/01/12 23:41:40 INFO DAGScheduler: Submitting 1 missing tasks from Stage 23 (ShuffledRDD[29] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:40 INFO TaskSchedulerImpl: Adding task set 23.0 with 1 tasks
//18/01/12 23:41:40 INFO TaskSetManager: Starting task 0.0 in stage 23.0 (TID 11, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:40 INFO Executor: Running task 0.0 in stage 23.0 (TID 11)
//18/01/12 23:41:40 INFO BlockManager: Removing broadcast 10
//18/01/12 23:41:40 INFO BlockManager: Removing block broadcast_10
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_10 of size 2264 dropped from memory (free 140120794)
//18/01/12 23:41:40 INFO BlockManager: Removing block broadcast_10_piece0
//18/01/12 23:41:40 INFO MemoryStore: Block broadcast_10_piece0 of size 1679 dropped from memory (free 140122473)
//18/01/12 23:41:40 INFO BlockManagerInfo: Removed broadcast_10_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:40 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 0 blocks
//18/01/12 23:41:40 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 5 ms
//18/01/12 23:41:40 INFO Executor: Finished task 0.0 in stage 23.0 (TID 11). 820 bytes result sent to driver
//18/01/12 23:41:40 INFO TaskSetManager: Finished task 0.0 in stage 23.0 (TID 11) in 16 ms on localhost (1/1)
//18/01/12 23:41:40 INFO TaskSchedulerImpl: Removed TaskSet 23.0, whose tasks have all completed, from pool
//18/01/12 23:41:40 INFO BlockManagerMaster: Updated info of block broadcast_10_piece0
//18/01/12 23:41:40 INFO ContextCleaner: Cleaned broadcast 10
//18/01/12 23:41:40 INFO CheckpointWriter: Deleting hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829250000.bk
//-------------------------------------------
//Time: 1515829300000 ms
//-------------------------------------------
//
//18/01/12 23:41:40 INFO DAGScheduler: Stage 23 (print at SparkStreamingOnHDFS.java:126) finished in 0.011 s
//18/01/12 23:41:40 INFO DAGScheduler: Job 11 finished: print at SparkStreamingOnHDFS.java:126, took 0.054029 s
//18/01/12 23:41:40 INFO JobScheduler: Finished job streaming job 1515829300000 ms.0 from job set of time 1515829300000 ms
//18/01/12 23:41:40 INFO ShuffledRDD: Removing RDD 24 from persistence list
//18/01/12 23:41:40 INFO BlockManager: Removing RDD 24
//18/01/12 23:41:40 INFO JobScheduler: Total delay: 0.121 s for time 1515829300000 ms (execution: 0.112 s)
//18/01/12 23:41:40 INFO MappedRDD: Removing RDD 23 from persistence list
//18/01/12 23:41:40 INFO BlockManager: Removing RDD 23
//18/01/12 23:41:40 INFO FlatMappedRDD: Removing RDD 22 from persistence list
//18/01/12 23:41:40 INFO BlockManager: Removing RDD 22
//18/01/12 23:41:40 INFO MappedRDD: Removing RDD 21 from persistence list
//18/01/12 23:41:40 INFO BlockManager: Removing RDD 21
//18/01/12 23:41:40 INFO FileInputDStream: Cleared 0 old files that were older than 1515829240000 ms:
//18/01/12 23:41:40 INFO ReceivedBlockTracker: Deleting batches ArrayBuffer()
//18/01/12 23:41:40 INFO JobGenerator: Checkpointing graph for time 1515829300000 ms
//18/01/12 23:41:40 INFO DStreamGraph: Updating checkpoint data for time 1515829300000 ms
//18/01/12 23:41:40 INFO DStreamGraph: Updated checkpoint data for time 1515829300000 ms
//18/01/12 23:41:40 INFO CheckpointWriter: Checkpoint for time 1515829300000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829300000', took 5374 bytes and 109 ms
//18/01/12 23:41:40 INFO CheckpointWriter: Saving checkpoint for time 1515829300000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829300000'
//18/01/12 23:41:40 INFO DStreamGraph: Clearing checkpoint data for time 1515829300000 ms
//18/01/12 23:41:40 INFO DStreamGraph: Cleared checkpoint data for time 1515829300000 ms
//18/01/12 23:41:40 INFO CheckpointWriter: Deleting hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829250000
//18/01/12 23:41:40 INFO CheckpointWriter: Checkpoint for time 1515829300000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829300000', took 5370 bytes and 114 ms
//18/01/12 23:41:40 INFO DStreamGraph: Clearing checkpoint data for time 1515829300000 ms
//18/01/12 23:41:40 INFO DStreamGraph: Cleared checkpoint data for time 1515829300000 ms
//18/01/12 23:41:50 INFO FileInputDStream: Finding new files took 6 ms
//18/01/12 23:41:50 INFO FileInputDStream: New files at time 1515829310000 ms:
//hdfs://alamps:9000/library/SparkStreaming/data/wc1.txt
//18/01/12 23:41:50 INFO BlockManager: Removing broadcast 11
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_11
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_11 of size 2264 dropped from memory (free 140124737)
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_11_piece0
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_11_piece0 of size 1679 dropped from memory (free 140126416)
//18/01/12 23:41:50 INFO BlockManagerInfo: Removed broadcast_11_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_11_piece0
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned broadcast 11
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned shuffle 4
//18/01/12 23:41:50 INFO BlockManager: Removing broadcast 4
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_4
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_4 of size 2264 dropped from memory (free 140128680)
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_4_piece0
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_4_piece0 of size 1677 dropped from memory (free 140130357)
//18/01/12 23:41:50 INFO BlockManagerInfo: Removed broadcast_4_piece0 on localhost:54231 in memory (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_4_piece0
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned broadcast 4
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned shuffle 2
//18/01/12 23:41:50 INFO BlockManager: Removing broadcast 2
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_2
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_2 of size 2264 dropped from memory (free 140132621)
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_2_piece0
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_2_piece0 of size 1679 dropped from memory (free 140134300)
//18/01/12 23:41:50 INFO BlockManagerInfo: Removed broadcast_2_piece0 on localhost:54231 in memory (size: 1679.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_2_piece0
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned broadcast 2
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned shuffle 1
//18/01/12 23:41:50 INFO BlockManager: Removing broadcast 1
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_1_piece0
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_1_piece0 of size 1677 dropped from memory (free 140135977)
//18/01/12 23:41:50 INFO BlockManagerInfo: Removed broadcast_1_piece0 on localhost:54231 in memory (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_1
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_1 of size 2264 dropped from memory (free 140138241)
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned broadcast 1
//18/01/12 23:41:50 INFO BlockManager: Removing broadcast 0
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_0
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_0 of size 2264 dropped from memory (free 140140505)
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_0_piece0
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_0_piece0 of size 1677 dropped from memory (free 140142182)
//18/01/12 23:41:50 INFO BlockManagerInfo: Removed broadcast_0_piece0 on localhost:54231 in memory (size: 1677.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned broadcast 0
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned shuffle 0
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(206436) called with curMem=0, maxMem=140142182
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_12 stored as values in memory (estimated size 201.6 KB, free 133.5 MB)
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(31473) called with curMem=206436, maxMem=140142182
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 30.7 KB, free 133.4 MB)
//18/01/12 23:41:50 INFO BlockManagerInfo: Added broadcast_12_piece0 in memory on localhost:54231 (size: 30.7 KB, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_12_piece0
//18/01/12 23:41:50 INFO SparkContext: Created broadcast 12 from textFileStream at SparkStreamingOnHDFS.java:67
//18/01/12 23:41:50 INFO FileInputFormat: Total input paths to process : 1
//18/01/12 23:41:50 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:50 INFO JobScheduler: Starting job streaming job 1515829310000 ms.0 from job set of time 1515829310000 ms
//18/01/12 23:41:50 INFO DAGScheduler: Registering RDD 34 (mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:41:50 INFO DAGScheduler: Got job 12 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:50 INFO DAGScheduler: Final stage: Stage 25(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:50 INFO DAGScheduler: Parents of final stage: List(Stage 24)
//18/01/12 23:41:50 INFO JobScheduler: Added jobs for time 1515829310000 ms
//18/01/12 23:41:50 INFO JobGenerator: Checkpointing graph for time 1515829310000 ms
//18/01/12 23:41:50 INFO DStreamGraph: Updating checkpoint data for time 1515829310000 ms
//18/01/12 23:41:50 INFO DStreamGraph: Updated checkpoint data for time 1515829310000 ms
//18/01/12 23:41:50 INFO DAGScheduler: Missing parents: List(Stage 24)
//18/01/12 23:41:50 INFO CheckpointWriter: Saving checkpoint for time 1515829310000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829310000'
//18/01/12 23:41:50 INFO DAGScheduler: Submitting Stage 24 (MappedRDD[34] at mapToPair at SparkStreamingOnHDFS.java:87), which has no missing parents
//18/01/12 23:41:50 INFO CheckpointWriter: Deleting hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829260000.bk
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(4488) called with curMem=237909, maxMem=140142182
//18/01/12 23:41:50 INFO CheckpointWriter: Checkpoint for time 1515829310000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829310000', took 5415 bytes and 36 ms
//18/01/12 23:41:50 INFO DStreamGraph: Clearing checkpoint data for time 1515829310000 ms
//18/01/12 23:41:50 INFO DStreamGraph: Cleared checkpoint data for time 1515829310000 ms
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_13 stored as values in memory (estimated size 4.4 KB, free 133.4 MB)
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(3075) called with curMem=242397, maxMem=140142182
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_13_piece0 stored as bytes in memory (estimated size 3.0 KB, free 133.4 MB)
//18/01/12 23:41:50 INFO BlockManagerInfo: Added broadcast_13_piece0 in memory on localhost:54231 (size: 3.0 KB, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_13_piece0
//18/01/12 23:41:50 INFO SparkContext: Created broadcast 13 from getCallSite at DStream.scala:294
//18/01/12 23:41:50 INFO DAGScheduler: Submitting 1 missing tasks from Stage 24 (MappedRDD[34] at mapToPair at SparkStreamingOnHDFS.java:87)
//18/01/12 23:41:50 INFO TaskSchedulerImpl: Adding task set 24.0 with 1 tasks
//18/01/12 23:41:50 INFO TaskSetManager: Starting task 0.0 in stage 24.0 (TID 12, localhost, ANY, 1448 bytes)
//18/01/12 23:41:50 INFO Executor: Running task 0.0 in stage 24.0 (TID 12)
//18/01/12 23:41:50 INFO NewHadoopRDD: Input split: hdfs://alamps:9000/library/SparkStreaming/data/wc1.txt:0+65
//18/01/12 23:41:50 INFO Executor: Finished task 0.0 in stage 24.0 (TID 12). 1896 bytes result sent to driver
//18/01/12 23:41:50 INFO DAGScheduler: Stage 24 (mapToPair at SparkStreamingOnHDFS.java:87) finished in 0.318 s
//18/01/12 23:41:50 INFO DAGScheduler: looking for newly runnable stages
//18/01/12 23:41:50 INFO DAGScheduler: running: Set()
//18/01/12 23:41:50 INFO DAGScheduler: waiting: Set(Stage 25)
//18/01/12 23:41:50 INFO DAGScheduler: failed: Set()
//18/01/12 23:41:50 INFO TaskSetManager: Finished task 0.0 in stage 24.0 (TID 12) in 322 ms on localhost (1/1)
//18/01/12 23:41:50 INFO TaskSchedulerImpl: Removed TaskSet 24.0, whose tasks have all completed, from pool
//18/01/12 23:41:50 INFO DAGScheduler: Missing parents for Stage 25: List()
//18/01/12 23:41:50 INFO DAGScheduler: Submitting Stage 25 (ShuffledRDD[35] at reduceByKey at SparkStreamingOnHDFS.java:104), which is now runnable
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=245472, maxMem=140142182
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_14 stored as values in memory (estimated size 2.2 KB, free 133.4 MB)
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(1680) called with curMem=247736, maxMem=140142182
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_14_piece0 stored as bytes in memory (estimated size 1680.0 B, free 133.4 MB)
//18/01/12 23:41:50 INFO BlockManagerInfo: Added broadcast_14_piece0 in memory on localhost:54231 (size: 1680.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_14_piece0
//18/01/12 23:41:50 INFO SparkContext: Created broadcast 14 from getCallSite at DStream.scala:294
//18/01/12 23:41:50 INFO DAGScheduler: Submitting 1 missing tasks from Stage 25 (ShuffledRDD[35] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:50 INFO TaskSchedulerImpl: Adding task set 25.0 with 1 tasks
//18/01/12 23:41:50 INFO TaskSetManager: Starting task 0.0 in stage 25.0 (TID 13, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:50 INFO Executor: Running task 0.0 in stage 25.0 (TID 13)
//18/01/12 23:41:50 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
//18/01/12 23:41:50 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
//18/01/12 23:41:50 INFO Executor: Finished task 0.0 in stage 25.0 (TID 13). 1048 bytes result sent to driver
//18/01/12 23:41:50 INFO TaskSetManager: Finished task 0.0 in stage 25.0 (TID 13) in 34 ms on localhost (1/1)
//18/01/12 23:41:50 INFO TaskSchedulerImpl: Removed TaskSet 25.0, whose tasks have all completed, from pool
//18/01/12 23:41:50 INFO DAGScheduler: Stage 25 (print at SparkStreamingOnHDFS.java:126) finished in 0.029 s
//18/01/12 23:41:50 INFO DAGScheduler: Job 12 finished: print at SparkStreamingOnHDFS.java:126, took 0.442777 s
//18/01/12 23:41:50 INFO SparkContext: Starting job: print at SparkStreamingOnHDFS.java:126
//18/01/12 23:41:50 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 6 is 147 bytes
//18/01/12 23:41:50 INFO DAGScheduler: Got job 13 (print at SparkStreamingOnHDFS.java:126) with 1 output partitions (allowLocal=true)
//18/01/12 23:41:50 INFO DAGScheduler: Final stage: Stage 27(print at SparkStreamingOnHDFS.java:126)
//18/01/12 23:41:50 INFO DAGScheduler: Parents of final stage: List(Stage 26)
//18/01/12 23:41:50 INFO DAGScheduler: Missing parents: List()
//18/01/12 23:41:50 INFO DAGScheduler: Submitting Stage 27 (ShuffledRDD[35] at reduceByKey at SparkStreamingOnHDFS.java:104), which has no missing parents
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(2264) called with curMem=249416, maxMem=140142182
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_15 stored as values in memory (estimated size 2.2 KB, free 133.4 MB)
//18/01/12 23:41:50 INFO MemoryStore: ensureFreeSpace(1680) called with curMem=251680, maxMem=140142182
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_15_piece0 stored as bytes in memory (estimated size 1680.0 B, free 133.4 MB)
//18/01/12 23:41:50 INFO BlockManagerInfo: Added broadcast_15_piece0 in memory on localhost:54231 (size: 1680.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_15_piece0
//18/01/12 23:41:50 INFO SparkContext: Created broadcast 15 from getCallSite at DStream.scala:294
//18/01/12 23:41:50 INFO DAGScheduler: Submitting 1 missing tasks from Stage 27 (ShuffledRDD[35] at reduceByKey at SparkStreamingOnHDFS.java:104)
//18/01/12 23:41:50 INFO TaskSchedulerImpl: Adding task set 27.0 with 1 tasks
//18/01/12 23:41:50 INFO TaskSetManager: Starting task 0.0 in stage 27.0 (TID 14, localhost, PROCESS_LOCAL, 1056 bytes)
//18/01/12 23:41:50 INFO Executor: Running task 0.0 in stage 27.0 (TID 14)
//18/01/12 23:41:50 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
//18/01/12 23:41:50 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
//18/01/12 23:41:50 INFO Executor: Finished task 0.0 in stage 27.0 (TID 14). 1055 bytes result sent to driver
//18/01/12 23:41:50 INFO TaskSetManager: Finished task 0.0 in stage 27.0 (TID 14) in 8 ms on localhost (1/1)
//18/01/12 23:41:50 INFO TaskSchedulerImpl: Removed TaskSet 27.0, whose tasks have all completed, from pool
//18/01/12 23:41:50 INFO DAGScheduler: Stage 27 (print at SparkStreamingOnHDFS.java:126) finished in 0.004 s
//-------------------------------------------
//Time: 1515829310000 ms
//-------------------------------------------
//(stream,1)
//(hive,1)
//(python,1)
//(kafka,1)
//(sql,1)
//(spark,2)
//(hadoop,1)
//(flume,1)
//(strom,1)
//(hbase,1)
//
//18/01/12 23:41:50 INFO DAGScheduler: Job 13 finished: print at SparkStreamingOnHDFS.java:126, took 0.019272 s
//18/01/12 23:41:50 INFO JobScheduler: Finished job streaming job 1515829310000 ms.0 from job set of time 1515829310000 ms
//18/01/12 23:41:50 INFO JobScheduler: Total delay: 0.929 s for time 1515829310000 ms (execution: 0.464 s)
//18/01/12 23:41:50 INFO ShuffledRDD: Removing RDD 29 from persistence list
//18/01/12 23:41:50 INFO BlockManager: Removing RDD 29
//18/01/12 23:41:50 INFO MappedRDD: Removing RDD 28 from persistence list
//18/01/12 23:41:50 INFO BlockManager: Removing RDD 28
//18/01/12 23:41:50 INFO FlatMappedRDD: Removing RDD 27 from persistence list
//18/01/12 23:41:50 INFO BlockManager: Removing RDD 27
//18/01/12 23:41:50 INFO MappedRDD: Removing RDD 26 from persistence list
//18/01/12 23:41:50 INFO BlockManager: Removing RDD 26
//18/01/12 23:41:50 INFO UnionRDD: Removing RDD 0 from persistence list
//18/01/12 23:41:50 INFO BlockManager: Removing RDD 0
//18/01/12 23:41:50 INFO FileInputDStream: Cleared 0 old files that were older than 1515829250000 ms:
//18/01/12 23:41:50 INFO ReceivedBlockTracker: Deleting batches ArrayBuffer()
//18/01/12 23:41:50 INFO JobGenerator: Checkpointing graph for time 1515829310000 ms
//18/01/12 23:41:50 INFO DStreamGraph: Updating checkpoint data for time 1515829310000 ms
//18/01/12 23:41:50 INFO DStreamGraph: Updated checkpoint data for time 1515829310000 ms
//18/01/12 23:41:50 INFO CheckpointWriter: Saving checkpoint for time 1515829310000 ms to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829310000'
//18/01/12 23:41:50 INFO BlockManager: Removing broadcast 15
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_15_piece0
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_15_piece0 of size 1680 dropped from memory (free 139890502)
//18/01/12 23:41:50 INFO BlockManagerInfo: Removed broadcast_15_piece0 on localhost:54231 in memory (size: 1680.0 B, free: 133.6 MB)
//18/01/12 23:41:50 INFO BlockManagerMaster: Updated info of block broadcast_15_piece0
//18/01/12 23:41:50 INFO BlockManager: Removing block broadcast_15
//18/01/12 23:41:50 INFO MemoryStore: Block broadcast_15 of size 2264 dropped from memory (free 139892766)
//18/01/12 23:41:50 INFO ContextCleaner: Cleaned broadcast 15
//18/01/12 23:41:50 INFO CheckpointWriter: Deleting hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829260000
//18/01/12 23:41:50 INFO CheckpointWriter: Checkpoint for time 1515829310000 ms saved to file 'hdfs://alamps:9000/library/SparkStreaming/CheckPoint_data/checkpoint-1515829310000', took 5404 bytes and 39 ms
//18/01/12 23:41:50 INFO DStreamGraph: Clearing checkpoint data for time 1515829310000 ms
//18/01/12 23:41:50 INFO DStreamGraph: Cleared checkpoint data for time 1515829310000 ms