spark读取hdfs上的文件和写入数据到hdfs上面

时间:2024-03-09 11:17:14
原创,未经同意转载,复制的没唧唧
def main(args: Array[String]): Unit = {
      val conf = new SparkConf()
      conf.set("spark.master", "local")
      conf.set("spark.app.name", "spark demo")
      val sc = new SparkContext(conf);
      // 读取hdfs数据
      val textFileRdd = sc.textFile("hdfs://m2:9820/README.md")
      val fRdd = textFileRdd.flatMap { _.split(" ") }
      val mrdd = fRdd.map { (_, 1) }
      val rbkrdd = mrdd.reduceByKey(_+_)
      // 写入数据到hdfs系统
      rbkrdd.saveAsTextFile("hdfs://m2:9820/wcresult")
  }