import org.apache.spark._
import SparkContext._
import java.util.{Calendar,Properties,Date,Locale}
import java.text.SimpleDateFormat
object tongji {
//http://blog.chinaunix.net/uid-25885064-id-3430852.html
//scala时间处理-获取今天日期,昨天日期,本周时间,本月时间,时间戳转换日期,时间比较
//http://blog.csdn.net/springlustre/article/details/47273353
//今天
def getNowDate():String={
var now:Date = new Date()
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
var NowTime = dateFormat.format( now )
NowTime
}
//获取昨天的日期
def getYesterday():String={
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
var cal:Calendar=Calendar.getInstance()
cal.add(Calendar.DATE,-1)
var yesterday=dateFormat.format(cal.getTime())
yesterday
}
// 字符串 转成时间
def strtoDate(tm:String):Date={
//val loc = new Locale("en")
// val fm = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss",loc)
val fm = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
// val tm = "30/Jul/2015:05:00:50"
val dt2 = fm.parse(tm);
dt2
}
// 字符串 转成时间戳
def strtoDatetolong(tm:String):Long={
val dt= strtoDate(tm)
val ldt=dt.getTime()
ldt
}
def main(args: Array[String]) {
/*
* 总文章数 TotalArticle
总点击数 TotalClick
总阅读数 TotalReadNum
订阅号总数TotalOpenNum
平均文章数AvgArticle
平均点击数AvgClick
平均阅读数AvgReadNum
每个订阅号的文章数量OpenArticle
每个订阅号的总点击数OpenClick
每个订阅号的总阅读数OpenReadNum
每个订阅号的平均文章数量AvgOpenArticle
每个订阅号的平均击数AvgOpenClick
每个订阅号的平均阅读数AvgOpenReadNum
*
*
* */
if (args.length < 4 ){
println(" spark://192.168.16.119:7077 SparkSubmit_Demo ")
println(" /wxcontentdb/xrk_wx_articles/part-m-00000")
println(" /outtxt")
println(args.length.toString())
println(args(0))
return
}
val conf = new SparkConf()
.setMaster(args(0))
.setAppName(args(1))
.set("spark.executor.memory", "3g")
val sc = new SparkContext(conf)
val lines = sc.textFile(args(2))
val TotalArticle=lines.count()
val TotalClick= lines.map(line => {val fileds = line.split("\t") ;( fileds(4).toLong)}).reduce((a,b) => a+b)
val TotalReadNum= lines.map(line => {val fileds = line.split("\t") ;( fileds(5).toLong)}).reduce((a,b) => a+b)
val OpenArticle = lines.map(_.split("\t")(1)).map((_,1)).reduceByKey(_ + _)
val TotalOpenNum=OpenArticle.count()
val OpenClick=lines.map(line => {val fileds = line.split("\t") ;( fileds(1).toString(),fileds(4).toLong)}).reduceByKey(_ + _)
val OpenReadNum=lines.map(line => {val fileds = line.split("\t") ;( fileds(1).toString(),fileds(5).toLong)}).reduceByKey(_ + _)
//val txt= OpenArticle.map(x=>(x._2, x._1)).sortByKey(true).map(x=>(x._2,x._1))
val _OpenClick=OpenClick.keyBy(top=>top._1)
val _OpenReadNum=OpenReadNum.keyBy(top=>top._1)
val list= OpenArticle.keyBy(top=>top._1).join(_OpenClick).join(_OpenReadNum).map(f => (f._1, f._2._1._1._2, f._2._1._2._2, f._2._2._2))
val txt=list.map((_,TotalArticle,TotalOpenNum,TotalClick,TotalReadNum));
//.map((_,TotalArticle,TotalClick,TotalReadNum))
txt.saveAsTextFile(args(3))
sc.stop()
val beginnow =new Date();
val mbegindate = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") format beginnow
}
}