1、map:将集合中每个元素乘以2
2、filter:过滤出集合中的偶数
3、flatMap:将行拆分为单词
4、groupByKey:将每个班级的成绩进行分组
5、reduceByKey:统计每个班级的总分
6、sortByKey、sortBy:将学生分数进行排序
7、join:打印每个学生的成绩
8、cogroup:打印每个学生的成绩
package sparkcore.java;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
/**
* transformation操作实战
*/
public class TransformationOperation {
public static void main(String[] , , , , )
val )
val }
, , , , , , , , , )
val )
val == }
)
val ), (),
(), ())
val )
val ), (),
(), ())
val )
val , , , , )
val ), (), (), ())
val )
val )
, , , , ), (, ), (, ),(, ), (, ), (, ));
val , , , , ), (, ), (, ),(, ), (, ), (, ));
val students = sc.parallelize(studentList);
val scores = sc.parallelize(scoreList);
val studentScores = students.cogroup(scores)
studentScores.foreach(studentScore => {
println("student id: " + studentScore._1);
println("student name: " + studentScore._2._1)
println("student socre: " + studentScore._2._2)
println("=======================================")
})
// 输出结果:
// student id: 1
// student name: CompactBuffer(leo)
// student socre: CompactBuffer(100, 70)
// =======================================
// student id: 3
// student name: CompactBuffer(tom)
// student socre: CompactBuffer(60, 50)
// =======================================
// student id: 2
// student name: CompactBuffer(jack)
// student socre: CompactBuffer(90, 80)
// =======================================
}
}