基础的 sparkSQL操作

时间:2021-08-25 18:07:02
spark连接mysql操作 数据库jdbc 连接封装
package test.com import org.apache.spark.sql.{DataFrame, SparkSession}
/**
* Created by sx on 2018/5/31.
*/
object JDBC_db {
val url = "jdbc:mysql://ip:3306/db"
val driver = "com.mysql.jdbc.Driver"
val user = "root"
val password = "root" def count(sparkSession: SparkSession, table: String): DataFrame = {
val sparkDF = sparkSession.read
.format("jdbc")
.option("url", s"${url}")
.option("driver", s"${driver}")
.option("dbtable", table)
.option("user", s"${user}")
.option("password", s"${password}")
.load()
return sparkDF }
} 创建类连接
import java.util.logging
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import test.com.JDBC_db object JDBC_DB {
System.setProperty("hadoop.home.dir", "D:\\hadoop\\hadoop-common-2.2.0-bin-master")
Logger.getLogger("org").setLevel(Level.WARN) def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local").getOrCreate()
val sparkDF = JDBC_db.count(spark, "test1")
sparkDF.createOrReplaceTempView("aa")
sparkDF.show() val sparkDF2 = JDBC_db.count(spark, "test2")
sparkDF2.createOrReplaceTempView("bb")
sparkDF2.show()
spark.sql("select * from aa union select * from bb").show()
}
}