Spark MLlib模型训练—分类算法Multilayer Perceptron Classifier
import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{VectorAssembler, StringIndexer}
import org.apache.spark.sql.SparkSession
// 创建 SparkSession
val spark = SparkSession.builder()
.appName("MLPClassifierExample")
.master("local[*]")
.getOrCreate()
// 准备数据集
val data = spark.createDataFrame(Seq(
(0.0, 0.0, 0.0, 0.0, 0.0),
(1.0, 1.0, 1.0, 1.0, 1.0),
(1.0, 0.0, 1.0, 0.0, 0.0),
(0.0, 1.0, 0.0, 1.0, 1.0),
(0.0, 1.0, 1.0, 0.0, 0.0)
)).toDF("label", "feature1", "feature2", "feature3", "feature4")
// 组合特征向量
val assembler = new VectorAssembler()
.setInputCols(Array("feature1", "feature2", "feature3", "feature4"))
.setOutputCol("features")
val trainingData = assembler.transform(data).select("label", "features")
// 定义神经网络结构
val layers = Array[Int](4, 5, 4, 2) // 输入层4个节点,第一隐藏层5个节点,第二隐藏层4个节点,输出层2个节点
// 配置 MLP 分类器
val trainer = new MultilayerPerceptronClassifier()
.setLayers(layers)
.setLabelCol("label")
.setFeaturesCol("features")
.setMaxIter(100) // 设置最大迭代次数
// 训练模型
val model = trainer.fit(trainingData)
// 准备测试数据
val testData = spark.createDataFrame(Seq(
(0.0, 1.0, 1.0, 0.0, 0.0),
(1.0, 0.0, 0.0, 1.0, 1.0)
)).toDF("label", "feature1", "feature2", "feature3", "feature4")
val testFeatures = assembler.transform(testData).select("features")
// 进行预测
val predictions = model.transform(testFeatures)
predictions.show()
// 评估模型
val evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")
.setMetricName("accuracy")
val accuracy = evaluator.evaluate(predictions)
println(s"Test set accuracy = $accuracy")
// 关闭 SparkSession
spark.stop()