Spark MLlib模型训练—分类算法Multilayer Perceptron Classifier

时间:2025-04-09 12:53:17
import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.ml.feature.{VectorAssembler, StringIndexer} import org.apache.spark.sql.SparkSession // 创建 SparkSession val spark = SparkSession.builder() .appName("MLPClassifierExample") .master("local[*]") .getOrCreate() // 准备数据集 val data = spark.createDataFrame(Seq( (0.0, 0.0, 0.0, 0.0, 0.0), (1.0, 1.0, 1.0, 1.0, 1.0), (1.0, 0.0, 1.0, 0.0, 0.0), (0.0, 1.0, 0.0, 1.0, 1.0), (0.0, 1.0, 1.0, 0.0, 0.0) )).toDF("label", "feature1", "feature2", "feature3", "feature4") // 组合特征向量 val assembler = new VectorAssembler() .setInputCols(Array("feature1", "feature2", "feature3", "feature4")) .setOutputCol("features") val trainingData = assembler.transform(data).select("label", "features") // 定义神经网络结构 val layers = Array[Int](4, 5, 4, 2) // 输入层4个节点,第一隐藏层5个节点,第二隐藏层4个节点,输出层2个节点 // 配置 MLP 分类器 val trainer = new MultilayerPerceptronClassifier() .setLayers(layers) .setLabelCol("label") .setFeaturesCol("features") .setMaxIter(100) // 设置最大迭代次数 // 训练模型 val model = trainer.fit(trainingData) // 准备测试数据 val testData = spark.createDataFrame(Seq( (0.0, 1.0, 1.0, 0.0, 0.0), (1.0, 0.0, 0.0, 1.0, 1.0) )).toDF("label", "feature1", "feature2", "feature3", "feature4") val testFeatures = assembler.transform(testData).select("features") // 进行预测 val predictions = model.transform(testFeatures) predictions.show() // 评估模型 val evaluator = new MulticlassClassificationEvaluator() .setLabelCol("label") .setPredictionCol("prediction") .setMetricName("accuracy") val accuracy = evaluator.evaluate(predictions) println(s"Test set accuracy = $accuracy") // 关闭 SparkSession spark.stop()