K近邻算法实例红酒分类预测

时间:2023-02-23 11:24:02
import pandas as pd
from sklearn import datasets

wine = datasets.load_wine() # 获取葡萄酒数据
wine_data = wine.data #获取葡萄酒的索引data数据,178行13列
wine_target = wine.target #获取分类目标值

wine_data = pd.DataFrame(data = wine_data) #转换成DataFrame类型数据
wine_target = pd.DataFrame(data = wine_target)
# 将target插入到第一列
wine_data.insert(0,'class',wine_target)

# ==1== 变量.sample(frac=1) 表示洗牌,重新排序
# ==2== 变量.reset_index(drop=True) 使index从0开始排序,可以省略这一步
wine = wine_data.sample(frac=1).reset_index(drop=True)

# 拿10行出来作验证
wine_predict = wine[-10:].reset_index(drop=True)
wine_predict_feature = wine_predict.drop('class',axis=1) #用于验证的特征值,输入到predict()函数中
wine_predict_target = wine_predict['class'] #目标值,用于和最终预测结果比较

wine = wine[:-10] #删除后10行
features = wine.drop(columns=['class'],axis=1) #删除class这一列,产生返回值,这个是特征值
targets = wine['class'] #class这一列就是目标值
# 相当于13个特征值对应1个目标


# 划分测试集和训练集
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(features,targets,test_size=0.25)

# 先标准化再预测
from sklearn.preprocessing import StandardScaler #导入标准化缩放方法
scaler = StandardScaler() #变量scaler接收标准化方法

# 传入特征值进行标准化
x_train = scaler.fit_transform(x_train) #对训练的特征值标准化
x_test = scaler.fit_transform(x_test) #对测试的特征值标准化
wine_predict_feature = scaler.fit_transform(wine_predict_feature)

# 使用K近邻算法分类
from sklearn.neighbors import KNeighborsClassifier #导入k近邻算法库
# k近邻函数
knn = KNeighborsClassifier(n_neighbors=5,algorithm='auto')

# 训练,把训练的特征值和训练的目标值传进去
knn.fit(x_train,y_train)
# 检测模型正确率--传入测试的特征值和目标值
# 评分法,根据x_test预测结果,把结果和真实的y_test比较,计算准确率
accuracy = knn.score(x_test,y_test)
# 预测,输入预测用的x值
result = knn.predict(wine_predict_feature)