一、一般模型
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn import metrics from sklearn.datasets import load_iris %matplotlib inline #载入数据 iris = load_iris() x = iris.data y = iris.target x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.7,random_state=0) #数据标准化 sc = StandardScaler() x_train_std = sc.fit_transform(x_train) x_test_std = sc.transform(x_test) #建立模型 lr = LogisticRegression() lr.fit(x_train_std,y_train) y_pred = lr.predict(x_test_std) #检验模型 accuracy_score = metrics.accuracy_score(y_test,y_pred) #错误率,也就是np.average(y_test==y_pred) accuracy_score
结果是:0.82222222222222219
二、加入正则项:
from sklearn.linear_model import RidgeClassifierCV alpha = np.logspace(-3,2,10) ridge_model = RidgeClassifierCV(alphas=alpha,cv=5) ridge_model.fit(x_train_std,y_train) ridge_model.alpha_ y_pred_ridge = ridge_model.predict(x_test_std) accuracy_score = metrics.accuracy_score(y_test,y_pred_ridge) accuracy_score
结果是:0.77777777777777779