【文件属性】:
文件名称:模型优化-mbse-overview-incose-30-july-2015
文件大小:5.71MB
文件格式:PDF
更新时间:2021-06-09 16:42:33
数据挖掘
11.9 模型优化
Scikit—learn 中有 GridSerchCV()函数可以解决参数优化问题
#模型参数优化
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import train_test_split
from sklearn.metrics import precision_score,recall_score,accuracy_score
pipeline=Pipeline([
('vect',TfidfVectorizer(stop_words='english')),
('clf',LogisticRegression())
])
parameters={
'vect__max_df':(0.25,0.5,0.75),
'vect__stop_words':('english',None),
'vect__max_features':(2500,5000,10000,None),
'vect__ngram_range':((1,1),(1,2)),
'vect__use_idf':(True,False),
'vect__norm':('l1','l2'),
'clf__penalty':('l1','l2'),
'clf__C':(0.01,0.1,1,10)
}
grid_search=GridSearchCV(pipeline,parameters,n_jobs=-1,verbose=1,scoring='accuracy',cv=3)
X,y=df['text'],df['Target']
X_train,X_test,y_train,y_test=train_test_split(X,y)
grid_search.fit(X_train,y_train)
print('最佳效果: %0.3f' % grid_search.best_score_)
#最佳效果: 0.983
print('最优参数组合:')
best_parameters=grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print('\t%s:%r' % (param_name,best_parameters[param_name]))