文件名称:模型优化-mbse-overview-incose-30-july-2015
文件大小:5.71MB
文件格式:PDF
更新时间:2024-06-29 10:29:13
数据挖掘
11.9 模型优化 Scikit—learn 中有 GridSerchCV()函数可以解决参数优化问题 #模型参数优化 from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model.logistic import LogisticRegression from sklearn.grid_search import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.cross_validation import train_test_split from sklearn.metrics import precision_score,recall_score,accuracy_score pipeline=Pipeline([ ('vect',TfidfVectorizer(stop_words='english')), ('clf',LogisticRegression()) ]) parameters={ 'vect__max_df':(0.25,0.5,0.75), 'vect__stop_words':('english',None), 'vect__max_features':(2500,5000,10000,None), 'vect__ngram_range':((1,1),(1,2)), 'vect__use_idf':(True,False), 'vect__norm':('l1','l2'), 'clf__penalty':('l1','l2'), 'clf__C':(0.01,0.1,1,10) } grid_search=GridSearchCV(pipeline,parameters,n_jobs=-1,verbose=1,scoring='accuracy',cv=3) X,y=df['text'],df['Target'] X_train,X_test,y_train,y_test=train_test_split(X,y) grid_search.fit(X_train,y_train) print('最佳效果: %0.3f' % grid_search.best_score_) #最佳效果: 0.983 print('最优参数组合:') best_parameters=grid_search.best_estimator_.get_params() for param_name in sorted(parameters.keys()): print('\t%s:%r' % (param_name,best_parameters[param_name]))