模型选择
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 3 10:10:10 2019
@author: xllix
"""
# R2 Score
def lets_try(train,labels):
results={}
def test_model(clf):
cv = KFold(n_splits=5,shuffle=True,random_state=45)
r2 = make_scorer(r2_score)
r2_val_score = cross_val_score(clf, train, labels, cv=cv,scoring=r2)
scores=[r2_val_score.mean()]
return scores
clf = linear_model.LinearRegression()
results["Linear"]=test_model(clf)
clf = svm.SVR()
results["SVM RBF"]=test_model(clf)
#clf = linear_model.Ridge()
#results["Ridge"]=test_model(clf)
#clf = linear_model.BayesianRidge()
#results["Bayesian Ridge"]=test_model(clf)
#clf = linear_model.HuberRegressor()
#results["Hubber"]=test_model(clf)
#clf = linear_model.Lasso(alpha=1e-4)
#results["Lasso"]=test_model(clf)
#clf = BaggingRegressor()
#results["Bagging"]=test_model(clf)
#clf = RandomForestRegressor()
#results["RandomForest"]=test_model(clf)
#clf = AdaBoostRegressor()
#results["AdaBoost"]=test_model(clf)
#clf = svm.SVR(kernel="linear")
#results["SVM Linear"]=test_model(clf)
results = pd.DataFrame.from_dict(results,orient='index')
results.columns=["R Square Score"]
results=results.sort(columns=["R Square Score"],ascending=False)
results.plot(kind="bar",title="Model Scores")
axes = plt.gca()
axes.set_ylim([0.5,1])
return results
#————————————————
#版权声明:本文为CSDN博主「DASEason」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
#原文链接:https://blog.csdn.net/qq547276542/article/details/75455629
SVM 网格调参
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 30 21:26:42 2019
@author: xllix
"""
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import preprocessing
from sklearn import neighbors
from sklearn import svm
from sklearn import tree
from time import time
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
iris = datasets.load_iris()
#构建参数字典
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 2, 4], 'gamma':[0.125, 0.25, 0.5 ,1, 2, 4]}
svr = svm.SVC()
clf = GridSearchCV(svr, parameters, n_jobs=-1)
clf.fit(iris.data, iris.target)
cv_result = pd.DataFrame.from_dict(clf.cv_results_)
with open('cv_result.csv','w') as f:
cv_result.to_csv(f)
print('The parameters of the best model are: ')
print(clf.best_params_)
y_pred = clf.predict(iris.data)
print(classification_report(y_true=iris.target, y_pred=y_pred))
#————————————————
#版权声明:本文为CSDN博主「Young_618」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
#原文链接:https://blog.csdn.net/cymy001/article/details/78578665
k折交叉验证
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 3 08:57:44 2019
@author: xllix
"""
#https://blog.csdn.net/cymy001/article/details/78578665
#首要工作:切分数据集train/validation/test(训练集上再切出来一部分作为验证集,用于评估模型,防止过拟合 )
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
iris = load_iris()
kfold = KFold(n_splits=5)
scores = cross_val_score(logreg, iris.data, iris.target, cv=kfold)
#或shuffle添加了随机扰动,打乱样本顺序,再进行k折切分样本------------
from sklearn.model_selection import KFold
kfold = KFold(n_splits=3, shuffle=True, random_state=0)
scores = cross_val_score(logreg, iris.data, iris.target, cv=kfold)
网友评论