填补缺失值
import pandas as pd
data = pd.read_excel(r"C:\Users\CGJ\Desktop\example\missing_data.xls", header=None)
data.head(10)

image.png
for i in range(data.shape[1]):
data.iloc[:, i].interpolate(inplace=True)
data.head(10)

image.png
对数据进行预测
data = pd.read_excel(r"C:\Users\CGJ\Desktop\example\model.xls").as_matrix()
X, y = data[:, :3], data[:, 3]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
from sklearn.tree import DecisionTreeClassifier
dt_clf = DecisionTreeClassifier()
dt_clf.fit(X_train, y_train)
dt_clf.score(X_test, y_test)

image.png
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
sns.set()
y_true = y_test
y_pred = dt_clf.predict(X_test)
C2= confusion_matrix(y_true, y_pred)
sns.heatmap(C2,annot=True)
plt.show()

image.png
使用随机森林
from sklearn.ensemble import ExtraTreesClassifier
et_clf = ExtraTreesClassifier(n_estimators=1000, bootstrap=True, oob_score=True)
et_clf.fit(X_train, y_train)
et_clf.score(X_test, y_test)

image.png
网友评论