728x90
반응형
In [1]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784',version=1)
In [2]:
from sklearn.model_selection import train_test_split
X_train_val, X_test, y_train_val, y_test = train_test_split(mnist.data,mnist.target
,test_size=10000, random_state=42)
X_train,X_val,y_train,y_val = train_test_split(X_train_val,y_train_val,test_size=10000,
random_state=42)
In [3]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
In [4]:
rnd_clf = RandomForestClassifier(n_estimators=10,random_state=42)
ext_clf = ExtraTreesClassifier(n_estimators=10, random_state=42)
svm_clf = LinearSVC(max_iter=10000,random_state=42)
mlp_clf = MLPClassifier(random_state=42)
In [5]:
# 검증 세트에서의 성능 확인
from sklearn.metrics import accuracy_score
In [7]:
# 한 번에 훈련
models = [rnd_clf,ext_clf,svm_clf,mlp_clf]
for model in models:
print("훈련 :",model)
model.fit(X_train,y_train)
In [12]:
# 한 번에 검증
for model in models:
y_pred = model.predict(X_val)
print("정확도 :",accuracy_score(y_val,y_pred))
In [29]:
# 투표
from sklearn.ensemble import VotingClassifier
named_models = [
("RandomForest" , rnd_clf),
("ExtraTree" , ext_clf),
("LinearSVM" , svm_clf),
("MLP" , mlp_clf),
]
In [30]:
voting_clf = VotingClassifier(named_models)
In [31]:
voting_clf.fit(X_train,y_train)
Out[31]:
In [32]:
voting_clf.score(X_val,y_val)
Out[32]:
In [33]:
[model.score for model in voting_clf.estimators_ ]
Out[33]:
In [41]:
# SVM제거
voting_clf.set_params(LinearSVM=None)
Out[41]:
In [42]:
voting_clf.estimators_
Out[42]:
In [43]:
# 안지워져서 다시
del voting_clf.estimators_[2]
In [44]:
voting_clf.score(X_val,y_val)
Out[44]:
In [45]:
# SVM이 성능 저하 시킨 것.
In [46]:
# 간접투표
# 이미 훈련 시켰으니 Hard -> soft 바꾸기
voting_clf.voting = 'soft'
In [47]:
# 다시 재평가
voting_clf.score(X_val,y_val)
Out[47]:
In [49]:
# 개개의 분류기보다 낫다
voting_clf.score(X_test,y_test)
Out[49]:
In [50]:
[model.score(X_test,y_test) for model in voting_clf.estimators_]
Out[50]:
In [51]:
import numpy as np
X_val_pred = np.empty((len(X_val), len(models)), dtype=np.float32)
In [52]:
for i, model in enumerate(models):
X_val_pred[:,i] = model.predict(X_val)
In [53]:
X_val_pred
Out[53]:
In [54]:
rnd_forest_blender = RandomForestClassifier(n_estimators=200,oob_score=True,random_state=42)
rnd_forest_blender.fit(X_val_pred,y_val)
Out[54]:
In [55]:
rnd_forest_blender.oob_score_
Out[55]:
In [63]:
X_test_pred = np.empty((len(X_test), len(models)), dtype=np.float32)
for i, model in enumerate(models):
X_test_pred[:,i] = model.predict(X_test) # 각 리스트에서 i열만 뽑아내기
In [83]:
y_pred = rnd_forest_blender.predict(X_test_pred)
In [84]:
accuracy_score(y_test,y_pred)
Out[84]:
728x90
반응형