728x90
반응형
In [1]:
from sklearn.datasets import make_moons
m = 1000
X_moons, y_moons = make_moons(m,noise=0.1,random_state=42)
In [7]:
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'NanumBarunGothic'
plt.plot(X_moons[y_moons == 1,0],X_moons[y_moons == 1,1], 'go', label='양성')
plt.plot(X_moons[y_moons == 0,0],X_moons[y_moons == 0,1],'r^',label='음성')
plt.legend()
plt.show()
In [8]:
# 편향 추가하기
X_moons_bias = np.c_[np.ones((m,1)), X_moons]
In [9]:
X_moons_bias[:5]
Out[9]:
In [12]:
# y_moons를 열벡터로 만들기
y_moons_colvector = y_moons.reshape(-1,1)
In [14]:
y_moons_colvector[:5]
Out[14]:
In [19]:
# train test 나누기
test_ratio = 0.2
test_size = int(m * test_ratio)
X_train = X_moons_bias[:-test_size]
X_test = X_moons_bias[-test_size:]
y_train = y_moons_colvector[:-test_size]
y_test = y_moons_colvector[-test_size:]
In [25]:
# 배치 만들기
def minibatch(X_train,y_train,batch_size):
rnd_indices = np.random.randint(0,len(X_train),batch_size) # batch_size만큼 만들기
X_batch = X_train[rnd_indices]
y_batch = y_train[rnd_indices]
return X_batch, y_batch
In [28]:
X_batch, y_batch = minibatch(X_train,y_train,5) # batch_size = 5
X_batch, y_batch
Out[28]:
In [31]:
# 먼저 그래프 리셋하기
import tensorflow as tf
tf.reset_default_graph()
# moons의 input은 두 개의 입력 특성을 가져서 2차원이다.
n_inputs = 2
In [33]:
X = tf.placeholder(tf.float32,shape=(None,n_inputs+1))
y = tf.placeholder(tf.float32,shape=(None,1))
theta = tf.Variable(tf.random_uniform([n_inputs + 1,1], -1.0,1.0, seed=42))
logits = tf.matmul(X,theta)
y_proba = 1 / (1+tf.exp(-logits))
In [34]:
y_proba = tf.sigmoid(logits) # 한 번에 가능
In [35]:
epsilon = 1e-7 # 로그 계산시 오버플로우 피하기 위해서
loss = -tf.reduce_mean(y*tf.log(y_proba + epsilon) + (1-y)*tf.log(1-y_proba + epsilon))
In [38]:
# 그냥 함수 써도됨
loss = tf.losses.log_loss(y,y_proba)
In [40]:
lr = 0.01
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
train = optimizer.minimize(loss)
In [42]:
# 모델 훈련
n_epochs=1000
batch_size=50
n_batches = int(np.ceil(m / batch_size))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch,y_batch = minibatch(X_train,y_train,batch_size)
sess.run(train,feed_dict={X:X_batch,y:y_batch})
loss_val = loss.eval({X:X_test,y:y_test})
if epoch % 100 == 0:
print("Epoch : {0}, Loss : {1}".format(epoch,loss_val))
y_proba_val = y_proba.eval(feed_dict={X:X_test,y:y_test})
In [44]:
# 양성으로 추정할 확률
y_proba_val[:5]
Out[44]:
In [45]:
y_pred = (y_proba_val >= 0.5)
y_pred[:5]
Out[45]:
In [50]:
# Precision(정밀도) , Recall(재현율)
from sklearn.metrics import precision_score, recall_score
print(precision_score(y_test,y_pred))
print(recall_score(y_test,y_pred))
In [59]:
# 열 벡터를 1차원으로
y_pred_index = y_pred.reshape(-1)
plt.plot(X_test[y_pred_index,1], X_test[y_pred_index,2],'go',label="양성")
plt.plot(X_test[~y_pred_index,1], X_test[~y_pred_index,2],'r^',label='음성')
# ~는 해당 인덱스 하나만 골라내기
plt.legend()
plt.show()
In [64]:
# 모델 성능 향상을 위해 특성을 추가하
X_train_enhanced = np.c_[X_train,np.square(X_train[:,1]),
np.square(X_train[:,2]),
X_train[:,1] ** 3,
X_train[:,2] ** 3
]
X_test_enhanced = np.c_[X_test,np.square(X_test[:,1]),
np.square(X_test[:,2]),
X_test[:,1] ** 3,
X_test[:,2] ** 3
]
In [66]:
X_train_enhanced[:5]
Out[66]:
In [67]:
tf.reset_default_graph()
In [70]:
def logistic_regression(X,y,initializer=None, seed=42,learning_rate=0.01):
n_inputs_with_bias = int(X.get_shape()[1])
with tf.name_scope('logistic_regression'):
with tf.name_scope('model'):
if initializer is None:
initializer = tf.random_uniform([n_inputs_with_bias ,1],-1.0,1.0,seed=seed)
theta = tf.Variable(initializer)
logits = tf.matmul(X,theta)
y_proba = tf.sigmoid(logits)
with tf.name_scope('train'):
loss = tf.losses.log_loss(y,y_proba)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(loss)
loss_summary = tf.summary.scalar('log_loss',loss)
with tf.name_scope('init'):
init = tf.global_variables_initializer()
with tf.name_scope('save'):
saver = tf.train.Saver()
return y_proba, loss, train, loss_summary, init, saver
In [77]:
# 텐서보드를 위해 서머리를 저장할 로그 디렉토리 이름을 생성하는 함수를 만듬
from datetime import datetime
def log_dir(prefix=''):
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
if prefix:
prefix += "-"
name = prefix + "run-" + now
return "{}/{}/".format(root_logdir,name)
In [80]:
# logistic_regression 이용해서 그래프 만들기
# 텐서보드용 서머리를 로그 디렉토리에 만들기 위해 Filewriter도 만듬
# 변수 추가 더해서
n_inputs = 2 + 4
logdir = log_dir("logreg")
X = tf.placeholder(tf.float32, shape=(None,n_inputs + 1))
y = tf.placeholder(tf.float32, shape=(None,1))
y_proba, loss, train,loss_summary, init, saver = logistic_regression(X,y)
file_writer = tf.summary.FileWriter(logdir,tf.get_default_graph())
In [92]:
import os
n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))
checkpoint_path = "tmp/my_logreg_model.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_logreg_model"
with tf.Session() as sess:
if os.path.isfile(checkpoint_epoch_path):
# 체크포인트 파일이 있으면 모델을 복원후 에포크 횟수를 로드
with open(checkpoint_epoch_path,'rb') as f:
start_epoch = int(f.read())
print('중지되었던 훈련, 에포크를 이어갑니다', start_epoch)
saver.restore(sess,checkpoint_path)
else:
start_epoch = 0
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch,y_batch = minibatch(X_train_enhanced,y_train,batch_size)
sess.run(train,feed_dict={X:X_batch,y:y_batch})
loss_val,summary_str = sess.run([loss,loss_summary],feed_dict={X:X_test_enhanced,y:y_test})
file_writer.add_summary(summary_str,epoch)
if epoch % 500 == 0:
print("Epoch : {0}, Loss : {1}".format(epoch,loss_val))
saver.save(sess,checkpoint_path)
with open(checkpoint_epoch_path,'wb') as f:
f.write(b"%d" % (epoch+1) )
saver.save(sess,final_model_path)
y_proba_val = y_proba.eval(feed_dict={X:X_test_enhanced,y:y_test})
os.remove(checkpoint_epoch_path)
In [93]:
y_pred = (y_proba_val >= 0.5)
precision_score(y_test,y_pred)
Out[93]:
In [94]:
recall_score(y_test,y_pred)
Out[94]:
In [95]:
y_pred_idx = y_pred.reshape(-1) # 열 벡터 대신 1차원 배열
plt.plot(X_test[y_pred_idx, 1], X_test[y_pred_idx, 2], 'go', label="양성")
plt.plot(X_test[~y_pred_idx, 1], X_test[~y_pred_idx, 2], 'r^', label="음성")
plt.legend()
plt.show()
# 더 잘 분류한 것을 볼 수 있다.
In [107]:
# 하이퍼파라미터 조정하기
from scipy.stats import reciprocal
n_search_iteration = 10
for search_iteration in range(n_search_iteration):
batch_size = np.random.randint(1,100)
learning_rate = reciprocal(0.0001,0.1).rvs(random_state=search_iteration)
n_inputs = 6
logdir = log_dir('logreg')
print("Repeat :", search_iteration)
print("logdir : ",logdir)
print("Batch_size",batch_size)
print('learning_rate',learning_rate)
print("Training :",end="")
tf.reset_default_graph()
X = tf.placeholder(tf.float32,shape=(None,n_inputs+1))
y = tf.placeholder(tf.float32,shape=(None,1))
y_proba, loss,train,loss_summary,init,saver = logistic_regression(X,y,
learning_rate=learning_rate)
file_writer = tf.summary.FileWriter(logdir,tf.get_default_graph())
n_epochs=10001
n_batches = int(np.ceil(m/batch_size))
final_model_path = "./my_logreg_model_%d" % search_iteration
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch,y_batch = minibatch(X_train_enhanced,y_train,batch_size)
sess.run(train,feed_dict={X:X_batch,y:y_batch})
loss_val, summary_str = sess.run([loss,loss_summary],feed_dict=
{X:X_test_enhanced,y:y_test})
file_writer.add_summary(summary_str,epoch)
if epoch % 500 ==0:
print(".",end="")
saver.save(sess,final_model_path)
print()
y_proba_val = y_proba.eval(feed_dict={X:X_test_enhanced,y:y_test})
y_pred = (y_proba_val >= 0.5)
print("Precision : {0}, Recall : {1}".format(precision_score(y_test,y_pred),
recall_score(y_test,y_pred)))
728x90
반응형