728x90
반응형
In [127]:
# load module
import tensorflow as tf
import numpy as np
In [128]:
(X_train,y_train),(X_test,y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1,28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1,28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
In [152]:
# 드롭아웃
tf.reset_default_graph()
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 150
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
dropout_rate = 0.3
training = tf.placeholder_with_default(False,shape=(),name='training')
X = tf.placeholder(tf.float32,shape=[None,n_inputs])
X_drop = tf.layers.dropout(X,dropout_rate,training=training)
hidden1 = tf.layers.dense(X_drop,n_hidden1,activation=tf.nn.relu,name='h1')
hidden2 = tf.layers.dense(hidden1,n_hidden2,activation=tf.nn.relu,name='h2')
hidden3 = tf.layers.dense(hidden2,n_hidden3,activation=tf.nn.relu,name='h3')
outputs = tf.layers.dense(hidden3,n_outputs,activation=tf.nn.relu,name='outputs')
# MSE
reconstruction_loss = tf.reduce_mean(tf.square(outputs-X))
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
In [130]:
def shuffle_batch(X,y,batch_size):
idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(idx,n_batches):
X_batch,y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
In [137]:
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for i in range(n_batches):
X_batch, y_batch = next(shuffle_batch(X_train,y_train,batch_size))
sess.run(training_op,feed_dict={X:X_batch, training:True})
loss_val = reconstruction_loss.eval(feed_dict={X:X_batch})
print("Epoch : {0} / Training MSE : {1}".format(epoch, loss_val))
saver.save(sess,'./my_model_denoising_dropout.ckpt')
In [184]:
def plot_image(image, shape=[28, 28]):
plt.imshow(image.reshape(shape), cmap="Greys", interpolation="nearest")
plt.axis("off")
def show_reconstructed_digits(X, outputs, model_path = None, n_test_digits = 2):
with tf.Session() as sess:
if model_path:
saver.restore(sess, model_path)
# X_test = mnist.test.images[:n_test_digits]
outputs_val = outputs.eval(feed_dict={X: X_test[:n_test_digits]})
fig = plt.figure(figsize=(8, 3 * n_test_digits))
for digit_index in range(n_test_digits):
plt.subplot(n_test_digits, 2, digit_index * 2 + 1)
plot_image(X_test[digit_index])
plt.subplot(n_test_digits, 2, digit_index * 2 + 2)
plot_image(outputs_val[digit_index])
In [139]:
show_reconstructed_digits(X,outputs,'./my_model_denoising_dropout.ckpt')
# show_digits(X,outputs,'./my_model_stacked_denoiisng_dropout.ckpt')
In [177]:
tf.reset_default_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # 코딩 유닛
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0001
activation = tf.nn.elu
regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
initializer = tf.variance_scaling_initializer()
training = tf.placeholder_with_default(False,shape=(),name='training')
X = tf.placeholder(tf.float32,shape=[None,n_inputs])
X_drop = tf.layers.dropout(X,dropout_rate,training=training)
weights1_init = initializer([n_inputs, n_hidden1])
weights2_init = initializer([n_hidden1, n_hidden2])
weights3_init = initializer([n_hidden2, n_hidden3])
weights4_init = initializer([n_hidden3, n_outputs])
weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.Variable(weights3_init, dtype=tf.float32, name="weights3")
weights4 = tf.Variable(weights4_init, dtype=tf.float32, name="weights4")
biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_hidden3), name="biases3")
biases4 = tf.Variable(tf.zeros(n_outputs), name="biases4")
hidden1 = activation(tf.matmul(X_drop, weights1) + biases1)
hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)
outputs = tf.matmul(hidden3, weights4) + biases4
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
In [178]:
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
with tf.name_scope("phase1"):
phase1_outputs = tf.matmul(hidden1, weights4) + biases4 # hidden2와 hidden3 통과합니다
phase1_reconstruction_loss = tf.reduce_mean(tf.square(phase1_outputs - X))
phase1_reg_loss = regularizer(weights1) + regularizer(weights4)
phase1_loss = phase1_reconstruction_loss + phase1_reg_loss
phase1_training_op = optimizer.minimize(phase1_loss)
with tf.name_scope("phase2"):
phase2_reconstruction_loss = tf.reduce_mean(tf.square(hidden3 - hidden1))
phase2_reg_loss = regularizer(weights2) + regularizer(weights3)
phase2_loss = phase2_reconstruction_loss + phase2_reg_loss
train_vars = [weights2, biases2, weights3, biases3]
phase2_training_op = optimizer.minimize(phase2_loss, var_list=train_vars) # hidden1 동결
In [179]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()
In [180]:
import numpy.random as rnd
training_ops = [phase1_training_op, phase2_training_op]
reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]
n_epochs = [4, 4]
batch_sizes = [150, 150]
with tf.Session() as sess:
init.run()
for phase in range(2):
print("훈련 단계 #{}".format(phase + 1))
if phase == 1:
hidden1_cache = hidden1.eval(feed_dict={X: X_train})
for epoch in range(n_epochs[phase]):
n_batches = len(X_train) // batch_sizes[phase]
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
if phase == 1:
indices = rnd.permutation(len(X_train))
hidden1_batch = hidden1_cache[indices[:batch_sizes[phase]]]
feed_dict = {hidden1: hidden1_batch}
sess.run(training_ops[phase], feed_dict=feed_dict)
else:
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_sizes[phase]))
feed_dict = {X: X_batch}
sess.run(training_ops[phase], feed_dict=feed_dict)
loss_train = reconstruction_losses[phase].eval(feed_dict=feed_dict)
print("\r{}".format(epoch), "훈련 MSE:", loss_train)
saver.save(sess, "./my_model_cache_frozen.ckpt")
loss_test = reconstruction_loss.eval(feed_dict={X: X_test})
print("테스트 MSE:", loss_test)
In [181]:
tf.reset_default_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150
n_outputs = 10
learning_rate = 0.01
l2_reg = 0.0005
activation = tf.nn.elu
regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
initializer = tf.variance_scaling_initializer()
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
y = tf.placeholder(tf.int32, shape=[None])
weights1_init = initializer([n_inputs, n_hidden1])
weights2_init = initializer([n_hidden1, n_hidden2])
weights3_init = initializer([n_hidden2, n_outputs])
weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.Variable(weights3_init, dtype=tf.float32, name="weights3")
biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_outputs), name="biases3")
hidden1 = activation(tf.matmul(X, weights1) + biases1)
hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
logits = tf.matmul(hidden2, weights3) + biases3
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
reg_loss = regularizer(weights1) + regularizer(weights2) + regularizer(weights3)
loss = cross_entropy + reg_loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
pretrain_saver = tf.train.Saver([weights1, weights2, biases1, biases2])
saver = tf.train.Saver()
In [182]:
# 재사용층으로 예측
n_epochs = 4
batch_size = 150
n_labeled_instances = 6000
#training_op = optimizer.minimize(loss, var_list=[weights3, biases3]) # layers 1와 2를 동결 (선택사항)
with tf.Session() as sess:
init.run()
pretrain_saver.restore(sess, "./my_model_cache_frozen.ckpt")
for epoch in range(n_epochs):
n_batches = n_labeled_instances // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
indices = rnd.permutation(n_labeled_instances)[:batch_size]
X_batch, y_batch = X_train[indices], y_train[indices]
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
accuracy_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
print("\r{}".format(epoch), "훈련 정확도:", accuracy_val, end="\t")
saver.save(sess, "./my_model_supervised_pretrained.ckpt")
test_val = accuracy.eval(feed_dict={X: X_test, y: y_test})
print("테스트 정확도:", test_val)
728x90
반응형