Create a file called train_eval_utils.py, and add the following code:
import os, time, sys
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import tensorflow as tf
from low_level_cnn import net_fn
from data_utils import get_init_data
class ConvNet(object):
def __init__(self, training=True):
self.lr = 0.001
self.train_batch = 128
self.test_batch = 100
self.keep_prob = tf.constant(0.75)
self.gstep = tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step')
self.n_classes = 10
self.skip_step = 100
self.n_test = 10000
self.training = training
def loss(self):
'''
define loss function
use softmax cross entropy with logits as the loss function
compute mean cross entropy, softmax is applied internally
'''
with tf.name_scope('loss'):
entropy = tf.nn.softmax_cross_entropy_with_logits(labels=self.label, logits=self.logits)
self.loss = tf.reduce_mean(entropy, name='loss')
def optimize(self):
'''
Define training op
using Adam optimizer to minimize cost
'''
self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.loss, global_step=self.gstep)
def eval(self):
'''
Count the number of right predictions in a batch
'''
with tf.name_scope('predict'):
preds = tf.nn.softmax(self.logits)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(self.label, 1))
self.accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
def summary(self):
'''
Create summaries to write on TensorBoard
'''
with tf.name_scope('summaries'):
tf.summary.scalar('accuracy', self.accuracy)
if self.training:
tf.summary.scalar('loss', self.loss)
tf.summary.histogram('histogram_loss', self.loss)
self.summary_op = tf.summary.merge_all()
def build(self, test_only=False):
'''
Build the computation graph
'''
self.img, self.label, self.train_init, self.test_init = \
get_init_data(self.train_batch, self.test_batch, test_only=test_only)
self.logits = net_fn(self.img, n_classes=self.n_classes, \
keep_prob=self.keep_prob, is_training=self.training)
if self.training:
self.loss()
self.optimize()
self.eval()
self.summary()
def train_one_epoch(self, sess, saver, writer, epoch, step):
start_time = time.time()
sess.run(self.train_init)
total_loss = 0
n_batches = 0
tf.logging.info(time.strftime('time:%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
try:
while True:
_, l, summaries = sess.run([self.opt, self.loss, self.summary_op])
writer.add_summary(summaries, global_step=step)
if (step + 1) % self.skip_step == 0:
tf.logging.info('Loss at step {0}: {1}'.format(step+1, l))
step += 1
total_loss += l
n_batches += 1
except tf.errors.OutOfRangeError:
pass
#saver.save(sess, 'checkpoints/convnet_mnist/mnist-convnet', step)
tf.logging.info('Average loss at epoch {0}: {1}'.format(epoch, total_loss/n_batches))
tf.logging.info('train one epoch took: {0} seconds'.format(time.time() - start_time))
return step
def eval_once(self, sess, writer=None, step=None):
start_time = time.time()
sess.run(self.test_init)
total_correct_preds = 0
eval_step = 0
try:
while True:
eval_step += 1
accuracy_batch, summaries = sess.run([self.accuracy, self.summary_op])
writer.add_summary(summaries, global_step=step) if writer else None
total_correct_preds += accuracy_batch
except tf.errors.OutOfRangeError:
pass
tf.logging.info('Evaluation took: {0} seconds'.format(time.time() - start_time))
tf.logging.info('Accuracy : {0} \n'.format(total_correct_preds/self.n_test))
def train_eval(self, n_epochs=10, save_ckpt=None, restore_ckpt=None):
'''
The train function alternates between training one epoch and evaluating
'''
if restore_ckpt:
writer = tf.summary.FileWriter('./graphs/convnet/finetune', tf.get_default_graph())
else:
writer = tf.summary.FileWriter('./graphs/convnet/train', tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
if restore_ckpt:
saver.restore(sess, restore_ckpt)
step = self.gstep.eval()
for epoch in range(n_epochs):
step = self.train_one_epoch(sess, saver, writer, epoch, step)
self.eval_once(sess, writer, step)
saver.save(sess, save_ckpt)
writer.close()
tf.logging.info("Finish")
def evaluate(self, restore_ckpt):
'''
The evaluating function
'''
with tf.Session() as sess:
saver = tf.train.Saver()
saver.restore(sess, restore_ckpt)
step = self.gstep.eval()
self.eval_once(sess)
tf.logging.info("Finish")
ConvNet is a class which can build graphs, train, and evaluate models. It is a
framework created by combining data utils, net definition, and metrics. To train and evaluate
a model, instantiate a ConvNet class, then call the class method build
to build, train, or evaluate a graph by setting the test_only
argument to True.