Skip to content

Instantly share code, notes, and snippets.

@gidim
Created May 16, 2017 13:12
Show Gist options
  • Save gidim/c9aa710374037f8e4b2fe689264b184e to your computer and use it in GitHub Desktop.
Save gidim/c9aa710374037f8e4b2fe689264b184e to your computer and use it in GitHub Desktop.
A quick example on how to run in-training validation in batches
'''
A Dynamic Recurrent Neural Network (LSTM) implementation example using
TensorFlow library. This example is using a toy dataset to classify linear
sequences. The generated sequences have variable length.
Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
'''
from __future__ import print_function
import tensorflow as tf
import random
# ====================
# TOY DATA GENERATOR
# ====================
class ToySequenceData(object):
""" Generate sequence of data with dynamic length.
This class generate samples for training:
- Class 0: linear sequences (i.e. [0, 1, 2, 3,...])
- Class 1: random sequences (i.e. [1, 3, 10, 7,...])
NOTICE:
We have to pad each sequence to reach 'max_seq_len' for TensorFlow
consistency (we cannot feed a numpy array with inconsistent
dimensions). The dynamic calculation will then be perform thanks to
'seqlen' attribute that records every actual sequence length.
"""
def __init__(self, n_samples=1000, max_seq_len=20, min_seq_len=3,
max_value=1000):
self.data = []
self.labels = []
self.seqlen = []
for i in range(n_samples):
# Random sequence length
len = random.randint(min_seq_len, max_seq_len)
# Monitor sequence length for TensorFlow dynamic calculation
self.seqlen.append(len)
# Add a random or linear int sequence (50% prob)
if random.random() < .5:
# Generate a linear sequence
rand_start = random.randint(0, max_value - len)
s = [[float(i)/max_value] for i in
range(rand_start, rand_start + len)]
# Pad sequence for dimension consistency
s += [[0.] for i in range(max_seq_len - len)]
self.data.append(s)
self.labels.append([1., 0.])
else:
# Generate a random sequence
s = [[float(random.randint(0, max_value))/max_value]
for i in range(len)]
# Pad sequence for dimension consistency
s += [[0.] for i in range(max_seq_len - len)]
self.data.append(s)
self.labels.append([0., 1.])
self.batch_id = 0
def next(self, batch_size):
""" Return a batch of data. When dataset end is reached, start over.
"""
if self.batch_id == len(self.data):
self.batch_id = 0
batch_data = (self.data[self.batch_id:min(self.batch_id +
batch_size, len(self.data))])
batch_labels = (self.labels[self.batch_id:min(self.batch_id +
batch_size, len(self.data))])
batch_seqlen = (self.seqlen[self.batch_id:min(self.batch_id +
batch_size, len(self.data))])
self.batch_id = min(self.batch_id + batch_size, len(self.data))
return batch_data, batch_labels, batch_seqlen
# ==========
# MODEL
# ==========
# Parameters
learning_rate = 0.01
training_iters = 1000000
batch_size = 128
val_batch_size = 128
val_set_size = 5000
val_iters = 1
display_step = 10
# Network Parameters
seq_max_len = 20 # Sequence max length
n_hidden = 64 # hidden layer num of features
n_classes = 2 # linear sequence or not
trainset = ToySequenceData(n_samples=1000, max_seq_len=seq_max_len)
testset = ToySequenceData(n_samples=500, max_seq_len=seq_max_len)
# tf Graph input
x = tf.placeholder("float", [None, seq_max_len, 1])
y = tf.placeholder("float", [None, n_classes])
# A placeholder for indicating each sequence length
seqlen = tf.placeholder(tf.int32, [None])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def dynamicRNN(x, seqlen, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.unstack(x, seq_max_len, 1)
# Define a lstm cell with tensorflow
lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)
# Get lstm cell output, providing 'sequence_length' will perform dynamic
# calculation.
outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32,
sequence_length=seqlen)
# When performing dynamic calculation, we must retrieve the last
# dynamically computed output, i.e., if a sequence length is 10, we need
# to retrieve the 10th output.
# However TensorFlow doesn't support advanced indexing yet, so we build
# a custom op that for each sample in batch size, get its length and
# get the corresponding relevant output.
# 'outputs' is a list of output at every timestep, we pack them in a Tensor
# and change back dimension to [batch_size, n_step, n_input]
outputs = tf.stack(outputs)
outputs = tf.transpose(outputs, [1, 0, 2])
# Hack to build the indexing and retrieve the right output.
batch_size = tf.shape(outputs)[0]
# Start indices for each sample
index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1)
# Indexing
outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)
# Linear activation, using outputs computed above
return tf.matmul(outputs, weights['out']) + biases['out']
pred = dynamicRNN(x, seqlen, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
test_data = testset.data
test_label = testset.labels
test_seqlen = testset.seqlen
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y, batch_seqlen = trainset.next(batch_size)
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
# Once in every 1000 *train* steps we evaluate the validation set (called test here)
if step % 1000 == 0:
val_step = 1
val_acc = 0
while val_step * val_batch_size < val_set_size:
valid_x, valid_y, valid_seqlen = testset.next(val_batch_size)
step_acc = sess.run(accuracy, feed_dict={x: valid_x, y: valid_y,seqlen: valid_seqlen})
val_acc+=step_acc
val_step +=1
val_acc = val_acc / val_step
print("Validation set accuracy: %s" % val_acc)
step += 1
print("Optimization Finished!")
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: test_data, y: test_label,
seqlen: test_seqlen}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment