xh_join = tf.concat(1, [x_step, h]) // Combine the features and hidden state into one tensor
g = tf.matmul(xh_join, W_g)+b_g
u = tf.matmul(x_step, W_u)+b_u
q = tf.matmul(xh_join, W_a)+b_a
q_greater = tf.maximum(q, 0.0) // Greater of the exponent term or zero
scale = tf.exp(-q_greater)
a_scale = tf.exp(q-q_greater)
n = tf.mul(n, scale)+tf.mul(tf.mul(u, tf.nn.tanh(g)), a_scale) // Numerically stable update of numerator
d = tf.mul(d, scale)+a_scale // Numerically stable update of denominator
h = activation(tf.div(n, d))
ly = tf.matmul(h, W_o)+b_o
error_step = tf.nn.softmax_cross_entropy_with_logits(ly, y[:,i,:]) // Cross-entropy cost function
error += tf.select(tf.greater(l, i), error_step, tf.zeros([batch_size])) // Include cost from this step only if the sequence length has not been exceeded
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Optimizer
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Optimizer
//
cost = tf.reduce_mean(tf.div(error, l))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Train
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Operation to initialize session
//
initializer = tf.global_variables_initializer()
// Open session
//
with tf.Session() as session:
// Initialize variables
//
session.run(initializer)
// Each training session represents one batch
//
for iteration in range(num_iterations):
// Grab a batch of training data
//
xs, ls, ys = dp.train.batch(batch_size)
feed = {x: xs, l: ls, y: ys}
// Update parameters
//
out = session.run((cost, optimizer), feed_dict=feed)
print("Iteration:", iteration, "Dataset:", "train", "Cost:", out[0]/np.log(2.0))
// Periodically run model on test data
//
if iteration%100 == 0:
// Grab a batch of test data
//
xs, ls, ys = dp.test.batch(batch_size)
feed = {x: xs, l: ls, y: ys}
// Run model
//
out = session.run(cost, feed_dict=feed)
print("Iteration:", iteration, "Dataset:", "test", "Cost:", out/np.log(2.0))
// Save the trained model
After Change
n = tf.zeros([batch_size, num_cells])
d = tf.zeros([batch_size, num_cells])
h = tf.zeros([batch_size, num_cells])
a_max = tf.fill([batch_size, num_cells], -1E38) // Start off with lowest number possible
// Define model
//
error = tf.zeros([batch_size])
h += activation(tf.expand_dims(s, 0))
for i in range(max_steps):
x_step = x[:,i,:]
xh_join = tf.concat(1, [x_step, h]) // Combine the features and hidden state into one tensor
u = tf.matmul(x_step, W_u)+b_u
g = tf.matmul(xh_join, W_g)+b_g
a = tf.matmul(xh_join, W_a)+b_a
z = tf.mul(u, tf.nn.tanh(g))a_newmax = tf.maximum(a_max, a)
exp_diff = tf.exp(a_max-a_newmax)
exp_scaled = tf.exp(a-a_newmax)
n = tf.mul(n, exp_diff)+tf.mul(z, exp_scaled) // Numerically stable update of numerator
d = tf.mul(d, exp_diff)+exp_scaled // Numerically stable update of denominator
h = activation(tf.div(n, d))
a_max = a_newmaxly = tf.matmul(h, W_o)+b_o
error_step = tf.nn.softmax_cross_entropy_with_logits(ly, y[:,i,:]) // Cross-entropy cost function
error += tf.select(tf.greater(l, i), error_step, tf.zeros([batch_size])) // Include cost from this step only if the sequence length has not been exceeded
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Optimizer
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Optimizer
//
cost = tf.reduce_mean(tf.div(error, l))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Train
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Operation to initialize session
//
initializer = tf.global_variables_initializer()
// Open session
//
with tf.Session() as session:
// Initialize variables
//
session.run(initializer)
// Each training session represents one batch
//
for iteration in range(num_iterations):
// Grab a batch of training data
//
xs, ls, ys = dp.train.batch(batch_size)
feed = {x: xs, l: ls, y: ys}
// Update parameters
//
out = session.run((cost, optimizer), feed_dict=feed)
print("Iteration:", iteration, "Dataset:", "train", "Cost:", out[0]/np.log(2.0))
// Periodically run model on test data
//
if iteration%100 == 0:
// Grab a batch of test data
//
xs, ls, ys = dp.test.batch(batch_size)
feed = {x: xs, l: ls, y: ys}
// Run model
//
out = session.run(cost, feed_dict=feed)
print("Iteration:", iteration, "Dataset:", "test", "Cost:", out/np.log(2.0))
// Save the trained model