diff --git a/.gitignore b/.gitignore index 2c2b574..13688a6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Project folders. +models/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -102,4 +105,4 @@ ENV/ # IntelliJ .idea/ -*.iml \ No newline at end of file +*.iml diff --git a/train.py b/train.py index 48dd10b..d8f609c 100644 --- a/train.py +++ b/train.py @@ -16,7 +16,14 @@ level=logging.DEBUG, stream=sys.stdout) -DATA_DIR = "data/LibriSpeech/" +# Model path. +MODEL_PATH = "./models/model.ckpt" + +# Summary directory. +SUMMARY_PATH = "./logs/" + +# Data directories. +DATA_DIR = "./data/LibriSpeech/" TRAIN_DIR = DATA_DIR + "train-clean-100-wav/" TEST_DIR = DATA_DIR + "test-clean-wav/" DEV_DIR = DATA_DIR + "dev-clean-wav/" @@ -104,27 +111,29 @@ def main(argv): outputs, _ = tf.nn.dynamic_rnn(stack, inputs_placeholder, sequence_length_placeholder, dtype=tf.float32) shape = tf.shape(inputs_placeholder) - batch_s, max_time_steps = shape[0], shape[1] + batch_size, max_time_steps = shape[0], shape[1] # Reshaping to apply the same weights over the time steps. outputs = tf.reshape(outputs, [-1, NUM_HIDDEN]) - weigths = tf.Variable(tf.truncated_normal([NUM_HIDDEN, - NUM_CLASSES], - stddev=0.1)) - biases = tf.Variable(tf.constant(0., shape=[NUM_CLASSES])) + weights = tf.Variable(tf.truncated_normal([NUM_HIDDEN, NUM_CLASSES], stddev=0.1), + name='weights') + bias = tf.Variable(tf.constant(0., shape=[NUM_CLASSES]), + name='bias') # Doing the affine projection. - logits = tf.matmul(outputs, weigths) + biases + logits = tf.matmul(outputs, weights) + bias # Reshaping back to the original shape. - logits = tf.reshape(logits, [batch_s, -1, NUM_CLASSES]) + logits = tf.reshape(logits, [batch_size, -1, NUM_CLASSES]) # Time is major. logits = tf.transpose(logits, (1, 0, 2)) - loss = tf.nn.ctc_loss(labels_placeholder, logits, sequence_length_placeholder) - cost = tf.reduce_mean(loss) + with tf.name_scope('loss'): + loss = tf.nn.ctc_loss(labels_placeholder, logits, sequence_length_placeholder) + cost = tf.reduce_mean(loss) + tf.summary.scalar("loss", cost) optimizer = tf.train.MomentumOptimizer(INITIAL_LEARNING_RATE, 0.9).minimize(cost) @@ -136,6 +145,16 @@ def main(argv): with tf.Session(config=config, graph=graph) as session: logging.debug("Starting TensorFlow session.") + + # Saver op to save and restore all the variables. + saver = tf.train.Saver() + + # Merge all the summaries and write them out. + merged_summary = tf.summary.merge_all() + + # Initializing summary writer for TensorBoard. + summary_writer = tf.summary.FileWriter(SUMMARY_PATH, tf.get_default_graph()) + # Initialize the weights and biases. tf.global_variables_initializer().run() @@ -154,12 +173,12 @@ def main(argv): train_label_error_rate = 0 start_time = time.time() - for batch in range(num_batches_per_epoch): + for step in range(num_batches_per_epoch): # Format batches. - if int(train_num / ((batch + 1) * BATCH_SIZE)) >= 1: - indexes = [i % train_num for i in range(batch * BATCH_SIZE, (batch + 1) * BATCH_SIZE)] + if int(train_num / ((step + 1) * BATCH_SIZE)) >= 1: + indexes = [i % train_num for i in range(step * BATCH_SIZE, (step + 1) * BATCH_SIZE)] else: - indexes = [i % train_num for i in range(batch * BATCH_SIZE, train_num)] + indexes = [i % train_num for i in range(step * BATCH_SIZE, train_num)] batch_train_inputs = train_inputs[indexes] batch_train_sequence_lengths = train_sequence_lengths[indexes] @@ -169,10 +188,13 @@ def main(argv): labels_placeholder: batch_train_targets, sequence_length_placeholder: batch_train_sequence_lengths} - batch_cost, _ = session.run([cost, optimizer], feed) + batch_cost, _, summary = session.run([cost, optimizer, merged_summary], feed) train_cost += batch_cost * BATCH_SIZE train_label_error_rate += session.run(label_error_rate, feed_dict=feed) * BATCH_SIZE + # Write logs at every iteration. + summary_writer.add_summary(summary, current_epoch * num_batches_per_epoch + step) + train_cost /= train_num train_label_error_rate /= train_num @@ -213,6 +235,10 @@ def main(argv): logging.info("Original:\n%s", test_texts[i]) logging.info("Decoded:\n%s", decoded_text) + # Save model weights to disk. + save_path = saver.save(session, MODEL_PATH) + logging.info("Model saved in file: %s", save_path) + if __name__ == '__main__': tf.app.run()