Skip to content
This repository has been archived by the owner on Mar 10, 2023. It is now read-only.

Commit

Permalink
Merge pull request #14 from ugnelis/run_saved_model
Browse files Browse the repository at this point in the history
Added demo.py for running trained model.
  • Loading branch information
ugnelis authored Oct 22, 2017
2 parents d05a504 + c6c011e commit 3f485eb
Showing 1 changed file with 142 additions and 0 deletions.
142 changes: 142 additions & 0 deletions demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time
import logging
import sys
import math

import tensorflow as tf

import utils

# Logging configuration.
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG,
stream=sys.stdout)

# Model path.
MODEL_PATH = "./models/model.ckpt"

# Summary directory.
SUMMARY_PATH = "./logs/"

# Data directories.
DATA_DIR = "./data/LibriSpeech/"
TRAIN_DIR = DATA_DIR + "train-clean-100-wav/"
TEST_DIR = DATA_DIR + "test-clean-wav/"
DEV_DIR = DATA_DIR + "dev-clean-wav/"

# Constants.
SPACE_TOKEN = '<space>'
SPACE_INDEX = 0
FIRST_INDEX = ord('a') - 1 # 0 is reserved to space

# Number of features.
NUM_FEATURES = 13

# Accounting the 0th index + space + blank label = 28 characters
NUM_CLASSES = ord('z') - ord('a') + 1 + 1 + 1

# Hyper-parameters.
NUM_EPOCHS = 200
NUM_HIDDEN = 50
NUM_LAYERS = 1
BATCH_SIZE = 1

# Optimizer parameters.
INITIAL_LEARNING_RATE = 1e-2
MOMENTUM = 0.9


def main(argv):
# Read test data files.
test_texts = utils.read_text_files(TEST_DIR)
test_labels = utils.texts_encoder(test_texts,
first_index=FIRST_INDEX,
space_index=SPACE_INDEX,
space_token=SPACE_TOKEN)
test_labels = utils.sparse_tuples_from_sequences(test_labels)
test_inputs = utils.read_audio_files(DEV_DIR)
test_inputs = utils.standardize_audios(test_inputs)
test_sequence_lengths = utils.get_sequence_lengths(test_inputs)
test_inputs = utils.make_sequences_same_length(test_inputs, test_sequence_lengths)

with tf.device('/cpu:0'):
config = tf.ConfigProto()

graph = tf.Graph()
with graph.as_default():
logging.debug("Starting new TensorFlow graph.")
inputs_placeholder = tf.placeholder(tf.float32, [None, None, NUM_FEATURES])

# SparseTensor placeholder required by ctc_loss op.
labels_placeholder = tf.sparse_placeholder(tf.int32)

# 1d array of size [batch_size].
sequence_length_placeholder = tf.placeholder(tf.int32, [None])

# Defining the cell.
cell = tf.contrib.rnn.LSTMCell(NUM_HIDDEN, state_is_tuple=True)

# Stacking rnn cells.
stack = tf.contrib.rnn.MultiRNNCell([cell] * NUM_LAYERS,
state_is_tuple=True)

# Creates a recurrent neural network.
outputs, _ = tf.nn.dynamic_rnn(stack, inputs_placeholder, sequence_length_placeholder, dtype=tf.float32)

shape = tf.shape(inputs_placeholder)
batch_size, max_time_steps = shape[0], shape[1]

# Reshaping to apply the same weights over the time steps.
outputs = tf.reshape(outputs, [-1, NUM_HIDDEN])

weights = tf.Variable(tf.truncated_normal([NUM_HIDDEN, NUM_CLASSES], stddev=0.1),
name='weights')
bias = tf.Variable(tf.constant(0., shape=[NUM_CLASSES]),
name='bias')

# Doing the affine projection.
logits = tf.matmul(outputs, weights) + bias

# Reshaping back to the original shape.
logits = tf.reshape(logits, [batch_size, -1, NUM_CLASSES])

# Time is major.
logits = tf.transpose(logits, (1, 0, 2))

# CTC decoder.
decoded, neg_sum_logits = tf.nn.ctc_greedy_decoder(logits, sequence_length_placeholder)

with tf.Session(config=config, graph=graph) as session:
logging.debug("Starting TensorFlow session.")

# Initialize the weights and biases.
tf.global_variables_initializer().run()

# Saver op to save and restore all the variables.
saver = tf.train.Saver()

# Restore model weights from previously saved model.
saver.restore(session, MODEL_PATH)

test_feed = {inputs_placeholder: test_inputs,
sequence_length_placeholder: test_sequence_lengths}
# Decoding.
decoded_outputs = session.run(decoded[0], feed_dict=test_feed)
dense_decoded = tf.sparse_tensor_to_dense(decoded_outputs, default_value=-1).eval(session=session)
test_num = test_texts.shape[0]

for i, sequence in enumerate(dense_decoded):
sequence = [s for s in sequence if s != -1]
decoded_text = utils.sequence_decoder(sequence)

logging.info("Sequence %d/%d", i + 1, test_num)
logging.info("Original:\n%s", test_texts[i])
logging.info("Decoded:\n%s", decoded_text)


if __name__ == '__main__':
tf.app.run()

0 comments on commit 3f485eb

Please sign in to comment.