This repository has been archived by the owner on Mar 10, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14 from ugnelis/run_saved_model
Added demo.py for running trained model.
- Loading branch information
Showing
1 changed file
with
142 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import time | ||
import logging | ||
import sys | ||
import math | ||
|
||
import tensorflow as tf | ||
|
||
import utils | ||
|
||
# Logging configuration. | ||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||
level=logging.DEBUG, | ||
stream=sys.stdout) | ||
|
||
# Model path. | ||
MODEL_PATH = "./models/model.ckpt" | ||
|
||
# Summary directory. | ||
SUMMARY_PATH = "./logs/" | ||
|
||
# Data directories. | ||
DATA_DIR = "./data/LibriSpeech/" | ||
TRAIN_DIR = DATA_DIR + "train-clean-100-wav/" | ||
TEST_DIR = DATA_DIR + "test-clean-wav/" | ||
DEV_DIR = DATA_DIR + "dev-clean-wav/" | ||
|
||
# Constants. | ||
SPACE_TOKEN = '<space>' | ||
SPACE_INDEX = 0 | ||
FIRST_INDEX = ord('a') - 1 # 0 is reserved to space | ||
|
||
# Number of features. | ||
NUM_FEATURES = 13 | ||
|
||
# Accounting the 0th index + space + blank label = 28 characters | ||
NUM_CLASSES = ord('z') - ord('a') + 1 + 1 + 1 | ||
|
||
# Hyper-parameters. | ||
NUM_EPOCHS = 200 | ||
NUM_HIDDEN = 50 | ||
NUM_LAYERS = 1 | ||
BATCH_SIZE = 1 | ||
|
||
# Optimizer parameters. | ||
INITIAL_LEARNING_RATE = 1e-2 | ||
MOMENTUM = 0.9 | ||
|
||
|
||
def main(argv): | ||
# Read test data files. | ||
test_texts = utils.read_text_files(TEST_DIR) | ||
test_labels = utils.texts_encoder(test_texts, | ||
first_index=FIRST_INDEX, | ||
space_index=SPACE_INDEX, | ||
space_token=SPACE_TOKEN) | ||
test_labels = utils.sparse_tuples_from_sequences(test_labels) | ||
test_inputs = utils.read_audio_files(DEV_DIR) | ||
test_inputs = utils.standardize_audios(test_inputs) | ||
test_sequence_lengths = utils.get_sequence_lengths(test_inputs) | ||
test_inputs = utils.make_sequences_same_length(test_inputs, test_sequence_lengths) | ||
|
||
with tf.device('/cpu:0'): | ||
config = tf.ConfigProto() | ||
|
||
graph = tf.Graph() | ||
with graph.as_default(): | ||
logging.debug("Starting new TensorFlow graph.") | ||
inputs_placeholder = tf.placeholder(tf.float32, [None, None, NUM_FEATURES]) | ||
|
||
# SparseTensor placeholder required by ctc_loss op. | ||
labels_placeholder = tf.sparse_placeholder(tf.int32) | ||
|
||
# 1d array of size [batch_size]. | ||
sequence_length_placeholder = tf.placeholder(tf.int32, [None]) | ||
|
||
# Defining the cell. | ||
cell = tf.contrib.rnn.LSTMCell(NUM_HIDDEN, state_is_tuple=True) | ||
|
||
# Stacking rnn cells. | ||
stack = tf.contrib.rnn.MultiRNNCell([cell] * NUM_LAYERS, | ||
state_is_tuple=True) | ||
|
||
# Creates a recurrent neural network. | ||
outputs, _ = tf.nn.dynamic_rnn(stack, inputs_placeholder, sequence_length_placeholder, dtype=tf.float32) | ||
|
||
shape = tf.shape(inputs_placeholder) | ||
batch_size, max_time_steps = shape[0], shape[1] | ||
|
||
# Reshaping to apply the same weights over the time steps. | ||
outputs = tf.reshape(outputs, [-1, NUM_HIDDEN]) | ||
|
||
weights = tf.Variable(tf.truncated_normal([NUM_HIDDEN, NUM_CLASSES], stddev=0.1), | ||
name='weights') | ||
bias = tf.Variable(tf.constant(0., shape=[NUM_CLASSES]), | ||
name='bias') | ||
|
||
# Doing the affine projection. | ||
logits = tf.matmul(outputs, weights) + bias | ||
|
||
# Reshaping back to the original shape. | ||
logits = tf.reshape(logits, [batch_size, -1, NUM_CLASSES]) | ||
|
||
# Time is major. | ||
logits = tf.transpose(logits, (1, 0, 2)) | ||
|
||
# CTC decoder. | ||
decoded, neg_sum_logits = tf.nn.ctc_greedy_decoder(logits, sequence_length_placeholder) | ||
|
||
with tf.Session(config=config, graph=graph) as session: | ||
logging.debug("Starting TensorFlow session.") | ||
|
||
# Initialize the weights and biases. | ||
tf.global_variables_initializer().run() | ||
|
||
# Saver op to save and restore all the variables. | ||
saver = tf.train.Saver() | ||
|
||
# Restore model weights from previously saved model. | ||
saver.restore(session, MODEL_PATH) | ||
|
||
test_feed = {inputs_placeholder: test_inputs, | ||
sequence_length_placeholder: test_sequence_lengths} | ||
# Decoding. | ||
decoded_outputs = session.run(decoded[0], feed_dict=test_feed) | ||
dense_decoded = tf.sparse_tensor_to_dense(decoded_outputs, default_value=-1).eval(session=session) | ||
test_num = test_texts.shape[0] | ||
|
||
for i, sequence in enumerate(dense_decoded): | ||
sequence = [s for s in sequence if s != -1] | ||
decoded_text = utils.sequence_decoder(sequence) | ||
|
||
logging.info("Sequence %d/%d", i + 1, test_num) | ||
logging.info("Original:\n%s", test_texts[i]) | ||
logging.info("Decoded:\n%s", decoded_text) | ||
|
||
|
||
if __name__ == '__main__': | ||
tf.app.run() |