From a23650cc3c1aa32816a1907a7fa34eb29fdeac2c Mon Sep 17 00:00:00 2001 From: nurtas-m Date: Thu, 9 Mar 2017 18:02:43 +0100 Subject: [PATCH 01/10] Provide instructions to reproduce results. Fixed: https://github.com/cmusphinx/g2p-seq2seq/issues/73. Save model regardless eval loss, because of the correctness of the eval loss calculation is questionable. --- README.md | 8 ++++++++ g2p_seq2seq/g2p.py | 10 +++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9f7f9b7e..c4dc9726 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,14 @@ And, if you want to start training from scratch: "--reinit" - Rewrite model in model_folder_path ``` +To reproduce the following results, set: +``` +# for the model with size 64: +--max_steps 80000 +# for the model with size 512: +--max_steps 150000 +``` + #### Word error rate on CMU dictionary data sets System | WER ([CMUdict PRONALSYL 2007](https://sourceforge.net/projects/cmusphinx/files/G2P%20Models/phonetisaurus-cmudict-split.tar.gz)), % | WER ([CMUdict latest\*](https://github.com/cmusphinx/cmudict)), % diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index 8ecf019b..a2fb9f37 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -241,12 +241,12 @@ def train(self): and train_loss > max(prev_train_losses[-3:])): self.session.run(self.model.learning_rate_decay_op) - if (len(prev_valid_losses) > 0 - and eval_loss <= min(prev_valid_losses)): + #if (len(prev_valid_losses) > 0 + # and eval_loss <= min(prev_valid_losses)): # Save checkpoint and zero timer and loss. - self.model.saver.save(self.session, - os.path.join(self.model_dir, "model"), - write_meta_graph=False) + self.model.saver.save(self.session, + os.path.join(self.model_dir, "model"), + write_meta_graph=False) # Stop train if no improvement was seen on validation set # over last 35 times From e2b4b3b8dc835c296ce7d9bf5b8d82970d839a35 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Fri, 14 Apr 2017 18:18:18 +0200 Subject: [PATCH 02/10] Change stop criteria. Vary the window of the allowable number of epochs without improvement. --- g2p_seq2seq/g2p.py | 88 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index f913953e..ccd03672 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -212,8 +212,9 @@ def train(self): # This is the training loop. step_time, train_loss = 0.0, 0.0 - current_step, num_iter_wo_improve = 0, 0 - prev_train_losses, prev_valid_losses = [], [] + current_step, iter_inx, num_epochs_last_impr, max_num_epochs,\ + num_up_trends, num_down_trends = 0, 0, 0, 2, 0, 0 + prev_train_losses, prev_valid_losses, prev_epoch_valid_losses = [], [], [] num_iter_cover_train = int(sum(train_bucket_sizes) / self.params.batch_size / self.params.steps_per_checkpoint) @@ -232,12 +233,12 @@ def train(self): # Print statistics for the previous steps. train_ppx = math.exp(train_loss) if train_loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f perplexity " - "%.2f" % (self.model.global_step.eval(self.session), + "%.3f" % (self.model.global_step.eval(self.session), self.model.learning_rate.eval(self.session), step_time, train_ppx)) eval_loss = self.__calc_eval_loss() eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') - print(" eval: perplexity %.2f" % (eval_ppx)) + print(" eval: perplexity %.3f" % (eval_ppx)) # Decrease learning rate if no improvement was seen on train set # over last 3 times. if (len(prev_train_losses) > 2 @@ -246,31 +247,56 @@ def train(self): #if (len(prev_valid_losses) > 0 # and eval_loss <= min(prev_valid_losses)): - # Save checkpoint and zero timer and loss. + # Save checkpoint and zero timer and loss. self.model.saver.save(self.session, os.path.join(self.model_dir, "model"), write_meta_graph=False) - if (len(prev_valid_losses) > 0 - and eval_loss >= min(prev_valid_losses)): - num_iter_wo_improve += 1 - else: - num_iter_wo_improve = 0 - - if num_iter_wo_improve > num_iter_cover_train * 2: - print("No improvement over last %d times. Training will stop after %d" - "iterations if no improvement was seen." - % (num_iter_wo_improve, - num_iter_cover_train - num_iter_wo_improve)) - - # Stop train if no improvement was seen on validation set - # over last 3 epochs. - if num_iter_wo_improve > num_iter_cover_train * 3: - break + # After epoch pass, calculate average epoch loss + # and then make a decision to continue/stop training. + if (iter_inx > 0 + and iter_inx % num_iter_cover_train == 0): + # Calculate average validation loss during the previous epoch + epoch_eval_loss = self.__calc_epoch_loss( + prev_valid_losses[-num_iter_cover_train:]) + if len(prev_epoch_valid_losses) > 0: + print('Previous min epoch eval loss: %f, current epoch eval loss: %f' % + (min(prev_epoch_valid_losses), epoch_eval_loss)) + # Check if there was improvement during last epoch + if (epoch_eval_loss < min(prev_epoch_valid_losses)): + if num_epochs_last_impr > max_num_epochs/1.5: + max_num_epochs = int(1.5 * num_epochs_last_impr) + print('Improved during last epoch.') + prev_min_level = prev_epoch_valid_losses[-1] + num_epochs_last_impr, num_up_trends, num_down_trends = 0, 0, 0 + else: + print('No improvement during last epoch.') + num_epochs_last_impr += 1 + if (prev_epoch_valid_losses[-1] < epoch_eval_loss + and num_up_trends <= num_down_trends): + num_up_trends += 1 + elif (epoch_eval_loss < prev_epoch_valid_losses[-1] + and num_down_trends <= num_up_trends): + num_down_trends += 1 + + print('Num up trends: %d, num down trends: %d' % + (num_up_trends, num_down_trends)) + + print('Number of the epochs passed from the last improvement: %d' + % num_epochs_last_impr) + print('Max allowable number of epochs for improvement: %d' + % max_num_epochs) + + if (num_epochs_last_impr > max_num_epochs + and num_up_trends > 1): + break + + prev_epoch_valid_losses.append(round(epoch_eval_loss, 3)) prev_train_losses.append(train_loss) prev_valid_losses.append(eval_loss) step_time, train_loss = 0.0, 0.0 + iter_inx += 1 print('Training done.') with tf.Graph().as_default(): @@ -316,6 +342,26 @@ def __calc_eval_loss(self): return eval_loss + def __calc_epoch_loss(self, epoch_losses): + """Calculate average loss during the epoch. + + Args: + epoch_losses: list of the losses during the epoch; + + Returns: + average value of the losses during the period; + """ + epoch_loss_sum, loss_num = 0, 0 + for loss in epoch_losses: + if loss < min(epoch_losses) * 1.5: + epoch_loss_sum += loss + loss_num += 1 + if loss_num > 0: + return epoch_loss_sum / loss_num + else: + return float(inf) + + def decode_word(self, word): """Decode input word to sequence of phonemes. From 4bb83d83e7a7af5aff54e0c949d4e869a41be9e1 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Fri, 14 Apr 2017 18:50:56 +0200 Subject: [PATCH 03/10] Minor fixes --- g2p_seq2seq/g2p.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index ccd03672..a57531c2 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -215,9 +215,9 @@ def train(self): current_step, iter_inx, num_epochs_last_impr, max_num_epochs,\ num_up_trends, num_down_trends = 0, 0, 0, 2, 0, 0 prev_train_losses, prev_valid_losses, prev_epoch_valid_losses = [], [], [] - num_iter_cover_train = int(sum(train_bucket_sizes) / - self.params.batch_size / - self.params.steps_per_checkpoint) + num_iter_cover_train = max(1, int(sum(train_bucket_sizes) / + self.params.batch_size / + self.params.steps_per_checkpoint)) while (self.params.max_steps == 0 or self.model.global_step.eval(self.session) <= self.params.max_steps): From 42acc60da3a32ded0cd659f59bf683a6c2b50a45 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Mon, 17 Apr 2017 14:31:59 +0200 Subject: [PATCH 04/10] Leave only one condition for stop training. --- g2p_seq2seq/g2p.py | 53 +++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index a57531c2..f6d96a80 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -211,13 +211,12 @@ def train(self): for i in xrange(len(train_bucket_sizes))] # This is the training loop. - step_time, train_loss = 0.0, 0.0 - current_step, iter_inx, num_epochs_last_impr, max_num_epochs,\ - num_up_trends, num_down_trends = 0, 0, 0, 2, 0, 0 + step_time, train_loss, window_scale = 0.0, 0.0, 1.5 + current_step, iter_idx, num_epochs_last_impr, max_num_epochs = 0, 0, 0, 2 prev_train_losses, prev_valid_losses, prev_epoch_valid_losses = [], [], [] - num_iter_cover_train = max(1, int(sum(train_bucket_sizes) / - self.params.batch_size / - self.params.steps_per_checkpoint)) + iter_per_epoch = max(1, int(sum(train_bucket_sizes) / + self.params.batch_size / + self.params.steps_per_checkpoint)) while (self.params.max_steps == 0 or self.model.global_step.eval(self.session) <= self.params.max_steps): @@ -254,41 +253,33 @@ def train(self): # After epoch pass, calculate average epoch loss # and then make a decision to continue/stop training. - if (iter_inx > 0 - and iter_inx % num_iter_cover_train == 0): + if (iter_idx > 0 + and iter_idx % iter_per_epoch == 0): # Calculate average validation loss during the previous epoch epoch_eval_loss = self.__calc_epoch_loss( - prev_valid_losses[-num_iter_cover_train:]) + prev_valid_losses[-iter_per_epoch:]) if len(prev_epoch_valid_losses) > 0: - print('Previous min epoch eval loss: %f, current epoch eval loss: %f' % + print('Prev min epoch eval loss: %f, curr epoch eval loss: %f' % (min(prev_epoch_valid_losses), epoch_eval_loss)) - # Check if there was improvement during last epoch + # Check if there was an improvement during last epoch if (epoch_eval_loss < min(prev_epoch_valid_losses)): - if num_epochs_last_impr > max_num_epochs/1.5: - max_num_epochs = int(1.5 * num_epochs_last_impr) + if num_epochs_last_impr > max_num_epochs/window_scale: + max_num_epochs = int(window_scale * num_epochs_last_impr) print('Improved during last epoch.') prev_min_level = prev_epoch_valid_losses[-1] - num_epochs_last_impr, num_up_trends, num_down_trends = 0, 0, 0 + num_epochs_last_impr = 0 else: print('No improvement during last epoch.') num_epochs_last_impr += 1 - if (prev_epoch_valid_losses[-1] < epoch_eval_loss - and num_up_trends <= num_down_trends): - num_up_trends += 1 - elif (epoch_eval_loss < prev_epoch_valid_losses[-1] - and num_down_trends <= num_up_trends): - num_down_trends += 1 - - print('Num up trends: %d, num down trends: %d' % - (num_up_trends, num_down_trends)) print('Number of the epochs passed from the last improvement: %d' % num_epochs_last_impr) print('Max allowable number of epochs for improvement: %d' % max_num_epochs) - if (num_epochs_last_impr > max_num_epochs - and num_up_trends > 1): + # Stop training if no improvement was seen during last + # max_num_epochs epochs + if num_epochs_last_impr > max_num_epochs: break prev_epoch_valid_losses.append(round(epoch_eval_loss, 3)) @@ -296,7 +287,7 @@ def train(self): prev_train_losses.append(train_loss) prev_valid_losses.append(eval_loss) step_time, train_loss = 0.0, 0.0 - iter_inx += 1 + iter_idx += 1 print('Training done.') with tf.Graph().as_default(): @@ -325,12 +316,12 @@ def __calc_step_loss(self, train_buckets_scale): def __calc_eval_loss(self): """Run evals on development set and print their perplexity. """ - eval_loss, num_iter_total = 0.0, 0.0 + eval_loss, iter_total = 0.0, 0.0 for bucket_id in xrange(len(self._BUCKETS)): - num_iter_cover_valid = int(math.ceil(len(self.valid_set[bucket_id])/ + iter_per_valid = int(math.ceil(len(self.valid_set[bucket_id])/ self.params.batch_size)) - num_iter_total += num_iter_cover_valid - for batch_id in xrange(num_iter_cover_valid): + iter_total += iter_per_valid + for batch_id in xrange(iter_per_valid): encoder_inputs, decoder_inputs, target_weights =\ self.model.get_eval_set_batch(self.valid_set, bucket_id, batch_id * self.params.batch_size) @@ -338,7 +329,7 @@ def __calc_eval_loss(self): decoder_inputs, target_weights, bucket_id, True) eval_loss += eval_batch_loss - eval_loss = eval_loss/num_iter_total if num_iter_total > 0 else float('inf') + eval_loss = eval_loss/iter_total if iter_total > 0 else float('inf') return eval_loss From 114adfdde5ad2857080b6e2605ef569e22628b20 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Mon, 17 Apr 2017 16:04:17 +0200 Subject: [PATCH 05/10] Minor fixes. --- README.md | 8 -------- g2p_seq2seq/g2p.py | 8 ++++---- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index c4dc9726..9f7f9b7e 100644 --- a/README.md +++ b/README.md @@ -121,14 +121,6 @@ And, if you want to start training from scratch: "--reinit" - Rewrite model in model_folder_path ``` -To reproduce the following results, set: -``` -# for the model with size 64: ---max_steps 80000 -# for the model with size 512: ---max_steps 150000 -``` - #### Word error rate on CMU dictionary data sets System | WER ([CMUdict PRONALSYL 2007](https://sourceforge.net/projects/cmusphinx/files/G2P%20Models/phonetisaurus-cmudict-split.tar.gz)), % | WER ([CMUdict latest\*](https://github.com/cmusphinx/cmudict)), % diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index f6d96a80..61a104d5 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -333,18 +333,18 @@ def __calc_eval_loss(self): return eval_loss - def __calc_epoch_loss(self, epoch_losses): - """Calculate average loss during the epoch. + def __calc_epoch_loss(self, epoch_losses, allow_excess_min=1.5): + """Calculate an average loss without outliers during the epoch. Args: epoch_losses: list of the losses during the epoch; Returns: - average value of the losses during the period; + the average value of the losses without outliers during the period; """ epoch_loss_sum, loss_num = 0, 0 for loss in epoch_losses: - if loss < min(epoch_losses) * 1.5: + if loss < min(epoch_losses) * allow_excess_min: epoch_loss_sum += loss loss_num += 1 if loss_num > 0: From d8b56b09172392661364fe78b092afddca1b2814 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Wed, 19 Apr 2017 11:31:39 +0200 Subject: [PATCH 06/10] Split checkpoint and stop condition parts. --- g2p_seq2seq/g2p.py | 119 +++++++++++++++++------------------ g2p_seq2seq/seq2seq_model.py | 10 ++- 2 files changed, 61 insertions(+), 68 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index 61a104d5..1fe1d9f1 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -212,11 +212,12 @@ def train(self): # This is the training loop. step_time, train_loss, window_scale = 0.0, 0.0, 1.5 - current_step, iter_idx, num_epochs_last_impr, max_num_epochs = 0, 0, 0, 2 + current_step, iter_idx, epochs_wo_improve, allow_epochs_wo_improve =\ + 0, 0, 0, 2 prev_train_losses, prev_valid_losses, prev_epoch_valid_losses = [], [], [] - iter_per_epoch = max(1, int(sum(train_bucket_sizes) / - self.params.batch_size / - self.params.steps_per_checkpoint)) + steps_per_epoch = max(1, int(sum(train_bucket_sizes) / + self.params.batch_size)) + iters_per_epoch = int(steps_per_epoch / self.params.steps_per_checkpoint) while (self.params.max_steps == 0 or self.model.global_step.eval(self.session) <= self.params.max_steps): @@ -251,43 +252,43 @@ def train(self): os.path.join(self.model_dir, "model"), write_meta_graph=False) - # After epoch pass, calculate average epoch loss - # and then make a decision to continue/stop training. - if (iter_idx > 0 - and iter_idx % iter_per_epoch == 0): - # Calculate average validation loss during the previous epoch - epoch_eval_loss = self.__calc_epoch_loss( - prev_valid_losses[-iter_per_epoch:]) - if len(prev_epoch_valid_losses) > 0: - print('Prev min epoch eval loss: %f, curr epoch eval loss: %f' % - (min(prev_epoch_valid_losses), epoch_eval_loss)) - # Check if there was an improvement during last epoch - if (epoch_eval_loss < min(prev_epoch_valid_losses)): - if num_epochs_last_impr > max_num_epochs/window_scale: - max_num_epochs = int(window_scale * num_epochs_last_impr) - print('Improved during last epoch.') - prev_min_level = prev_epoch_valid_losses[-1] - num_epochs_last_impr = 0 - else: - print('No improvement during last epoch.') - num_epochs_last_impr += 1 - - print('Number of the epochs passed from the last improvement: %d' - % num_epochs_last_impr) - print('Max allowable number of epochs for improvement: %d' - % max_num_epochs) - - # Stop training if no improvement was seen during last - # max_num_epochs epochs - if num_epochs_last_impr > max_num_epochs: - break - - prev_epoch_valid_losses.append(round(epoch_eval_loss, 3)) - prev_train_losses.append(train_loss) prev_valid_losses.append(eval_loss) step_time, train_loss = 0.0, 0.0 - iter_idx += 1 + + # After epoch pass, calculate average epoch loss + # and then make a decision to continue/stop training. + if (len(prev_valid_losses) > iters_per_epoch + and current_step % steps_per_epoch == 0): + # Calculate average validation loss during the previous epoch + epoch_eval_loss = self.__calc_epoch_loss( + prev_valid_losses[-iters_per_epoch:]) + if len(prev_epoch_valid_losses) > 0: + print('Prev min epoch eval loss: %f, curr epoch eval loss: %f' % + (min(prev_epoch_valid_losses), epoch_eval_loss)) + # Check if there was an improvement during last epoch + if (epoch_eval_loss < min(prev_epoch_valid_losses)): + if epochs_wo_improve > allow_epochs_wo_improve/window_scale: + allow_epochs_wo_improve = int(math.ceil(epochs_wo_improve * + window_scale)) + print('Improved during last epoch.') + prev_min_level = prev_epoch_valid_losses[-1] + epochs_wo_improve = 0 + else: + print('No improvement during last epoch.') + epochs_wo_improve += 1 + + print('Number of the epochs passed from the last improvement: %d' + % epochs_wo_improve) + print('Max allowable number of epochs for improvement: %d' + % allow_epochs_wo_improve) + + # Stop training if no improvement was seen during last + # max allowable number of epochs + if epochs_wo_improve > allow_epochs_wo_improve: + break + + prev_epoch_valid_losses.append(epoch_eval_loss) print('Training done.') with tf.Graph().as_default(): @@ -305,8 +306,8 @@ def __calc_step_loss(self, train_buckets_scale): if train_buckets_scale[i] > random_number_01]) # Get a batch and make a step. - encoder_inputs, decoder_inputs, target_weights = self.model.get_batch( - self.train_set, bucket_id) + encoder_inputs, decoder_inputs, target_weights =\ + self.model.get_random_batch(self.train_set, bucket_id) _, step_loss, _ = self.model.step(self.session, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) @@ -316,41 +317,35 @@ def __calc_step_loss(self, train_buckets_scale): def __calc_eval_loss(self): """Run evals on development set and print their perplexity. """ - eval_loss, iter_total = 0.0, 0.0 + eval_loss, steps_total = 0.0, 0.0 for bucket_id in xrange(len(self._BUCKETS)): - iter_per_valid = int(math.ceil(len(self.valid_set[bucket_id])/ - self.params.batch_size)) - iter_total += iter_per_valid - for batch_id in xrange(iter_per_valid): + steps_per_bucket = int(math.ceil(len(self.valid_set[bucket_id])/ + self.params.batch_size)) + steps_total += steps_per_bucket + for from_row_idx in xrange(0, steps_per_bucket, self.params.batch_size): encoder_inputs, decoder_inputs, target_weights =\ - self.model.get_eval_set_batch(self.valid_set, bucket_id, - batch_id * self.params.batch_size) + self.model.get_not_random_batch(self.valid_set, bucket_id, + from_row_idx) _, eval_batch_loss, _ = self.model.step(self.session, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_loss += eval_batch_loss - eval_loss = eval_loss/iter_total if iter_total > 0 else float('inf') - return eval_loss + return eval_loss/steps_total if steps_total > 0 else float('inf') - def __calc_epoch_loss(self, epoch_losses, allow_excess_min=1.5): + def __calc_epoch_loss(self, prev_eval_losses, allow_excess_min=1.5): """Calculate an average loss without outliers during the epoch. Args: - epoch_losses: list of the losses during the epoch; + prev_eval_losses: list of the losses during the epoch; Returns: the average value of the losses without outliers during the period; """ - epoch_loss_sum, loss_num = 0, 0 - for loss in epoch_losses: - if loss < min(epoch_losses) * allow_excess_min: - epoch_loss_sum += loss - loss_num += 1 - if loss_num > 0: - return epoch_loss_sum / loss_num - else: - return float(inf) + epoch_losses = [loss for loss in prev_eval_losses + if (loss < (min(prev_eval_losses) * allow_excess_min))] + return sum(epoch_losses) / len(epoch_losses) if len(epoch_losses) > 0\ + else float(inf) def decode_word(self, word): @@ -374,8 +369,8 @@ def decode_word(self, word): bucket_id = min([b for b in xrange(len(self._BUCKETS)) if self._BUCKETS[b][0] > len(token_ids)]) # Get a 1-element batch to feed the word to the model. - encoder_inputs, decoder_inputs, target_weights = self.model.get_batch( - {bucket_id: [(token_ids, [])]}, bucket_id) + encoder_inputs, decoder_inputs, target_weights =\ + self.model.get_random_batch({bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the word. _, _, output_logits = self.model.step(self.session, encoder_inputs, decoder_inputs, target_weights, diff --git a/g2p_seq2seq/seq2seq_model.py b/g2p_seq2seq/seq2seq_model.py index 06b57206..722d757a 100644 --- a/g2p_seq2seq/seq2seq_model.py +++ b/g2p_seq2seq/seq2seq_model.py @@ -262,7 +262,7 @@ def step(self, session, encoder_inputs, decoder_inputs, target_weights, return None, outputs[0], outputs[1:] # No gradient norm, loss, outputs. - def get_batch(self, data, bucket_id): + def get_random_batch(self, data, bucket_id): """Get a random batch of data from the specified bucket, prepare for step. To feed data in step(..) it must be a list of batch-major vectors, while @@ -298,7 +298,7 @@ def get_batch(self, data, bucket_id): encoder_inputs, decoder_inputs) - def get_eval_set_batch(self, data, bucket_id, from_row_idx): + def get_not_random_batch(self, data, bucket_id, from_row_idx): """Get a batch from data with rows started with from_row_idx. To feed data in step(..) it must be a list of batch-major vectors, while @@ -334,10 +334,8 @@ def get_eval_set_batch(self, data, bucket_id, from_row_idx): decoder_inputs.append([GO_ID] + decoder_input + [PAD_ID] * decoder_pad_size) batch_row_idx += 1 - return self.__create_batch_major_vecs(encoder_size, - decoder_size, - encoder_inputs, - decoder_inputs) + return self.__create_batch_major_vecs(encoder_size, decoder_size, + encoder_inputs, decoder_inputs) def __create_batch_major_vecs(self, encoder_size, decoder_size, From 72f4f7f34cf5580021c01011818f09e6aa253a49 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Wed, 19 Apr 2017 17:58:56 +0200 Subject: [PATCH 07/10] Move the block with checking the stop criteria to a separate method. --- g2p_seq2seq/g2p.py | 146 ++++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 69 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index 1fe1d9f1..28b9f445 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -30,7 +30,6 @@ import numpy as np import tensorflow as tf -from tensorflow.core.protobuf import saver_pb2 from g2p_seq2seq import data_utils from g2p_seq2seq import seq2seq_model @@ -211,13 +210,12 @@ def train(self): for i in xrange(len(train_bucket_sizes))] # This is the training loop. - step_time, train_loss, window_scale = 0.0, 0.0, 1.5 - current_step, iter_idx, epochs_wo_improve, allow_epochs_wo_improve =\ - 0, 0, 0, 2 - prev_train_losses, prev_valid_losses, prev_epoch_valid_losses = [], [], [] + step_time, train_loss, allow_excess_min = 0.0, 0.0, 1.5 + current_step, self.epochs_wo_improvement,\ + self.allow_epochs_wo_improvement = 0, 0, 2 + train_losses, eval_losses, epoch_losses = [], [], [] steps_per_epoch = max(1, int(sum(train_bucket_sizes) / self.params.batch_size)) - iters_per_epoch = int(steps_per_epoch / self.params.steps_per_checkpoint) while (self.params.max_steps == 0 or self.model.global_step.eval(self.session) <= self.params.max_steps): @@ -241,54 +239,35 @@ def train(self): print(" eval: perplexity %.3f" % (eval_ppx)) # Decrease learning rate if no improvement was seen on train set # over last 3 times. - if (len(prev_train_losses) > 2 - and train_loss > max(prev_train_losses[-3:])): + if (len(train_losses) > 2 + and train_loss > max(train_losses[-3:])): self.session.run(self.model.learning_rate_decay_op) - #if (len(prev_valid_losses) > 0 - # and eval_loss <= min(prev_valid_losses)): # Save checkpoint and zero timer and loss. self.model.saver.save(self.session, os.path.join(self.model_dir, "model"), write_meta_graph=False) - prev_train_losses.append(train_loss) - prev_valid_losses.append(eval_loss) + train_losses.append(train_loss) + eval_losses.append(eval_loss) step_time, train_loss = 0.0, 0.0 # After epoch pass, calculate average epoch loss # and then make a decision to continue/stop training. - if (len(prev_valid_losses) > iters_per_epoch - and current_step % steps_per_epoch == 0): + if (current_step % steps_per_epoch == 0 + and len(eval_losses) > 0): # Calculate average validation loss during the previous epoch - epoch_eval_loss = self.__calc_epoch_loss( - prev_valid_losses[-iters_per_epoch:]) - if len(prev_epoch_valid_losses) > 0: - print('Prev min epoch eval loss: %f, curr epoch eval loss: %f' % - (min(prev_epoch_valid_losses), epoch_eval_loss)) - # Check if there was an improvement during last epoch - if (epoch_eval_loss < min(prev_epoch_valid_losses)): - if epochs_wo_improve > allow_epochs_wo_improve/window_scale: - allow_epochs_wo_improve = int(math.ceil(epochs_wo_improve * - window_scale)) - print('Improved during last epoch.') - prev_min_level = prev_epoch_valid_losses[-1] - epochs_wo_improve = 0 - else: - print('No improvement during last epoch.') - epochs_wo_improve += 1 - - print('Number of the epochs passed from the last improvement: %d' - % epochs_wo_improve) - print('Max allowable number of epochs for improvement: %d' - % allow_epochs_wo_improve) - - # Stop training if no improvement was seen during last - # max allowable number of epochs - if epochs_wo_improve > allow_epochs_wo_improve: - break - - prev_epoch_valid_losses.append(epoch_eval_loss) + eval_losses = [loss for loss in eval_losses + if loss < (min(eval_losses) * allow_excess_min)] + epoch_loss = (sum(eval_losses) / len(eval_losses) + if len(eval_losses) > 0 else float('inf')) + epoch_losses.append(epoch_loss) + + stop_training = self.__should_stop_training(epoch_losses) + if stop_training: + break + + eval_losses = [] print('Training done.') with tf.Graph().as_default(): @@ -297,6 +276,50 @@ def train(self): g2p_model_eval.evaluate(self.test_lines) + def __should_stop_training(self, epoch_losses, window_scale=1.5): + """Check stop training condition. + Because models with different sizes need different number of epochs + for improvement, we implemented stop criteria based on a expanding window + of allowable number of epochs without improvement. Assuming how many + maximum epochs it was needed for the previous improvements, we may increase + allowable number of epochs without improvement. Model will stop training + if number of epochs passed from previous improvement exceed maximal + allowable number. + + Args: + epoch_losses: losses on a validation set during the previous epochs; + + Returns: + True/False: should or should not stop training; + """ + if len(epoch_losses) > 0: + print('Prev min epoch eval loss: %f, curr epoch eval loss: %f' % + (min(epoch_losses[:-1]), epoch_losses[-1])) + # Check if there was an improvement during the last epoch + if epoch_losses[-1] < min(epoch_losses[:-1]): + # Increase window if major part of previous window have been passed + if (self.allow_epochs_wo_improvement < + (self.epochs_wo_improvement * window_scale)): + self.allow_epochs_wo_improvement =\ + int(math.ceil(self.epochs_wo_improvement * window_scale)) + print('Improved during the last epoch.') + self.epochs_wo_improvement = 0 + else: + print('No improvement during the last epoch.') + self.epochs_wo_improvement += 1 + + print('Number of the epochs passed from the last improvement: %d' + % self.epochs_wo_improvement) + print('Max allowable number of epochs for improvement: %d' + % self.allow_epochs_wo_improvement) + + # Stop training if no improvement was seen during last + # max allowable number of epochs + if self.epochs_wo_improvement > self.allow_epochs_wo_improvement: + return True + return False + + def __calc_step_loss(self, train_buckets_scale): """Choose a bucket according to data distribution. We pick a random number in [0, 1] and use the corresponding interval in train_buckets_scale. @@ -317,35 +340,19 @@ def __calc_step_loss(self, train_buckets_scale): def __calc_eval_loss(self): """Run evals on development set and print their perplexity. """ - eval_loss, steps_total = 0.0, 0.0 + eval_loss, steps = 0.0, 0 for bucket_id in xrange(len(self._BUCKETS)): - steps_per_bucket = int(math.ceil(len(self.valid_set[bucket_id])/ - self.params.batch_size)) - steps_total += steps_per_bucket - for from_row_idx in xrange(0, steps_per_bucket, self.params.batch_size): + for from_row in xrange(0, len(self.valid_set[bucket_id]), + self.params.batch_size): encoder_inputs, decoder_inputs, target_weights =\ self.model.get_not_random_batch(self.valid_set, bucket_id, - from_row_idx) - _, eval_batch_loss, _ = self.model.step(self.session, encoder_inputs, - decoder_inputs, target_weights, - bucket_id, True) - eval_loss += eval_batch_loss - return eval_loss/steps_total if steps_total > 0 else float('inf') - - - def __calc_epoch_loss(self, prev_eval_losses, allow_excess_min=1.5): - """Calculate an average loss without outliers during the epoch. - - Args: - prev_eval_losses: list of the losses during the epoch; - - Returns: - the average value of the losses without outliers during the period; - """ - epoch_losses = [loss for loss in prev_eval_losses - if (loss < (min(prev_eval_losses) * allow_excess_min))] - return sum(epoch_losses) / len(epoch_losses) if len(epoch_losses) > 0\ - else float(inf) + from_row) + _, loss, _ = self.model.step(self.session, encoder_inputs, + decoder_inputs, target_weights, + bucket_id, True) + eval_loss += loss + steps += 1 + return eval_loss/steps if steps > 0 else float('inf') def decode_word(self, word): @@ -360,7 +367,8 @@ def decode_word(self, word): # Check if all graphemes attended in vocabulary gr_absent = [gr for gr in word if gr not in self.gr_vocab] if gr_absent: - print("Symbols '%s' are not in vocabulary" % "','".join(gr_absent).encode('utf-8')) + print("Symbols '%s' are not in vocabulary".format( + "','".join(gr_absent).encode('utf-8'))) return "" # Get token-ids for the input word. From d4d9630e307ae8ab4010ed03999ad588b821badc Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Thu, 20 Apr 2017 11:51:32 +0200 Subject: [PATCH 08/10] Remove random batch selection. --- g2p_seq2seq/g2p.py | 128 +++++++++++++++++------------------ g2p_seq2seq/seq2seq_model.py | 80 +++++++++++----------- 2 files changed, 103 insertions(+), 105 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index 28b9f445..d52e798b 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -27,6 +27,7 @@ import math import os import time +import random import numpy as np import tensorflow as tf @@ -125,6 +126,9 @@ def __put_into_buckets(self, source, target): if len(source_ids) < source_size and len(target_ids) < target_size: data_set[bucket_id].append([source_ids, target_ids]) break + + for bucket_id in range(len(self._BUCKETS)): + random.shuffle(data_set[bucket_id]) return data_set @@ -202,13 +206,6 @@ def train(self): train_bucket_sizes = [len(self.train_set[b]) for b in xrange(len(self._BUCKETS))] - train_total_size = float(sum(train_bucket_sizes)) - # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use - # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to - # the size if i-th training bucket, as used later. - train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size - for i in xrange(len(train_bucket_sizes))] - # This is the training loop. step_time, train_loss, allow_excess_min = 0.0, 0.0, 1.5 current_step, self.epochs_wo_improvement,\ @@ -221,53 +218,59 @@ def train(self): <= self.params.max_steps): # Get a batch and make a step. start_time = time.time() - step_loss = self.__calc_step_loss(train_buckets_scale) - step_time += (time.time() - start_time) / self.params.steps_per_checkpoint - train_loss += step_loss / self.params.steps_per_checkpoint - current_step += 1 - - # Once in a while, we save checkpoint, print statistics, and run evals. - if current_step % self.params.steps_per_checkpoint == 0: - # Print statistics for the previous steps. - train_ppx = math.exp(train_loss) if train_loss < 300 else float('inf') - print ("global step %d learning rate %.4f step-time %.2f perplexity " - "%.3f" % (self.model.global_step.eval(self.session), - self.model.learning_rate.eval(self.session), - step_time, train_ppx)) - eval_loss = self.__calc_eval_loss() - eval_ppx = math.exp(eval_loss) if eval_loss < 300 else float('inf') - print(" eval: perplexity %.3f" % (eval_ppx)) - # Decrease learning rate if no improvement was seen on train set - # over last 3 times. - if (len(train_losses) > 2 - and train_loss > max(train_losses[-3:])): - self.session.run(self.model.learning_rate_decay_op) - - # Save checkpoint and zero timer and loss. - self.model.saver.save(self.session, - os.path.join(self.model_dir, "model"), - write_meta_graph=False) - - train_losses.append(train_loss) - eval_losses.append(eval_loss) - step_time, train_loss = 0.0, 0.0 - - # After epoch pass, calculate average epoch loss - # and then make a decision to continue/stop training. - if (current_step % steps_per_epoch == 0 - and len(eval_losses) > 0): - # Calculate average validation loss during the previous epoch - eval_losses = [loss for loss in eval_losses - if loss < (min(eval_losses) * allow_excess_min)] - epoch_loss = (sum(eval_losses) / len(eval_losses) - if len(eval_losses) > 0 else float('inf')) - epoch_losses.append(epoch_loss) - - stop_training = self.__should_stop_training(epoch_losses) - if stop_training: - break + for from_row in range(0, max(train_bucket_sizes), self.params.batch_size): + for bucket_id in range(len(self._BUCKETS)): + if from_row <= train_bucket_sizes[bucket_id]: + step_loss = self.__calc_step_loss(bucket_id, from_row) + step_time += (time.time() - start_time) /\ + self.params.steps_per_checkpoint + train_loss += step_loss / self.params.steps_per_checkpoint + current_step += 1 + + # Once in a while, we save checkpoint, print statistics, + # and run evals. + if current_step % self.params.steps_per_checkpoint == 0: + # Print statistics for the previous steps. + train_ppx =\ + math.exp(train_loss) if train_loss < 300 else float('inf') + print ("global step %d learning rate %.4f step-time %.2f " + "perplexity %.3f" % ( + self.model.global_step.eval(self.session), + self.model.learning_rate.eval(self.session), + step_time, train_ppx)) + eval_loss = self.__calc_eval_loss() + eval_ppx =\ + math.exp(eval_loss) if eval_loss < 300 else float('inf') + print(" eval: perplexity %.3f" % (eval_ppx)) + # Decrease learning rate if no improvement was seen on train set + # over last 3 times. + if (len(train_losses) > 2 + and train_loss > max(train_losses[-3:])): + self.session.run(self.model.learning_rate_decay_op) + + # Save checkpoint and zero timer and loss. + self.model.saver.save(self.session, + os.path.join(self.model_dir, "model"), + write_meta_graph=False) + + train_losses.append(train_loss) + eval_losses.append(eval_loss) + step_time, train_loss = 0.0, 0.0 + + # After epoch pass, calculate average validation loss during + # the previous epoch + eval_losses = [loss for loss in eval_losses + if loss < (min(eval_losses) * allow_excess_min)] + epoch_loss = (sum(eval_losses) / len(eval_losses) + if len(eval_losses) > 0 else float('inf')) + epoch_losses.append(epoch_loss) + + # Make a decision to continue/stop training. + stop_training = self.__should_stop_training(epoch_losses) + if stop_training: + break - eval_losses = [] + eval_losses = [] print('Training done.') with tf.Graph().as_default(): @@ -292,7 +295,7 @@ def __should_stop_training(self, epoch_losses, window_scale=1.5): Returns: True/False: should or should not stop training; """ - if len(epoch_losses) > 0: + if len(epoch_losses) > 1: print('Prev min epoch eval loss: %f, curr epoch eval loss: %f' % (min(epoch_losses[:-1]), epoch_losses[-1])) # Check if there was an improvement during the last epoch @@ -320,17 +323,13 @@ def __should_stop_training(self, epoch_losses, window_scale=1.5): return False - def __calc_step_loss(self, train_buckets_scale): + def __calc_step_loss(self, bucket_id, from_row):#train_buckets_scale): """Choose a bucket according to data distribution. We pick a random number in [0, 1] and use the corresponding interval in train_buckets_scale. """ - random_number_01 = np.random.random_sample() - bucket_id = min([i for i in xrange(len(train_buckets_scale)) - if train_buckets_scale[i] > random_number_01]) - # Get a batch and make a step. encoder_inputs, decoder_inputs, target_weights =\ - self.model.get_random_batch(self.train_set, bucket_id) + self.model.get_batch(self.train_set, bucket_id, from_row) _, step_loss, _ = self.model.step(self.session, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) @@ -345,8 +344,7 @@ def __calc_eval_loss(self): for from_row in xrange(0, len(self.valid_set[bucket_id]), self.params.batch_size): encoder_inputs, decoder_inputs, target_weights =\ - self.model.get_not_random_batch(self.valid_set, bucket_id, - from_row) + self.model.get_batch(self.valid_set, bucket_id, from_row) _, loss, _ = self.model.step(self.session, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) @@ -365,9 +363,9 @@ def decode_word(self, word): phonemes: decoded phoneme sequence for input word; """ # Check if all graphemes attended in vocabulary - gr_absent = [gr for gr in word if gr not in self.gr_vocab] + gr_absent = set([gr for gr in word if gr not in self.gr_vocab]) if gr_absent: - print("Symbols '%s' are not in vocabulary".format( + print("Symbols '%s' are not in vocabulary" % ( "','".join(gr_absent).encode('utf-8'))) return "" @@ -378,7 +376,7 @@ def decode_word(self, word): if self._BUCKETS[b][0] > len(token_ids)]) # Get a 1-element batch to feed the word to the model. encoder_inputs, decoder_inputs, target_weights =\ - self.model.get_random_batch({bucket_id: [(token_ids, [])]}, bucket_id) + self.model.get_batch({bucket_id: [(token_ids, [])]}, bucket_id, 0) # Get output logits for the word. _, _, output_logits = self.model.step(self.session, encoder_inputs, decoder_inputs, target_weights, diff --git a/g2p_seq2seq/seq2seq_model.py b/g2p_seq2seq/seq2seq_model.py index 722d757a..f84b76a4 100644 --- a/g2p_seq2seq/seq2seq_model.py +++ b/g2p_seq2seq/seq2seq_model.py @@ -262,43 +262,43 @@ def step(self, session, encoder_inputs, decoder_inputs, target_weights, return None, outputs[0], outputs[1:] # No gradient norm, loss, outputs. - def get_random_batch(self, data, bucket_id): - """Get a random batch of data from the specified bucket, prepare for step. - - To feed data in step(..) it must be a list of batch-major vectors, while - data here contains single length-major cases. So the main logic of this - function is to re-index data cases to be in the proper format for feeding. - - Args: - data: a tuple of size len(self.buckets) in which each element contains - lists of pairs of input and output data that we use to create a batch. - bucket_id: integer, which bucket to get the batch for. - - Returns: - The triple (encoder_inputs, decoder_inputs, target_weights) for - the constructed batch that has the proper format to call step(...) later. - """ - encoder_size, decoder_size = self.buckets[bucket_id] - encoder_inputs, decoder_inputs = [], [] - - # Get a random batch of encoder and decoder inputs from data, - # pad them if needed, reverse encoder inputs and add GO to decoder. - for _ in xrange(self.batch_size): - encoder_input, decoder_input = random.choice(data[bucket_id]) - - # Encoder inputs are padded and then reversed. - encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input)) - encoder_inputs.append(list(reversed(encoder_input + encoder_pad))) - - # Decoder inputs get an extra "GO" symbol, and are padded then. - decoder_pad_size = decoder_size - len(decoder_input) - 1 - decoder_inputs.append([GO_ID] + decoder_input + - [PAD_ID] * decoder_pad_size) - return self.__create_batch_major_vecs(encoder_size, decoder_size, - encoder_inputs, decoder_inputs) +# def get_random_batch(self, data, bucket_id): +# """Get a random batch of data from the specified bucket, prepare for step. +# +# To feed data in step(..) it must be a list of batch-major vectors, while +# data here contains single length-major cases. So the main logic of this +# function is to re-index data cases to be in the proper format for feeding. +# +# Args: +# data: a tuple of size len(self.buckets) in which each element contains +# lists of pairs of input and output data that we use to create a batch. +# bucket_id: integer, which bucket to get the batch for. +# +# Returns: +# The triple (encoder_inputs, decoder_inputs, target_weights) for +# the constructed batch that has the proper format to call step(...) later. +# """ +# encoder_size, decoder_size = self.buckets[bucket_id] +# encoder_inputs, decoder_inputs = [], [] +# +# # Get a random batch of encoder and decoder inputs from data, +# # pad them if needed, reverse encoder inputs and add GO to decoder. +# for _ in xrange(self.batch_size): +# encoder_input, decoder_input = random.choice(data[bucket_id]) +# +# # Encoder inputs are padded and then reversed. +# encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input)) +# encoder_inputs.append(list(reversed(encoder_input + encoder_pad))) +# +# # Decoder inputs get an extra "GO" symbol, and are padded then. +# decoder_pad_size = decoder_size - len(decoder_input) - 1 +# decoder_inputs.append([GO_ID] + decoder_input + +# [PAD_ID] * decoder_pad_size) +# return self.__create_batch_major_vecs(encoder_size, decoder_size, +# encoder_inputs, decoder_inputs) - def get_not_random_batch(self, data, bucket_id, from_row_idx): + def get_batch(self, data, bucket_id, from_row): """Get a batch from data with rows started with from_row_idx. To feed data in step(..) it must be a list of batch-major vectors, while @@ -316,14 +316,14 @@ def get_not_random_batch(self, data, bucket_id, from_row_idx): """ encoder_size, decoder_size = self.buckets[bucket_id] encoder_inputs, decoder_inputs = [], [] - batch_row_idx = 0 + batch_row = 0 # Get a batch of encoder and decoder inputs from data, # pad them if needed, reverse encoder inputs and add GO to decoder. - while (from_row_idx+batch_row_idx < len(data[bucket_id]) - and batch_row_idx < self.batch_size): + while (from_row + batch_row < len(data[bucket_id]) + and batch_row < self.batch_size): encoder_input, decoder_input =\ - data[bucket_id][from_row_idx+batch_row_idx] + data[bucket_id][from_row + batch_row] # Encoder inputs are padded and then reversed. encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input)) @@ -333,7 +333,7 @@ def get_not_random_batch(self, data, bucket_id, from_row_idx): decoder_pad_size = decoder_size - len(decoder_input) - 1 decoder_inputs.append([GO_ID] + decoder_input + [PAD_ID] * decoder_pad_size) - batch_row_idx += 1 + batch_row += 1 return self.__create_batch_major_vecs(encoder_size, decoder_size, encoder_inputs, decoder_inputs) From 9e22191604a6a02eb41982be588a3538b2b41699 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Thu, 20 Apr 2017 11:57:18 +0200 Subject: [PATCH 09/10] Remove comments. --- g2p_seq2seq/g2p.py | 2 +- g2p_seq2seq/seq2seq_model.py | 36 ------------------------------------ 2 files changed, 1 insertion(+), 37 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index d52e798b..c26090e2 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -323,7 +323,7 @@ def __should_stop_training(self, epoch_losses, window_scale=1.5): return False - def __calc_step_loss(self, bucket_id, from_row):#train_buckets_scale): + def __calc_step_loss(self, bucket_id, from_row): """Choose a bucket according to data distribution. We pick a random number in [0, 1] and use the corresponding interval in train_buckets_scale. """ diff --git a/g2p_seq2seq/seq2seq_model.py b/g2p_seq2seq/seq2seq_model.py index f84b76a4..95b3dcae 100644 --- a/g2p_seq2seq/seq2seq_model.py +++ b/g2p_seq2seq/seq2seq_model.py @@ -262,42 +262,6 @@ def step(self, session, encoder_inputs, decoder_inputs, target_weights, return None, outputs[0], outputs[1:] # No gradient norm, loss, outputs. -# def get_random_batch(self, data, bucket_id): -# """Get a random batch of data from the specified bucket, prepare for step. -# -# To feed data in step(..) it must be a list of batch-major vectors, while -# data here contains single length-major cases. So the main logic of this -# function is to re-index data cases to be in the proper format for feeding. -# -# Args: -# data: a tuple of size len(self.buckets) in which each element contains -# lists of pairs of input and output data that we use to create a batch. -# bucket_id: integer, which bucket to get the batch for. -# -# Returns: -# The triple (encoder_inputs, decoder_inputs, target_weights) for -# the constructed batch that has the proper format to call step(...) later. -# """ -# encoder_size, decoder_size = self.buckets[bucket_id] -# encoder_inputs, decoder_inputs = [], [] -# -# # Get a random batch of encoder and decoder inputs from data, -# # pad them if needed, reverse encoder inputs and add GO to decoder. -# for _ in xrange(self.batch_size): -# encoder_input, decoder_input = random.choice(data[bucket_id]) -# -# # Encoder inputs are padded and then reversed. -# encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input)) -# encoder_inputs.append(list(reversed(encoder_input + encoder_pad))) -# -# # Decoder inputs get an extra "GO" symbol, and are padded then. -# decoder_pad_size = decoder_size - len(decoder_input) - 1 -# decoder_inputs.append([GO_ID] + decoder_input + -# [PAD_ID] * decoder_pad_size) -# return self.__create_batch_major_vecs(encoder_size, decoder_size, -# encoder_inputs, decoder_inputs) - - def get_batch(self, data, bucket_id, from_row): """Get a batch from data with rows started with from_row_idx. From 9fc5caf67223e5341886441506cf00c26bba04f6 Mon Sep 17 00:00:00 2001 From: Nurtas Makhazhanov Date: Thu, 20 Apr 2017 16:04:24 +0200 Subject: [PATCH 10/10] Minor fixes. --- g2p_seq2seq/g2p.py | 14 ++++++-------- g2p_seq2seq/seq2seq_model.py | 9 +++++---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/g2p_seq2seq/g2p.py b/g2p_seq2seq/g2p.py index c26090e2..aefcd202 100644 --- a/g2p_seq2seq/g2p.py +++ b/g2p_seq2seq/g2p.py @@ -138,8 +138,8 @@ def prepare_data(self, train_path, valid_path, test_path): print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab, self.test_lines =\ - data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path, - test_path) + data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path, + test_path) # Read data into buckets and compute their sizes. print ("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) @@ -211,8 +211,6 @@ def train(self): current_step, self.epochs_wo_improvement,\ self.allow_epochs_wo_improvement = 0, 0, 2 train_losses, eval_losses, epoch_losses = [], [], [] - steps_per_epoch = max(1, int(sum(train_bucket_sizes) / - self.params.batch_size)) while (self.params.max_steps == 0 or self.model.global_step.eval(self.session) <= self.params.max_steps): @@ -234,10 +232,10 @@ def train(self): train_ppx =\ math.exp(train_loss) if train_loss < 300 else float('inf') print ("global step %d learning rate %.4f step-time %.2f " - "perplexity %.3f" % ( - self.model.global_step.eval(self.session), - self.model.learning_rate.eval(self.session), - step_time, train_ppx)) + "perplexity %.3f" % + (self.model.global_step.eval(self.session), + self.model.learning_rate.eval(self.session), + step_time, train_ppx)) eval_loss = self.__calc_eval_loss() eval_ppx =\ math.exp(eval_loss) if eval_loss < 300 else float('inf') diff --git a/g2p_seq2seq/seq2seq_model.py b/g2p_seq2seq/seq2seq_model.py index 95b3dcae..2917f9ca 100644 --- a/g2p_seq2seq/seq2seq_model.py +++ b/g2p_seq2seq/seq2seq_model.py @@ -99,7 +99,8 @@ def __init__(self, softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.target_vocab_size: - w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], dtype=dtype) + w_t = tf.get_variable("proj_w", [self.target_vocab_size, size], + dtype=dtype) w = tf.transpose(w_t) b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype) output_projection = (w, b) @@ -243,7 +244,7 @@ def step(self, session, encoder_inputs, decoder_inputs, target_weights, # Since our targets are decoder inputs shifted by one, we need one more. last_target = self.decoder_inputs[decoder_size].name input_feed[last_target] = np.zeros([len(encoder_inputs[0])], - dtype=np.int32) + dtype=np.int32) # Output feed: depends on whether we do a backward step or not. if not forward_only: @@ -312,14 +313,14 @@ def __create_batch_major_vecs(self, encoder_size, decoder_size, batch_encoder_inputs.append( np.array([encoder_inputs[batch_idx][length_idx] for batch_idx in xrange(len(encoder_inputs))], - dtype=np.int32)) + dtype=np.int32)) # Batch decoder inputs are re-indexed decoder_inputs, we create weights. for length_idx in xrange(decoder_size): batch_decoder_inputs.append( np.array([decoder_inputs[batch_idx][length_idx] for batch_idx in xrange(len(encoder_inputs))], - dtype=np.int32)) + dtype=np.int32)) # Create target_weights to be 0 for targets that are padding. batch_weight = np.ones(len(encoder_inputs), dtype=np.float32)