Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import absolute_import, division, print_function, unicode_literals
- import tensorflow as tf
- import numpy as np
- import os
- import time
- from nltk.chunk.named_entity import build_model
- tf.enable_eager_execution()
- def split_input_target(chunk):
- in_txt = chunk[:-1]
- tar_txt = chunk[1:]
- return in_txt, tar_txt
- # tf.keras.sequential to define the model
- # Keras GRU, type of RNN used
- # Keras Dense, the output layer
- def model(vocab_size, embedding_dim, rnn_units, batch_size):
- model = tf.keras.Sequential([
- tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
- tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
- tf.keras.layers.Dense(vocab_size)
- ])
- return model
- #Los function for all dimensions of the predictions
- def loss(labels, logits):
- return tf.keras.losses.sparse_categorical_crossentropy(labels, logits,from_logits=True)
- def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
- model = tf.keras.Sequential([
- tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
- tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
- tf.keras.layers.Dense(vocab_size)
- ])
- return model
- def main():
- # Reference file
- path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
- # Set file to readable
- text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
- # Print length of text
- print('The length of the text is: ' + str(len(text)) + " characters.")
- # print(text[:250])
- # Sets every unique character in the file
- vocab = sorted(set(text))
- # Prints them
- print(str(len(vocab)) + " unique characters")
- # Converts characters to indices by mapping
- char2idx = {u:i for i, u in enumerate(vocab)}
- idx2char = np.array(vocab)
- # Create an integer representation for every character
- txt_to_int = np.array([char2idx[c] for c in text])
- # Model has to be created to predict what the next character should be given the previous characters.
- # Input: Sequence of characters
- # Output: Following character
- # Divide text into sequences
- # Maximum sequence length:
- seq_l = 100
- exam_per_epoch = len(text)
- # Make training examples and targets
- char_dataset = tf.data.Dataset.from_tensor_slices(txt_to_int)
- # for i in char_dataset.take(5):
- # print(idx2char[i.numpy()])
- # batch converts individual charaters to sequences of the desired size.
- sequences = char_dataset.batch(seq_l+1, drop_remainder=True)
- dataset = sequences.map(split_input_target)
- # Create training batches
- Batch_Size = 64
- Buffer_size = 10000
- dataset = dataset.shuffle(Buffer_size).batch(Batch_Size, drop_remainder=True)
- # Build model
- # Length of the vocabulary in chars
- vocab_size = len(vocab)
- # Embedding to get trainable lookup table that maps the number of each characterr to a vector with embedding dimensions
- # Embedding dimension
- ebd_dim = 256
- # Number of RNN units
- rnn_units = 1024
- test_model = build_model(len(vocab), ebd_dim, rnn_units, Batch_Size)
- # Summary of model
- test_model.summary()
- # (Batch_size, sequence_length, vocab_size
- for input_example_batch, target_example_batch in dataset.take(1):
- example_batch_predictions = test_model(input_example_batch)
- print(example_batch_predictions.shape)
- # initialize batch loss
- batch_loss = loss(target_example_batch, example_batch_predictions)
- print(example_batch_predictions.shape)
- #Print Scalar loss
- print("Scalar loss: ", batch_loss.numpy().mean())
- # Configure training procedure
- test_model.compile(optimizer='adam', loss=loss)
- # Configure checkpoints
- check_dir = './training_checkpoints'
- check_pf = os.path.join(check_dir, "ckpt_{epoch}")
- check_cb = tf.keras.callbacks.ModelCheckpoint(
- filepath=check_pf,
- save_weights_only=True
- )
- # Execute the training
- # history = test_model.fit(dataset, 10, callbacks=[check_cb])
- tf.train.latest_checkpoint(check_dir)
- model = build_model(vocab_size, ebd_dim, rnn_units, batch_size=1)
- model.load_weights(tf.train.latest_checkpoint(check_dir))
- model.build(tf.TensorShape([1, None]))
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement