Untitled

from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import numpy as np
import os
import time

from nltk.chunk.named_entity import build_model

tf.enable_eager_execution()
def split_input_target(chunk):
    in_txt = chunk[:-1]
    tar_txt = chunk[1:]
    return in_txt, tar_txt

    # tf.keras.sequential to define the model
    # Keras GRU, type of RNN used
    # Keras Dense, the output layer
def model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

#Los function for all dimensions of the predictions
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits,from_logits=True)

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
    tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)

    ])
    return model

def main():
    # Reference file
    path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
    # Set file to readable
    text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
    # Print length of text
    print('The length of the text is: ' + str(len(text)) + " characters.")
    # print(text[:250])
    # Sets every unique character in the file
    vocab = sorted(set(text))
    # Prints them
    print(str(len(vocab)) + " unique characters")
    # Converts characters to indices by mapping
    char2idx = {u:i for i, u in enumerate(vocab)}
    idx2char = np.array(vocab)
    # Create an integer representation for every character
    txt_to_int = np.array([char2idx[c] for c in text])
    # Model has to be created to predict what the next character should be given the previous characters.
    # Input: Sequence of characters
    # Output: Following character
    # Divide text into sequences
    # Maximum sequence length:
    seq_l = 100
    exam_per_epoch = len(text)

    # Make training examples and targets
    char_dataset = tf.data.Dataset.from_tensor_slices(txt_to_int)
    # for i in char_dataset.take(5):
    #   print(idx2char[i.numpy()])
    # batch converts individual charaters to sequences of the desired size.
    sequences = char_dataset.batch(seq_l+1, drop_remainder=True)
    dataset = sequences.map(split_input_target)
    # Create training batches
    Batch_Size = 64
    Buffer_size = 10000
    dataset = dataset.shuffle(Buffer_size).batch(Batch_Size, drop_remainder=True)
    # Build model
    # Length of the vocabulary in chars
    vocab_size = len(vocab)
    # Embedding to get trainable lookup table that maps the number of each characterr to a vector with embedding dimensions
    # Embedding dimension
    ebd_dim = 256
    # Number of RNN units
    rnn_units = 1024
    test_model = build_model(len(vocab), ebd_dim, rnn_units, Batch_Size)
    # Summary of model
    test_model.summary()
    # (Batch_size, sequence_length, vocab_size
    for input_example_batch, target_example_batch in dataset.take(1):
        example_batch_predictions = test_model(input_example_batch)
        print(example_batch_predictions.shape)
    # initialize batch loss
    batch_loss = loss(target_example_batch, example_batch_predictions)
    print(example_batch_predictions.shape)
    #Print Scalar loss
    print("Scalar loss: ", batch_loss.numpy().mean())
    # Configure training procedure
    test_model.compile(optimizer='adam', loss=loss)
    # Configure checkpoints
    check_dir = './training_checkpoints'
    check_pf = os.path.join(check_dir, "ckpt_{epoch}")
    check_cb = tf.keras.callbacks.ModelCheckpoint(
        filepath=check_pf,
        save_weights_only=True
    )
    # Execute the training
    # history = test_model.fit(dataset, 10, callbacks=[check_cb])
    tf.train.latest_checkpoint(check_dir)
    model = build_model(vocab_size, ebd_dim, rnn_units, batch_size=1)
    model.load_weights(tf.train.latest_checkpoint(check_dir))
    model.build(tf.TensorShape([1, None]))


main()