Advertisement
Guest User

Untitled

a guest
Mar 8th, 2020
394
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.51 KB | None | 0 0
  1. from __future__ import absolute_import, division, print_function, unicode_literals
  2. import tensorflow as tf
  3. import numpy as np
  4. import os
  5. import time
  6.  
  7. from nltk.chunk.named_entity import build_model
  8.  
  9. tf.enable_eager_execution()
  10. def split_input_target(chunk):
  11. in_txt = chunk[:-1]
  12. tar_txt = chunk[1:]
  13. return in_txt, tar_txt
  14.  
  15. # tf.keras.sequential to define the model
  16. # Keras GRU, type of RNN used
  17. # Keras Dense, the output layer
  18. def model(vocab_size, embedding_dim, rnn_units, batch_size):
  19. model = tf.keras.Sequential([
  20. tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
  21. tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
  22. tf.keras.layers.Dense(vocab_size)
  23. ])
  24. return model
  25.  
  26. #Los function for all dimensions of the predictions
  27. def loss(labels, logits):
  28. return tf.keras.losses.sparse_categorical_crossentropy(labels, logits,from_logits=True)
  29.  
  30. def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  31. model = tf.keras.Sequential([
  32. tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
  33. tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
  34. tf.keras.layers.Dense(vocab_size)
  35.  
  36. ])
  37. return model
  38.  
  39. def main():
  40. # Reference file
  41. path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
  42. # Set file to readable
  43. text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
  44. # Print length of text
  45. print('The length of the text is: ' + str(len(text)) + " characters.")
  46. # print(text[:250])
  47. # Sets every unique character in the file
  48. vocab = sorted(set(text))
  49. # Prints them
  50. print(str(len(vocab)) + " unique characters")
  51. # Converts characters to indices by mapping
  52. char2idx = {u:i for i, u in enumerate(vocab)}
  53. idx2char = np.array(vocab)
  54. # Create an integer representation for every character
  55. txt_to_int = np.array([char2idx[c] for c in text])
  56. # Model has to be created to predict what the next character should be given the previous characters.
  57. # Input: Sequence of characters
  58. # Output: Following character
  59. # Divide text into sequences
  60. # Maximum sequence length:
  61. seq_l = 100
  62. exam_per_epoch = len(text)
  63.  
  64. # Make training examples and targets
  65. char_dataset = tf.data.Dataset.from_tensor_slices(txt_to_int)
  66. # for i in char_dataset.take(5):
  67. # print(idx2char[i.numpy()])
  68. # batch converts individual charaters to sequences of the desired size.
  69. sequences = char_dataset.batch(seq_l+1, drop_remainder=True)
  70. dataset = sequences.map(split_input_target)
  71. # Create training batches
  72. Batch_Size = 64
  73. Buffer_size = 10000
  74. dataset = dataset.shuffle(Buffer_size).batch(Batch_Size, drop_remainder=True)
  75. # Build model
  76. # Length of the vocabulary in chars
  77. vocab_size = len(vocab)
  78. # Embedding to get trainable lookup table that maps the number of each characterr to a vector with embedding dimensions
  79. # Embedding dimension
  80. ebd_dim = 256
  81. # Number of RNN units
  82. rnn_units = 1024
  83. test_model = build_model(len(vocab), ebd_dim, rnn_units, Batch_Size)
  84. # Summary of model
  85. test_model.summary()
  86. # (Batch_size, sequence_length, vocab_size
  87. for input_example_batch, target_example_batch in dataset.take(1):
  88. example_batch_predictions = test_model(input_example_batch)
  89. print(example_batch_predictions.shape)
  90. # initialize batch loss
  91. batch_loss = loss(target_example_batch, example_batch_predictions)
  92. print(example_batch_predictions.shape)
  93. #Print Scalar loss
  94. print("Scalar loss: ", batch_loss.numpy().mean())
  95. # Configure training procedure
  96. test_model.compile(optimizer='adam', loss=loss)
  97. # Configure checkpoints
  98. check_dir = './training_checkpoints'
  99. check_pf = os.path.join(check_dir, "ckpt_{epoch}")
  100. check_cb = tf.keras.callbacks.ModelCheckpoint(
  101. filepath=check_pf,
  102. save_weights_only=True
  103. )
  104. # Execute the training
  105. # history = test_model.fit(dataset, 10, callbacks=[check_cb])
  106. tf.train.latest_checkpoint(check_dir)
  107. model = build_model(vocab_size, ebd_dim, rnn_units, batch_size=1)
  108. model.load_weights(tf.train.latest_checkpoint(check_dir))
  109. model.build(tf.TensorShape([1, None]))
  110.  
  111.  
  112.  
  113.  
  114.  
  115.  
  116.  
  117.  
  118. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement