Advertisement
Guest User

Untitled

a guest
Jun 27th, 2017
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.15 KB | None | 0 0
  1. import tensorflow as tf
  2. import random
  3. import numpy as np
  4. CHARS = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
  5. STRING_LENGTH = 12
  6. num_examples = 10000
  7. # Args:
  8. # n: Number of examples to generate.
  9. # Returns:
  10. # strings_v: numpy array of the form (n, STRING_LENGTH, len(CHARS)). One hot encoding of sequences of text
  11. # strings: Array of actual generated random text:
  12. # uniques_v: numpy array of the form (n, len(CHARS)). One hot encoding of number of unique characters
  13. # uniques: numpy array of length n, number of unique characters for each sequence.
  14. def generate_data(n=num_examples):
  15. chars_to_idx = { c: i for i, c in enumerate(CHARS)}
  16.  
  17. strings_v = np.zeros([n, STRING_LENGTH, len(CHARS)])
  18. strings = [''] * n
  19. uniques = np.zeros(n)
  20. uniques_v = np.zeros([n, len(CHARS)])
  21. for x in range(n):
  22. for y in range(STRING_LENGTH):
  23. random.shuffle(CHARS)
  24. char = CHARS[0]
  25.  
  26. strings_v[x][y][chars_to_idx[char]] = 1
  27. strings[x] += char
  28.  
  29. uniques[x] = len(set(strings[x]))
  30. uniques_v[x][len(set(strings[x])) - 1] = 1
  31.  
  32. return strings_v, strings, uniques_v, uniques
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement