Advertisement
Guest User

Untitled

a guest
Jun 27th, 2017
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.63 KB | None | 0 0
  1. import numpy as np
  2. import tensorflow as tf
  3. from tensorflow.contrib import rnn
  4. import random
  5. #"abc" => "abc"
  6. #"aabbac" => "abc"
  7. #"abacd" => "abcd"
  8. MAX_LENGTH = 6 # Max length of 6
  9. chars = ["a", "b", "c", "d", "e", "f"]
  10. all_chars = chars + [' '] # Space for padding
  11. NUM_EXAMPLES = 50000
  12. # Args:
  13. # n: number of examples to generate
  14. # Returns:
  15. # strings: list of strings that may contain duplicates
  16. # solutions: strings without duplicates
  17. # strings_v: One hot encoding of strings with duplicates (without padding)
  18. # solutions_v: One hot encoding of solutions (with padding)
  19. def generate_data(n=NUM_EXAMPLES):
  20. all_chars_to_idx = { c:i for i, c in enumerate(all_chars) }
  21. strings_v = np.zeros((NUM_EXAMPLES, MAX_LENGTH, len(all_chars)))
  22. solutions_v = np.zeros((NUM_EXAMPLES, MAX_LENGTH, len(all_chars)))
  23.  
  24. strings = [''] * NUM_EXAMPLES
  25. solutions = [''] * NUM_EXAMPLES
  26.  
  27. for i in range(NUM_EXAMPLES):
  28. for l in range(MAX_LENGTH):
  29. char = random.choice(chars) # only sample from valid characters
  30. strings[i] += char
  31. if char not in solutions[i]:
  32. solutions[i] += char
  33.  
  34. # Pad solutions strings
  35. num_missing = MAX_LENGTH - len(solutions[i])
  36. solutions[i] += ' ' * num_missing
  37.  
  38. for x in range(len(strings)):
  39. for y in range(MAX_LENGTH):
  40. string_char = strings[x][y]
  41. strings_v[x][y][all_chars_to_idx[string_char]] = 1
  42.  
  43. solution_char = solutions[x][y]
  44. solutions_v[x][y][all_chars_to_idx[solution_char]] = 1
  45.  
  46. return strings, solutions, strings_v, solutions_v
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement