Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tensorflow as tf
- import random
- import numpy as np
- CHARS = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
- STRING_LENGTH = 12
- num_examples = 10000
- # Args:
- # n: Number of examples to generate.
- # Returns:
- # strings_v: numpy array of the form (n, STRING_LENGTH, len(CHARS)). One hot encoding of sequences of text
- # strings: Array of actual generated random text:
- # uniques_v: numpy array of the form (n, len(CHARS)). One hot encoding of number of unique characters
- # uniques: numpy array of length n, number of unique characters for each sequence.
- def generate_data(n=num_examples):
- chars_to_idx = { c: i for i, c in enumerate(CHARS)}
- strings_v = np.zeros([n, STRING_LENGTH, len(CHARS)])
- strings = [''] * n
- uniques = np.zeros(n)
- uniques_v = np.zeros([n, len(CHARS)])
- for x in range(n):
- for y in range(STRING_LENGTH):
- random.shuffle(CHARS)
- char = CHARS[0]
- strings_v[x][y][chars_to_idx[char]] = 1
- strings[x] += char
- uniques[x] = len(set(strings[x]))
- uniques_v[x][len(set(strings[x])) - 1] = 1
- return strings_v, strings, uniques_v, uniques
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement