Untitled

import tensorflow as tf
import random
import numpy as np
CHARS = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
STRING_LENGTH = 12
num_examples = 10000
# Args:
#   n: Number of examples to generate.
# Returns:
#   strings_v: numpy array of the form (n, STRING_LENGTH, len(CHARS)). One hot encoding of sequences of text
#   strings: Array of actual generated random text:
#   uniques_v: numpy array of the form (n, len(CHARS)). One hot encoding of number of unique characters
#   uniques: numpy array of length n, number of unique characters for each sequence.
def generate_data(n=num_examples):
    chars_to_idx = { c: i for i, c in enumerate(CHARS)}

    strings_v = np.zeros([n, STRING_LENGTH, len(CHARS)])
    strings = [''] * n
    uniques = np.zeros(n)
    uniques_v = np.zeros([n, len(CHARS)])
    for x in range(n):
        for y in range(STRING_LENGTH):
            random.shuffle(CHARS)
            char = CHARS[0]

            strings_v[x][y][chars_to_idx[char]] = 1
            strings[x] += char

        uniques[x] = len(set(strings[x]))
        uniques_v[x][len(set(strings[x])) - 1] = 1

    return strings_v, strings, uniques_v, uniques