Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import sys, pickle, os
- import tensorflow.keras as krs
- lines_per_song = 128
- words_per_line = 32
- max_words = lines_per_song * words_per_line
- latent_dim = 64
- iterations = 10000
- batch_size = 20
- save_dir = 'your_dir'
- def turn_word_to_float(data, dictionary):
- assert(len(data) <= max_words)
- result = np.zeros(shape=(lines_per_song,words_per_line), dtype=np.float64)
- length = len(dictionary)
- line = 0
- word = 0
- for elem in data:
- #print("pos = %s, %s" % (line, word))
- if elem == "\n":
- #print("found n")
- word = -1
- line += 1
- else:
- if elem in dictionary:
- #print("found %s with val" % elem)
- #print("yep %s %s" % (word, dictionary[word]))
- result[line][word] = np.float64(dictionary[elem]/length)
- else:
- print("not found %s" % elem)
- word += 1
- if word == words_per_line:
- word = 0
- line += 1
- if line == lines_per_song:
- break
- return result
- def turn_num_array_to_text_array(num_array, reversed_dictionary):
- result = []
- length = len(reversed_dictionary)
- for line in num_array:
- for elem in line:
- inified = int(elem*length)
- if inified in reversed_dictionary:
- if elem != 0.0:
- result.append(reversed_dictionary[inified])
- result.append("\n")
- return result
- def clean_string(data):
- data = data.lower()
- data = data.replace("\n", " \n ")
- data = data.replace("\"", " \" ")
- data = data.replace("!", " ! ")
- data = data.replace("?", " ? ")
- data = data.replace(",", " , ")
- data = data.replace("(", " ( ")
- data = data.replace(")", " ) ")
- data = data.replace("{", " { ")
- data = data.replace("}", " } ")
- #data = data.replace("'", " ' ")
- #data = data.replace("#", " # ")
- data = data.replace(" ", " ")
- data = data.replace(" ", " ")
- data = data.replace(" ", " ")
- #data = data.split("\n")
- #data = " ".join(data)
- return data
- def turn_file_to_num_array( filename, dictionary):
- #print("opening file '%s'" % filename)
- fo = open(filename, "r")
- data = fo.read()
- data = clean_string(data).split(" ")
- data = turn_word_to_float(data, dictionary)
- #print(data)
- fo.close()
- return data
- def grab_dataset_from_folder(folder_to_read,dict_name):
- #try:
- print("opening ditctionaries %s.dict.pickle and %s.rdict.pickle" % (dict_name,dict_name))
- with open('%s.dict.pickle' % dict_name, 'rb') as handle:
- dictionary = pickle.load(handle)
- with open('%s.rdict.pickle' % dict_name, 'rb') as handle:
- reversed_dictionary = pickle.load(handle)
- print("opening folder '%s'" % folder_to_read)
- result = []
- for filename in os.listdir(folder_to_read):
- result.append(turn_file_to_num_array( "%s/%s" % (folder_to_read,filename), dictionary))
- #break
- #reversed = turn_num_array_to_text_array(result[0], reversed_dictionary)
- #print(" ".join(reversed))
- return np.array(result)
- #except:
- # print("oups! cannot build dataset")
- # exit()
- def generate_generator():
- generator_input = krs.Input(shape=(latent_dim,))
- x = krs.layers.Dense(128 * lines_per_song * words_per_line)(generator_input)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.Reshape((32, 32, 128))(x)
- x = krs.layers.Conv2D(256, 5, padding='same')(x)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.Conv2DTranspose(256, 4, strides=2, padding='same')(x)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.UpSampling2D()(x)
- x = krs.layers.Conv2D(256, 5, padding='same')(x)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.UpSampling2D()(x)
- x = krs.layers.Conv2D(256, 5, padding='same')(x)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.Conv2D(1, 4, activation='tanh', padding='same')(x)
- x = krs.layers.Flatten()(x)
- x = krs.layers.Reshape((lines_per_song, words_per_line, 1))(x)
- generator = krs.models.Model(generator_input, x)
- print("-- Generator -- ")
- generator.summary()
- return generator
- def generate_discriminator():
- discriminator_input = krs.layers.Input(shape=(lines_per_song, words_per_line, 1))
- x = krs.layers.Conv2D(128, 3)(discriminator_input)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.Conv2D(128, 4, strides=2)(x)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.Conv2D(128, 4, strides=2)(x)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.Conv2D(128, 4, strides=2)(x)
- x = krs.layers.LeakyReLU()(x)
- x = krs.layers.Flatten()(x)
- x = krs.layers.Dropout(0.4)(x)
- x = krs.layers.Dense(1, activation='sigmoid')(x)
- discriminator = krs.models.Model(discriminator_input, x)
- print("-- Discriminator -- ")
- discriminator.summary()
- discriminator_optimizer = krs.optimizers.RMSprop(
- lr=0.0008,
- clipvalue=1.0,
- decay=1e-8
- )
- discriminator.compile(
- optimizer=discriminator_optimizer,
- loss='binary_crossentropy'
- )
- discriminator.trainable = False
- return discriminator
- if __name__ == '__main__':
- if len(sys.argv) != 3:
- print("script folder dictname")
- exit()
- x_train = grab_dataset_from_folder(sys.argv[1], sys.argv[2])
- #print(dataset)
- print(x_train[0].shape)
- generator = generate_generator()
- discriminator = generate_discriminator()
- gan_input = krs.Input(shape=(latent_dim,))
- gan_output = discriminator(generator(gan_input))
- gan = krs.models.Model(gan_input, gan_output)
- gan_optimizer = krs.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=1e-8)
- gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy')
- start = 0
- for step in range(iterations):
- random_latent_vectors = np.random.normal(size=(batch_size, latent_dim))
- generated_images = generator.predict(random_latent_vectors)
- stop = start + batch_size
- real_images = x_train[start : stop]
- combined_images = np.concatenate([generated_images, real_images])
- labels = np.concatenate([np.ones((batch_size, 1)),
- np.zeros((batch_size, 1))])
- labels += 0.05 * np.random.random(labels.shape)
- d_loss = discriminator.train_on_batch(combined_images, labels)
- random_latent_vectors = np.random.normal(size=(batch_size, latent_dim))
- misleading_targets = np.zeros((batch_size, 1))
- a_loss = gan.train_on_batch(random_latent_vectors, misleading_targets)
- start += batch_size
- if start > len(x_train) - batch_size:
- start = 0
- if step % 100 == 0:
- gan.save_weights('gan.h5')
- print('discriminator loss:', d_loss)
- print('adversarial loss:', a_loss)
- reversed = " ".join(turn_num_array_to_text_array(generated_images[0], reversed_dictionary))
- print("---------------------\n%s\n---------------------" % reversed)
- file1 = open(os.path.join(save_dir,'generated_song' + str(step) + '.txt'),"w")
- file1.write(reversed)
- file1.close()
- reversed = " ".join(turn_num_array_to_text_array(real_images[0], reversed_dictionary))
- print("---------------------\n%s\n---------------------" % reversed)
- file1 = open(os.path.join(save_dir,'real_song' + str(step) + '.txt'),"w")
- file1.write(reversed)
- file1.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement