Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tensorflow as tf
- tf.reset_default_graph()
- onehot_encoding_articles = tf.one_hot(int_article, len(set(articleIds)))
- n_articles = len(set(articleIds))
- seqLENGTH = tf.placeholder(tf.int32, [RNN_batch_size])
- inputX = tf.placeholder(tf.int32, [RNN_batch_size, None])
- inputs = tf.nn.embedding_lookup(embeddings, inputX)
- inputY = tf.placeholder(tf.int32, [RNN_batch_size])
- labels = tf.nn.embedding_lookup(onehot_encoding_articles, inputY)
- rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
- multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
- init_state = multi_rnn_cell.zero_state(RNN_batch_size, tf.float64)
- outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
- inputs=inputs,
- initial_state=init_state,
- sequence_length=seqLENGTH,
- dtype=tf.float64)
- dense_layer = tf.layers.dense(state[1].h,32)
- logits = tf.layers.dense(dense_layer,n_articles)
- cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels,logits=logits)
- loss = tf.reduce_mean(cross_entropy)
- optimizer = tf.train.AdamOptimizer().minimize(loss)
- init = tf.global_variables_initializer()
- with tf.Session() as sess:
- sess.run(init)
- for epoch in range(1,5):
- batches = utils.get_batches_rnn(RNN_batch_size,int_wordsPerInteractionInput[:1000],int_article[:1000], len(embeddings)-1)
- for x, y, size in batches:
- feed = {inputX: x,
- inputY: y,
- seqLENGTH: size}
- train_loss, _ = sess.run([loss, optimizer], feed_dict=feed)
- print("Avg. Training loss: {:.4f}".format(train_loss))
- def get_batches_rnn(n_sentences_in_batch, sentences, articleIds, pad_index):
- for idx in range(0, len(sentences), n_sentences_in_batch):
- x, y, batch_size = [], [], []
- x_batch = sentences[idx:idx+n_sentences_in_batch]
- y_batch = articleIds[idx:idx+n_sentences_in_batch]
- max_batch = max(len(x) for x in x_batch)
- for batchIndex in range(len(x_batch)):
- padding = [word for word in x_batch[batchIndex]]
- padSize = max_batch - len(x_batch[batchIndex])
- padding.extend(np.full((padSize), pad_index, dtype=int))
- x.append(padding)
- y.append(y_batch[batchIndex])
- batch_size.append(len(x_batch[batchIndex]))
- yield np.asarray(x), np.asarray(y), np.asarray(batch_size)
- batches = utils.get_batches_rnn(3,int_wordsPerInteractionInput,int_article,len(embeddings)-1)
- x, y, size = next(batches)
- print(x)
- print(y)
- print(size)
- Output:
- [[ 20 0 23342]
- [ 122 8 23342]
- [ 257 243 82]]
- [452 468 521]
- [2 2 3]
Add Comment
Please, Sign In to add comment