Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Unlike the TFRecordWriter, the TFRecordReader is symbolic
- reader = tf.TFRecordReader()
- # One can read a single serialized example from a filename
- # serialized_example is a Tensor of type string.
- _, serialized_example = reader.read(filename_queue)
- # The serialized example is converted back to actual values.
- # One needs to describe the format of the objects to be returned
- features = tf.parse_single_example(
- serialized_example,
- features={
- # We know the length of both fields. If not the
- # tf.VarLenFeature could be used
- 'click': tf.FixedLenFeature([], tf.int64),
- 'title': tf.FixedLenFeature([25], tf.int64)
- # maybe others eg data1:tf.FixLenFeature([],tf.float64)
- })
- # now return the converted data
- lbl = features['click']
- ttl = features['title']
- return lbl, ttl
- def read_batch_data(files, b_s):
- min_after_dequeue = 8
- num_threads = 2
- batch_size = b_s
- capacity = min_after_dequeue + (num_threads + 2) * batch_size
- filename_queue = tf.train.string_input_producer(files, num_epochs=1)
- c_n_c, tit = read_and_decode_single_example(filename_queue)
- label_batch, title_batch = tf.train.shuffle_batch([c_n_c, tit], batch_size=batch_size, capacity=capacity, num_threads=num_threads, min_after_dequeue=min_after_dequeue)
- return label_batch, title_batch
- import math
- import os,sys
- import subprocess
- import pickle
- import load_data_labels
- import numpy as np
- import tensorflow as tf
- import shutil
- LOG_DIR = './log_dir'
- def init_weights(shape, name):
- return tf.Variable(tf.random_normal(shape,stddev=0.01,dtype=tf.float64), name=name)
- def init_biases(shape, name):
- return tf.Variable(tf.random_normal(shape,dtype=tf.float64),name=name)
- def model(titles, w_h, w_h2, w_o, vocab_size,embd_layer):
- # Add layer name scopes for better graph visualization
- # Embedding layer
- with tf.device('/cpu:0'), tf.name_scope("embedding"):
- W_em = tf.Variable(embd_layer,name="word_embeddings")
- embed_l = tf.nn.embedding_lookup(W_em, titles)
- # can be reduce sum
- embedding = tf.reduce_mean(embed_l, [1])
- with tf.name_scope("layer1"):
- h = tf.nn.relu(tf.add(tf.matmul(embedding, w_h), b_h))
- with tf.name_scope("layer2"):
- h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2), b_h2))
- with tf.name_scope("layer3"):
- return tf.add(tf.matmul(h2, w_o), b_o)
- def init_word_embedding_with_w2v(w2v_dict, word_map, emb_dim, voc_len):
- initW = np.random.uniform(-1.0,1.0,(voc_len+1, emb_dim))
- for word in word_map:
- vec = w2v_dict.get(word)
- idx = word_map[word]
- if vec is not None:
- initW[idx,:] = vec
- return initW
- with open('./data/word_map.pickle', 'rb') as word_map_file:
- word_map = pickle.load(word_map_file)
- with open('./data/word_2_vec_dict.pickle', 'rb') as w2vec_file:
- w2vec = pickle.load(w2vec_file)
- dataset_file= "./data/file000000000000_1000lines.tfrecords"
- batch_size=4
- trY,trX = load_data_labels.read_batch_data([dataset_file],batch_size)
- trY=tf.one_hot(trY,depth=2,axis = -1)
- trY=tf.reshape(trY,[4,2])
- print trY.get_shape()
- print trX.get_shape()
- w_h = init_weights([300, 625], "w_h")
- w_h2 = init_weights([625, 625], "w_h2")
- w_o = init_weights([625, 2], "w_o")
- vocabulary_length=len(w2vec)
- any_vector_in_dict = w2vec.itervalues().next()
- emb_dim = len(any_vector_in_dict)
- embd_layer=init_word_embedding_with_w2v(w2vec,word_map,emb_dim,vocabulary_length)
- b_h = init_biases([625], "b_h")
- b_h2 = init_biases([625], "b_h2")
- b_o = init_biases([2],"b_o")
- tf.summary.histogram("w_h_summar", w_h)
- tf.summary.histogram("w_h2_summar", w_h2)
- tf.summary.histogram("w_o_summar", w_o)
- tf.summary.histogram("embedding_layer", embd_layer)
- py_x = model(trX, w_h, w_h2, w_o, vocabulary_length,embd_layer)
- with tf.name_scope("cost"):
- cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=trY, logits=py_x))
- train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
- tf.summary.scalar("cost", cost)
- with tf.name_scope("accuracy"):
- correct_pred = tf.equal(tf.argmax(trY, 1), tf.argmax(py_x, 1))
- acc_op = tf.reduce_mean(tf.cast(correct_pred, "float"))
- tf.summary.scalar("accuracy", acc_op)
- with tf.Session() as sess:
- writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
- merged = tf.summary.merge_all()
- tf.global_variables_initializer().run()
- for i in range(10):
- sess.run(train_op)
- summary, acc = sess.run([merged, acc_op])
- writer.add_summary(summary, i) # Write summary
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement