Untitled

# Unlike the TFRecordWriter, the TFRecordReader is symbolic
reader = tf.TFRecordReader()
# One can read a single serialized example from a filename
# serialized_example is a Tensor of type string.
_, serialized_example = reader.read(filename_queue)
# The serialized example is converted back to actual values.
# One needs to describe the format of the objects to be returned
features = tf.parse_single_example(
    serialized_example,
    features={
        # We know the length of both fields. If not the
        # tf.VarLenFeature could be used
        'click': tf.FixedLenFeature([], tf.int64),
        'title': tf.FixedLenFeature([25], tf.int64)
        # maybe others eg data1:tf.FixLenFeature([],tf.float64)
    })
# now return the converted data
lbl = features['click']
ttl = features['title']
return lbl, ttl


def read_batch_data(files, b_s):
 min_after_dequeue = 8
 num_threads = 2
 batch_size = b_s
 capacity = min_after_dequeue + (num_threads + 2) * batch_size
 filename_queue = tf.train.string_input_producer(files, num_epochs=1)
 c_n_c, tit = read_and_decode_single_example(filename_queue)
 label_batch, title_batch = tf.train.shuffle_batch([c_n_c, tit], batch_size=batch_size, capacity=capacity,                                                   num_threads=num_threads, min_after_dequeue=min_after_dequeue)
return label_batch, title_batch

import math
import os,sys
import subprocess
import pickle
import load_data_labels
import numpy as np
import tensorflow as tf
import shutil
LOG_DIR = './log_dir'

def init_weights(shape, name):
    return tf.Variable(tf.random_normal(shape,stddev=0.01,dtype=tf.float64), name=name)

def init_biases(shape, name):
     return tf.Variable(tf.random_normal(shape,dtype=tf.float64),name=name)


def model(titles, w_h, w_h2, w_o, vocab_size,embd_layer):
    # Add layer name scopes for better graph visualization
    # Embedding layer
    with tf.device('/cpu:0'), tf.name_scope("embedding"):
         W_em = tf.Variable(embd_layer,name="word_embeddings")
         embed_l = tf.nn.embedding_lookup(W_em, titles)
       #    can be reduce sum
         embedding = tf.reduce_mean(embed_l, [1])
    with tf.name_scope("layer1"):
         h = tf.nn.relu(tf.add(tf.matmul(embedding, w_h), b_h))
    with tf.name_scope("layer2"):
         h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2), b_h2))
    with tf.name_scope("layer3"):
    return tf.add(tf.matmul(h2, w_o), b_o)

def init_word_embedding_with_w2v(w2v_dict, word_map, emb_dim, voc_len):
        initW = np.random.uniform(-1.0,1.0,(voc_len+1, emb_dim))
        for word in word_map:
            vec = w2v_dict.get(word)
            idx = word_map[word]
            if vec is not None:
                initW[idx,:] = vec
        return initW

with open('./data/word_map.pickle', 'rb') as word_map_file:
    word_map = pickle.load(word_map_file)
with open('./data/word_2_vec_dict.pickle', 'rb') as w2vec_file:
    w2vec = pickle.load(w2vec_file)


dataset_file= "./data/file000000000000_1000lines.tfrecords"
batch_size=4
trY,trX = load_data_labels.read_batch_data([dataset_file],batch_size)
trY=tf.one_hot(trY,depth=2,axis = -1)
trY=tf.reshape(trY,[4,2])
print trY.get_shape()
print trX.get_shape()
w_h = init_weights([300, 625], "w_h")
w_h2 = init_weights([625, 625], "w_h2")
w_o = init_weights([625, 2], "w_o")
vocabulary_length=len(w2vec)
any_vector_in_dict = w2vec.itervalues().next()
emb_dim = len(any_vector_in_dict)
embd_layer=init_word_embedding_with_w2v(w2vec,word_map,emb_dim,vocabulary_length)

b_h = init_biases([625], "b_h")
b_h2 = init_biases([625], "b_h2")
b_o = init_biases([2],"b_o")
tf.summary.histogram("w_h_summar", w_h)
tf.summary.histogram("w_h2_summar", w_h2)
tf.summary.histogram("w_o_summar", w_o)
tf.summary.histogram("embedding_layer", embd_layer)
py_x = model(trX, w_h, w_h2, w_o, vocabulary_length,embd_layer)

with tf.name_scope("cost"):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=trY, logits=py_x))
    train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
    tf.summary.scalar("cost", cost)
with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(trY, 1), tf.argmax(py_x, 1))
    acc_op = tf.reduce_mean(tf.cast(correct_pred, "float"))

tf.summary.scalar("accuracy", acc_op)
with tf.Session() as sess:
    writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
    merged = tf.summary.merge_all()
    tf.global_variables_initializer().run()
    for i in range(10):
        sess.run(train_op)
        summary, acc = sess.run([merged, acc_op])
        writer.add_summary(summary, i)  # Write summary