Advertisement
Guest User

Untitled

a guest
Sep 25th, 2017
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.48 KB | None | 0 0
  1. # Unlike the TFRecordWriter, the TFRecordReader is symbolic
  2. reader = tf.TFRecordReader()
  3. # One can read a single serialized example from a filename
  4. # serialized_example is a Tensor of type string.
  5. _, serialized_example = reader.read(filename_queue)
  6. # The serialized example is converted back to actual values.
  7. # One needs to describe the format of the objects to be returned
  8. features = tf.parse_single_example(
  9. serialized_example,
  10. features={
  11. # We know the length of both fields. If not the
  12. # tf.VarLenFeature could be used
  13. 'click': tf.FixedLenFeature([], tf.int64),
  14. 'title': tf.FixedLenFeature([25], tf.int64)
  15. # maybe others eg data1:tf.FixLenFeature([],tf.float64)
  16. })
  17. # now return the converted data
  18. lbl = features['click']
  19. ttl = features['title']
  20. return lbl, ttl
  21.  
  22.  
  23. def read_batch_data(files, b_s):
  24. min_after_dequeue = 8
  25. num_threads = 2
  26. batch_size = b_s
  27. capacity = min_after_dequeue + (num_threads + 2) * batch_size
  28. filename_queue = tf.train.string_input_producer(files, num_epochs=1)
  29. c_n_c, tit = read_and_decode_single_example(filename_queue)
  30. label_batch, title_batch = tf.train.shuffle_batch([c_n_c, tit], batch_size=batch_size, capacity=capacity, num_threads=num_threads, min_after_dequeue=min_after_dequeue)
  31. return label_batch, title_batch
  32.  
  33. import math
  34. import os,sys
  35. import subprocess
  36. import pickle
  37. import load_data_labels
  38. import numpy as np
  39. import tensorflow as tf
  40. import shutil
  41. LOG_DIR = './log_dir'
  42.  
  43. def init_weights(shape, name):
  44. return tf.Variable(tf.random_normal(shape,stddev=0.01,dtype=tf.float64), name=name)
  45.  
  46. def init_biases(shape, name):
  47. return tf.Variable(tf.random_normal(shape,dtype=tf.float64),name=name)
  48.  
  49.  
  50. def model(titles, w_h, w_h2, w_o, vocab_size,embd_layer):
  51. # Add layer name scopes for better graph visualization
  52. # Embedding layer
  53. with tf.device('/cpu:0'), tf.name_scope("embedding"):
  54. W_em = tf.Variable(embd_layer,name="word_embeddings")
  55. embed_l = tf.nn.embedding_lookup(W_em, titles)
  56. # can be reduce sum
  57. embedding = tf.reduce_mean(embed_l, [1])
  58. with tf.name_scope("layer1"):
  59. h = tf.nn.relu(tf.add(tf.matmul(embedding, w_h), b_h))
  60. with tf.name_scope("layer2"):
  61. h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2), b_h2))
  62. with tf.name_scope("layer3"):
  63. return tf.add(tf.matmul(h2, w_o), b_o)
  64.  
  65. def init_word_embedding_with_w2v(w2v_dict, word_map, emb_dim, voc_len):
  66. initW = np.random.uniform(-1.0,1.0,(voc_len+1, emb_dim))
  67. for word in word_map:
  68. vec = w2v_dict.get(word)
  69. idx = word_map[word]
  70. if vec is not None:
  71. initW[idx,:] = vec
  72. return initW
  73.  
  74. with open('./data/word_map.pickle', 'rb') as word_map_file:
  75. word_map = pickle.load(word_map_file)
  76. with open('./data/word_2_vec_dict.pickle', 'rb') as w2vec_file:
  77. w2vec = pickle.load(w2vec_file)
  78.  
  79.  
  80. dataset_file= "./data/file000000000000_1000lines.tfrecords"
  81. batch_size=4
  82. trY,trX = load_data_labels.read_batch_data([dataset_file],batch_size)
  83. trY=tf.one_hot(trY,depth=2,axis = -1)
  84. trY=tf.reshape(trY,[4,2])
  85. print trY.get_shape()
  86. print trX.get_shape()
  87. w_h = init_weights([300, 625], "w_h")
  88. w_h2 = init_weights([625, 625], "w_h2")
  89. w_o = init_weights([625, 2], "w_o")
  90. vocabulary_length=len(w2vec)
  91. any_vector_in_dict = w2vec.itervalues().next()
  92. emb_dim = len(any_vector_in_dict)
  93. embd_layer=init_word_embedding_with_w2v(w2vec,word_map,emb_dim,vocabulary_length)
  94.  
  95. b_h = init_biases([625], "b_h")
  96. b_h2 = init_biases([625], "b_h2")
  97. b_o = init_biases([2],"b_o")
  98. tf.summary.histogram("w_h_summar", w_h)
  99. tf.summary.histogram("w_h2_summar", w_h2)
  100. tf.summary.histogram("w_o_summar", w_o)
  101. tf.summary.histogram("embedding_layer", embd_layer)
  102. py_x = model(trX, w_h, w_h2, w_o, vocabulary_length,embd_layer)
  103.  
  104. with tf.name_scope("cost"):
  105. cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=trY, logits=py_x))
  106. train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
  107. tf.summary.scalar("cost", cost)
  108. with tf.name_scope("accuracy"):
  109. correct_pred = tf.equal(tf.argmax(trY, 1), tf.argmax(py_x, 1))
  110. acc_op = tf.reduce_mean(tf.cast(correct_pred, "float"))
  111.  
  112. tf.summary.scalar("accuracy", acc_op)
  113. with tf.Session() as sess:
  114. writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
  115. merged = tf.summary.merge_all()
  116. tf.global_variables_initializer().run()
  117. for i in range(10):
  118. sess.run(train_op)
  119. summary, acc = sess.run([merged, acc_op])
  120. writer.add_summary(summary, i) # Write summary
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement