Advertisement
Guest User

Untitled

a guest
Jun 21st, 2018
167
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.54 KB | None | 0 0
  1. import tensorflow as tf
  2.  
  3. name_dataset = 'name.csv'
  4.  
  5. train_x = []
  6. train_y = []
  7. with open(name_dataset, 'r') as f:
  8. first_line = True
  9. for line in f:
  10. if first_line is True:
  11. first_line = False
  12. continue
  13. sample = line.strip().split(',')
  14. if len(sample) == 2:
  15. train_x.append(sample[0])
  16. if sample[1] == '男':
  17. train_y.append([0, 1]) # 男
  18. else:
  19. train_y.append([1, 0]) # 女
  20.  
  21. max_name_length = max([len(name) for name in train_x])
  22. print("最长名字的字符数: ", max_name_length)
  23. max_name_length = 8
  24.  
  25. # 数据已shuffle
  26. # shuffle_indices = np.random.permutation(np.arange(len(train_y)))
  27. # train_x = train_x[shuffle_indices]
  28. # train_y = train_y[shuffle_indices]
  29.  
  30. # 词汇表(参看聊天机器人练习)
  31. counter = 0
  32. vocabulary = {}
  33. for name in train_x:
  34. counter += 1
  35. tokens = [word for word in name]
  36. for word in tokens:
  37. if word in vocabulary:
  38. vocabulary[word] += 1
  39. else:
  40. vocabulary[word] = 1
  41.  
  42. vocabulary_list = [' '] + sorted(vocabulary, key=vocabulary.get, reverse=True)
  43. print(len(vocabulary_list))
  44.  
  45. # 字符串转为向量形式
  46. vocab = dict([(x, y) for (y, x) in enumerate(vocabulary_list)])
  47. train_x_vec = []
  48. for name in train_x:
  49. name_vec = []
  50. for word in name:
  51. name_vec.append(vocab.get(word))
  52. while len(name_vec) < max_name_length:
  53. name_vec.append(0)
  54. train_x_vec.append(name_vec)
  55.  
  56. #######################################################
  57.  
  58. input_size = max_name_length
  59. num_classes = 2
  60.  
  61. batch_size = 64
  62. num_batch = len(train_x_vec) // batch_size
  63.  
  64. X = tf.placeholder(tf.int32, [None, input_size])
  65. Y = tf.placeholder(tf.float32, [None, num_classes])
  66.  
  67. dropout_keep_prob = tf.placeholder(tf.float32)
  68.  
  69.  
  70. def neural_network(vocabulary_size, embedding_size=128, num_filters=128):
  71. # embedding layer
  72. with tf.device('/cpu:0'), tf.name_scope("embedding"):
  73. W = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
  74. embedded_chars = tf.nn.embedding_lookup(W, X)
  75. embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
  76. # convolution + maxpool layer
  77. filter_sizes = [3, 4, 5]
  78. pooled_outputs = []
  79. for i, filter_size in enumerate(filter_sizes):
  80. with tf.name_scope("conv-maxpool-%s" % filter_size):
  81. filter_shape = [filter_size, embedding_size, 1, num_filters]
  82. W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1))
  83. b = tf.Variable(tf.constant(0.1, shape=[num_filters]))
  84. conv = tf.nn.conv2d(embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID")
  85. h = tf.nn.relu(tf.nn.bias_add(conv, b))
  86. pooled = tf.nn.max_pool(h, ksize=[1, input_size - filter_size + 1, 1, 1], strides=[1, 1, 1, 1],
  87. padding='VALID')
  88. pooled_outputs.append(pooled)
  89.  
  90. num_filters_total = num_filters * len(filter_sizes)
  91. h_pool = tf.concat(3, pooled_outputs)
  92. h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
  93. # dropout
  94. with tf.name_scope("dropout"):
  95. h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob)
  96. # output
  97. with tf.name_scope("output"):
  98. W = tf.get_variable("W", shape=[num_filters_total, num_classes],
  99. initializer=tf.contrib.layers.xavier_initializer())
  100. b = tf.Variable(tf.constant(0.1, shape=[num_classes]))
  101. output = tf.nn.xw_plus_b(h_drop, W, b)
  102.  
  103. return output
  104.  
  105.  
  106. # 训练
  107. def train_neural_network():
  108. output = neural_network(len(vocabulary_list))
  109.  
  110. optimizer = tf.train.AdamOptimizer(1e-3)
  111. loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
  112. grads_and_vars = optimizer.compute_gradients(loss)
  113. train_op = optimizer.apply_gradients(grads_and_vars)
  114.  
  115. saver = tf.train.Saver(tf.global_variables())
  116. with tf.Session() as sess:
  117. sess.run(tf.global_variables_initializer())
  118.  
  119. for e in range(201):
  120. for i in range(num_batch):
  121. batch_x = train_x_vec[i * batch_size: (i + 1) * batch_size]
  122. batch_y = train_y[i * batch_size: (i + 1) * batch_size]
  123. _, loss_ = sess.run([train_op, loss], feed_dict={X: batch_x, Y: batch_y, dropout_keep_prob: 0.5})
  124. print(e, i, loss_)
  125. # 保存模型
  126. if e % 50 == 0:
  127. saver.save(sess, "./model/name2sex.model", global_step=e)
  128.  
  129.  
  130. train_neural_network()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement