Guest User

Untitled

a guest
Jan 23rd, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.81 KB | None | 0 0
  1. from __future__ import absolute_import
  2. from __future__ import division
  3. from __future__ import print_function
  4. import itertools
  5. import os
  6. import matplotlib.pyplot as plt
  7. import numpy as np
  8. import pandas as pd
  9. import tensorflow as tf
  10.  
  11. from sklearn.preprocessing import LabelBinarizer, LabelEncoder
  12. from sklearn.metrics import confusion_matrix
  13.  
  14. from tensorflow import keras
  15. from keras.models import Sequential
  16. from keras.layers import Dense, Activation, Dropout
  17. from keras.preprocessing import text, sequence
  18. from keras import utils
  19. import sys
  20. reload(sys)
  21. sys.setdefaultencoding('utf-8')
  22. print("You have TensorFlow version", tf.__version__)
  23. data = pd.read_csv("dataset_1.csv")
  24.  
  25. data.head()
  26. print(data['tags'].value_counts())
  27. train_size = int(len(data) * .8)
  28. print ("Train size: %d" % train_size)
  29. print ("Test size: %d" % (len(data) - train_size))
  30. train_posts = data['post'][:train_size]
  31. train_tags = data['tags'][:train_size]
  32.  
  33. test_posts = data['post'][train_size:]
  34. test_tags = data['tags'][train_size:]
  35. max_words = 5000
  36. tokenize = text.Tokenizer(num_words=max_words, char_level=False)
  37.  
  38. tokenize.fit_on_texts(train_posts) # only fit on train
  39. x_train = tokenize.texts_to_matrix(train_posts)
  40. x_test = tokenize.texts_to_matrix(test_posts)
  41. encoder = LabelEncoder()
  42. encoder.fit(train_tags)
  43. y_train = encoder.transform(train_tags)
  44. y_test = encoder.transform(test_tags)
  45. num_classes = np.max(y_train) + 1
  46. y_train = utils.to_categorical(y_train, num_classes)
  47. y_test = utils.to_categorical(y_test, num_classes)
  48. print('x_train shape:', x_train.shape)
  49. print('x_test shape:', x_test.shape)
  50. print('y_train shape:', y_train.shape)
  51. print('y_test shape:', y_test.shape)
  52. batch_size = 64
  53. epochs = 5
  54. model = Sequential()
  55. model.add(Dense(512, input_shape=(max_words,)))
  56. model.add(Activation('relu'))
  57. model.add(Dropout(0.5))
  58. model.add(Dense(num_classes))
  59. model.add(Activation('softmax'))
  60.  
  61. model.compile(loss='categorical_crossentropy',
  62. optimizer='adam',
  63. metrics=['accuracy'])
  64. history = model.fit(x_train, y_train,
  65. batch_size=batch_size,
  66. epochs=epochs,
  67. verbose=1,
  68. validation_split=0.1)
  69. score = model.evaluate(x_test, y_test,
  70. batch_size=batch_size, verbose=1)
  71. print('Test score:', score[0])
  72. print('Test accuracy:', score[1])
  73. text_labels = encoder.classes_
  74.  
  75. #for i in range(10):
  76. #prediction = model.predict(np.array([x_test[i]]))
  77. #predicted_label = text_labels[np.argmax(prediction)]
  78. #print(test_posts.iloc[i][:50], "...")
  79. #print('Actual label:' + test_tags.iloc[i])
  80. #print("Predicted label: " + predicted_label + "n")
  81. string='naplata propisanih obveza'
  82. x_test2 = tokenize.sequences_to_matrix(string)
  83. prediction2 = model.predict(np.array(x_test2))
  84. predicted_label2 = text_labels[np.argmax(prediction2)]
  85. print("Predicted label: " + predicted_label2 + "n")
Add Comment
Please, Sign In to add comment