Advertisement
Guest User

Untitled

a guest
Jun 26th, 2019
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.74 KB | None | 0 0
  1. import csv
  2. import numpy as np
  3. import deepcut
  4. from keras.models import Model
  5. from keras.layers import Input, Dense
  6. from keras.utils import to_categorical
  7. import matplotlib.pyplot as plt
  8. from random import shuffle
  9. from IPython.core.debugger import set_trace
  10. from sklearn.metrics import confusion_matrix
  11. import pandas as pd
  12.  
  13. #------------------------- Read data ------------------------------
  14. file = open('new_sample_data_observe.csv', 'r', encoding='utf-8')
  15. data = list(csv.reader(file))
  16. shuffle(data)
  17.  
  18. # for d in data:
  19. #     print(d)
  20.  
  21. sentences = [d[4] for d in data]
  22. set_of_words = set()
  23. # set of words
  24. num_sentense = 80
  25. for i in range(len(sentences) - num_sentense):
  26.     sentences[i] = sentences[i]+sentences[i+num_sentense]
  27. count = []
  28. words = [[w for w in deepcut.tokenize(s) if w != ' '] for s in sentences]
  29. print(words)
  30. for i in range(num_sentense):
  31.     for x in words[i]:
  32.         set_of_words = set_of_words.union(words[i])
  33.     count.append(len(words[i]))
  34.    
  35. count_word_in_set = np.zeros((num_sentense,len(set_of_words)))
  36. set_of_words = list(set_of_words )
  37.  
  38. for i in range(num_sentense):
  39.     for j in range(count[i]):
  40.         for k in range(len(set_of_words)):
  41.             if(set_of_words[k] == words[i][j]):
  42.                 count_word_in_set[i][k] = count_word_in_set[i][k] + 1
  43.    
  44. print(count_word_in_set)
  45. # for i in range(num_sentense):
  46. #     dott = np.sqrt(np.dot(count_word_in_set[i],count_word_in_set[i]))
  47. #     count_word_in_set[i] =   count_word_in_set[i]/dott
  48. pd_count_word = pd.DataFrame(count_word_in_set)
  49. pd_count_word.to_csv(r'count_data.csv')
  50. # count_word_in_set = np.asarray(count_word_in_set).T
  51. # pd_word = pd.DataFrame(count_word_in_set,index=set_of_words)
  52. # # pd_word
  53. # pd_word.to_csv(r'data_med.csv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement