Advertisement
Guest User

save_me

a guest
Aug 19th, 2018
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.81 KB | None | 0 0
  1. import numpy as np
  2. import scipy as sc
  3. import re
  4. from collections import Counter
  5.  
  6. with open('sentences.txt') as f:
  7.     wlist = [None]
  8.     n = 0
  9.     for line in f:
  10.         n += 1
  11.         line = line.lower()
  12.         line = re.split('[^a-z]', line)
  13.         x = list(filter(lambda x: x != '', line))
  14.         wlist += x
  15.    
  16.     uwords = set(wlist)
  17.     d = len(uwords)
  18.     uwords = list(uwords)
  19.     count = 0
  20.     dicti = {}
  21.     for uw in uwords:
  22.         if(count != d-1):
  23.             dicti[count] = uw
  24.             count += 1
  25.    
  26.     arr = np.eye(n, d)
  27.     i = 0
  28.     for line in f:
  29.         line = line.lower()
  30.         line = re.split('[^a-z]', line)
  31.         x = list(filter(lambda x: x != '', line))
  32.         c = Counter(x)
  33.         for j in range(d):
  34.             arr[i][j] = c[dicti[j]]
  35.         i+=1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement