Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import scipy as sc
- import re
- from collections import Counter
- with open('sentences.txt') as f:
- wlist = [None]
- n = 0
- for line in f:
- n += 1
- line = line.lower()
- line = re.split('[^a-z]', line)
- x = list(filter(lambda x: x != '', line))
- wlist += x
- uwords = set(wlist)
- d = len(uwords)
- uwords = list(uwords)
- count = 0
- dicti = {}
- for uw in uwords:
- if(count != d-1):
- dicti[count] = uw
- count += 1
- arr = np.eye(n, d)
- i = 0
- for line in f:
- line = line.lower()
- line = re.split('[^a-z]', line)
- x = list(filter(lambda x: x != '', line))
- c = Counter(x)
- for j in range(d):
- arr[i][j] = c[dicti[j]]
- i+=1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement