Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- path = "dataset1"
- int =0
- docHashList = []
- for infile in glob.glob(os.path.join(path, '*')):
- #file = open(infile,'r').read()
- with open(infile, 'r', encoding='utf-8') as f:
- text = f.readlines()
- #word_list = text2words(text)
- #print(text)
- int +=1
- treatedList = text2words(text)
- #shingleSet= shingling(treatedList, 3, ['for','the', 'from' , 'this'])
- shingleSet= shingling(treatedList)
- hashlist= hash_shingles(shingleSet)
- print(shingleSet)
- print("\n-------Hash---------\n")
- print(hashlist)
- print("----------------"*10)
- #print("------------APPEND--------------")
- docHashList.append(hashlist)
- arrayNumPy = np.array(docHashList)
- print(arrayNumPy.shape)
- print(docHashList)
- #minhashing = minhash(arrayNumPy,coefA, coefB, coefC)
- print(int)
- -------------------------------------------------------------------------
- lista = [[1,2,3],[4,5,6]]
- b = np.array(lista)
- print(b)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement