Advertisement
Guest User

Untitled

a guest
Nov 23rd, 2017
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.12 KB | None | 0 0
  1. path = "dataset1"
  2. int =0
  3. docHashList = []
  4.  
  5. for infile in glob.glob(os.path.join(path, '*')):
  6. #file = open(infile,'r').read()
  7. with open(infile, 'r', encoding='utf-8') as f:
  8. text = f.readlines()
  9. #word_list = text2words(text)
  10. #print(text)
  11. int +=1
  12. treatedList = text2words(text)
  13.  
  14. #shingleSet= shingling(treatedList, 3, ['for','the', 'from' , 'this'])
  15. shingleSet= shingling(treatedList)
  16.  
  17. hashlist= hash_shingles(shingleSet)
  18. print(shingleSet)
  19. print("\n-------Hash---------\n")
  20. print(hashlist)
  21. print("----------------"*10)
  22.  
  23.  
  24. #print("------------APPEND--------------")
  25. docHashList.append(hashlist)
  26.  
  27. arrayNumPy = np.array(docHashList)
  28.  
  29. print(arrayNumPy.shape)
  30. print(docHashList)
  31.  
  32.  
  33. #minhashing = minhash(arrayNumPy,coefA, coefB, coefC)
  34.  
  35.  
  36.  
  37.  
  38. print(int)
  39.  
  40.  
  41. -------------------------------------------------------------------------
  42.  
  43. lista = [[1,2,3],[4,5,6]]
  44. b = np.array(lista)
  45. print(b)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement