Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2019
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.58 KB | None | 0 0
  1. import codecs
  2. import time
  3.  
  4. from sklearn.feature_extraction.text import TfidfVectorizer
  5.  
  6. class NipTester:
  7.  
  8.     def load_reference_nips(self):
  9.         self.nips = []
  10.         for line in codecs.open("nips.txt", "r", "utf8"):
  11.             line = line.strip()
  12.             if len(line) > 0:
  13.                 for n in range(0,7):
  14.                     self.nips.append(line)
  15.  
  16.     def init(self):
  17.         self.load_reference_nips()
  18.         self.vect = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), min_df=0.0, max_df=1.0)
  19.         self.tfidf = self.vect.fit_transform(self.nips)
  20.  
  21.     def find_similar(self, nip):
  22.         nip_tfidf = self.vect.transform([nip])
  23.         mat = (self.tfidf * nip_tfidf.T).A
  24.  
  25.         similar = []
  26.         for i in range(0, len(self.nips)):
  27.             sim = float(mat[i][0])
  28.             if sim > 0.6:
  29.                 similar.append((self.nips[i], sim))
  30.         return similar
  31.  
  32.  
  33. test_nips = []
  34. test_nips.append("986o239432")
  35. test_nips.append("986o239.32")
  36.  
  37.  
  38. nipTester = NipTester()
  39.  
  40. start = time.time()
  41. nipTester.init()
  42. print("Loading time: %4.2f s" % (time.time() - start))
  43. print("Size of the reference NIP database: %d" % len(nipTester.nips))
  44.  
  45. loops = 5
  46. searches = 0
  47.  
  48. start = time.time()
  49. for n in range(0, loops):
  50.     for nip in test_nips:
  51.         matches = nipTester.find_similar(nip)
  52.         searches += 1
  53.         for match in matches:
  54.             print("'%s' is similar to '%s' with score of %8.5f" % (nip, match[0], match[1]))
  55.  
  56. print("Searching time: %4.2f s" % (time.time() - start))
  57. print("Number of searches: %d" % searches)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement