Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def simhash(self, texts):
- for text in texts:
- hashes = [bin(int(md5(word).hexdigest(), 16))[2:].zfill(128) for word in text.split()]
- sh = [2 * temp.count("1") - len(temp) for temp in zip(*hashes)]
- sh = ''.join(map(str, [1 if sh[i] >= 0 else 0 for i in range(len(sh))]))
- self.shs.append(sh)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement