Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/env python
- # -*- coding: UTF-8 -*-
- __author__ = 'Myautsai Pan'
- import sys
- import numpy as np
- class SimHash(object):
- '''Implements of SimHash for chromaprint'''
- def __init__(self, chromaprint=[], bits=32):
- self.hash_bits = 32
- self.value = self.cal_hash(chromaprint)
- def cal_hash(self, chromaprint):
- ''' input a hashed array `chromaprint`, output `self.hash_bits` bit hash'''
- Q = [0]*self.hash_bits
- for fp_hash in chromaprint:
- #TODO assert type(fp_hash) == int, "assert fingerprint type being integer."
- for i in xrange(self.hash_bits):
- r =(((fp_hash >> i) & 1) << 1) -1 # right started i'th(start from 0) bit * 2 -1
- assert r == 1 or r == -1
- Q[i] += r
- sim_hash = 0
- for i in xrange(self.hash_bits):
- w = 1 if Q[i]>0 else 0
- sim_hash |= w << i
- return sim_hash
- def __int__(self):
- return int(self.value)
- def __long__(self):
- return long(self.value)
- def __hash__(self):
- return self.value
- def __str__(self):
- s = [str(self.value >> i & 1) for i in xrange(self.hash_bits)]
- s.reverse()
- return ''.join(s)
- def __repr__(self):
- return ' '.join(('<SimHash',self.__str__(),' (',str(self.__int__()),')>'))
- def __call__(self):
- return self.value
- def main(argv):
- a = SimHash([1,2,8,8,8,8,8])
- a = SimHash([1,2,8,8,8,8,7])
- print 'SimHash a:',repr(a)
- return 0
- if __name__ == '__main__':
- sys.exit(main(sys.argv))
Add Comment
Please, Sign In to add comment