Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import sys
- space = shingleSpace(sets)
- sortedSpace = sorted(space)
- def computeSignature(space, hashes, sets):
- """
- This function will create the minhash signature matrix from our sets s1-s4
- using the list of hashfunction hashes and the shingleSpace space
- :param space: The ShingleSpace set
- :param hashes: The list of hashes
- :param sets: The list of ShingleSets
- :return: The MinHashSignature
- """
- result = np.full((len(hashes),len(sets)), sys.maxsize)
- # Start coding here!
- sSpace = sorted(space)
- for i in range(len(sSpace)):
- shingle = sSpace[i]
- for shingleSetIndex in range(len(sets)):
- shingleSet = sets[shingleSetIndex]
- if shingle in shingleSet:
- for hashIndex in range(len(hashes)):
- hashObj = hashes[hashIndex]
- result[hashIndex][shingleSetIndex] = min(hashObj.hashf(i, len(sSpace)),result[hashIndex][shingleSetIndex])
- return result
- computeSignature(space, hashes, sets)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement