Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sourmash_lib._minhash import hash_murmur
- from sourmash_lib._minhash import MinHash
- def test_incorrect_minhash():
- def revcomp(seq):
- rev = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
- out = []
- for i, c in enumerate(seq):
- out += rev[c]
- return "".join(reversed(out))
- seq = "TACGGAGGACGCACGTCTGTTTTAGCTAGTCAACCTTATTCGCGGCCAGTTT"
- k = 21
- maxsize = 10
- mh = MinHash(maxsize, k)
- mh.add_sequence(seq)
- hashes = set()
- for i in range(0, len(seq) - k + 1):
- x = seq[i:i+k]
- rev = revcomp(x)
- if x < rev:
- hashes.add(hash_murmur(x))
- else:
- hashes.add(hash_murmur(rev))
- other_mins = list(sorted(hashes))[:maxsize]
- print()
- print(mh.get_mins())
- print(other_mins)
- assert mh.get_mins() == other_mins
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement