Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import regex
- def mismatch_kernel(chainsA, chainsB, k=3, m=0):
- chainsA = np.atleast_1d(chainsA)
- chainsB = np.atleast_1d(chainsB)
- NVocab = {}
- vocab = np.array(["".join(item) for item in itertools.product("ATCG", repeat=k)])
- n_vocab = len(vocab)
- idx = dict(zip(vocab, range(len(vocab))))
- all_seq = "".join(vocab)
- for kmer in vocab:
- neighbors = regex.findall("(" + kmer + ")" + "{s<=" + str(m) + "}", all_seq, overlapped=True)
- NVocab[kmer] = list(np.unique(neighbors))
- specA = []
- for chain in chainsA:
- spec = np.zeros(n_vocab)
- n = len(chain[0])
- for offset in range(n - k):
- kmer = chain[0][offset: offset + k]
- for nb in NVocab[kmer]:
- spec[idx[kmer]] += 1
- specA.append(spec)
- specA = np.asarray(specA)
- specB = []
- for chain in chainsB:
- spec = np.zeros(n_vocab)
- n = len(chain[0])
- for offset in range(n - k):
- kmer = chain[0][offset: offset + k]
- for nb in NVocab[kmer]:
- spec[idx[kmer]] += 1
- specB.append(spec)
- specB = np.asarray(specB)
- return np.dot(specA, specB.T)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement