Advertisement
Guest User

Untitled

a guest
Mar 21st, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.20 KB | None | 0 0
  1. import regex
  2.  
  3. def mismatch_kernel(chainsA, chainsB, k=3, m=0):
  4.     chainsA = np.atleast_1d(chainsA)
  5.     chainsB = np.atleast_1d(chainsB)
  6.  
  7.     NVocab = {}
  8.     vocab = np.array(["".join(item) for item in itertools.product("ATCG", repeat=k)])
  9.     n_vocab = len(vocab)
  10.     idx = dict(zip(vocab, range(len(vocab))))
  11.     all_seq = "".join(vocab)
  12.     for kmer in vocab:
  13.         neighbors = regex.findall("(" + kmer + ")" + "{s<=" + str(m) + "}", all_seq, overlapped=True)
  14.         NVocab[kmer] = list(np.unique(neighbors))
  15.  
  16.     specA = []
  17.     for chain in chainsA:
  18.         spec = np.zeros(n_vocab)
  19.         n = len(chain[0])
  20.         for offset in range(n - k):
  21.             kmer = chain[0][offset: offset + k]
  22.             for nb in NVocab[kmer]:
  23.                 spec[idx[kmer]] += 1
  24.         specA.append(spec)
  25.     specA = np.asarray(specA)
  26.        
  27.     specB = []
  28.     for chain in chainsB:
  29.         spec = np.zeros(n_vocab)
  30.         n = len(chain[0])
  31.         for offset in range(n - k):
  32.             kmer = chain[0][offset: offset + k]
  33.             for nb in NVocab[kmer]:
  34.                 spec[idx[kmer]] += 1
  35.         specB.append(spec)
  36.     specB = np.asarray(specB)
  37.        
  38.     return np.dot(specA, specB.T)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement