Advertisement
zuevv

stepik_bio_414

Oct 19th, 2019
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.23 KB | None | 0 0
  1. import math
  2. from collections import defaultdict
  3.  
  4. def get_ktuples(rmap, k):
  5.     ktuples = defaultdict(list)
  6.     for idx in range(0, len(rmap)-k + 1):
  7.         ktuple = tuple(rmap[idx:idx+k])
  8.         ktuples[ktuple].append(idx)
  9.     return ktuples
  10.  
  11. def feasible_match(rmap1, rmap2):
  12.     mult = 0.02
  13.     res = sum(rmap1) - sum(rmap2)
  14.     if res < 0:
  15.         res *= -1
  16.     std_dev = sum([(mult*elem)**2 for elem in rmap2])
  17.     res /= math.sqrt(std_dev)
  18.     return res
  19.  
  20. def get_locations(ref,query):
  21.     ktuples = defaultdict(list)
  22.     for k in range(len(query)-2,len(query)+1):
  23.         morektuples = get_ktuples(ref,k)
  24.         for key, value in morektuples.items():
  25.             ktuples[key].append(value)
  26.     #print(ktuples)
  27.     c_sigm = 5
  28.     res = set()
  29.     for ktup in ktuples:
  30.         if feasible_match(ktup, query) < 5:
  31.             #print(feasible_match(ktup, query))
  32.             #print(str(ktup) + str(ktuples[ktup]))
  33.             pass
  34.         #print(ktuples[ktup])
  35.         if feasible_match(ktup, query) <= c_sigm:
  36.             for offsets in ktuples[ktup]:
  37.                 #print((offsets, len(ktup)))
  38.                 for offset in offsets:
  39.                     res.add((offset, len(ktup)))
  40.     return sorted(res)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement