Advertisement
Guest User

Untitled

a guest
May 27th, 2019
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.80 KB | None | 0 0
  1. import numpy as np
  2. import math
  3. from collections import Counter
  4. # note that this assumes that all base pairs are upper cased
  5.  
  6. def kmers(coll, k):
  7. n = len(coll)
  8. for i in range(0, n - k + 1):
  9. yield coll[i:i+k]
  10.  
  11. def encode(kmer):
  12. v = 0
  13. for i, c in enumerate(kmer):
  14. v += 4 ** i + {'A': 0, 'C': 1, 'T': 2, 'G' : 3}[c]
  15. return v
  16.  
  17. def mk_vec(coll, k):
  18. v = Counter()
  19. for kmer in kmers(coll, k):
  20. v[kmer] += 1
  21. return v
  22.  
  23. def euclidean_dist(lhs, rhs):
  24. ks = set(lhs.keys()) | set(rhs.keys())
  25. sm = 0
  26. for k in ks:
  27. delta = lhs[k] - rhs[k]
  28. sm += delta ** 2
  29. return math.sqrt(sm)
  30.  
  31. def manhattan_dist(lhs, rhs):
  32. ks = set(lhs.keys()) | set(rhs.keys())
  33. sm = 0
  34. for k in ks:
  35. delta = lhs[k] - rhs[k]
  36. sm += abs(delta)
  37. return sm
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement