Advertisement
Guest User

Untitled

a guest
Feb 13th, 2018
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.94 KB | None | 0 0
  1. from collections import defaultdict, deque, Counter
  2. from itertools import islice
  3. def sliding_window
  4. class sequence():
  5. def _
  6. def sliding_window(seq, n=2):
  7. """Returns a sliding window (of width n) over data from the iterable
  8. s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...
  9. this function courtesy http://stackoverflow.com/questions/6822725/rolling-or-sliding-window-iterator-in-python
  10. """
  11. it = iter(seq)
  12. result = tuple(islice(it, n))
  13. if len(result) == n:
  14. yield result
  15. for elem in it:
  16. result = result[1:] + (elem,)
  17. yield result
  18.  
  19. def get_clumps(genome, k, L, t):
  20. """
  21. Given a genome, returns all (L,t)-clumps of k-mers.
  22. See http://stackoverflow.com/a/26695030/2581969 for some explanation if that doesn't mean anything to you.
  23. """
  24. kmers = KmerSequence(L-k, t)
  25.  
  26. kmers_add = kmers.add #microoptimization
  27. for kmer in sliding_window(genome, k):
  28. kmers_add(kmer)
  29.  
  30. return kmers.clumps
  31.  
  32. class KmerSequence(object):
  33. """
  34. Implementation of a sliding-window (of length :limit) which will keep track of kmers in :clumps that reach a certain :threshold.
  35. """
  36. __slots__ = ['order', 'counts', 'limit', 'clumps', 't']
  37.  
  38. def __init__(self, limit, threshold):
  39. self.order = deque()
  40. self.counts = Counter()
  41. self.limit = limit
  42. self.clumps = set()
  43. self.t = threshold
  44.  
  45. def add(self, kmer):
  46. if len(self.order) > self.limit:
  47. self._remove_oldest()
  48. self._add_one(kmer)
  49.  
  50. def _add_one(self,kmer):
  51. self.order.append(kmer)
  52. new_count = self.counts[kmer] + 1
  53. self.counts[kmer] = new_count
  54.  
  55. if new_count == self.t:
  56. self.clumps.add(kmer)
  57.  
  58. def _remove_oldest(self):
  59. self.counts[self.order.popleft()] -= 1
  60. tt=time.time()
  61. clumps = get_clumps(a, k,L,t)
  62. print(time.time()-tt)
  63. print(clumps)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement