daily pastebin goal
23%
SHARE
TWEET

Untitled

a guest Feb 13th, 2018 65 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from collections import defaultdict, deque, Counter
  2. from itertools import islice
  3. def sliding_window
  4. class sequence():
  5.     def _
  6. def sliding_window(seq, n=2):
  7.     """Returns a sliding window (of width n) over data from the iterable
  8.        s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                  
  9.        this function courtesy http://stackoverflow.com/questions/6822725/rolling-or-sliding-window-iterator-in-python
  10.     """
  11.     it = iter(seq)
  12.     result = tuple(islice(it, n))
  13.     if len(result) == n:
  14.         yield result    
  15.     for elem in it:
  16.         result = result[1:] + (elem,)
  17.         yield result
  18.  
  19. def get_clumps(genome, k, L, t):
  20.     """
  21.     Given a genome, returns all (L,t)-clumps of k-mers.
  22.     See http://stackoverflow.com/a/26695030/2581969 for some explanation if that doesn't mean anything to you.
  23.     """
  24.     kmers = KmerSequence(L-k, t)
  25.  
  26.     kmers_add = kmers.add #microoptimization
  27.     for kmer in sliding_window(genome, k):
  28.         kmers_add(kmer)
  29.        
  30.     return kmers.clumps
  31.  
  32. class KmerSequence(object):
  33.     """
  34.     Implementation of a sliding-window (of length :limit) which will keep track of kmers in :clumps that reach a certain :threshold.
  35.     """
  36.     __slots__ = ['order', 'counts', 'limit', 'clumps', 't']
  37.  
  38.     def __init__(self, limit, threshold):
  39.         self.order = deque()
  40.         self.counts = Counter()
  41.         self.limit = limit
  42.         self.clumps = set()
  43.         self.t = threshold
  44.  
  45.     def add(self, kmer):
  46.         if len(self.order) > self.limit:
  47.             self._remove_oldest()
  48.         self._add_one(kmer)
  49.  
  50.     def _add_one(self,kmer):
  51.         self.order.append(kmer)
  52.         new_count = self.counts[kmer] + 1
  53.         self.counts[kmer] = new_count
  54.  
  55.         if new_count == self.t:
  56.             self.clumps.add(kmer)
  57.  
  58.     def _remove_oldest(self):
  59.         self.counts[self.order.popleft()] -= 1
  60. tt=time.time()
  61. clumps = get_clumps(a, k,L,t)
  62. print(time.time()-tt)
  63. print(clumps)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top