Advertisement
Kosheen

Gene Expression (solution) Python

Mar 4th, 2019
174
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.22 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import bisect
  4. import time;
  5. starttime = time.time()
  6.  
  7. genes = []; gene_lowest = 2000000000; gene_highest = 0
  8. reads = []; reads_filtered = 0;
  9. cache = {}
  10.  
  11. def isect(gene_id, read):
  12.     index_b = bisect.bisect_left(genes[gene_id], read[0])
  13.     index_e = bisect.bisect(genes[gene_id], read[1])
  14.     #print ("indexes are [%d, %d]" % (index_b, index_e))
  15.     return (index_e - index_b == 1 or (index_e == index_b and index_b % 2 == 1) or (index_e - index_b > 1))
  16.  
  17. def number_of_intersections(gene_id, reads):
  18.     n = 0
  19.     #print (genes[gene_id])
  20.     #print (reads[0])
  21.     #print (reads[-1])
  22.     #if (genes[gene_id][0] > reads[-1][-1] or genes[gene_id][-1] < reads[0][0]):
  23.     #   #print ('no intersections at all')
  24.     #   return 0
  25.     for r in reads:
  26.         if isect(gene_id, r):
  27.             return 1
  28.     return 0
  29. (n, m) = map(int, input().split())
  30. # reading genes
  31. for i in range(n):
  32.   l = list(map(int, input().split()))
  33.   gene_lowest = l[0] if l[0] < gene_lowest else gene_lowest
  34.   gene_highest = l[-1] if l[1] > gene_highest else gene_highest
  35.   #print ('gene', i, 'has', len(l)/2, 'intervals')
  36.   genes += [l]
  37.  
  38. for i in range(m):
  39.   l = list(map(int, input().split()))
  40.   read = []
  41.   for j in range(len(l) // 2):
  42.     #if (l[j*2] < gene_highest and l[j*2+1] > gene_lowest):
  43.     read += [(l[j*2], l[j*2+1])]
  44.     #else:
  45.     #    reads_filtered +=1
  46.   if (len(read)> 0):
  47.     reads += [read]
  48. #print ("genes\n", genes)
  49. #print ("reads\n", reads)
  50. #print ('reading complete in', time.time() - starttime, 'seconds')
  51. #print ('gene range is %d .. %d, reads filtered %d' % (gene_lowest, gene_highest, reads_filtered))
  52.  
  53. g_isect = {}
  54. for g in range(len(genes)):
  55.     g_isect[g] = 0
  56.  
  57. for read in reads:
  58.     startread = time.time()
  59.     n_of_genes = 0; gene_number = -1
  60.     for g in range(len(genes)):
  61.         n = number_of_intersections(g, read)
  62.         if (n > 0):
  63.             gene_number = g
  64.             n_of_genes += 1
  65.         #print (read, 'has', n, 'intersections in gene #', g, genes[g])
  66.     if (n_of_genes == 1 and gene_number >= 0):
  67.         #print ('adding to gene', gene_number)
  68.         g_isect[gene_number] += 1
  69.     #if (n_of_genes > 1):
  70.     #   print ('Multiple hits for gene ', g)
  71.     #print ('read processing taken %1.2f seconds for %d reads' % ( time.time() - startread, len(read)))
  72.  
  73.  
  74. for g in g_isect:
  75.     print (g_isect[g])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement