Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import bisect
- import time;
- starttime = time.time()
- genes = []; gene_lowest = 2000000000; gene_highest = 0
- reads = []; reads_filtered = 0;
- cache = {}
- def isect(gene_id, read):
- index_b = bisect.bisect_left(genes[gene_id], read[0])
- index_e = bisect.bisect(genes[gene_id], read[1])
- #print ("indexes are [%d, %d]" % (index_b, index_e))
- return (index_e - index_b == 1 or (index_e == index_b and index_b % 2 == 1) or (index_e - index_b > 1))
- def number_of_intersections(gene_id, reads):
- n = 0
- #print (genes[gene_id])
- #print (reads[0])
- #print (reads[-1])
- #if (genes[gene_id][0] > reads[-1][-1] or genes[gene_id][-1] < reads[0][0]):
- # #print ('no intersections at all')
- # return 0
- for r in reads:
- if isect(gene_id, r):
- return 1
- return 0
- (n, m) = map(int, input().split())
- # reading genes
- for i in range(n):
- l = list(map(int, input().split()))
- gene_lowest = l[0] if l[0] < gene_lowest else gene_lowest
- gene_highest = l[-1] if l[1] > gene_highest else gene_highest
- #print ('gene', i, 'has', len(l)/2, 'intervals')
- genes += [l]
- for i in range(m):
- l = list(map(int, input().split()))
- read = []
- for j in range(len(l) // 2):
- #if (l[j*2] < gene_highest and l[j*2+1] > gene_lowest):
- read += [(l[j*2], l[j*2+1])]
- #else:
- # reads_filtered +=1
- if (len(read)> 0):
- reads += [read]
- #print ("genes\n", genes)
- #print ("reads\n", reads)
- #print ('reading complete in', time.time() - starttime, 'seconds')
- #print ('gene range is %d .. %d, reads filtered %d' % (gene_lowest, gene_highest, reads_filtered))
- g_isect = {}
- for g in range(len(genes)):
- g_isect[g] = 0
- for read in reads:
- startread = time.time()
- n_of_genes = 0; gene_number = -1
- for g in range(len(genes)):
- n = number_of_intersections(g, read)
- if (n > 0):
- gene_number = g
- n_of_genes += 1
- #print (read, 'has', n, 'intersections in gene #', g, genes[g])
- if (n_of_genes == 1 and gene_number >= 0):
- #print ('adding to gene', gene_number)
- g_isect[gene_number] += 1
- #if (n_of_genes > 1):
- # print ('Multiple hits for gene ', g)
- #print ('read processing taken %1.2f seconds for %d reads' % ( time.time() - startread, len(read)))
- for g in g_isect:
- print (g_isect[g])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement