Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- from operator import itemgetter
- class Transcript:
- def __init__(self,chromosome,name,exons,cds,strand):
- self.chromosome=chromosome
- self.name=name
- self.strand=strand
- self.exons=exons
- self.cds=cds
- self.introns=self._computeIntrons()
- def __str__(self):
- return("Transcript <%s:%s %s:%s>" %
- (self.name,self.strand,self.chromosome,str(self.exons)))
- def _computeIntrons(self):
- self.exons.sort()
- intronstarts=[x[0] for x in exons[1:]]
- intronends =[x[1] for x in exons[0:-1]]
- introns=zip(intronstarts,intronends)
- return(introns)
- def overlaps(self,pos):
- types=[]
- for exon in self.exons:
- if((pos>exon[0]) & (pos<=exon[1])):
- if((pos>self.cds[0]) & (pos<=self.cds[1])):
- types.append('coding')
- else:
- if(pos>self.cds[1]):
- if(self.strand=='+'):
- types.append("3'-UTR")
- else:
- types.append("5'-UTR")
- if(pos<=self.cds[0]):
- if(self.strand=='+'):
- types.append("5'-UTR")
- else:
- types.append("3'-UTR")
- maxp=max(self.exons,key=itemgetter(1))[1]
- minp=min(self.exons,key=itemgetter(0))[0]
- if((pos>minp) & (pos<=maxp)):
- for exon in self.exons:
- if(((pos>exon[0]-2) & (pos<=exon[0])) |
- ((pos>exon[1]) & (pos<=exon[1]+2))):
- types.append('splice-site')
- for intron in self.introns:
- if((pos>intron[0]) & (pos<=intron[1])):
- if((pos>self.cds[0]) & (pos<=self.cds[1])):
- types.append('intron')
- return(types)
- with open("/Users/sdavis/Downloads/CCDS.hg18 (1).bed",'r') as bedfile:
- bedfile.next()
- for line in bedfile:
- sline = line.strip().split("\t")
- exonstarts=map(int,sline[9].split(",")[0:-1])
- exonends=map(int,sline[10].split(",")[0:-1])
- cds = (int(sline[6]),int(sline[7]))
- exons = zip(exonstarts,exonends)
- t = Transcript(chromosome=sline[2],name=sline[1],exons=exons,
- strand=sline[3],cds=cds)
- for i in range(10):
- overlap=t.overlaps(51063758)
- overlap=t.overlaps(51064000)
- if(len(overlap)<>0):
- print overlap
- print t.introns
Add Comment
Please, Sign In to add comment