Guest User

Untitled

a guest
Jun 20th, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.53 KB | None | 0 0
  1. #!/usr/bin/env python
  2. from operator import itemgetter
  3.  
  4. class Transcript:
  5. def __init__(self,chromosome,name,exons,cds,strand):
  6. self.chromosome=chromosome
  7. self.name=name
  8. self.strand=strand
  9. self.exons=exons
  10. self.cds=cds
  11. self.introns=self._computeIntrons()
  12.  
  13.  
  14. def __str__(self):
  15. return("Transcript <%s:%s %s:%s>" %
  16. (self.name,self.strand,self.chromosome,str(self.exons)))
  17.  
  18. def _computeIntrons(self):
  19. self.exons.sort()
  20. intronstarts=[x[0] for x in exons[1:]]
  21. intronends =[x[1] for x in exons[0:-1]]
  22. introns=zip(intronstarts,intronends)
  23. return(introns)
  24.  
  25. def overlaps(self,pos):
  26. types=[]
  27. for exon in self.exons:
  28. if((pos>exon[0]) & (pos<=exon[1])):
  29. if((pos>self.cds[0]) & (pos<=self.cds[1])):
  30. types.append('coding')
  31. else:
  32. if(pos>self.cds[1]):
  33. if(self.strand=='+'):
  34. types.append("3'-UTR")
  35. else:
  36. types.append("5'-UTR")
  37. if(pos<=self.cds[0]):
  38. if(self.strand=='+'):
  39. types.append("5'-UTR")
  40. else:
  41. types.append("3'-UTR")
  42. maxp=max(self.exons,key=itemgetter(1))[1]
  43. minp=min(self.exons,key=itemgetter(0))[0]
  44. if((pos>minp) & (pos<=maxp)):
  45. for exon in self.exons:
  46. if(((pos>exon[0]-2) & (pos<=exon[0])) |
  47. ((pos>exon[1]) & (pos<=exon[1]+2))):
  48. types.append('splice-site')
  49. for intron in self.introns:
  50. if((pos>intron[0]) & (pos<=intron[1])):
  51. if((pos>self.cds[0]) & (pos<=self.cds[1])):
  52. types.append('intron')
  53. return(types)
  54.  
  55.  
  56. with open("/Users/sdavis/Downloads/CCDS.hg18 (1).bed",'r') as bedfile:
  57. bedfile.next()
  58. for line in bedfile:
  59. sline = line.strip().split("\t")
  60. exonstarts=map(int,sline[9].split(",")[0:-1])
  61. exonends=map(int,sline[10].split(",")[0:-1])
  62. cds = (int(sline[6]),int(sline[7]))
  63. exons = zip(exonstarts,exonends)
  64. t = Transcript(chromosome=sline[2],name=sline[1],exons=exons,
  65. strand=sline[3],cds=cds)
  66. for i in range(10):
  67. overlap=t.overlaps(51063758)
  68. overlap=t.overlaps(51064000)
  69. if(len(overlap)<>0):
  70. print overlap
  71. print t.introns
Add Comment
Please, Sign In to add comment