Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import sys, urllib2, re
- es="http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term="+sys.argv[1]
- ef="http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id="
- url=urllib2.urlopen(es)
- match=url.readlines()
- print "<head>"
- print "<style>"
- print "table, th, td {"
- print " border: 1px solid black;"
- print "}"
- print "</style>"
- print "</head>"
- print "<body>"
- print "<table>"
- print "<tr>"
- print "<th>GI number</th>"
- print "<th>ID</th>"
- print "<th>Nucleotide sequence</th>"
- print "</tr>"
- for line in match:
- col1=re.search(r'<Id>(.*)</Id>',line)
- if col1:
- fetch=ef+col1.group(1)+"&rettype=fasta&retmode=xml"
- urlf=urllib2.urlopen(fetch)
- print "<tr>"
- print "<th>"
- print "<a href="http://www.ncbi.nlm.nih.gov/protein/"+col1.group(1)+"">"+col1.group(1)
- print "</a>"
- print "</th>"
- pline="".join(urlf.readlines())
- col2=re.search(r'<TSeq_sequence>(.*)</TSeq_sequence>',pline)
- col3=re.search(r'<TSeq_defline>(.*)</TSeq_defline>',pline)
- if col3:
- print "<td>"
- print col3.group(1)
- print "</td>"
- ltot=(len(col2.group(1))/40)+1
- ln=0
- print "<td>"
- while ln<=ltot:
- bnd1=ln*40
- bnd2=(ln+1)*40
- print col2.group(1)[bnd1:bnd2]
- ln+=1
- print "</tr>"
- print "</body>"
- print "</table>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement