Untitled

#!/usr/bin/python
import sys, urllib2, re
es="http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term="+sys.argv[1]
ef="http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id="
url=urllib2.urlopen(es)
match=url.readlines()
print "<head>"
print "<style>"
print "table, th, td {"
print "    border: 1px solid black;"
print "}"
print "</style>"
print "</head>"
print "<body>"
print "<table>"
print "<tr>"
print "<th>GI number</th>"
print "<th>ID</th>"
print "<th>Nucleotide sequence</th>"
print "</tr>"
for line in match:
  col1=re.search(r'<Id>(.*)</Id>',line)
  if col1:
    fetch=ef+col1.group(1)+"&rettype=fasta&retmode=xml"
    urlf=urllib2.urlopen(fetch)
    print "<tr>"
    print "<th>"
    print "<a href="http://www.ncbi.nlm.nih.gov/protein/"+col1.group(1)+"">"+col1.group(1)
    print "</a>"
    print "</th>"
    pline="".join(urlf.readlines())
    col2=re.search(r'<TSeq_sequence>(.*)</TSeq_sequence>',pline)
    col3=re.search(r'<TSeq_defline>(.*)</TSeq_defline>',pline)
    if col3:
      print "<td>"
      print col3.group(1)
      print "</td>"
      ltot=(len(col2.group(1))/40)+1
      ln=0
      print "<td>"
      while ln<=ltot:
        bnd1=ln*40
        bnd2=(ln+1)*40
        print col2.group(1)[bnd1:bnd2]
    ln+=1
    print "</tr>"
print "</body>"
print "</table>"