Advertisement
Guest User

Untitled

a guest
Nov 24th, 2014
184
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.30 KB | None | 0 0
  1. #!/usr/bin/python
  2. import sys, urllib2, re
  3. es="http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term="+sys.argv[1]
  4. ef="http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id="
  5. url=urllib2.urlopen(es)
  6. match=url.readlines()
  7. print "<head>"
  8. print "<style>"
  9. print "table, th, td {"
  10. print " border: 1px solid black;"
  11. print "}"
  12. print "</style>"
  13. print "</head>"
  14. print "<body>"
  15. print "<table>"
  16. print "<tr>"
  17. print "<th>GI number</th>"
  18. print "<th>ID</th>"
  19. print "<th>Nucleotide sequence</th>"
  20. print "</tr>"
  21. for line in match:
  22. col1=re.search(r'<Id>(.*)</Id>',line)
  23. if col1:
  24. fetch=ef+col1.group(1)+"&rettype=fasta&retmode=xml"
  25. urlf=urllib2.urlopen(fetch)
  26. print "<tr>"
  27. print "<th>"
  28. print "<a href="http://www.ncbi.nlm.nih.gov/protein/"+col1.group(1)+"">"+col1.group(1)
  29. print "</a>"
  30. print "</th>"
  31. pline="".join(urlf.readlines())
  32. col2=re.search(r'<TSeq_sequence>(.*)</TSeq_sequence>',pline)
  33. col3=re.search(r'<TSeq_defline>(.*)</TSeq_defline>',pline)
  34. if col3:
  35. print "<td>"
  36. print col3.group(1)
  37. print "</td>"
  38. ltot=(len(col2.group(1))/40)+1
  39. ln=0
  40. print "<td>"
  41. while ln<=ltot:
  42. bnd1=ln*40
  43. bnd2=(ln+1)*40
  44. print col2.group(1)[bnd1:bnd2]
  45. ln+=1
  46. print "</tr>"
  47. print "</body>"
  48. print "</table>"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement