Advertisement
Guest User

Untitled

a guest
Dec 6th, 2016
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.98 KB | None | 0 0
  1. url = 'http://www.ebi.ac.uk/ena/data/view/PRJEB2357&display=xml'
  2. project_page = urlopen(url)
  3. soup = BeautifulSoup(project_page, "html.parser")
  4. print soup
  5.  
  6. <db>PUBMED</db>
  7. <id>25101644</id>
  8. </xref_link>
  9. </project_link>
  10. <project_link>
  11. <xref_link>
  12. <db>PUBMED</db>
  13. <id>24509479</id>
  14.  
  15. url = 'http://www.ebi.ac.uk/ena/data/view/PRJEB2357&display=xml'
  16. project_page = urlopen(url)
  17. soup = BeautifulSoup(project_page, "html.parser")
  18. text = soup.text
  19. print text
  20.  
  21. PUBMED
  22. 25101644
  23.  
  24.  
  25.  
  26.  
  27. PUBMED
  28. 24509479
  29.  
  30. url = 'http://www.ebi.ac.uk/ena/data/view/PRJEB2357&display=xml'
  31. project_page = urlopen(url)
  32. soup2 = BeautifulSoup(project_page, "html.parser")
  33. text = soup2.text
  34. text = text.replace('n', ' ').replace(' ', '') #removes all spaces and linebreaks
  35. PMID = re.findall('PUBMED........', text, flags = 0)
  36. print PMID
  37.  
  38. [u'PUBMED25101644', u'PUBMED24509479']
  39.  
  40. [pubmed.find_next_sibling("ID").get_text()
  41. for pubmed in soup.find_all("DB", text="PUBMED")]
  42.  
  43. soup = BeautifulSoup(project_page, "xml")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement