Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- url = 'http://www.ebi.ac.uk/ena/data/view/PRJEB2357&display=xml'
- project_page = urlopen(url)
- soup = BeautifulSoup(project_page, "html.parser")
- print soup
- <db>PUBMED</db>
- <id>25101644</id>
- </xref_link>
- </project_link>
- <project_link>
- <xref_link>
- <db>PUBMED</db>
- <id>24509479</id>
- url = 'http://www.ebi.ac.uk/ena/data/view/PRJEB2357&display=xml'
- project_page = urlopen(url)
- soup = BeautifulSoup(project_page, "html.parser")
- text = soup.text
- print text
- PUBMED
- 25101644
- PUBMED
- 24509479
- url = 'http://www.ebi.ac.uk/ena/data/view/PRJEB2357&display=xml'
- project_page = urlopen(url)
- soup2 = BeautifulSoup(project_page, "html.parser")
- text = soup2.text
- text = text.replace('n', ' ').replace(' ', '') #removes all spaces and linebreaks
- PMID = re.findall('PUBMED........', text, flags = 0)
- print PMID
- [u'PUBMED25101644', u'PUBMED24509479']
- [pubmed.find_next_sibling("ID").get_text()
- for pubmed in soup.find_all("DB", text="PUBMED")]
- soup = BeautifulSoup(project_page, "xml")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement