
Untitled
By: a guest on
Aug 12th, 2012 | syntax:
None | size: 0.96 KB | hits: 15 | expires: Never
How do I get raw text with beautifulsoup?
<link>
www.link1.com
</link>
<link>
www.link2.com
</link>
from BeautifulSoup import BeautifulStoneSoup
soup = BeautifulStoneSoup(results2) #Beautiful Soup
linklist = soup.findAll('link')
print soup
[<link>www.link1.com</link>,<link>www.link2.com</link>]
[www.link1.com, www.link2.com]
linklist = [el.string for el in soup.findAll('link')]
from bs4 import BeautifulSoup
xml = """<html><link>
www.link1.com
</link>
<link>
www.link2.com
</link></html>"""
soup = BeautifulSoup(xml,features="xml")
linklist = soup.find_all('link')
linklist = map(lambda x: x.string, linklist)
links = soup.find_all('link')
link_strings = [s.string for s in links.string]
for l in linklist:
print str(l.split('>')[1].split('<')[0])
>>> linklist=["<link>www.google.com</link>", "<link>www.yahoo.com</link>"]
>>> for l in linklist:
... print str(l.split('>')[1].split('<')[0])
...
www.google.com
www.yahoo.com