Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- path = "/home/sree/code/scrap/index.html"
- page = open(path,"r")
- data = page.read()
- soup = BeautifulSoup(data,"lxml")
- vals = soup.findAll("td", { "class" : "refineItem" })
- import re
- re_str1 = '<td class="refineItem" valign="top"><label for="AU_\d+">'
- re_str2 = "\([\d]+\)</label></td>"
- #re.sub('[ES]', 'a', s)
- update = [re.sub(re_str1, '', str(val)) for val in vals]
- update = [re.sub(re_str2, '', str(val)).strip() for val in update]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement