Advertisement
sreejith2904

scrapping update

Jun 23rd, 2017
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.47 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2.  
  3.  
  4. path = "/home/sree/code/scrap/index.html"
  5. page = open(path,"r")
  6. data = page.read()
  7. soup = BeautifulSoup(data,"lxml")
  8.  
  9. vals = soup.findAll("td", { "class" : "refineItem" })
  10.  
  11.  
  12. import re
  13. re_str1 = '<td class="refineItem" valign="top"><label for="AU_\d+">'
  14. re_str2 = "\([\d]+\)</label></td>"
  15. #re.sub('[ES]', 'a', s)
  16. update = [re.sub(re_str1, '', str(val)) for val in vals]
  17. update = [re.sub(re_str2, '', str(val)).strip() for val in update]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement