Guest User

Untitled

a guest
Jul 19th, 2018
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.29 KB | None | 0 0
  1. def parse(base_url):
  2. base_url = requests.get('https://www.indeed.com/cmp/Google/reviews', timeout=5)
  3. page_content = BeautifulSoup(base_url.content, 'lxml')
  4. containers = page_content.findAll('div', {'class':'cmp-review-container'})
  5. df = pd.DataFrame(columns = ['rating', 'rating_title', 'rating_description',
  6. 'rating_pros', 'rating_cons'])
  7. for item in containers:
  8. try:
  9. rating = item.find('div', {'class': 'cmp-ratingNumber'}).text.replace('n', '')
  10. except:
  11. rating = None
  12. try:
  13. rating_title = item.find('div', {'class': 'cmp-review-title'}).text.replace('n', '')
  14. except:
  15. rating_title = None
  16. try:
  17. rating_description = item.find('span', {'class': 'cmp-review-text'}).text.replace('n', '')
  18. except:
  19. rating_description = None
  20. try:
  21. rating_pros = item.find('div', {'class': 'cmp-review-pro-text'}).text.replace('n', '')
  22. except:
  23. rating_pros = None
  24. try:
  25. rating_cons = item.find('div', {'class': 'cmp-review-con-text'}).text.replace('n', '')
  26. except:
  27. rating_cons = None
  28. df = df.append({'rating': rating, 'rating_title': rating_title, 'rating_description': rating_description,
  29. 'rating_pros': rating_pros, 'rating_cons': rating_cons}, ignore_index=True)
  30. return df
Add Comment
Please, Sign In to add comment