Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def parse(base_url):
- base_url = requests.get('https://www.indeed.com/cmp/Google/reviews', timeout=5)
- page_content = BeautifulSoup(base_url.content, 'lxml')
- containers = page_content.findAll('div', {'class':'cmp-review-container'})
- df = pd.DataFrame(columns = ['rating', 'rating_title', 'rating_description',
- 'rating_pros', 'rating_cons'])
- for item in containers:
- try:
- rating = item.find('div', {'class': 'cmp-ratingNumber'}).text.replace('n', '')
- except:
- rating = None
- try:
- rating_title = item.find('div', {'class': 'cmp-review-title'}).text.replace('n', '')
- except:
- rating_title = None
- try:
- rating_description = item.find('span', {'class': 'cmp-review-text'}).text.replace('n', '')
- except:
- rating_description = None
- try:
- rating_pros = item.find('div', {'class': 'cmp-review-pro-text'}).text.replace('n', '')
- except:
- rating_pros = None
- try:
- rating_cons = item.find('div', {'class': 'cmp-review-con-text'}).text.replace('n', '')
- except:
- rating_cons = None
- df = df.append({'rating': rating, 'rating_title': rating_title, 'rating_description': rating_description,
- 'rating_pros': rating_pros, 'rating_cons': rating_cons}, ignore_index=True)
- return df
Add Comment
Please, Sign In to add comment