Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import csv
- mijn_http = requests.get("https://www.wehelpen.nl/ik-wil-helpen/hulpvragen/")
- mijn_html = mijn_http.text
- mijn_soup = BeautifulSoup(mijn_html)
- pagination = []
- for i in range(1,101):
- new_link = "https://www.wehelpen.nl/ik-wil-helpen/hulpvragen/?page=" + str(i)
- pagination.append(new_link)
- print pagination
- mijn_links = []
- for link in pagination:
- mijn_content = mijn_soup.find(class_="page_content")
- mijn_tiles = mijn_content.find_all('a', class_='tile')
- for mijn_tile in mijn_tiles:
- mijn_links.append("https://www.wehelpen.nl" + mijn_tile['href'])
- print mijn_links
- csvfile = open('wehelpen.csv','wb')
- csv_writer = csv.writer(csvfile, delimiter=' ',
- quotechar='|', quoting=csv.QUOTE_MINIMAL)
- inhoud = []
- for link in mijn_links:
- nieuwe_http = requests.get(link)
- nieuwe_html = nieuwe_http.text
- nieuwe_soup = BeautifulSoup(nieuwe_html)
- post_info = nieuwe_soup.find_all('div', class_='data-item')
- post_titel = nieuwe_soup.find_all('h2', class_='padding-top')
- post_content = nieuwe_soup.find_all('p', class_='padding-top')
- post_date = nieuwe_soup.find_all('div', class_='detail-date')
- for result in post_date:
- result.text
- inhoud.append(result.text)
- # inhoud.append(post_date.text)
- # inhoud.append(post_info.text)
- # inhoud.append(post_titel.text)
- # inhoud.append(post_content.text)
- print inhoud
- print result
- #csv_writer.writerow(inhoud)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement