Advertisement
Guest User

Untitled

a guest
May 25th, 2015
231
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.46 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import csv
  4.  
  5. mijn_http = requests.get("https://www.wehelpen.nl/ik-wil-helpen/hulpvragen/")
  6. mijn_html = mijn_http.text
  7. mijn_soup = BeautifulSoup(mijn_html)
  8.  
  9. pagination = []
  10. for i in range(1,101):
  11. new_link = "https://www.wehelpen.nl/ik-wil-helpen/hulpvragen/?page=" + str(i)
  12. pagination.append(new_link)
  13. print pagination
  14.  
  15. mijn_links = []
  16. for link in pagination:
  17. mijn_content = mijn_soup.find(class_="page_content")
  18. mijn_tiles = mijn_content.find_all('a', class_='tile')
  19. for mijn_tile in mijn_tiles:
  20. mijn_links.append("https://www.wehelpen.nl" + mijn_tile['href'])
  21. print mijn_links
  22.  
  23. csvfile = open('wehelpen.csv','wb')
  24. csv_writer = csv.writer(csvfile, delimiter=' ',
  25. quotechar='|', quoting=csv.QUOTE_MINIMAL)
  26.  
  27. inhoud = []
  28. for link in mijn_links:
  29. nieuwe_http = requests.get(link)
  30. nieuwe_html = nieuwe_http.text
  31. nieuwe_soup = BeautifulSoup(nieuwe_html)
  32. post_info = nieuwe_soup.find_all('div', class_='data-item')
  33. post_titel = nieuwe_soup.find_all('h2', class_='padding-top')
  34. post_content = nieuwe_soup.find_all('p', class_='padding-top')
  35. post_date = nieuwe_soup.find_all('div', class_='detail-date')
  36. for result in post_date:
  37. result.text
  38. inhoud.append(result.text)
  39. # inhoud.append(post_date.text)
  40. # inhoud.append(post_info.text)
  41. # inhoud.append(post_titel.text)
  42. # inhoud.append(post_content.text)
  43. print inhoud
  44. print result
  45. #csv_writer.writerow(inhoud)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement