Advertisement
Guest User

LachschonCrawler

a guest
Jan 27th, 2016
593
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.96 KB | None | 0 0
  1. import urllib2
  2. from bs4 import BeautifulSoup
  3.  
  4. print "Meddl Loide! Servus und herzlich willkommen beim automatischen Lachschon-Crawler\n"
  5.  
  6. response = urllib2.urlopen('http://www.lachschon.de/forum/thread/show/50195/?page=1')
  7. soup = BeautifulSoup(response, 'html.parser')
  8.  
  9. soup.find_all("div", class_="pageselection top")
  10. pagecountmax = soup.find_all("a", class_="link")[-1].getText()
  11.  
  12. print "Etzala "+pagecountmax+" Seiten voll"
  13. savefile = open("lachschon.html", "w")
  14. savefile.write('<meta charset="UTF-8" /><style>li{border: solid 2px black;list-style-type: none;}</style>')
  15.  
  16.  
  17. for i in range(1, int(pagecountmax)):
  18.     response = urllib2.urlopen('http://www.lachschon.de/forum/thread/show/50195/?page='+str(i))
  19.  
  20.     soup = BeautifulSoup(response, 'html.parser')
  21.     content = soup.find_all(id="post-list")
  22.    
  23.     savefile.write(str(unicode.join(u'\n',map(unicode,content))))
  24.    
  25.     print("Speichere Seite "+str(i)+"...")
  26.     i = i+1;
  27. savefile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement