Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import urllib, urllib2
- import HTMLParser
- import re
- import time
- class JeuALaConParser(HTMLParser.HTMLParser):
- def __init__(self):
- HTMLParser.HTMLParser.__init__(self)
- self.enregistrage = False
- self.traitsdesprit = []
- def handle_starttag(self, tag, attributes):
- if tag != 'div':
- return
- for name, value in attributes:
- if name == 'class' and value == 'postmsg':
- break
- else:
- return
- self.enregistrage = True
- def handle_endtag(self, tag):
- if tag == 'div' and self.enregistrage:
- self.enregistrage = False
- def handle_data(self, data):
- if self.enregistrage:
- try:
- tde = data.decode('iso-8859-1')
- except UnicodeEncodeError:
- try:
- tde = data.decode('utf-8')
- except:
- print("Klonk: " + data)
- return
- self.traitsdesprit.append(tde)
- #proxy = urllib2.ProxyHandler({'http': 'http://leusername:lemotdepasse@leproxy:leporc'})
- #auth = urllib2.HTTPBasicAuthHandler()
- #opener = urllib2.build_opener(proxy, auth, urllib2.HTTPHandler)
- #urllib2.install_opener(opener)
- nonantedeuxpagesdetraitsdesprit = []
- for i in range(1, 92):
- moissoneuse = JeuALaConParser()
- unepagedelols = urllib2.urlopen('http://leliendufauxrhum/viewtopic.php?id=323&p=' + str(i))
- moissoneuse.feed(unepagedelols.read())
- nonantedeuxpagesdetraitsdesprit += moissoneuse.traitsdesprit
- time.sleep(1)
- compilationdenonantedeuxpagesdetraitsdesprit = u''
- for traitdesprit in nonantedeuxpagesdetraitsdesprit:
- if re.match('^[\n\t]*$', traitdesprit):
- continue
- compilationdenonantedeuxpagesdetraitsdesprit += traitdesprit + u' '
- print(compilationdenonantedeuxpagesdetraitsdesprit)
Advertisement
Add Comment
Please, Sign In to add comment