Guest User

Vroum

a guest
Sep 29th, 2012
13
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.77 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import urllib, urllib2
  3. import HTMLParser
  4. import re
  5. import time
  6.  
  7. class JeuALaConParser(HTMLParser.HTMLParser):
  8.   def __init__(self):
  9.     HTMLParser.HTMLParser.__init__(self)
  10.     self.enregistrage = False
  11.     self.traitsdesprit = []
  12.  
  13.   def handle_starttag(self, tag, attributes):
  14.     if tag != 'div':
  15.       return
  16.     for name, value in attributes:
  17.       if name == 'class' and value == 'postmsg':
  18.         break
  19.     else:
  20.       return
  21.     self.enregistrage = True
  22.  
  23.   def handle_endtag(self, tag):
  24.     if tag == 'div' and self.enregistrage:
  25.       self.enregistrage = False
  26.  
  27.   def handle_data(self, data):
  28.     if self.enregistrage:
  29.       try:
  30.         tde = data.decode('iso-8859-1')
  31.       except UnicodeEncodeError:
  32.         try:
  33.           tde = data.decode('utf-8')
  34.         except:
  35.           print("Klonk: " + data)
  36.           return        
  37.       self.traitsdesprit.append(tde)  
  38.  
  39. #proxy = urllib2.ProxyHandler({'http': 'http://leusername:lemotdepasse@leproxy:leporc'})
  40. #auth = urllib2.HTTPBasicAuthHandler()
  41. #opener = urllib2.build_opener(proxy, auth, urllib2.HTTPHandler)
  42. #urllib2.install_opener(opener)
  43.  
  44. nonantedeuxpagesdetraitsdesprit = []
  45. for i in range(1, 92):
  46.     moissoneuse = JeuALaConParser()
  47.     unepagedelols =  urllib2.urlopen('http://leliendufauxrhum/viewtopic.php?id=323&p=' + str(i))
  48.     moissoneuse.feed(unepagedelols.read())
  49.     nonantedeuxpagesdetraitsdesprit += moissoneuse.traitsdesprit
  50.     time.sleep(1)
  51.    
  52. compilationdenonantedeuxpagesdetraitsdesprit = u''
  53. for traitdesprit in nonantedeuxpagesdetraitsdesprit:
  54.     if re.match('^[\n\t]*$', traitdesprit):
  55.         continue    
  56.     compilationdenonantedeuxpagesdetraitsdesprit += traitdesprit + u' '
  57.    
  58. print(compilationdenonantedeuxpagesdetraitsdesprit)
Advertisement
Add Comment
Please, Sign In to add comment