Advertisement
Guest User

Untitled

a guest
Aug 22nd, 2013
191
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.17 KB | None | 0 0
  1. from urllib2 import urlopen
  2.  
  3. class Item():
  4. id = None
  5. comments = None
  6.  
  7.  
  8. class Comment():
  9. user = None
  10. timestamp = None
  11. comment = None
  12.  
  13. user_token_start = "<span style='text-decoration: underline; font-size: 110%'>"
  14. user_token_end = "</span>"
  15.  
  16. comment_token_start = "<div style='margin-bottom: 20px'>"
  17. comment_token_end = "</div>"
  18.  
  19. def parse_response(id, response):
  20. item = Item()
  21. item.id = id
  22.  
  23. comments = response.split('<legend>Comments</legend>')[1].split('</table>')[0]
  24.  
  25. list_of_comments = []
  26. for i, comment in enumerate(comments.split(user_token_start)):
  27. if not i:
  28. continue
  29. com = Comment()
  30. user_time = comment.split(user_token_end)[0]
  31. com.user = user_time.split(' @ ')[0].strip()
  32. com.timestamp = user_time.split(' @ ')[1].strip()
  33. com.comment = comment.split(comment_token_start)[1].split(comment_token_end)[0].strip()
  34.  
  35. list_of_comments.append(com)
  36.  
  37. item.comments = list_of_comments
  38.  
  39. BASE_URL = 'http://auno.org/ao/db.php?id=%i'
  40.  
  41. for id in range(1, 300000):
  42. response = urlopen(BASE_URL % id)
  43. parse_response(id, response.read())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement