Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib2 import urlopen
- class Item():
- id = None
- comments = None
- class Comment():
- user = None
- timestamp = None
- comment = None
- user_token_start = "<span style='text-decoration: underline; font-size: 110%'>"
- user_token_end = "</span>"
- comment_token_start = "<div style='margin-bottom: 20px'>"
- comment_token_end = "</div>"
- def parse_response(id, response):
- item = Item()
- item.id = id
- comments = response.split('<legend>Comments</legend>')[1].split('</table>')[0]
- list_of_comments = []
- for i, comment in enumerate(comments.split(user_token_start)):
- if not i:
- continue
- com = Comment()
- user_time = comment.split(user_token_end)[0]
- com.user = user_time.split(' @ ')[0].strip()
- com.timestamp = user_time.split(' @ ')[1].strip()
- com.comment = comment.split(comment_token_start)[1].split(comment_token_end)[0].strip()
- list_of_comments.append(com)
- item.comments = list_of_comments
- BASE_URL = 'http://auno.org/ao/db.php?id=%i'
- for id in range(1, 300000):
- response = urlopen(BASE_URL % id)
- parse_response(id, response.read())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement