Untitled

from urllib2 import urlopen

class Item():
    id = None
    comments = None


class Comment():
    user = None
    timestamp = None
    comment = None

user_token_start = "<span style='text-decoration: underline; font-size: 110%'>"
user_token_end = "</span>"

comment_token_start = "<div style='margin-bottom: 20px'>"
comment_token_end = "</div>"

def parse_response(id, response):
    item = Item()
    item.id = id

    comments = response.split('<legend>Comments</legend>')[1].split('</table>')[0]

    list_of_comments = []
    for i, comment in enumerate(comments.split(user_token_start)):
        if not i:
            continue
        com = Comment()
        user_time = comment.split(user_token_end)[0]
        com.user = user_time.split(' @ ')[0].strip()
        com.timestamp = user_time.split(' @ ')[1].strip()
        com.comment = comment.split(comment_token_start)[1].split(comment_token_end)[0].strip()

        list_of_comments.append(com)

    item.comments = list_of_comments

BASE_URL = 'http://auno.org/ao/db.php?id=%i'

for id in range(1, 300000):
    response = urlopen(BASE_URL % id)
    parse_response(id, response.read())