Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- import time
- import json
- from random import randrange
- import urllib2
- from HTMLParser import HTMLParser
- h = HTMLParser()
- trivia_url = 'https://opentdb.com/api.php'
- token_url = 'https://opentdb.com/api_token.php?command=request'
- file_name = 'trivia.txt'
- questions_per_request = 50
- f = open(file_name,"w")
- # Trivia class for convenience
- class Trivia(object):
- def __init__(self, question, answer, options):
- # save the question
- self.question = h.unescape(question).encode("utf-8")
- # generate a random index for answer
- self.answer_index = self.make_random_index(len(options) + 1)
- # make list of options and insert correct answer at generated index
- self.options = [h.unescape(opt).encode("utf-8") for opt in options]
- self.options.insert(self.answer_index, h.unescape(answer).encode("utf-8"))
- def make_random_index(self, range):
- return randrange(0, range)
- @property
- def option_string(self):
- option_string = ''
- for x in range(0, len(self.options)):
- option = '(%d) %s ' % (x + 1, self.options[x])
- option_string = '%s%s' % (option_string, option)
- return option_string
- @property
- def file_string(self):
- return ('%s\n%s\n%s\n%s\n') % (self.question, self.option_string, self.options[self.answer_index], self.answer_index + 1)
- def start_crawler():
- print 'Getting session token..'
- token_data = json.load(urllib2.urlopen(token_url))
- token = token_data['token']
- print 'Got token: %s' % token
- print 'Starting to crawl API..'
- crawl_trivia_api(token)
- def crawl_trivia_api(token):
- data = json.load(urllib2.urlopen("%s?amount=%s&token=%s" % (trivia_url, questions_per_request, token)))
- # response code 4 means there are no more questions!
- if data['response_code'] == 4:
- print 'No more questions!'
- f.close()
- return
- print 'Got more questions..'
- results = data['results']
- for result in results:
- question = result['question']
- answer = result['correct_answer']
- options = result['incorrect_answers']
- trivia = Trivia(question, answer, options)
- f.write(trivia.file_string)
- question_count = sum(1 for line in open(file_name))
- question_count = question_count / 4
- print 'Questions saved! Now at %d questions!' % question_count
- # recursion!!
- time.sleep(0.1)
- crawl_trivia_api(token)
- start_crawler()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement