Advertisement
Guest User

Untitled

a guest
Sep 26th, 2017
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.31 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. import time
  4. import json
  5. from random import randrange
  6. import urllib2
  7. from HTMLParser import HTMLParser
  8.  
  9. h = HTMLParser()
  10. trivia_url = 'https://opentdb.com/api.php'
  11. token_url = 'https://opentdb.com/api_token.php?command=request'
  12. file_name = 'trivia.txt'
  13. questions_per_request = 50
  14.  
  15. f = open(file_name,"w")
  16.  
  17. # Trivia class for convenience
  18. class Trivia(object):
  19.  
  20.     def __init__(self, question, answer, options):
  21.         # save the question
  22.         self.question = h.unescape(question).encode("utf-8")
  23.  
  24.         # generate a random index for answer
  25.         self.answer_index = self.make_random_index(len(options) + 1)
  26.  
  27.         # make list of options and insert correct answer at generated index
  28.         self.options = [h.unescape(opt).encode("utf-8") for opt in options]
  29.         self.options.insert(self.answer_index, h.unescape(answer).encode("utf-8"))
  30.  
  31.     def make_random_index(self, range):
  32.         return randrange(0, range)
  33.  
  34.     @property
  35.     def option_string(self):
  36.         option_string = ''
  37.  
  38.         for x in range(0, len(self.options)):
  39.             option = '(%d) %s ' % (x + 1, self.options[x])
  40.             option_string = '%s%s' % (option_string, option)
  41.  
  42.         return option_string
  43.  
  44.     @property
  45.     def file_string(self):
  46.         return ('%s\n%s\n%s\n%s\n') % (self.question, self.option_string, self.options[self.answer_index], self.answer_index + 1)
  47.  
  48. def start_crawler():
  49.     print 'Getting session token..'
  50.  
  51.     token_data = json.load(urllib2.urlopen(token_url))
  52.     token = token_data['token']
  53.     print 'Got token: %s' % token
  54.  
  55.     print 'Starting to crawl API..'
  56.     crawl_trivia_api(token)
  57.  
  58. def crawl_trivia_api(token):
  59.     data = json.load(urllib2.urlopen("%s?amount=%s&token=%s" % (trivia_url, questions_per_request, token)))
  60.  
  61.     # response code 4 means there are no more questions!
  62.     if data['response_code'] == 4:
  63.         print 'No more questions!'
  64.         f.close()
  65.         return
  66.  
  67.     print 'Got more questions..'
  68.     results = data['results']
  69.  
  70.     for result in results:
  71.         question = result['question']
  72.         answer = result['correct_answer']
  73.         options = result['incorrect_answers']
  74.         trivia = Trivia(question, answer, options)
  75.         f.write(trivia.file_string)
  76.  
  77.     question_count = sum(1 for line in open(file_name))
  78.     question_count = question_count / 4
  79.     print 'Questions saved! Now at %d questions!' % question_count
  80.  
  81.     # recursion!!
  82.     time.sleep(0.1)
  83.     crawl_trivia_api(token)
  84.  
  85.  
  86. start_crawler()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement