Advertisement
Pepijn

Python: PMC Contest Info

Oct 5th, 2017
31
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.99 KB | None | 0 0
  1. ######################################
  2. # TODO:                              #
  3. # - Add ordering by entry            #
  4. ######################################
  5.  
  6. import urllib.request
  7. import sys
  8. import time
  9.  
  10. user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
  11. headers = {'User-Agent':user_agent,}
  12.  
  13. base_url = "https://www.planetminecraft.com"
  14. url = base_url + "/contests/?p="
  15.  
  16. #Defining defaults
  17. default_max = 1
  18. orders = ["type","date"]
  19. default_order = "type"
  20. default_output = "output.txt"
  21.  
  22. def error_handler(error):
  23.     print (error)
  24.     log = open("error_log.txt","w")
  25.     log.write(str(error))
  26.     log.close()
  27.  
  28. def get_page(url):
  29.     request = urllib.request.Request(url,None,headers)
  30.     response = urllib.request.urlopen(request)
  31.     byte_page = response.readlines()
  32.     page = [byte.decode("utf-8") for byte in byte_page]
  33.     return page
  34.  
  35. def page_exists(url):
  36.         page = get_page(url)
  37.         for line in page:
  38.             if ("Blast! No contests match your query." in line):
  39.                 return False
  40.         return True
  41.  
  42. def get_entries(page):
  43.     for line in page:
  44.         if ("num_results" in line):
  45.             search_start = line.find("stat") + 1
  46.             entries_index_start = line.find("stat",search_start) + 7
  47.             entries_index_end = line.find("pagination") - 23
  48.             entries = line[entries_index_start:entries_index_end]
  49.             return entries
  50.  
  51. def get_year(page):
  52.     for line in page:
  53.         if ("entry-preview" in line):
  54.             line_number = page.index(line) + 1
  55.             search_line = page[line_number]
  56.             entries_index_end = search_line.find("title") - 2
  57.             url = search_line[9:entries_index_end]
  58.             entry_page = get_page(base_url + url)
  59.             for entry_line in entry_page:
  60.                 if ("Posted on" in entry_line):
  61.                     entry_index_start = entry_line.find(">") + 1
  62.                     entry_index_end = entry_line.find("</")
  63.                     date = entry_line[entry_index_start:entry_index_end]
  64.                     year = date.split("/")[2]
  65.                     return year
  66.  
  67. def get_type(page):
  68.     for line in page:
  69.         if ("sub_lrg sub_" in line):
  70.             type_index_start = line.find("sub_lrg sub_") + 12
  71.             type_index_end = line.find("style") - 3
  72.             contest_type = line[type_index_start:type_index_end]
  73.             return contest_type
  74.  
  75. def get_info(url,contest_dict):
  76.     page = get_page(url)
  77.     for line in page:
  78.         if ("contest_info" in line):
  79.             contest_info = line[34:-6]
  80.             contest_info_split = contest_info.split(":",1)
  81.             month = contest_info_split[0][:-1]
  82.             contest_name_pre = contest_info_split[1][1:].replace("\"","").replace(":","").replace("-","").replace("  "," ")
  83.             if ("#" in contest_name_pre):
  84.                 end_index = contest_name_pre.find("#")
  85.                 contest_name = contest_name_pre[:end_index].strip()
  86.             else:
  87.                 contest_name = contest_name_pre
  88.             line_number = page.index(line)
  89.             url_line = page[line_number + 2]
  90.             url_index_end = url_line.find("title") - 2
  91.             contest_url = base_url + url_line[9:url_index_end] + "entries"
  92.             contest_page = get_page(contest_url)
  93.             entries = get_entries(contest_page)
  94.             year = get_year(contest_page)
  95.             contest_type = get_type(contest_page)
  96.             if ((contest_type == "project") and ("Solo" in contest_name)):
  97.                 contest_type = "solo project"
  98.             out = "%s %s - %s (%s entries)\n" % (month,year,contest_name,entries)
  99.             if (order == "type"):
  100.                 if contest_type in contest_dict:
  101.                     contest_dict[contest_type].append(out)
  102.                 else:
  103.                     contest_dict[contest_type] = [out]
  104.             if (order == "date"):
  105.                 if year in contest_dict:
  106.                     contest_dict[year].append(out)
  107.                 else:
  108.                     contest_dict[year] = [out]
  109.     return contest_dict
  110.  
  111. #Setting default inputs        
  112. max_page = default_max
  113. order = default_order
  114. output_file = default_output
  115.  
  116. #Max pages input
  117. max_page_input = input("Max pages (default=1): ")
  118. if (max_page_input == ""):
  119.     print ("Defaulting to \"1\"")
  120. else:
  121.     try:
  122.         max_page = int(max_page_input)
  123.     except ValueError:
  124.         error_handler("Error: Page limit must be an integer.")
  125.         sys.exit()
  126.     check_url = url + str(max_page)
  127.     if (page_exists(check_url) == False):
  128.         error_handler("Error: Page limit is too high.")
  129.         sys.exit()
  130.  
  131. #Ordering input
  132. order_input = input("Ordering (\"type or date\"): ").lower()
  133. if (order_input == ""):
  134.     print ("Defaulting to \"type\"")
  135. elif (order_input in orders):
  136.     order = order_input
  137. else:
  138.     error_handler("Error: Invalid ordering. Defaulting to \"type\"")
  139.  
  140. #Output file name input
  141. file_input = input("Output file name: ").lower()
  142. if (file_input == ""):
  143.     print ("Defaulting to \"output.txt\"")
  144. else:
  145.     file_split = file_input.split(".")
  146.     if ((len(file_split) > 1) and (file_split[-1] == "txt")):
  147.         output_file = file_input
  148.     else:
  149.         output_file = file_input + ".txt"  
  150.  
  151. #Main
  152. output = open(output_file,"w")
  153. input_dict = {}
  154. try:
  155.     for page in range(1,max_page+1):
  156.         output_dict = get_info(url + str(page),input_dict)
  157.         input_dict = output_dict
  158.         time.sleep(5)
  159.     for order_key in output_dict:
  160.         order = order_key.capitalize()
  161.         output.write("%s contests:\n" % (order))
  162.         for contest in output_dict[order_key]:
  163.             output.write("    " + contest)
  164.     output.close()
  165. except Exception as e:
  166.     error_handler(e)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement