Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ######################################
- # TODO: #
- # - Add ordering by entry #
- ######################################
- import urllib.request
- import sys
- import time
- user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
- headers = {'User-Agent':user_agent,}
- base_url = "https://www.planetminecraft.com"
- url = base_url + "/contests/?p="
- #Defining defaults
- default_max = 1
- orders = ["type","date"]
- default_order = "type"
- default_output = "output.txt"
- def error_handler(error):
- print (error)
- log = open("error_log.txt","w")
- log.write(str(error))
- log.close()
- def get_page(url):
- request = urllib.request.Request(url,None,headers)
- response = urllib.request.urlopen(request)
- byte_page = response.readlines()
- page = [byte.decode("utf-8") for byte in byte_page]
- return page
- def page_exists(url):
- page = get_page(url)
- for line in page:
- if ("Blast! No contests match your query." in line):
- return False
- return True
- def get_entries(page):
- for line in page:
- if ("num_results" in line):
- search_start = line.find("stat") + 1
- entries_index_start = line.find("stat",search_start) + 7
- entries_index_end = line.find("pagination") - 23
- entries = line[entries_index_start:entries_index_end]
- return entries
- def get_year(page):
- for line in page:
- if ("entry-preview" in line):
- line_number = page.index(line) + 1
- search_line = page[line_number]
- entries_index_end = search_line.find("title") - 2
- url = search_line[9:entries_index_end]
- entry_page = get_page(base_url + url)
- for entry_line in entry_page:
- if ("Posted on" in entry_line):
- entry_index_start = entry_line.find(">") + 1
- entry_index_end = entry_line.find("</")
- date = entry_line[entry_index_start:entry_index_end]
- year = date.split("/")[2]
- return year
- def get_type(page):
- for line in page:
- if ("sub_lrg sub_" in line):
- type_index_start = line.find("sub_lrg sub_") + 12
- type_index_end = line.find("style") - 3
- contest_type = line[type_index_start:type_index_end]
- return contest_type
- def get_info(url,contest_dict):
- page = get_page(url)
- for line in page:
- if ("contest_info" in line):
- contest_info = line[34:-6]
- contest_info_split = contest_info.split(":",1)
- month = contest_info_split[0][:-1]
- contest_name_pre = contest_info_split[1][1:].replace("\"","").replace(":","").replace("-","").replace(" "," ")
- if ("#" in contest_name_pre):
- end_index = contest_name_pre.find("#")
- contest_name = contest_name_pre[:end_index].strip()
- else:
- contest_name = contest_name_pre
- line_number = page.index(line)
- url_line = page[line_number + 2]
- url_index_end = url_line.find("title") - 2
- contest_url = base_url + url_line[9:url_index_end] + "entries"
- contest_page = get_page(contest_url)
- entries = get_entries(contest_page)
- year = get_year(contest_page)
- contest_type = get_type(contest_page)
- if ((contest_type == "project") and ("Solo" in contest_name)):
- contest_type = "solo project"
- out = "%s %s - %s (%s entries)\n" % (month,year,contest_name,entries)
- if (order == "type"):
- if contest_type in contest_dict:
- contest_dict[contest_type].append(out)
- else:
- contest_dict[contest_type] = [out]
- if (order == "date"):
- if year in contest_dict:
- contest_dict[year].append(out)
- else:
- contest_dict[year] = [out]
- return contest_dict
- #Setting default inputs
- max_page = default_max
- order = default_order
- output_file = default_output
- #Max pages input
- max_page_input = input("Max pages (default=1): ")
- if (max_page_input == ""):
- print ("Defaulting to \"1\"")
- else:
- try:
- max_page = int(max_page_input)
- except ValueError:
- error_handler("Error: Page limit must be an integer.")
- sys.exit()
- check_url = url + str(max_page)
- if (page_exists(check_url) == False):
- error_handler("Error: Page limit is too high.")
- sys.exit()
- #Ordering input
- order_input = input("Ordering (\"type or date\"): ").lower()
- if (order_input == ""):
- print ("Defaulting to \"type\"")
- elif (order_input in orders):
- order = order_input
- else:
- error_handler("Error: Invalid ordering. Defaulting to \"type\"")
- #Output file name input
- file_input = input("Output file name: ").lower()
- if (file_input == ""):
- print ("Defaulting to \"output.txt\"")
- else:
- file_split = file_input.split(".")
- if ((len(file_split) > 1) and (file_split[-1] == "txt")):
- output_file = file_input
- else:
- output_file = file_input + ".txt"
- #Main
- output = open(output_file,"w")
- input_dict = {}
- try:
- for page in range(1,max_page+1):
- output_dict = get_info(url + str(page),input_dict)
- input_dict = output_dict
- time.sleep(5)
- for order_key in output_dict:
- order = order_key.capitalize()
- output.write("%s contests:\n" % (order))
- for contest in output_dict[order_key]:
- output.write(" " + contest)
- output.close()
- except Exception as e:
- error_handler(e)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement