Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # File_in is essentially copy-paste of the text version of the webpage:
- # http://dfat.gov.au/about-us/our-people/homs/pages/australian-ambassadors-and-other-representatives.aspx
- # Ctrl-A, then Ctrl-C, then Ctrl-V into a file list.txt, making sure to remove all lines except the list
- # (Whitespaces between names/countries will be removed during execution of this script)
- file_in = open("list.txt", "r")
- file_out = open("list2.txt", "w")
- lists = [line for line in file_in]
- # Removes duplicates in input file, and organises data
- lists2 = []
- i = 0
- while i < len(lists):
- if lists[i] == "\n":
- i += 2
- else:
- lists2.append(lists[i].rstrip("\n"))
- i += 1
- # Organises data into (title+country, person) format
- lists3 = []
- i = 0
- while i < len(lists2)-1:
- lists3.append((lists2[i+1],lists2[i]))
- i += 2
- # Organises data into (country, title, person) format
- lists4 = []
- for country, person in lists3:
- # Removes instances of " , located in "
- country2 = country.partition(", ")[0]
- # Remove title (e.g. "Mr", "Ms") from person. Has some false positives
- person2 = person.partition(" ")[2]
- if "Consul" in country or "Deputy" in country:
- continue
- elif "Ambassador" in country:
- cnty = country2[14:] # Remove "Ambassador to "
- lists4.append((cnty, "Ambassador", person))
- elif "High Commissioner" in country:
- cnty = country2[21:] # Remove "High Commissioner to "
- lists4.append((cnty, "High Commissioner", person))
- else: # Edge case
- lists4.append((country, "", person))
- # Outputs wikipedia markup
- for cnty, title, person in lists4:
- file_out.write("|-\n")
- file_out.write("| [[" + cnty + "]] | [[" + title + " of Australia to " + cnty + "|" + title + "]] | " + person2 + "\n")
- # Further modifications are necessary, but are mostly case-by-case
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement