Advertisement
Guest User

Untitled

a guest
Apr 22nd, 2018
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.86 KB | None | 0 0
  1. # File_in is essentially copy-paste of the text version of the webpage:
  2. # http://dfat.gov.au/about-us/our-people/homs/pages/australian-ambassadors-and-other-representatives.aspx
  3. # Ctrl-A, then Ctrl-C, then Ctrl-V into a file list.txt, making sure to remove all lines except the list
  4. # (Whitespaces between names/countries will be removed during execution of this script)
  5. file_in  = open("list.txt", "r")
  6. file_out = open("list2.txt", "w")
  7.  
  8. lists  = [line for line in file_in]
  9.  
  10. # Removes duplicates in input file, and organises data
  11. lists2 = []
  12.  
  13. i = 0
  14. while i < len(lists):
  15.     if lists[i] == "\n":
  16.         i += 2
  17.     else:
  18.         lists2.append(lists[i].rstrip("\n"))
  19.         i += 1
  20.  
  21. # Organises data into (title+country, person) format
  22. lists3 = []
  23.  
  24. i = 0
  25. while i < len(lists2)-1:
  26.     lists3.append((lists2[i+1],lists2[i]))
  27.     i += 2
  28.  
  29. # Organises data into (country, title, person) format
  30. lists4 = []
  31. for country, person in lists3:
  32.     # Removes instances of " , located in "
  33.     country2 = country.partition(", ")[0]
  34.     # Remove title (e.g. "Mr", "Ms") from person. Has some false positives
  35.     person2 = person.partition(" ")[2]
  36.  
  37.     if "Consul" in country or "Deputy" in country:
  38.         continue
  39.     elif "Ambassador" in country:
  40.         cnty = country2[14:] # Remove "Ambassador to "
  41.         lists4.append((cnty, "Ambassador", person))
  42.     elif "High Commissioner" in country:
  43.         cnty = country2[21:] # Remove "High Commissioner to "
  44.         lists4.append((cnty, "High Commissioner", person))
  45.     else: # Edge case
  46.         lists4.append((country, "", person))
  47.  
  48. # Outputs wikipedia markup
  49. for cnty, title, person in lists4:
  50.     file_out.write("|-\n")
  51.     file_out.write("| [[" + cnty + "]] | [[" + title + " of Australia to " + cnty + "|" + title + "]] | " + person2 + "\n")
  52.  
  53. # Further modifications are necessary, but are mostly case-by-case
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement