daily pastebin goal
7%
SHARE
TWEET

Untitled

a guest Feb 14th, 2018 65 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2. import re
  3.        
  4. def gen_indexes(txt):    
  5.     indexes=[]
  6.     for index, line in enumerate(txt):
  7.         if line == '':
  8.             indexes.append(index-1)
  9.             indexes.append(index)
  10.             indexes.append(index+1)
  11.         else:
  12.             pass
  13.     return indexes
  14.  
  15. def gen_clean_txt(txt):
  16.     indexes = gen_indexes(txt)
  17.     return [line for index,line in enumerate(txt) if index not in indexes]
  18.  
  19. def gen_list_airports(txt):
  20.     re_coords = r'.*\d+ \d+ \d+[NS]\/\d+ \d+ \d+[WE]$'
  21.     re_uf = r'.*[A-Z]{2} [A-Z]{4}.*' #re matching UF and airport symbol
  22.     re_a = r'(.+)/ (.+), [A-Z]{2} [A-Z]{4}' #re matching the entire title (ie CITY NAME / Airport Name, UF CODE)
  23.  
  24.     airports = []
  25.     for index, line in enumerate(txt):
  26.         m1 = re.match(re_coords, line) #matches the coordinate in the title line
  27.         m2 = re.match(re_uf,line) #matches the uf airport in the title line
  28.         if m1 is not None and m2 is not None: #if both are a match, title page
  29.             airports.append(index)
  30.         elif m1 is not None and m2 is None: #if coords are a match but uf airport isnt look into the next line
  31.             m2 = re.match(re_uf, txt[index+1])
  32.             if m2 is not None:
  33.                 airports.append(index)
  34.         else:
  35.             'seek jesus'
  36.            
  37.     blocks = [ txt[airports[index]:airports[index+1]] for index in range(len(airports)-1)]
  38.     return blocks
  39.  
  40. with open('ch3-pdfgrep.txt', 'r') as f:
  41.     txt= [line.strip('\n') for line in f.readlines()]
  42. txt = gen_clean_txt(txt)
  43. airports = gen_list_airports(txt)
RAW Paste Data
Top