# Untitled

1. #!/usr/bin/env python
2. import re
3.
4. def gen_indexes(txt):
5.     indexes=[]
6.     for index, line in enumerate(txt):
7.         if line == '':
8.             indexes.append(index-1)
9.             indexes.append(index)
10.             indexes.append(index+1)
11.         else:
12.             pass
13.     return indexes
14.
15. def gen_clean_txt(txt):
16.     indexes = gen_indexes(txt)
17.     return [line for index,line in enumerate(txt) if index not in indexes]
18.
19. def gen_list_airports(txt):
20.     re_coords = r'.*\d+ \d+ \d+[NS]\/\d+ \d+ \d+[WE]\$'
21.     re_uf = r'.*[A-Z]{2} [A-Z]{4}.*' #re matching UF and airport symbol
22.     re_a = r'(.+)/ (.+), [A-Z]{2} [A-Z]{4}' #re matching the entire title (ie CITY NAME / Airport Name, UF CODE)
23.
24.     airports = []
25.     for index, line in enumerate(txt):
26.         m1 = re.match(re_coords, line) #matches the coordinate in the title line
27.         m2 = re.match(re_uf,line) #matches the uf airport in the title line
28.         if m1 is not None and m2 is not None: #if both are a match, title page
29.             airports.append(index)
30.         elif m1 is not None and m2 is None: #if coords are a match but uf airport isnt look into the next line
31.             m2 = re.match(re_uf, txt[index+1])
32.             if m2 is not None:
33.                 airports.append(index)
34.         else:
35.             'seek jesus'
36.
37.     blocks = [ txt[airports[index]:airports[index+1]] for index in range(len(airports)-1)]
38.     return blocks
39.
40. with open('ch3-pdfgrep.txt', 'r') as f:
41.     txt= [line.strip('\n') for line in f.readlines()]
42. txt = gen_clean_txt(txt)
43. airports = gen_list_airports(txt)
