Untitled

#!/usr/bin/env python
import re

def gen_indexes(txt):
    indexes=[]
    for index, line in enumerate(txt):
        if line == '':
            indexes.append(index-1)
            indexes.append(index)
            indexes.append(index+1)
        else:
            pass
    return indexes

def gen_clean_txt(txt):
    indexes = gen_indexes(txt)
    return [line for index,line in enumerate(txt) if index not in indexes]

def gen_list_airports(txt):
    re_coords = r'.*\d+ \d+ \d+[NS]\/\d+ \d+ \d+[WE]$'
    re_uf = r'.*[A-Z]{2} [A-Z]{4}.*' #re matching UF and airport symbol
    re_a = r'(.+)/ (.+), [A-Z]{2} [A-Z]{4}' #re matching the entire title (ie CITY NAME / Airport Name, UF CODE)

    airports = []
    for index, line in enumerate(txt):
        m1 = re.match(re_coords, line) #matches the coordinate in the title line
        m2 = re.match(re_uf,line) #matches the uf airport in the title line
        if m1 is not None and m2 is not None: #if both are a match, title page
            airports.append(index)
        elif m1 is not None and m2 is None: #if coords are a match but uf airport isnt look into the next line
            m2 = re.match(re_uf, txt[index+1])
            if m2 is not None:
                airports.append(index)
        else:
            'seek jesus'

    blocks = [ txt[airports[index]:airports[index+1]] for index in range(len(airports)-1)]
    return blocks

with open('ch3-pdfgrep.txt', 'r') as f:
    txt= [line.strip('\n') for line in f.readlines()]
txt = gen_clean_txt(txt)
airports = gen_list_airports(txt)