Untitled

import csv
import re

from collections import Iterable

def flatten(items):
    # Why in gods name am I forced to write this, python?
    for x in items:
        if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
            for sub_x in flatten(x):
                yield sub_x
        else:
            yield x

# This error is raised if no suitable name can be found.
class NameNotFoundException(Exception): pass

SPECIAL_EDITS = ["district", "county", "city", "township", "village"]

class Correctable():
    def __init__(self, names):
        """`names` is a dictionary mapping name to prior probability."""
        self.names = {name.lower(): [name, p] for name, p in names.items()}

    def exact_match(self, name):
        "Returns name (w/ proper capitalization) if name is already canonical; None otherwise."
        if name.lower() in self.names:
            return self.names[name.lower()][0]

    def correction(self, name):
        "Returns the most probable correction for name."
        return self.names[max(self._candidates(name), key=self._p)][0]

    def _p(self, name):
        "Returns the prior probability of name"
        return self.names[name][1]

    def _candidates(self, name, depth=0):
        "Generate possible corrections for name."
        c = self._known(self._edits_within(name, depth))
        if c:
            return c
        if depth >= 2:
            raise NameNotFoundException("Unable to find canonical name for: " + name)
        return self.candidates(name, depth+1)

    def _known(self, names):
        "The subset of `names` that appear in the dictionary of names."
        return set(w for w in names if w in self.names)

    def _edits_within(self, name, max_dist=2):
        "All edits that are max_dist edits away from `name`."
        return self._edits_recur([name], max_dist)

    def _edits_recur(self, partials, stop):
        if stop == 0:
            return partials
        return self._edits_recur(flatten([self._edits(n) for n in partials]), stop - 1)

    def _edits(self, name):
        "All edits that are one edit away from `name`."
        letters = 'abcdefghijklmnopqrstuvwxyz'
        splits = [(name[:i], name[i:]) for i in range(len(name) + 1)]
        deletes = [L + R[1:] for L, R in splits if R]
        transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
        replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
        inserts = [L + c + R for L, R in splits for c in letters]
        finals = [self._add_or_remove_final(name, s) for s in SPECIAL_EDITS]
        initials = [self._add_or_remove_final(name, s + " of") for s in SPECIAL_EDITS]
        return list(set(deletes + transposes + replaces + inserts + finals + initials))

    def _add_or_remove_final(self, name, string):
        reg = r"\s*" + re.escape(string) + r"$"
        if re.search(reg, name):
            return re.sub(reg, "", name)
        return name + " " + string

    def _add_or_remove_initial(self, name, string):
        reg = r"^" + re.escape(string) + r"\s*"
        if re.search(reg, name):
            return re.sub(reg, "", name)
        return string + " " + name