"ing" isolater

# -*- coding: utf-8 -*-
import codecs
from re import compile

def iter_sentences(LISO=None):
    SISO = set(LISO) if LISO else None

    DSentences = {}
    for line in codecs.open('sentences.csv', 'rb', 'utf-8'):
        id, iso, sentence = line.split('\t')
        id = int(id)
        assert not id in DSentences

        if SISO and iso not in SISO:
            continue

        DSentences[id] = (iso, sentence.strip())

    for line in codecs.open('links.csv', 'rb', 'utf-8'):
        from_id, to_id = line.split('\t')
        from_id = int(from_id)
        to_id = int(to_id)

        if not from_id in DSentences:
            continue
        elif not to_id in DSentences:
            continue

        yield sorted((DSentences[from_id],
                      DSentences[to_id]))

ing_re = compile(r" (is|is not|isn't|are|are not|aren't|we're) ([A-z]*ing\b)")
def get_ing(s):
    match = ing_re.search(s)
    if match:
        r = match.group(2)
        #print r

        if 'going to' in s and r=='going':
            return None
        elif 'is getting' in s and r=='getting':
            return None
        elif r not in ('anything',
                       'nothing',
                       'something',
                       'interesting',
                       'refreshing',
                       'lacking',
                       'being',
                       'boring',
                       'willing',
                       'unwilling',
                       'tiring',
                       'surprising',
                       'disappointing',
                       'amazing',
                       'amusing',
                       'annoying'):
            return r

    return None

LTeiru = u'''
てる
ている
ていた
ていて
ていない
でいる
でいた
でいて
でいない
ています
ていました
ています
でいます
でいました
でいます
てきて
てきた
ております
ていません
てます
'''.strip().replace('\r', '').split('\n')

def is_teiru(s):
    allow = any(i for i in LTeiru if i in s)
    block = (u'できている' in s or
             u'られている' in s)
    return allow and not block

def print_relevant():
    SIng = set()
    DOut = {}

    for (_, eng), (_, jpn) in iter_sentences(['jpn', 'eng']):
        def append(key):
            DOut.setdefault(key.lower(), []).append((eng, jpn))
            s = '%s: %s %s' % (key, eng, jpn)
            print s.encode('shift-jis', 'replace')

        ing = get_ing(eng)
        has_teiru = is_teiru(jpn)

        if ing and not ing in SIng and not has_teiru:
            append(ing)
        elif (u'かけている' in jpn or
              u'掛けている' in jpn) and ing:
            append(ing)

        if ing and 0:
            SIng.add(ing)

    SUsed = set()
    with codecs.open('out.txt', 'wb', 'utf-8') as f_out:
        for key, L in sorted(DOut.items()):
            for eng, jpn in L:
                s = '%s\t%s\t%s\n' % (key, eng, jpn)
                if s in SUsed:
                    continue
                SUsed.add(s)

                f_out.write(s)

if __name__ == '__main__':
    print_relevant()