import codecs
def file2ms( path, ws=None, bs=[u'記号'] ): # bs filters punctuation
#inp = unicode( open( path, 'r' ).read(), 'utf-8' )
#return getMorphemes1( inp, ws, bs)
f = codecs.open( path, 'r', 'utf-8' )
inp = f.readlines()
f.close()
#return getMorphemes( mecab(None), e, ws, bs )
mcb = mecab(None)
s = set()
for i in inp:
ms = getMorphemes( mcb, i.strip(), ws, bs)
for m in ms:
s.add(m)
mcb.terminate()
return list(s)