Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- from text.classifiers import NaiveBayesClassifier
- # Utility for coloring
- def col(line, color):
- return '\033[%sm%s\033[0m' % (color, line)
- # Quick util for pretty command line output
- def ln(metric, value, space=40):
- color = 31 if value is 'bullshit' else 32
- s = '{0: <' + str(space) + '}'
- print s.format('- %s' % metric) + col(value, color)
- train = []
- # Train dat shit
- def trainer(content, directory):
- if '-d' in sys.argv: print os.listdir(directory)
- for file in os.listdir(directory):
- if file == '.DS_Store': continue
- if '-d' in sys.argv: print file
- with open('%s/%s' % (directory, file)) as f: r = f.read()
- content.append( (r, directory) )
- return content
- train = trainer(train, 'legit')
- train = trainer(train, 'bullshit')
- cl = NaiveBayesClassifier(train)
- print 'Accuracy: %s' % col(cl.accuracy(train), 93)
- print 'Features:'
- print cl.show_informative_features(40)
- # Run ze trials
- for file in os.listdir('trials'):
- if file == '.DS_Store': continue
- with open('trials/' + file) as f: r = f.read()
- ln(file, cl.classify(r))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement