Advertisement
Guest User

Simple Bayesian classifier

a guest
Sep 13th, 2014
1,588
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.14 KB | None | 0 0
  1. import sys
  2.  
  3. from text.classifiers import NaiveBayesClassifier
  4.  
  5.  
  6. # Utility for coloring
  7. def col(line, color):
  8.     return '\033[%sm%s\033[0m' % (color, line)
  9.  
  10. # Quick util for pretty command line output
  11. def ln(metric, value, space=40):
  12.     color = 31 if value is 'bullshit' else 32
  13.     s = '{0: <' + str(space) + '}'
  14.     print s.format('- %s' % metric) + col(value, color)
  15.  
  16. train = []
  17.  
  18. # Train dat shit
  19. def trainer(content, directory):
  20.    
  21.     if '-d' in sys.argv: print os.listdir(directory)
  22.  
  23.     for file in os.listdir(directory):
  24.         if file == '.DS_Store': continue
  25.         if '-d' in sys.argv: print file
  26.        
  27.         with open('%s/%s' % (directory, file)) as f: r = f.read()
  28.         content.append( (r, directory) )
  29.  
  30.     return content
  31.  
  32. train = trainer(train, 'legit')
  33. train = trainer(train, 'bullshit')
  34.  
  35. cl = NaiveBayesClassifier(train)
  36.  
  37. print 'Accuracy: %s' % col(cl.accuracy(train), 93)
  38. print 'Features:'
  39. print cl.show_informative_features(40)
  40.  
  41. # Run ze trials
  42. for file in os.listdir('trials'):
  43.     if file == '.DS_Store': continue
  44.     with open('trials/' + file) as f: r = f.read()
  45.     ln(file, cl.classify(r))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement