Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.svm import LinearSVC
- from sklearn.ensemble import AdaBoostClassifier
- from sklearn.naive_bayes import MultinomialNB, BernoulliNB
- from sklearn.linear_model import RidgeClassifier
- from sklearn.linear_model import PassiveAggressiveClassifier
- from sklearn.linear_model import Perceptron
- from sklearn.neighbors import NearestCentroid
- from sklearn.feature_selection import SelectFromModel
- names = ["Logistic Regression", "Linear SVC", "LinearSVC with L1-based feature selection","Multinomial NB",
- "Bernoulli NB", "Ridge Classifier", "AdaBoost", "Perceptron","Passive-Aggresive", "Nearest Centroid"]
- classifiers = [
- LogisticRegression(),
- LinearSVC(),
- Pipeline([
- ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False))),
- ('classification', LinearSVC(penalty="l2"))]),
- MultinomialNB(),
- BernoulliNB(),
- RidgeClassifier(),
- AdaBoostClassifier(),
- Perceptron(),
- PassiveAggressiveClassifier(),
- NearestCentroid()
- ]
- zipped_clf = zip(names,classifiers)
- tvec = TfidfVectorizer()
- def classifier_comparator(vectorizer=tvec, n_features=10000, stop_words=None, ngram_range=(1, 1), classifier=zipped_clf):
- result = []
- vectorizer.set_params(stop_words=stop_words, max_features=n_features, ngram_range=ngram_range)
- for n,c in classifier:
- checker_pipeline = Pipeline([
- ('vectorizer', vectorizer),
- ('classifier', c)
- ])
- print "Validation result for {}".format(n)
- print c
- clf_accuracy,tt_time = accuracy_summary(checker_pipeline, x_train, y_train, x_validation, y_validation)
- result.append((n,clf_accuracy,tt_time))
- return result
- trigram_result = classifier_comparator(n_features=100000,ngram_range=(1,3))
Add Comment
Please, Sign In to add comment