Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Test classifier with all test dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [],
- "source": [
- "test_counts = vectorizer.transform(test_corpus)\n",
- "test_tfidf = transformer.transform(test_counts)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.944"
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "classifier.score(test_tfidf, test_labels)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Find Informative Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "metadata": {},
- "outputs": [],
- "source": [
- "n = 20\n",
- "feature_names = vectorizer.get_feature_names() # Array mapping from feature integer indices to feature name"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {
- "scrolled": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "AaronPressman 's Top 20 features \n",
- "\n",
- "products -6.843221656677828\n",
- "commerce -6.810211392169265\n",
- "phone -6.810211392169265\n",
- "administration -6.759908463882919\n",
- "securities -6.72589537844437\n",
- "federal -6.669434280670851\n",
- "court -6.656228165206716\n",
- "computer -6.648670161880032\n",
- "credit -6.59269344152553\n",
- "new -6.466038750420433\n",
- "companies -6.421187855927672\n",
- "banks -6.282188052280851\n",
- "congress -6.1691369178693325\n",
- "'s -5.874179559976962\n",
- "internet -5.48040317700573\n",
- "said -5.33729863768842\n",
- "'' -5.271277381900798\n",
- "`` -5.260227520807157\n",
- ". -4.229822040487652\n",
- ", -4.030488993297016\n",
- "\n",
- "\n",
- "AlanCrosby 's Top 20 features \n",
- "\n",
- "billion -6.651115217637997\n",
- "elections -6.56600473481026\n",
- "analysts -6.555871417225831\n",
- "match -6.527893548285615\n",
- "coalition -6.512588487497869\n",
- "week -6.388919463309094\n",
- "points -6.371236393412705\n",
- "index -6.340860179321937\n",
- "senate -6.295387040925007\n",
- "round -6.170196671090114\n",
- "( -6.150627605586323\n",
- ") -6.150627605586323\n",
- "percent -6.077633643022997\n",
- "market -6.022847352516765\n",
- "`` -5.770879251421359\n",
- "'' -5.745310055668684\n",
- "said -5.60866477446716\n",
- "'s -5.504211244742248\n",
- ". -4.294466466066362\n",
- ", -3.931324802675596\n",
- "\n",
- "\n",
- "AlexanderSmith 's Top 20 features \n",
- "\n",
- "cable -6.5614732431634435\n",
- "year -6.529223847082791\n",
- "investment -6.478300823188887\n",
- "british -6.463011028791085\n",
- "; -6.443924532066207\n",
- "$ -6.437760325964971\n",
- ") -6.38363052460965\n",
- "company -6.38363052460965\n",
- "( -6.3761314468987695\n",
- "group -6.3761314468987695\n",
- "percent -6.318081205830778\n",
- "& -6.296556432207335\n",
- "amp -6.296556432207335\n",
- "million -6.283439795017349\n",
- "'s -5.456188187197981\n",
- "`` -5.351676730265064\n",
- "'' -5.338355645441447\n",
- "said -5.139780496405044\n",
- ". -4.199710987553583\n",
- ", -4.107476056200915\n",
- "\n",
- "\n",
- "BenjaminKangLim 's Top 20 features \n",
- "\n",
- "( -6.818371181790302\n",
- ") -6.818371181790302\n",
- "state -6.79976981870553\n",
- "links -6.764806000459541\n",
- "dan -6.724913852417953\n",
- "people -6.710306847324062\n",
- "years -6.6989899973268265\n",
- "sentence -6.6316354347067294\n",
- "... -6.582288783415159\n",
- "government -6.495999271657508\n",
- "court -6.265417549885052\n",
- "party -6.18776190871761\n",
- "'' -5.5625764634394805\n",
- "`` -5.551799132659087\n",
- "beijing -5.454730907587863\n",
- "china -5.256907777517354\n",
- "said -5.081614300033448\n",
- "'s -5.068306343512768\n",
- ". -4.181346344826704\n",
- ", -4.087015543226064\n",
- "\n",
- "\n",
- "BernardHickey 's Top 20 features \n",
- "\n",
- "australian -6.496200125113844\n",
- "analysts -6.4895887628902855\n",
- "bank -6.465451563584756\n",
- "australia -6.330408486415948\n",
- "( -6.294269155165459\n",
- ") -6.294269155165459\n",
- "corp -6.252254460517263\n",
- "million -5.955480959210123\n",
- "profit -5.933900175980197\n",
- "year -5.921578211989878\n",
- "murdoch -5.905439558869782\n",
- "percent -5.851842538640983\n",
- "news -5.846674401805445\n",
- "$ -5.483310978602539\n",
- "'s -5.355618305216927\n",
- "`` -5.330694412171886\n",
- "'' -5.315426649102102\n",
- "said -4.8155626174681165\n",
- ". -4.2475736088130445\n",
- ", -4.224915439286149\n",
- "\n",
- "\n"
- ]
- }
- ],
- "source": [
- "n = 20\n",
- "for i in range(5):\n",
- " for j in label_encoder.inverse_transform([i]):\n",
- " print(j, \"'s Top %s features \\n\" %n)\n",
- " topn = sorted(zip(classifier.coef_[i], feature_names))[-n: ]\n",
- " for coef, feature in topn:\n",
- " print(feature, coef)\n",
- " print(\"\\n\")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement