buggy-naive-classification-with-nltk

import nltk.classify.util,os,sys
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import stopwords
from nltk.tokenize  import word_tokenize,RegexpTokenizer
import re

TAG_RE = re.compile(r'<[^>]+>')
def remove_tags(text):
    return TAG_RE.sub('', text)

def word_feats(words):
    return dict([(word,True) for word in words])

def feature_extractor(sentiment):
    path = "train/"+sentiment+"/"
    files = os.listdir(path)
    feats = {}
    i = 0
    for file in files:
        f = open(path+file,"r", encoding='utf-8')
        review = f.read()
        review = remove_tags(review)
        stopWords = (stopwords.words("english"))
        tokenizer = RegexpTokenizer(r"\w+")
        tokens = tokenizer.tokenize(review)
        features = word_feats(tokens)
        feats.update(features)
    return feats

posative_feat = feature_extractor("pos")
p = open("posFeat.txt","w", encoding='utf-8')
p.write(str(posative_feat))
negative_feat = feature_extractor("neg")
n = open("negFeat.txt","w", encoding='utf-8')
n.write(str(negative_feat))
plength = int(len(posative_feat)*3/4)
nlength = int(len(negative_feat)*3/4)
totalLength = plength+nlength
trainFeatList = []
testFeatList  = []
i = 0
for items in posative_feat.items():
    i +=1
    feature_name = 'word'
    feature = items[0]
    label = 'pos'
    value = ({feature_name:feature}, label)
    if(i<plength):
        trainFeatList.append(value)
    else:
        testFeatList.append(value)

j = 0
for items in negative_feat.items():
    j +=1;
    feature_name = 'word'
    feature = items[0]
    label = 'neg'
    value = ({feature_name:feature}, label)
    if(j<plength):
        trainFeatList.append(value)
    else:
        testFeatList.append(value)

classifier = NaiveBayesClassifier.train(trainFeatList)
print(nltk.classify.util.accuracy(classifier,testFeatList))
classifier.show_most_informative_features()