Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- with open ('news_train.txt', 'r', encoding='utf-8') as news_train:
- train=list(csv.reader(news_train, delimiter='\t'))
- trainCat=[None]*60000
- trainNews=[None]*60000
- i=0
- for element in train:
- trainCat[i]=train[i][0]
- trainNews[i]=train[i][1]+' '+train[i][2]
- i=i+1
- with open ('news_test.txt', 'r', encoding='utf-8') as news_test:
- test=list(csv.reader(news_test, delimiter='\t'))
- testNews=[None]*15000
- i=0
- for element in test:
- testNews[i]=test[i][0]+' '+test[i][1]
- i=i+1
- from sklearn.svm import LinearSVC
- from sklearn.pipeline import Pipeline
- from sklearn.multiclass import OneVsRestClassifier
- from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
- predictor=Pipeline([
- ('vectorizer', CountVectorizer()),
- ('tfidf', TfidfTransformer()),
- ('clf', OneVsRestClassifier(LinearSVC())),
- ])
- predictor.fit(trainNews, trainCat)
- result=predictor.predict(testNews)
- with open ('result.txt', 'w', encoding='utf-8') as predResult:
- for element in result:
- predResult.write(element+'\n')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement