Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2017
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.08 KB | None | 0 0
  1. import csv
  2. with open ('news_train.txt', 'r', encoding='utf-8') as news_train:
  3.     train=list(csv.reader(news_train, delimiter='\t'))
  4.  
  5. trainCat=[None]*60000
  6. trainNews=[None]*60000
  7. i=0
  8. for element in train:
  9.     trainCat[i]=train[i][0]
  10.     trainNews[i]=train[i][1]+' '+train[i][2]
  11.     i=i+1
  12.  
  13. with open ('news_test.txt', 'r', encoding='utf-8') as news_test:
  14.     test=list(csv.reader(news_test, delimiter='\t'))  
  15.  
  16. testNews=[None]*15000
  17. i=0
  18. for element in test:
  19.     testNews[i]=test[i][0]+' '+test[i][1]
  20.     i=i+1
  21.  
  22. from sklearn.svm import LinearSVC
  23. from sklearn.pipeline import Pipeline
  24. from sklearn.multiclass import OneVsRestClassifier
  25. from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
  26.  
  27. predictor=Pipeline([
  28.         ('vectorizer', CountVectorizer()),
  29.         ('tfidf', TfidfTransformer()),
  30.         ('clf', OneVsRestClassifier(LinearSVC())),
  31.     ])
  32. predictor.fit(trainNews, trainCat)
  33. result=predictor.predict(testNews)
  34.  
  35. with open ('result.txt', 'w', encoding='utf-8') as predResult:
  36.     for element in result:
  37.         predResult.write(element+'\n')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement