Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function ypred = predict(Xtrain, ytrain, Xtest)
- docs = size(Xtrain,1);
- terms = size(Xtrain,2);
- classes = size(ytrain,2);
- %% Находим классическую TF*IDF матрицу
- fprintf('computing TF*IDF... '); tic;
- tf = Xtrain;
- for i=1:docs
- tf(i,:) = tf(i,:)/sum(tf(i,:));
- %fprintf('%d out of %d tf lines computed\n', i, docs);
- end
- idf = zeros(1,terms);
- for i=1:terms
- ni = sum(Xtrain(:,i)>0);
- if (ni==0)
- idf(i) = 0;
- else
- idf(i) = log(docs/ni);
- end
- end
- TFIDF = tf;
- for i=1:docs
- TFIDF(i,:) = tf(i,:).*idf(1,:);
- %fprintf('%d of %d TFIDF lines computed\n',i,docs);
- end
- clear tf i idf ni;
- toc;
- %% Находим центры классов
- fprintf('computing centroids... '); tic;
- centroids = zeros(classes,terms);
- for k=1:classes
- klass = ytrain(:,k)>0.5;
- centroids(k,:) = sum(TFIDF(klass,:))/sum(klass);
- %fprintf('%d out of %d centroids made\n', k, classes);
- end
- centroids = sparse(centroids);
- clear k klass;
- clear TFIDF;
- toc;
- %% Относим вектора к близким по косинусной мере ценрам классов
- threshold = 0.825;
- fprintf('classifying... '); tic;
- ypred = zeros(size(Xtest,1),classes);
- dists = zeros(classes,1);
- for i=1:size(Xtest,1)
- for k=1:classes
- dists(k) = cossim(Xtest(i,:)',centroids(k,:)');
- end
- ypred(i,dists<threshold)=1;
- [~,I] = min(dists);
- ypred(i,I)=1; % как минимум к одному точно
- %fprintf('%d out of %d processed (%d)\n', i, size(Xtest,1), sum(ypred(i,:)));
- end
- clear threshold dists i k I;
- clear centroids;
- toc;
- %% Прибираем за собой (имело смысл в скрипте до выделения функции)
- clear docs terms classes;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement