Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #read in the data
- A = mmread('data/yelpData.mtx');
- #extract the ratings of each review
- ratings = full(A(:,1));
- A(:,1) = [];
- #split into test/train, should replace with better julia way of doing it
- data = randperm(numData);
- ind = floor(numData*0.7);
- training = data(1:ind);
- test = data(ind+1:end);
- trainReviews = A(training,:);
- trainRatings = ratings(training,:);
- testReviews = A(test,:);
- testRatings = ratings(test,:);
- #pick some value of lambda i think 100 should work
- lambda = 100
- #CVX fails here, i used matrix stuffing in actual code, but lsq should be better
- cvx_begin
- variables w(1000) v(1)
- minimize sum_square(trainReviews*w + v - trainRatings) + lambda*sum_square(w)
- cvx_end
- #calculate root mean squared error for test/train
- yhat = trainReviews*w + v;
- trainRMS = sqrt(mean((trainRatings - yhat).^2));
- yhat2 = testReviews*w + v;
- testRMS = sqrt(mean((testRatings - yhat2).^2));
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement