Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package playground;
- import java.io.IOException;
- import java.net.URISyntaxException;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.function.UnaryOperator;
- import java.util.stream.Stream;
- import org.apache.mahout.cf.taste.common.TasteException;
- import org.apache.mahout.cf.taste.eval.DataModelBuilder;
- import org.apache.mahout.cf.taste.eval.IRStatistics;
- import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
- import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
- import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
- import org.apache.mahout.cf.taste.impl.common.FastIDSet;
- import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
- import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
- import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
- import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
- import org.apache.mahout.cf.taste.model.DataModel;
- import org.apache.mahout.cf.taste.model.PreferenceArray;
- import org.apache.mahout.cf.taste.recommender.Recommender;
- import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
- import org.junit.Assert;
- import org.junit.Test;
- public class IRStatisticsOnBooleanDataTest {
- private static final String RATINGS_DATA_FILE_RELATIVE_PATH = "ratings.csv";
- private final UnaryOperator<List<Long>> ascendingSorter = longs -> {
- List<Long> sorted = new ArrayList<>(longs);
- Collections.sort(sorted);
- return sorted;
- };
- private final UnaryOperator<List<Long>> descendingSorter = longs -> {
- List<Long> sorted = new ArrayList<>(longs);
- Collections.sort(sorted, Collections.reverseOrder());
- return sorted;
- };
- @Test
- public void testSamePrecisionRecall() throws IOException, URISyntaxException, TasteException {
- Map<Long, List<Long>> data = readData();
- GenericBooleanPrefDataModel dataModelAscending = buildGenericBooleanPrefDataModel(data, ascendingSorter);
- GenericBooleanPrefDataModel dataModelDescending = buildGenericBooleanPrefDataModel(data, descendingSorter);
- int at = 5;
- double evaluationPercentage = 1.0;
- Assert.assertTrue(
- samePrecisionRecall(
- evaluate(dataModelAscending, at, evaluationPercentage),
- evaluate(dataModelDescending, at, evaluationPercentage)));
- }
- private boolean samePrecisionRecall(IRStatistics s1, IRStatistics s2) {
- if (s1 == null && s2 != null) {
- return false;
- }
- if (s1 != null && s2 == null) {
- return false;
- }
- if (s1 == null && s2 == null) {
- return true;
- }
- return s1.getPrecision() == s2.getPrecision() &&
- s1.getRecall() == s2.getRecall();
- }
- private IRStatistics evaluate(DataModel dataModel, int at, double evaluationPercentage) throws TasteException {
- RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator();
- RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
- @Override
- public Recommender buildRecommender(DataModel dataModel) throws TasteException {
- ItemSimilarity itemSimilarity = new TanimotoCoefficientSimilarity(dataModel);
- return new GenericBooleanPrefItemBasedRecommender(dataModel, itemSimilarity);
- }
- };
- DataModelBuilder dataModelBuilder = new DataModelBuilder() {
- @Override
- public DataModel buildDataModel(FastByIDMap<PreferenceArray> trainingData) {
- return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(trainingData));
- }
- };
- return evaluator.evaluate(
- recommenderBuilder,
- dataModelBuilder,
- dataModel,
- null, // IDRescorer
- at,
- GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD,
- evaluationPercentage);
- }
- /*
- * Builds a GenericBooleanPrefDataModel using the data in the given Map<Long, List<Long>>,
- * where keys are userIds, and values are the itemsIds associated with that userId.
- * Uses the given UnaryOperator to sort itemIds associated to a userId.
- */
- private GenericBooleanPrefDataModel buildGenericBooleanPrefDataModel(
- Map<Long, List<Long>> data,
- UnaryOperator<List<Long>> itemsSorter) {
- FastByIDMap<FastIDSet> userData = new FastByIDMap<>(data.size());
- for (Map.Entry<Long, List<Long>> entry : data.entrySet()) {
- long userId = entry.getKey();
- List<Long> itemIds = entry.getValue();
- List<Long> sortedItemIds = itemsSorter.apply(itemIds);
- FastIDSet userItems = new FastIDSet(sortedItemIds.size());
- for (Long itemId : sortedItemIds) {
- userItems.add(itemId);
- }
- userData.put(userId, userItems);
- }
- return new GenericBooleanPrefDataModel(userData);
- }
- /*
- * Reads the "ratings.csv" file from the MovieLens dataset, and returns a Map<Long, List<Long>>
- * where keys are userIds, and values are the itemsIds associated with that userId.
- */
- private Map<Long, List<Long>> readData() throws IOException, URISyntaxException {
- try (
- Stream<String> lines = Files.lines(
- Paths.get(
- this.getClass()
- .getClassLoader()
- .getResource(RATINGS_DATA_FILE_RELATIVE_PATH)
- .toURI()))) {
- Map<Long, List<Long>> data = new HashMap<>();
- lines.skip(1)
- .forEach(line -> {
- String[] tokens = line.split(",");
- long userId = Long.parseLong(tokens[0]);
- long itemId = Long.parseLong(tokens[1]);
- if (!data.containsKey(userId)) {
- data.put(userId, new ArrayList<>());
- }
- data.get(userId).add(itemId);
- });
- return data;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement