Advertisement
Guest User

Untitled

a guest
Jan 18th, 2017
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.41 KB | None | 0 0
  1. package playground;
  2.  
  3. import java.io.IOException;
  4. import java.net.URISyntaxException;
  5. import java.nio.file.Files;
  6. import java.nio.file.Paths;
  7. import java.util.ArrayList;
  8. import java.util.Collections;
  9. import java.util.HashMap;
  10. import java.util.List;
  11. import java.util.Map;
  12. import java.util.function.UnaryOperator;
  13. import java.util.stream.Stream;
  14.  
  15. import org.apache.mahout.cf.taste.common.TasteException;
  16. import org.apache.mahout.cf.taste.eval.DataModelBuilder;
  17. import org.apache.mahout.cf.taste.eval.IRStatistics;
  18. import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
  19. import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
  20. import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
  21. import org.apache.mahout.cf.taste.impl.common.FastIDSet;
  22. import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
  23. import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
  24. import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
  25. import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
  26. import org.apache.mahout.cf.taste.model.DataModel;
  27. import org.apache.mahout.cf.taste.model.PreferenceArray;
  28. import org.apache.mahout.cf.taste.recommender.Recommender;
  29. import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
  30. import org.junit.Assert;
  31. import org.junit.Test;
  32.  
  33. public class IRStatisticsOnBooleanDataTest {
  34.  
  35. private static final String RATINGS_DATA_FILE_RELATIVE_PATH = "ratings.csv";
  36.  
  37. private final UnaryOperator<List<Long>> ascendingSorter = longs -> {
  38. List<Long> sorted = new ArrayList<>(longs);
  39. Collections.sort(sorted);
  40. return sorted;
  41. };
  42.  
  43. private final UnaryOperator<List<Long>> descendingSorter = longs -> {
  44. List<Long> sorted = new ArrayList<>(longs);
  45. Collections.sort(sorted, Collections.reverseOrder());
  46. return sorted;
  47. };
  48.  
  49. @Test
  50. public void testSamePrecisionRecall() throws IOException, URISyntaxException, TasteException {
  51. Map<Long, List<Long>> data = readData();
  52.  
  53. GenericBooleanPrefDataModel dataModelAscending = buildGenericBooleanPrefDataModel(data, ascendingSorter);
  54.  
  55. GenericBooleanPrefDataModel dataModelDescending = buildGenericBooleanPrefDataModel(data, descendingSorter);
  56.  
  57. int at = 5;
  58. double evaluationPercentage = 1.0;
  59. Assert.assertTrue(
  60. samePrecisionRecall(
  61. evaluate(dataModelAscending, at, evaluationPercentage),
  62. evaluate(dataModelDescending, at, evaluationPercentage)));
  63. }
  64.  
  65. private boolean samePrecisionRecall(IRStatistics s1, IRStatistics s2) {
  66. if (s1 == null && s2 != null) {
  67. return false;
  68. }
  69. if (s1 != null && s2 == null) {
  70. return false;
  71. }
  72. if (s1 == null && s2 == null) {
  73. return true;
  74. }
  75. return s1.getPrecision() == s2.getPrecision() &&
  76. s1.getRecall() == s2.getRecall();
  77. }
  78.  
  79. private IRStatistics evaluate(DataModel dataModel, int at, double evaluationPercentage) throws TasteException {
  80. RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator();
  81.  
  82. RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
  83.  
  84. @Override
  85. public Recommender buildRecommender(DataModel dataModel) throws TasteException {
  86. ItemSimilarity itemSimilarity = new TanimotoCoefficientSimilarity(dataModel);
  87. return new GenericBooleanPrefItemBasedRecommender(dataModel, itemSimilarity);
  88. }
  89. };
  90.  
  91. DataModelBuilder dataModelBuilder = new DataModelBuilder() {
  92.  
  93. @Override
  94. public DataModel buildDataModel(FastByIDMap<PreferenceArray> trainingData) {
  95. return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(trainingData));
  96. }
  97. };
  98.  
  99. return evaluator.evaluate(
  100. recommenderBuilder,
  101. dataModelBuilder,
  102. dataModel,
  103. null, // IDRescorer
  104. at,
  105. GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD,
  106. evaluationPercentage);
  107. }
  108.  
  109. /*
  110. * Builds a GenericBooleanPrefDataModel using the data in the given Map<Long, List<Long>>,
  111. * where keys are userIds, and values are the itemsIds associated with that userId.
  112. * Uses the given UnaryOperator to sort itemIds associated to a userId.
  113. */
  114. private GenericBooleanPrefDataModel buildGenericBooleanPrefDataModel(
  115. Map<Long, List<Long>> data,
  116. UnaryOperator<List<Long>> itemsSorter) {
  117. FastByIDMap<FastIDSet> userData = new FastByIDMap<>(data.size());
  118. for (Map.Entry<Long, List<Long>> entry : data.entrySet()) {
  119. long userId = entry.getKey();
  120. List<Long> itemIds = entry.getValue();
  121. List<Long> sortedItemIds = itemsSorter.apply(itemIds);
  122. FastIDSet userItems = new FastIDSet(sortedItemIds.size());
  123. for (Long itemId : sortedItemIds) {
  124. userItems.add(itemId);
  125. }
  126. userData.put(userId, userItems);
  127. }
  128. return new GenericBooleanPrefDataModel(userData);
  129. }
  130.  
  131. /*
  132. * Reads the "ratings.csv" file from the MovieLens dataset, and returns a Map<Long, List<Long>>
  133. * where keys are userIds, and values are the itemsIds associated with that userId.
  134. */
  135. private Map<Long, List<Long>> readData() throws IOException, URISyntaxException {
  136. try (
  137. Stream<String> lines = Files.lines(
  138. Paths.get(
  139. this.getClass()
  140. .getClassLoader()
  141. .getResource(RATINGS_DATA_FILE_RELATIVE_PATH)
  142. .toURI()))) {
  143.  
  144. Map<Long, List<Long>> data = new HashMap<>();
  145.  
  146. lines.skip(1)
  147. .forEach(line -> {
  148. String[] tokens = line.split(",");
  149. long userId = Long.parseLong(tokens[0]);
  150. long itemId = Long.parseLong(tokens[1]);
  151. if (!data.containsKey(userId)) {
  152. data.put(userId, new ArrayList<>());
  153. }
  154. data.get(userId).add(itemId);
  155. });
  156.  
  157. return data;
  158. }
  159. }
  160.  
  161. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement