Advertisement
Guest User

Untitled

a guest
May 3rd, 2014
246
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 9.92 KB | None | 0 0
  1. package cz.cvut.fit.bouchja1.mi_dip.rest.client.alg.impl;
  2.  
  3. import com.google.common.collect.Lists;
  4. import cz.cvut.fit.bouchja1.mi_dip.rest.client.alg.IAlgorithm;
  5. import cz.cvut.fit.bouchja1.mi_dip.rest.client.domain.output.OutputDocument;
  6. import cz.cvut.fit.bouchja1.mi_dip.rest.client.helper.AlgorithmEndpointHelper;
  7. import cz.cvut.fit.bouchja1.mi_dip.rest.client.solr.AlgorithmSolrService;
  8. import cz.cvut.fit.bouchja1.mi_dip.rest.client.util.Util;
  9. import java.util.ArrayList;
  10. import java.util.Collection;
  11. import java.util.HashSet;
  12. import java.util.Iterator;
  13. import java.util.List;
  14. import java.util.Map;
  15. import java.util.Set;
  16. import javax.ws.rs.core.GenericEntity;
  17. import javax.ws.rs.core.Response;
  18. import org.apache.commons.logging.Log;
  19. import org.apache.commons.logging.LogFactory;
  20. import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
  21. import org.apache.mahout.cf.taste.impl.common.FastIDSet;
  22. import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
  23. import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
  24. import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefUserBasedRecommender;
  25. import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
  26. import org.apache.mahout.cf.taste.model.DataModel;
  27. import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
  28. import org.apache.mahout.cf.taste.recommender.RecommendedItem;
  29. import org.apache.mahout.cf.taste.recommender.Recommender;
  30. import org.apache.mahout.cf.taste.similarity.UserSimilarity;
  31. import org.apache.solr.client.solrj.SolrQuery;
  32. import org.apache.solr.client.solrj.SolrServerException;
  33. import org.apache.solr.client.solrj.impl.HttpSolrServer;
  34. import org.apache.solr.client.solrj.response.QueryResponse;
  35. import org.apache.solr.common.SolrDocument;
  36. import org.apache.solr.common.SolrDocumentList;
  37.  
  38. /**
  39.  *
  40.  * @author jan
  41.  */
  42. public class AlgorithmUserBasedCf implements IAlgorithm {
  43.     private final Log logger = LogFactory.getLog(getClass());
  44.    
  45.     private static final String ALGORITHM_NAME = "usercf";
  46.     private String id;
  47.  
  48.     private HttpSolrServer server;
  49.    
  50.     private String coreId;
  51.     private String groupId;
  52.     private String userId;
  53.     private String limit;
  54.        
  55.     public AlgorithmUserBasedCf(Map<String, String> algorithmParams) {
  56.         this.coreId = algorithmParams.get("coreId");
  57.         this.groupId = algorithmParams.get("groupId");
  58.         this.userId = algorithmParams.get("userId");
  59.         this.limit = algorithmParams.get("limit");
  60.         this.id = ALGORITHM_NAME;
  61.     }    
  62.  
  63.     @Override
  64.     public Response recommend(AlgorithmSolrService algorithmSolrService, AlgorithmEndpointHelper helper) {        
  65.         Response resp;
  66.         List<OutputDocument> docs = new ArrayList<OutputDocument>();
  67.         if (algorithmSolrService.getSolrService().isServerCoreFromPool(coreId)) {
  68.             int limitToQuery = Util.getCountOfElementsToBeReturned(limit);
  69.             try {
  70.                 docs = getRecommendationByUserBasedCf(coreId, userId, groupId, limitToQuery, algorithmSolrService);
  71.                 resp = Response.ok(
  72.                         new GenericEntity<List<OutputDocument>>(Lists.newArrayList(docs)) {
  73.                 }).build();
  74.             } catch (SolrServerException ex) {
  75.                 logger.error(ex);
  76.                 resp = helper.getServerError(ex.getMessage());
  77.             }
  78.         } else {
  79.             //vratit odpoved, ze takovy core-id tam neexistuje
  80.             resp = helper.getBadRequestResponse("You filled bad or non-existing {core-id}.");
  81.         }
  82.         return resp;  
  83.     }
  84.    
  85.     private List<OutputDocument> getRecommendationByUserBasedCf(String coreId, String userId, String groupId, int limit, AlgorithmSolrService algorithmSolrService) throws SolrServerException {
  86.         this.server = algorithmSolrService.getSolrService().getServerFromPool(coreId);
  87.         List<OutputDocument> docs = new ArrayList<OutputDocument>();
  88.        
  89.         /*
  90.          * We need userIds and their articles - userid field represents the unique
  91.          * user identification and articleList is a list of article ids.
  92.          * So if we execute a Solr query then we get a response like this
  93.          */
  94.         Set<Integer> userIdsSet = new HashSet<Integer>();
  95.         List<SolrDocument> docsToReturn = new ArrayList<SolrDocument>();
  96.  
  97.         try {
  98.             SolrQuery testQuery = new SolrQuery();
  99.             // Potrebuju projet vsechny dokumenty v indexu a od kazdyho ulozit do Set userId
  100.             testQuery.setQuery("id:*");
  101.             testQuery.setRows(0);
  102.             QueryResponse response = server.query(testQuery);
  103.  
  104.             fillUserIdsSet(response, userIdsSet);
  105.  
  106.             //nyni mam tedy userIds a mohu se dotazovat a ziskavat jejich articles
  107.  
  108.             //Mahout Map and Set implementations
  109.             FastByIDMap<FastIDSet> userData = new FastByIDMap<FastIDSet>();
  110.  
  111.             createUserData(response, userIdsSet, userData);
  112.  
  113.             DataModel model = new GenericBooleanPrefDataModel(userData);
  114.  
  115.             //After create a DataModel object we are able to build a recommender:
  116.             /*
  117.              * The LogLikelihoodSimilarity class does not need preferences values.
  118.              * Other similarity metrics like Euclidean distance and Pearson
  119.              * correlation throw IllegalArgumentException for boolean preferences.
  120.              */
  121.             UserSimilarity similarity = new LogLikelihoodSimilarity(model);
  122.  
  123.             // user-based recommender - considering 2-nearest neighboors, given log-likelihood similarity
  124.             /*
  125.              * Nearest neighborhood means that recommended items for some user will
  126.              * be calculated according to the (log-likelihood) similarity between this user and users contained in model.
  127.              */
  128.             UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
  129.             long[] sousedi = neighborhood.getUserNeighborhood(Long.parseLong(userId));
  130.  
  131.             for (int i = 0; i < sousedi.length; i++) {
  132.                 System.out.println("soused: " + sousedi[i]);
  133.             }
  134.  
  135.             //GenericBooleanPrefUserBasedRecommender = the appropriate recommender for boolean preferences
  136.             Recommender recommender = new GenericBooleanPrefUserBasedRecommender(model, neighborhood, similarity);
  137.  
  138.             // recommended items for specific user (one item recommended)
  139.             List<RecommendedItem> recommendedItems = recommender.recommend(Long.parseLong(userId), limit);
  140.  
  141.             processRecommendedItems(response, docsToReturn, recommendedItems);
  142.            
  143.             //NEJAK NAPLNIT TY DOCS
  144.  
  145.         } catch (Exception e) {
  146.             e.printStackTrace();
  147.             //return Response.status(500).entity("error : " + e.toString()).build();
  148.         }
  149.        
  150.         return docs;        
  151.     }
  152.    
  153.     private void fillUserIdsSet(QueryResponse response, Set<Integer> userIdsSet) throws SolrServerException {
  154.         int numFound = (int) response.getResults().getNumFound();
  155.         SolrQuery query = new SolrQuery();
  156.         query.setQuery("id:*");
  157.         query.setFields("userId");
  158.  
  159.         for (int i = 0; i < numFound; i = i + 50) {
  160.             query.setStart(i);
  161.             query.setRows(50);
  162.             response = server.query(query);
  163.             SolrDocumentList results = response.getResults();
  164.             for (int j = 0; j < results.size(); j++) {
  165.                 Collection<Object> userIdsInDoc = results.get(j).getFieldValues("userId");
  166.                 if (userIdsInDoc != null) {
  167.                     Iterator<Object> userIdsInDocIterator = userIdsInDoc.iterator();
  168.                     while (userIdsInDocIterator.hasNext()) {
  169.                         Integer tempUserId = (Integer) userIdsInDocIterator.next();
  170.                         userIdsSet.add(tempUserId);
  171.                     }
  172.                 }
  173.             }
  174.         }
  175.     }
  176.    
  177.     private void createUserData(QueryResponse response, Set<Integer> userIdsSet, FastByIDMap<FastIDSet> userData) throws SolrServerException {
  178.         SolrQuery cfQuery = new SolrQuery();
  179.         cfQuery.setRows(Integer.MAX_VALUE);
  180.         cfQuery.setFields("id");
  181.         //nyni ziskej ty dvojice uzivatelId - articleId
  182.         Iterator<Integer> userSetIterator = userIdsSet.iterator();
  183.         while (userSetIterator.hasNext()) {
  184.             Long userRelatedId = userSetIterator.next().longValue();
  185.             cfQuery.setQuery("userId:" + userRelatedId);
  186.             response = server.query(cfQuery);
  187.             SolrDocumentList results = response.getResults();
  188.             long[] itemValues = new long[results.size()];
  189.             //List<Integer> itemValues = new ArrayList<Integer>();
  190.  
  191.             for (int j = 0; j < results.size(); j++) {
  192.                 SolrDocument d = results.get(j);
  193.                 //itemValues.add((Integer)d.getFieldValue("id"));
  194.                 itemValues[j] = Long.parseLong(d.getFieldValue("id") + "");
  195.             }
  196.             userData.put(userRelatedId, new FastIDSet(itemValues));
  197.         }
  198.     }
  199.    
  200.     private void processRecommendedItems(QueryResponse response, List<SolrDocument> docsToReturn, List<RecommendedItem> recommendedItems) throws SolrServerException {
  201.         System.out.println("Recommended items for user# " + userId);
  202.  
  203.         SolrQuery queryReturnArticles = new SolrQuery();
  204.  
  205.         for (RecommendedItem recommendedItem : recommendedItems) {
  206.             System.out.println(recommendedItem.getItemID());
  207.             queryReturnArticles.setQuery("id:" + recommendedItem.getItemID());
  208.             response = server.query(queryReturnArticles);
  209.             SolrDocumentList results = response.getResults();
  210.             for (int i = 0; i < results.size(); i++) {
  211.                 System.out.println(results.get(i));
  212.                 docsToReturn.add(results.get(i));
  213.             }
  214.         }
  215.     }    
  216.  
  217.     public String getId() {
  218.         return id;
  219.     }
  220.    
  221.    
  222. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement