Advertisement
Guest User

Untitled

a guest
Mar 23rd, 2019
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 1.89 KB | None | 0 0
  1.     private PostingsList getTfidfQuery(Query query) {
  2.  
  3.         /** Build query vector */
  4.         ArrayList<Double> q = new ArrayList<>();
  5.  
  6.         HashMap<String, Integer> tkns = new HashMap<>();
  7.         for (int i = 0; i < query.queryterm.size(); i++) {
  8.             if (!tkns.containsKey(query.queryterm.get(i).term))
  9.                 q.add(query.queryterm.get(i).weight);
  10.             else {
  11.                 int idx = tkns.get(query.queryterm.get(i).term);
  12.                 q.set(idx, q.get(idx) + query.queryterm.get(i).weight);
  13.             }
  14.         }
  15.  
  16.         ArrayList<TokenIndexData> postingsLists = getPostingsLists(query);
  17.         HashMap<Integer, PostingsEntry> scores = new HashMap<>();
  18.         HashMap<Integer, Double> denom = new HashMap<>();
  19.  
  20.         int i = 0;
  21.         for (TokenIndexData pt: postingsLists) {
  22.             PostingsList pl = pt.postingsList;
  23.  
  24.             for (PostingsEntry pe: pl) {
  25.  
  26.                 double tfidf = tfidf(pe, pl);
  27.  
  28.                 double score = q.get(i) * tfidf;
  29.  
  30.                 if (!scores.containsKey(pe.docID)) {
  31.                     scores.put(pe.docID, new PostingsEntry(pe.docID, score));
  32.                 }
  33.                 else {
  34.                     scores.get(pe.docID).score += score;
  35.                 }
  36.             }
  37.             i++;
  38.         }
  39.  
  40.         PostingsList results = new PostingsList();
  41.  
  42.         /** Normalize score */
  43.         for (int docID: scores.keySet()) {
  44.             PostingsEntry pe = scores.get(docID);
  45.  
  46.             pe.score /= Index.docLengths.get(pe.docID);
  47.  
  48.             results.add(pe);
  49.         }
  50.        
  51.         Collections.sort(results);
  52.  
  53.         return results;
  54.     }
  55.  
  56.     private double tfidf(PostingsEntry pe, PostingsList pl) {
  57.         double tf = pe.getOccurences();
  58.         double idf = Math.log(Index.docNames.size() / pl.size());
  59.  
  60.         double tfidf = tf * idf;
  61.         return tfidf;
  62.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement