Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 2.74 KB | None | 0 0
  1. import java.io.BufferedReader;
  2. import java.io.BufferedWriter;
  3. import java.io.File;
  4. import java.io.FileNotFoundException;
  5. import java.io.FileReader;
  6. import java.io.FileWriter;
  7. import java.io.IOException;
  8. import java.util.ArrayList;
  9. import java.util.Arrays;
  10. import java.util.HashMap;
  11. import java.util.List;
  12. import java.util.Map;
  13. import java.util.Scanner;
  14. import java.util.TreeMap;
  15.  
  16. /**
  17.  *
  18.  * @author Pedro
  19.  */
  20. public class Indexer {
  21.  
  22.     private Map<String, BufferedWriter> buffers = new HashMap<>();
  23.  
  24.     public Indexer() throws IOException {
  25.         buffers.put("a", new BufferedWriter(new FileWriter("a.txt")));
  26.         buffers.put("bc", new BufferedWriter(new FileWriter("bc.txt")));
  27.         buffers.put("def", new BufferedWriter(new FileWriter("def.txt")));
  28.         buffers.put("ghijk", new BufferedWriter(new FileWriter("ghijk.txt")));
  29.         buffers.put("lmnp", new BufferedWriter(new FileWriter("lmnp.txt")));
  30.         buffers.put("pqrs", new BufferedWriter(new FileWriter("pqrs.txt")));
  31.         buffers.put("tuv", new BufferedWriter(new FileWriter("tuv.txt")));
  32.         buffers.put("wxyz", new BufferedWriter(new FileWriter("wxyz.txt")));
  33.     }
  34.  
  35.     public void index() throws FileNotFoundException, IOException {
  36.         Map<String, Map<Integer, Integer>> invertedIndex = new TreeMap<>();
  37.         BufferedReader sc = new BufferedReader(new FileReader("tokens.txt"));
  38.         BufferedWriter writer;
  39.         String line;
  40.  
  41.         for (String key : buffers.keySet()) {
  42.             writer = buffers.get(key);
  43.             invertedIndex.clear();
  44.             while ((line = sc.readLine()) != null) {
  45.                 String[] splitedLine = line.split(":");
  46.                 int docID = Integer.parseInt(splitedLine[0]);
  47.                 String second = splitedLine[1].replaceAll("[^a-zA-Z0-9 ]", "");
  48.                 List<String> tokens = new ArrayList<>(Arrays.asList(second.split(" ")));
  49.  
  50.                 for (String cnsmr : tokens) {
  51.  
  52.                     if (invertedIndex.containsKey(cnsmr)) {
  53.                         Map<Integer, Integer> temp = invertedIndex.get(cnsmr);
  54.                         if (temp.containsKey(docID)) {
  55.                             int cont = temp.get(docID);
  56.                             invertedIndex.get(cnsmr).put(docID, ++cont);
  57.                         } else {
  58.                             invertedIndex.get(cnsmr).put(docID, 1);
  59.                         }
  60.                     } else {
  61.                         Map<Integer, Integer> temp = new HashMap<>();
  62.                         temp.put(docID, 1);
  63.                         invertedIndex.put(cnsmr, temp);
  64.                     }
  65.                  
  66.                 }
  67.                
  68.                
  69.                
  70.  
  71.             }
  72.  
  73.         }
  74.  
  75.     }
  76. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement