Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- import java.util.Scanner;
- import java.util.TreeMap;
- /**
- *
- * @author Pedro
- */
- public class Indexer {
- private Map<String, BufferedWriter> buffers = new HashMap<>();
- public Indexer() throws IOException {
- buffers.put("a", new BufferedWriter(new FileWriter("a.txt")));
- buffers.put("bc", new BufferedWriter(new FileWriter("bc.txt")));
- buffers.put("def", new BufferedWriter(new FileWriter("def.txt")));
- buffers.put("ghijk", new BufferedWriter(new FileWriter("ghijk.txt")));
- buffers.put("lmnp", new BufferedWriter(new FileWriter("lmnp.txt")));
- buffers.put("pqrs", new BufferedWriter(new FileWriter("pqrs.txt")));
- buffers.put("tuv", new BufferedWriter(new FileWriter("tuv.txt")));
- buffers.put("wxyz", new BufferedWriter(new FileWriter("wxyz.txt")));
- }
- public void index() throws FileNotFoundException, IOException {
- Map<String, Map<Integer, Integer>> invertedIndex = new TreeMap<>();
- BufferedReader sc = new BufferedReader(new FileReader("tokens.txt"));
- BufferedWriter writer;
- String line;
- for (String key : buffers.keySet()) {
- writer = buffers.get(key);
- invertedIndex.clear();
- while ((line = sc.readLine()) != null) {
- String[] splitedLine = line.split(":");
- int docID = Integer.parseInt(splitedLine[0]);
- String second = splitedLine[1].replaceAll("[^a-zA-Z0-9 ]", "");
- List<String> tokens = new ArrayList<>(Arrays.asList(second.split(" ")));
- for (String cnsmr : tokens) {
- if (invertedIndex.containsKey(cnsmr)) {
- Map<Integer, Integer> temp = invertedIndex.get(cnsmr);
- if (temp.containsKey(docID)) {
- int cont = temp.get(docID);
- invertedIndex.get(cnsmr).put(docID, ++cont);
- } else {
- invertedIndex.get(cnsmr).put(docID, 1);
- }
- } else {
- Map<Integer, Integer> temp = new HashMap<>();
- temp.put(docID, 1);
- invertedIndex.put(cnsmr, temp);
- }
- }
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement