Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.company;
- import java.io.*;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import java.util.*;
- public class InvertedIndex {
- public static Map<String, List<Coordinates>> invertedIndex = new TreeMap<>();
- public static Map<String, Double> IDF = new TreeMap<>();
- public static String text;
- public static Map<String, List<String>> words=new TreeMap<>();
- public static File[] files;
- public static void setValue (String var, int value, String name) {
- if (invertedIndex.containsKey(var)) {
- if (invertedIndex.get(var).get(invertedIndex.get(var).size()-1).getName().equals(name)){
- invertedIndex.get(var).get(invertedIndex.get(var).size()-1).add(value);
- }
- else {
- Coordinates otherCoordinates = new Coordinates();
- otherCoordinates.fileName=name;
- otherCoordinates.add(value);
- invertedIndex.get(var).add(otherCoordinates);
- }
- } else {
- List<Coordinates> newList = new ArrayList<>();
- Coordinates newCoordinates = new Coordinates();
- newCoordinates.fileName=name;
- newCoordinates.add(value);
- newList.add(newCoordinates);
- invertedIndex.put(var, newList);
- }
- }
- public static void main() throws IOException {
- File file = new File("E:\\SomeDir");
- if(file.isDirectory()) {
- files = file.listFiles();
- if (files != null) {
- for (File f : files) {
- try {
- text = new String(Files.readAllBytes(Paths.get(new File(f.getCanonicalPath()).toURI())));
- List<String> text2 = Arrays.asList(
- text
- .replaceAll("[^a-zA-Zа-яА-Я1-9-]", " ")
- .toLowerCase()
- .split("\\s+")
- );
- words.put(f.getName(),text2);
- for (int number = 0; number < words.get(f.getName()).size(); number++) {
- setValue(words.get(f.getName()).get(number), number, f.getName());
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- for(Map.Entry<String, List<Coordinates>> item: invertedIndex.entrySet()){
- System.out.print(item.getKey()+" ");
- for (Coordinates coordinate : item.getValue()){
- coordinate.get();
- }
- System.out.println();
- }
- System.out.println("IDF: ");
- // Заполняем IDF
- for(String item2: invertedIndex.keySet()) {
- IDF.put(item2, Math.log10(words.size() / invertedIndex.get(item2).size()));
- System.out.println(IDF.get(item2));
- }
- }
- else {
- System.out.println("Здесь нет файлов");
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement