Advertisement
Guest User

Untitled

a guest
Nov 18th, 2019
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 10.77 KB | None | 0 0
  1. import java.io.IOException;
  2. import java.nio.file.Path;
  3. import java.util.ArrayList;
  4. import java.util.Collection;
  5. import java.util.Collections;
  6. import java.util.HashMap;
  7. import java.util.Map;
  8. import java.util.Set;
  9. import java.util.TreeMap;
  10. import java.util.TreeSet;
  11.  
  12. /**
  13.  * This is inverted index data structure that maps words to a map to locate where the word is in file.
  14.  * @author Alex Kowalczuk
  15.  * @class Software Developer 212 at University of San Francisco (Fall 2019)
  16.  */
  17. public class InvertedIndex {
  18.    
  19.     /**
  20.  * This declare TreeMap for my inverted index
  21.  */
  22. private final TreeMap<String, TreeMap<String, TreeSet<Integer>>> invertedIndex;
  23.  
  24. /**
  25.  * This declare new TreeMap for my count function
  26.  */
  27. private final TreeMap<String, Integer> counts;
  28.  
  29. /**
  30.  * This is constructor to the inverted index class.
  31.  */
  32. public InvertedIndex() {
  33.     this.invertedIndex = new TreeMap<>();
  34.     this.counts = new TreeMap<>();
  35. }
  36.  
  37. /**
  38.  * This function adds the array of words.
  39.  * @param other inverted index
  40.  */
  41. public void addAll(InvertedIndex other) {
  42.     for (String key : other.invertedIndex.keySet()) {
  43.         if (this.invertedIndex.containsKey(key) == false) {
  44.             this.invertedIndex.put(key, other.invertedIndex.get(key));
  45.         } else {
  46.             for (String path: other.invertedIndex.get(key).keySet()) {
  47.                 if (this.invertedIndex.get(key).containsKey(path) == false) {
  48.                     this.invertedIndex.get(key).put(path, other.invertedIndex.get(key).get(path));
  49.                 } else {
  50.                     this.invertedIndex.get(key).get(path).addAll(other.invertedIndex.get(key).get(path));
  51.                 }
  52.                
  53.             }
  54.         }
  55.     }
  56.     for (String path: other.counts.keySet()) {
  57.         if (this.counts.containsKey(path) == false) {
  58.             this.counts.put(path, other.counts.get(path));
  59.         } else {
  60.             this.counts.put(path, Math.max(this.counts.get(path), other.counts.get(path)));
  61.            
  62.         }
  63.     }
  64. }
  65.  
  66. /**
  67.  * This function adds the array of words at once with default start at position 1
  68.  * @param path path
  69.  * @param element array of elements to add
  70.  */
  71. public void addAll(String path, String[] element) {
  72.     addAll(path, element, 1);
  73. }
  74.  
  75. /**
  76.  * This function adds the array of words at once with provided start position.
  77.  * @param path path
  78.  * @param element array of elements to add
  79.  * @param start starting position
  80.  */
  81. public void addAll(String path, String[] element, int start) {
  82.     for (String word: element) {
  83.         addEntry(word, path, start++);
  84.     }
  85. }
  86.  
  87.  
  88. /**
  89.  * This function Add Entry asserts important files and make sure its postition is right.
  90.  * @param element
  91.  * @param path
  92.  * @param position
  93.  * @return true if the data structure was modified as a result of add()
  94.  */
  95. public boolean addEntry(String element, String path, int position) {
  96.     invertedIndex.putIfAbsent(element, new TreeMap<String, TreeSet<Integer>>());
  97.     invertedIndex.get(element).putIfAbsent(path, new TreeSet<Integer>());
  98.     boolean added = this.invertedIndex.get(element).get(path).add(position);
  99.     this.counts.putIfAbsent(path, position);
  100.    
  101.     if (position > counts.get(path)) {
  102.         this.counts.put(path, position);
  103.     }
  104.     return added;
  105. }
  106.  
  107. /**
  108.  * This function return unmodifiable map of counts.
  109.  * @return num of counts which is unmodifiable Map
  110.  */
  111. public Map<String, Integer> getCounts() {
  112.     return Collections.unmodifiableMap(counts);
  113. }
  114.  
  115. /**
  116.  * This function print invertedIndex in a Json format to the output file.
  117.  * @param outFile that uses SimpleJsonWriter to print out our file
  118.  * @throws IOException
  119.  */
  120. public void printIndex(Path outFile) throws IOException {
  121.         SimpleJsonWriter.asDoubleNested(invertedIndex, outFile);
  122. }
  123.  
  124. /**
  125.  * This function check if the map contains the specific word.
  126.  * @param word to look in
  127.  * @return true is is passed
  128.  */
  129. public boolean contains(String word) {
  130.     return invertedIndex.containsKey(word);
  131. }
  132.  
  133. /**
  134.  * This function check if the map has the specific word and if word contain path.
  135.  * @param word to check if have it
  136.  * @param path to check if word has path
  137.  * @return true if has, false if not
  138.  */
  139. public boolean contains(String word, String path) {
  140.     return contains(word) ? invertedIndex.get(word).containsKey(path) : false;
  141. }
  142.  
  143. /**
  144.  * This function check if the map contains the specific word, path and index.
  145.  * @param word check if the map contains the word.
  146.  * @param path check if the map contains the path.
  147.  * @param index check if the map contains the index.
  148.  * @return true/false
  149.  */
  150. public boolean contains(String word, String path, int index) {
  151.     return contains(word, path) ? invertedIndex.get(word).get(path).contains(index) : false;
  152. }
  153.  
  154. /**
  155.  * This function returns how many words is in inverted index.
  156.  * @return size of inverted index.
  157.  */
  158. public int size() {
  159.     return invertedIndex.size();
  160. }
  161.  
  162. /**
  163.  * This function returns how many paths are found in word.
  164.  * @param word to check
  165.  * @return amount of paths, return 0 if not found
  166.  */
  167. public int size(String word) {
  168.     return contains(word) ? invertedIndex.get(word).size() : 0;
  169. }
  170.  
  171. /**
  172.  * This function get word as unmodifiable set of words if null will return empty set.
  173.  * @return unmodifiable set of words.
  174.  */
  175. public Set<String> getWords() {
  176.     return Collections.unmodifiableSet(this.invertedIndex.keySet());
  177. }
  178.  
  179. /**
  180.  * This function gets unmodifiable location of our inverted index.
  181.  * @param word we check location.
  182.  * @return unmodifiable set of word locations.
  183.  */
  184. public Set<String> getLocations(String word) {
  185.     if (this.invertedIndex.get(word) == null) {
  186.         return Collections.emptySet();
  187.     } else {
  188.         return Collections.unmodifiableSet(this.invertedIndex.get(word).keySet());
  189.     }
  190. }
  191.  
  192. /**
  193.  * This function gets unmodifiable position of our inverted index if null will return empty set.
  194.  * @param word we use
  195.  * @param location we look
  196.  * @return unmodifiable set of position.
  197.  */
  198. public Set<Integer> getPositions(String word, String location) {
  199.     if (this.invertedIndex.get(word).get(location) == null) {
  200.         return Collections.emptySet();
  201.     } else {
  202.         return Collections.unmodifiableSet(this.invertedIndex.get(word).get(location));
  203.     }
  204. }
  205.  
  206. /**
  207.  * This is search function that is calling partial or exact function to make search
  208.  * @param queries
  209.  * @param exact
  210.  * @return exact / partial search
  211.  */
  212. public ArrayList<SearchResult> search(Collection<String> queries, boolean exact) {
  213.         return exact ? exactSearch(queries) : partialSearch(queries);
  214.  }
  215.  
  216. /**
  217.  * This function process exact search
  218.  * @param queries to search
  219.  * @return results of the search
  220.  */
  221. public ArrayList<SearchResult> exactSearch(Collection<String> queries) {
  222.     ArrayList<SearchResult> results = new ArrayList<>();
  223.     HashMap<String, SearchResult> track = new HashMap<>();
  224.    
  225.         for (String query : queries) {
  226.             if (invertedIndex.containsKey(query)) {
  227.                 searchHelper(results, query, track);
  228.             }
  229.         }
  230.         Collections.sort(results);
  231.         return results;
  232.   }
  233.  
  234. /**
  235.  * This function process partial search
  236.  * @param queries to search
  237.  * @return results of the search
  238.  */
  239. public ArrayList<SearchResult> partialSearch(Collection<String> queries) {
  240.     ArrayList<SearchResult> results = new ArrayList<>();
  241.     HashMap<String, SearchResult> track = new HashMap<>();
  242.  
  243.     for (String query : queries) {
  244.         for (String word : this.invertedIndex.tailMap(query).keySet()) {
  245.             if (word.startsWith(query)) {
  246.                 searchHelper(results, word, track);
  247.             } else {
  248.                 break;
  249.             }
  250.         }
  251.     }
  252.     Collections.sort(results);
  253.     return results;
  254. }
  255.  
  256. /**
  257.  * This function is a helper method used in search methods.
  258.  * @param results list of objects.
  259.  * @param word to searched.
  260.  * @param track for updating and track counts and scores.
  261.  */
  262. private void searchHelper(ArrayList<SearchResult> results, String word, Map<String, SearchResult> track) {
  263.     for (String location : this.invertedIndex.get(word).keySet()) {
  264.         if (track.containsKey(location)) {
  265.             track.get(location).updateCount(word);
  266.         } else {
  267.             SearchResult result = new SearchResult(location);
  268.             result.updateCount(word);
  269.             track.put(location, result);
  270.             results.add(result);
  271.         }
  272.     }
  273. }
  274.  
  275. /**
  276.  * This function is inner SearchResult class that implements Comparable.
  277.  */
  278. public class SearchResult implements Comparable<SearchResult>{
  279.  
  280. /**
  281.  * This will hold the location of the search result.
  282.  */
  283. private final String location;
  284.  
  285. /**
  286.  * This will hold the count of the search result.
  287.  */
  288. private int count;
  289.  
  290. /**
  291.  * This will hold the score of the search result.
  292.  */
  293. private double score;
  294.  
  295. /**
  296.  * This function is Constructor for SearchResult objects.
  297.  * @param location to construct
  298.  *
  299.  */
  300. public SearchResult(String location) {
  301.     this.location = location;
  302.     this.count = 0;
  303.     this.score = 0;
  304. }
  305.  
  306. /**
  307.  * This function is to debug constructor.
  308.  * @param location set
  309.  * @param count set
  310.  * @param score set
  311.  */
  312. public SearchResult(String location, int count, double score) {
  313.     this.location = location;
  314.     this.count = count;
  315.     this.score = score;
  316. }
  317.  
  318. /**
  319.  * This function is to update the count
  320.  * @param word to update
  321.  */
  322. private void updateCount(String word) {
  323.     this.count += invertedIndex.get(word).get(this.location).size();
  324.     this.score = (double) this.count / counts.get(this.location);
  325. }
  326.  
  327. /**
  328.  * This function is getter for the count data member.
  329.  * @return the count data member
  330.  */
  331. public int getCount() {
  332.     return this.count;
  333. }
  334.  
  335. /**
  336.  * This function is getter for the score data member.
  337.  * @return the score
  338.  */
  339. public double getScore() {
  340.     return this.score;
  341. }
  342.  
  343. /**
  344.  * This function check if another results location is the same as this ones.
  345.  * @param other location
  346.  * @return true if same;
  347.  */
  348. public boolean sameLocation(SearchResult other) {
  349.     return this.location.compareTo(other.location) == 0;
  350. }
  351.  
  352. @Override
  353. public String toString() {
  354.     String out = "";
  355.  
  356. out += "\"where\": ";
  357. out += "\"" + this.location + "\",\n";
  358.  
  359. out += "\"count\": ";
  360. out += "\"" + this.count + "\",\n";
  361.  
  362. out += "\"score\": ";
  363. out += "\"" + this.score + "\"";
  364.  
  365.     return out;
  366. }
  367.  
  368. /**
  369.  * @return This function formats string to write into file for location.
  370.  */
  371. public String getWhereString() {
  372.     return ("\"where\": " + "\"" + this.location + "\",");
  373. }
  374.  
  375. /**
  376.  * @return This function formats string to write into file for count.
  377.  */
  378. public String getCountString() {
  379.     return ("\"count\": " + this.count + ",");
  380. }
  381.  
  382. /**
  383.  * @return This function formats string to write into file for location.
  384.  */
  385. public String getScoreString() {
  386.     return ("\"score\": " + String.format("%.8f", this.score));
  387.         }
  388.  
  389.         @Override
  390.         public int compareTo(SearchResult o) {
  391.             double scoreDiff = this.score - o.score;
  392.  
  393.             if(scoreDiff != 0) {
  394.                 return scoreDiff > 0 ? -1 : 1;
  395.             } else {
  396.                 int countDiff = this.count - o.count;
  397.  
  398.                 if(countDiff != 0) {
  399.                     return countDiff> 0 ? -1 : 1;
  400.                 } else {
  401.                     return (this.location.toLowerCase().compareTo(o.location.toLowerCase()));
  402.                 }
  403.             }
  404.         }
  405.     }
  406.    
  407. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement