Untitled

import java.io.*;
import java.util.*;

public class Assignment1 {

    public static void main(String[] args) {
        try {
            System.out.print("Enter name of a directory> ");
            Scanner scan = new Scanner(System.in);
            File dir = new File(scan.nextLine());
            File[] fileList = dir.listFiles();

            //stop words array
            List<String> stopWords = Arrays.asList("i", "was", "the", "and", "am", "an", "it", "is", "a", "of", "&", "for", "this", "in", "with");

            //stemmer stuff
            Stemmer s = new Stemmer();

            String filePath = dir.toString();

            //Map<String, String> wordanDoc = new HashMap<String, String>();
            Map<String, String> invIndex = new HashMap<String, String>();
            Set<String> allWords= new HashSet<String>();
            for(File f: fileList) {

                Scanner sc = new Scanner(f);

                while(sc.hasNextLine()) {
                    StringTokenizer st = new StringTokenizer(sc.nextLine());
                    while (st.hasMoreTokens()) {
                        String word = st.nextToken();

                        //file location testing purpose: C:\Users\brobs\OneDrive\Desktop\Classes\taglines\taglines
                        //       /home/brobs0111/Documents/taglines/
                        ///     /home/brobs0111/Documents/alldocs/


                        //replace all punc besides hyphens and forward slashes and convert all to lower case
                        word = word.replaceAll("[^\\w\\/\\+\\-]","").toLowerCase();

                        //if the word is a stop word it breaks out of loop
                        if(stopWords.contains(word) )
                            continue;

                        //the current file path is saved to docName then everything besides the name is removed
                        String docName=f.getPath()
                                .replace(filePath,"")
                                .replace("/","")
                                .replace(".txt","");


                        //if(invIndex.containsValue(word)){
                         //   String temp = wordanDoc.get(word).concat("," + docName);
                         //   wordanDoc.put(word, temp);
                        //}else{
                        //    wordanDoc.put(word, docName);
                        //}
                        allWords.add(word);
                        // adding to stemmer
                        for (int i=0; i<word.length(); i++)
                        {  if(!word.contains("-"))
                            if (Character.isLetter(word.charAt(i))) {
                                s.add(word.charAt(i));
                            }
                        }
                        //prevent hyphened words from being stemmed
                        if(!word.contains("-")) {
                            s.stem();
                            if(invIndex.containsKey(s.toString())){
                                String temp = invIndex.get(s.toString()).concat("," + docName);
                                invIndex.put(s.toString(), temp);}
                            else invIndex.put(s.toString(), docName);
                        }
                        //adds unstemmed word to wordIndex
                        else {
                            if(invIndex.containsKey(word)){
                                String temp = invIndex.get(word).concat("," + docName);
                            invIndex.put(word, temp);}
                            else invIndex.put(word, docName);

                        }
                    }
                }
            }

            //Map<String, String> sorted= new TreeMap<>();
            //sorted.putAll(wordanDoc);
            //for(Map.Entry<String, String> entry: sorted.entrySet())
              //  System.out.println("Word: " + entry.getKey()+ "   DocID's<" + entry.getValue() + ">");

            //wordanDoc.entrySet().forEach(entry->{
            //    System.out.println("Word: "+entry.getKey() + "|| DocID <" + entry.getValue()+"> ");
            //});

            invIndex.entrySet().forEach(entry->{
                System.out.println("Word: "+entry.getKey() + "|| DocID <" + entry.getValue()+"> ");
            });
        }

        catch(Exception e) {
            System.out.println("Error: " + e.toString());
        }
    }
}