- Fast loading of elements into array/list with fixed index without duplicates
- import java.io.*;
- import java.util.ArrayList;
- import static java.lang.Math.log;
- class DocumentRepresentation
- {
- private String dirPath;
- private ArrayList<String> fileNameVector;
- private ArrayList<String> termVector;
- private ArrayList<Integer[]> tf; /* store it in natural 2d array */
- private Integer df[]; /* do normal 1d array */
- private Double idf[]; /* do normal 1d array */
- private Double tfIdf[][]; /* do normal 2d array */
- DocumentRepresentation (String dirPath)
- {
- this.dirPath = dirPath;
- fileNameVector = new ArrayList<String> ();
- termVector = new ArrayList<String> ();
- tf = new ArrayList<Integer[]> ();
- }
- /* Later sepatere the internal works */
- public int start ()
- {
- /* Load the files, and populate the fileNameVector string */
- File fileDir = new File (dirPath);
- int fileCount = 0;
- int index;
- if (fileDir.isDirectory () == false)
- {
- return -1;
- }
- File fileList[] = fileDir.listFiles ();
- for (int i=0; i<fileList.length; i++)
- {
- if (fileList[i].isFile () == true)
- {
- fileNameVector.add (fileList[i].getName ());
- // System.out.print ("File Name " + (i + 1) + ": " + fileList[i].getName () + "n");
- }
- }
- fileCount = fileNameVector.size ();
- for (int i=0;i<fileNameVector.size (); i++)
- {
- System.out.print ("Name " + (i+1) + ": " + fileNameVector.get (i) + "n");
- }
- /* Bind the files with a buffered reader */
- BufferedReader fileReaderVector[] = new BufferedReader [fileCount];
- for (int i=0; i<fileCount; i++)
- {
- try
- {
- fileReaderVector[i] = new BufferedReader (new FileReader (fileList[i]));
- }
- /* Not handled */
- catch (FileNotFoundException e)
- {
- System.out.println (e);
- }
- }
- /* Scan the term frequencies in the tf 2d array */
- for (int i=0; i<fileCount; i++)
- {
- String line;
- try
- {
- /*** THIS IS THE PLACE OF MY QUESTION **/
- while ((line = fileReaderVector[i].readLine ()) != null)
- {
- String words[] = line.split ("[\W]");
- for (int j=0;j<words.length;j++)
- {
- if ((index = termVector.indexOf (words[j])) != -1)
- {
- tf.get (index)[i]++;
- /* increase the tf count */
- }
- else
- {
- termVector.add (words[j]);
- Integer temp[] = new Integer [fileCount];
- for (int k=0; k<fileCount; k++)
- {
- temp[k] = new Integer (0);
- }
- temp[i] = 1;
- tf.add (temp);
- index = termVector.indexOf (words[j]);
- }
- System.out.println (words[j]);
- }
- }
- }
- /* Not handled */
- catch (IOException e)
- {
- System.out.println (e);
- }
- }
- return 0;
- }
- }
- class DocumentRepresentationTest
- {
- public static void main (String args[])
- {
- DocumentRepresentation docSet = new DocumentRepresentation (args[0]);
- docSet.start ();
- System.out.print ("n");
- }
- }
- LinkedHashMap<Item,Integer> map = new LinkedHashMap<Item,Integer>();
- Integer index = map.get(item);
- if (index != null) {
- // already in the map; use `index'
- } else {
- // not in the map
- }
- if (!map.containsKey(item)) {
- map.put(item, map.size());
- }
- for (Entry<Item,Integer> e : map.entrySet()) {
- Item item = e.getKey();
- int index = e.getValue();
- ...
- }