Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 2nd, 2012  |  syntax: None  |  size: 3.50 KB  |  hits: 17  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Fast loading of elements into array/list with fixed index without duplicates
  2. import java.io.*;
  3. import java.util.ArrayList;
  4. import static java.lang.Math.log;
  5.  
  6.  
  7. class DocumentRepresentation
  8. {
  9.   private String dirPath;
  10.   private ArrayList<String> fileNameVector;
  11.   private ArrayList<String> termVector;
  12.   private ArrayList<Integer[]> tf; /* store it in natural 2d array */
  13.   private Integer df[]; /* do normal 1d array */
  14.   private Double idf[]; /* do normal 1d array */
  15.   private Double tfIdf[][]; /* do normal 2d array */
  16.  
  17.   DocumentRepresentation (String dirPath)
  18.   {
  19.     this.dirPath = dirPath;
  20.     fileNameVector = new ArrayList<String> ();
  21.     termVector = new ArrayList<String> ();
  22.     tf = new ArrayList<Integer[]> ();
  23.   }
  24.  
  25.   /* Later sepatere the internal works */
  26.   public int start ()
  27.   {
  28.     /* Load the files, and populate the fileNameVector string */
  29.     File fileDir = new File (dirPath);
  30.     int fileCount = 0;
  31.     int index;
  32.  
  33.     if (fileDir.isDirectory () == false)
  34.     {
  35.       return -1;
  36.     }
  37.  
  38.     File fileList[] = fileDir.listFiles ();
  39.  
  40.     for (int i=0; i<fileList.length; i++)
  41.     {
  42.       if (fileList[i].isFile () == true)
  43.       {
  44.         fileNameVector.add (fileList[i].getName ());
  45.         //      System.out.print ("File Name " + (i + 1) + ": " + fileList[i].getName () + "n");
  46.       }
  47.     }
  48.  
  49.     fileCount = fileNameVector.size ();
  50.     for (int i=0;i<fileNameVector.size (); i++)
  51.     {
  52.       System.out.print ("Name " + (i+1) + ": " + fileNameVector.get (i) + "n");
  53.     }
  54.  
  55.     /* Bind the files with a buffered reader */
  56.     BufferedReader fileReaderVector[] = new BufferedReader [fileCount];
  57.     for (int i=0; i<fileCount; i++)
  58.     {
  59.       try
  60.       {
  61.         fileReaderVector[i] = new BufferedReader (new FileReader (fileList[i]));
  62.       }
  63.       /* Not handled */
  64.       catch (FileNotFoundException e)
  65.       {
  66.         System.out.println (e);
  67.       }
  68.     }
  69.  
  70.     /* Scan the term frequencies in the tf 2d array */
  71.     for (int i=0; i<fileCount; i++)
  72.     {
  73.       String line;
  74.  
  75.       try
  76.       {
  77.             /*** THIS IS THE PLACE OF MY QUESTION **/
  78.         while ((line = fileReaderVector[i].readLine ()) != null)
  79.         {
  80.           String words[] = line.split ("[\W]");
  81.  
  82.           for (int j=0;j<words.length;j++)
  83.           {
  84.             if ((index = termVector.indexOf (words[j])) != -1)
  85.             {
  86.               tf.get (index)[i]++;
  87.               /* increase the tf count */
  88.             }
  89.             else
  90.             {
  91.               termVector.add (words[j]);
  92.               Integer temp[] = new Integer [fileCount];
  93.  
  94.               for (int k=0; k<fileCount; k++)
  95.               {
  96.                 temp[k] = new Integer (0);
  97.               }
  98.               temp[i] = 1;
  99.               tf.add (temp);
  100.               index = termVector.indexOf (words[j]);
  101.             }
  102.  
  103.             System.out.println (words[j]);
  104.           }
  105.         }
  106.       }
  107.       /* Not handled */
  108.       catch (IOException e)
  109.       {
  110.         System.out.println (e);
  111.       }
  112.     }
  113.  
  114.     return 0;
  115.   }
  116. }
  117.  
  118. class DocumentRepresentationTest
  119. {
  120.   public static void main (String args[])
  121.   {
  122.     DocumentRepresentation docSet = new DocumentRepresentation (args[0]);
  123.     docSet.start ();
  124.     System.out.print ("n");
  125.   }
  126. }
  127.        
  128. LinkedHashMap<Item,Integer> map = new LinkedHashMap<Item,Integer>();
  129.        
  130. Integer index = map.get(item);
  131. if (index != null) {
  132.   // already in the map; use `index'
  133. } else {
  134.   // not in the map
  135. }
  136.        
  137. if (!map.containsKey(item)) {
  138.   map.put(item, map.size());
  139. }
  140.        
  141. for (Entry<Item,Integer> e : map.entrySet()) {
  142.   Item item = e.getKey();
  143.   int index = e.getValue();
  144.   ...
  145. }