document.write('
Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. import java.io.BufferedReader;
  2. import java.io.File;
  3. import java.io.FileReader;
  4. import java.io.IOException;
  5. import java.util.ArrayList;
  6. import java.util.Collections;
  7. import java.util.List;
  8.  
  9. public class Document {
  10.     private final String label;
  11.     private final List words;
  12.     private String guid;
  13.    
  14.     public Document(File file, String label) throws IOException {
  15.         this.label = label;
  16.         BufferedReader reader = new BufferedReader(new FileReader(file));
  17.        
  18.         words = new ArrayList();
  19.         String line = null;
  20.         while ((line = reader.readLine()) != null) {
  21.             for (String word : line.split("\\\\s+"))
  22.                 words.add(word.trim());
  23.         }
  24.         reader.close();
  25.     }
  26.    
  27.     public void setGUID(String guid) {
  28.         this.guid = guid;
  29.     }
  30.    
  31.     public String getGUID(){
  32.         return this.guid;
  33.     }
  34.    
  35.     /**
  36.     * Recall that this is used in SpamClassifier.lbj
  37.     */
  38.     public String getLabel() {
  39.         return label;
  40.     }
  41.  
  42.     /**
  43.     * As is this.
  44.     */
  45.     public List getWords() {
  46.         return Collections.unmodifiableList(words);
  47.     }
  48.    
  49.     @Override
  50.     public String toString() {
  51.         return label + ", " + words;
  52.     }
  53. }
');