NoobsDeSroobs

Fuzzy logic is hard it seems.

Apr 26th, 2016
448
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.99 KB | None | 0 0
  1.  
  2.    
  3.     /**
  4.      * Constructs the prior distribution for the classifier
  5.      *
  6.      * @param store the message store
  7.      */
  8.     private void constructPrior(MessageStore store) {  
  9.         prior = new HashMap<Integer,Double>();
  10.         IInvertedIndex[]  index = store.getIndexes();
  11.         double total = 0;
  12.         classLen = store.getIndexes().length;
  13.         for (int classNumber = 0 ; classNumber < store.getIndexes().length ; classNumber++) {
  14.             IInvertedIndex ii = index[classNumber];
  15.             total += ii.getDocumentStore().size();
  16.         }
  17.  
  18.         for (int classNumber = 0 ; classNumber < store.getIndexes().length ; classNumber++) {
  19.             double numDocsInClass = index[classNumber].getDocumentStore().size();
  20.             double localPrior = numDocsInClass/total;
  21.             prior.put(classNumber, localPrior);
  22.         }
  23.     }
  24.    
  25.    
  26.    
  27.     /**
  28.      * Constructs the likelihood distribution for the classifier
  29.      *
  30.      * @param store the message store
  31.      */
  32.     private void constructLikelihood(MessageStore store) {
  33.         likelihood = new HashMap<Integer,Map<Integer,Double>>();
  34.        
  35.         for (int classNumber = 0 ; classNumber < store.getIndexes().length ; classNumber++) {
  36.             likelihood.put(classNumber, new HashMap<Integer,Double>());
  37.             double allTermsInClass = 0;
  38.             IInvertedIndex localIndex = store.getIndexes()[classNumber];           
  39.             ILexicon localLexicon = localIndex.getLexicon();
  40.             for (int i = 0; i < localIndex.getDocumentStore().size(); i++) {
  41.                     allTermsInClass+=localIndex.getDocumentStore().getDocument(i).getNormalizedLength();
  42.             }
  43.             allTermsInClass+=globalLexicon.size();
  44.            
  45.  
  46.             HashMap<Integer, Double> localLikelihood = new HashMap<>();
  47.             for (Iterator<String> iterator =globalLexicon.iterator(); iterator.hasNext();) {
  48.                 String str = (String) iterator.next();
  49.  
  50.                 int termID = localLexicon.lookup(str);
  51.                 int globalTermID = globalLexicon.lookup(str);
  52.                 if(termID == ILexicon.INVALID){
  53.                     double likelihood = Math.log(1/allTermsInClass);
  54.                     localLikelihood.put(globalTermID, likelihood);
  55.                 }else{
  56.                     PostingList pl = localIndex.getPostingList(termID);
  57.                     double Tct = 0;
  58.                     for (int i = 0; i < pl.size(); i++) {
  59.                         Tct+=pl.getPosting(i).getOccurrenceCount();
  60.                     }
  61.                     double likelihood = (Tct+1)/allTermsInClass;
  62.                     localLikelihood.put(globalTermID, likelihood);
  63.                 }
  64.             }
  65.             likelihood.put(classNumber, localLikelihood);
  66.         }  
  67.     }
  68.    
  69.  
  70.     /**
  71.      * Classifies the document into one of the possible classes, given
  72.      * its content.  The returned value is the class number for the class
  73.      * which has the highest probability given the document content.
  74.      *
  75.      * @param documentContent the document content (already normalized and tokenized)
  76.      * @return the class with highest probability
  77.      */
  78.     public int classify (List<IToken> documentContent) {
  79.        
  80.         Sieve<Integer, Double> posterior = new Sieve<Integer, Double>(1);
  81.         /** Prior class distribution P(c) */
  82.         //Map<Integer,Double> prior;
  83.         Integer[] priorClassIDs = new Integer[prior.size()];
  84.         priorClassIDs = prior.keySet().toArray(priorClassIDs);
  85.         for (int classNumber = 0 ; classNumber < classLen ; classNumber++) {
  86.             double priorScore = Math.abs(prior.get(classNumber).doubleValue());
  87.             if(priorScore > 0){
  88.                 priorScore = Math.log(priorScore);
  89.             }
  90.            
  91.             HashMap<Integer, Double> likelihoodScores = (HashMap<Integer, Double>)likelihood.get(classNumber);
  92.             double likelihoodScore = priorScore;
  93.             for (Iterator<IToken> iterator = documentContent.iterator(); iterator.hasNext();) {
  94.                 IToken docTerm = (IToken) iterator.next();
  95.                 int globalTermID = globalLexicon.lookup(docTerm.getValue());
  96.                 if(globalTermID == ILexicon.INVALID){
  97.                     System.out.println("The fucking word is not in the global dictionary,");
  98.                     continue;
  99.                 }
  100.  
  101.                 double subScore = Math.abs(likelihoodScores.get(globalTermID));
  102.                
  103.                 if(subScore>0){
  104.                     likelihoodScore += Math.abs(Math.log(subScore));
  105.                 }
  106.             }
  107.             double totalClassScore = likelihoodScore;
  108.             System.out.println(totalClassScore);
  109.             posterior.sift(classNumber, totalClassScore);
  110.         }
  111.         return posterior.iterator().next().data;
  112.     }
Advertisement
Add Comment
Please, Sign In to add comment