Don't like ads? PRO users don't see any ads ;-)
Guest

Classifier

By: a guest on May 1st, 2010  |  syntax: VB.NET  |  size: 3.10 KB  |  hits: 114  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Imports System
  2. Imports System.Collections.Generic
  3. Imports System.Text
  4.  
  5. Namespace BayesClassifier
  6.     Public Class Classifier
  7.         Implements BayesClassifier.IClassifier
  8.  
  9.         Private m_Categories As SortedDictionary(Of String, ICategory)
  10.         Private m_ExcludedWords As ExcludedWords
  11.  
  12.         Public Sub New()
  13.             m_Categories = New SortedDictionary(Of String, ICategory)()
  14.             m_ExcludedWords = New ExcludedWords()
  15.             m_ExcludedWords.InitDefault()
  16.         End Sub
  17.  
  18.         Private Function CountTotalWordsInCategories() As Integer
  19.             Dim total As Integer = 0
  20.             For Each cat As Category In m_Categories.Values
  21.                 total += cat.TotalWords
  22.             Next
  23.             Return total
  24.         End Function
  25.  
  26.         Private Function GetOrCreateCategory(ByVal cat As String) As ICategory
  27.             Dim c As Object = New Object()
  28.             If Not m_Categories.TryGetValue(cat, c) Then
  29.                 c = New Category(cat, m_ExcludedWords)
  30.                 m_Categories.Add(cat, c)
  31.             End If
  32.  
  33.             Return c
  34.         End Function
  35.  
  36.         Public Sub TeachPhrases(ByVal cat As String, ByVal phrases As String()) Implements IClassifier.TeachPhrases
  37.             GetOrCreateCategory(cat).TeachPhrases(phrases)
  38.         End Sub
  39.  
  40.         Public Sub TeachCategory(ByVal cat As String, ByVal tr As System.IO.TextReader) Implements IClassifier.TeachCategory
  41.             GetOrCreateCategory(cat).TeachCategory(tr)
  42.         End Sub
  43.  
  44.         Public Function Classify(ByVal tr As System.IO.StreamReader) As Dictionary(Of String, Double) Implements IClassifier.Classify
  45.             Dim score As New Dictionary(Of String, Double)()
  46.             For Each cat As KeyValuePair(Of String, ICategory) In m_Categories
  47.                 score.Add(cat.Value.Name, 0.0R)
  48.             Next
  49.  
  50.             Dim words_in_file As New EnumerableCategory("", m_ExcludedWords)
  51.             words_in_file.TeachCategory(tr)
  52.  
  53.             For Each kvp1 As KeyValuePair(Of String, PhraseCount) In words_in_file
  54.                 Dim pc_in_file As PhraseCount = kvp1.Value
  55.                 For Each kvp As KeyValuePair(Of String, ICategory) In m_Categories
  56.                     Dim cat As ICategory = kvp.Value
  57.                     Dim count As Integer = cat.GetPhraseCount(pc_in_file.RawPhrase)
  58.                     If 0 < count Then
  59.                         score(cat.Name) += System.Math.Log(CDbl(count) / CDbl(cat.TotalWords))
  60.                     Else
  61.                         score(cat.Name) += System.Math.Log(0.01 / CDbl(cat.TotalWords))
  62.                     End If
  63.  
  64.                     System.Diagnostics.Trace.WriteLine(((pc_in_file.RawPhrase.ToString() & "(") + cat.Name & ")") + score(cat.Name))
  65.                 Next
  66.             Next
  67.  
  68.             For Each kvp As KeyValuePair(Of String, ICategory) In m_Categories
  69.                 Dim cat As ICategory = kvp.Value
  70.                 score(cat.Name) += System.Math.Log(CDbl(cat.TotalWords) / CDbl(Me.CountTotalWordsInCategories()))
  71.             Next
  72.             Return score
  73.         End Function
  74.     End Class
  75. End Namespace