Imports System
Imports System.Collections.Generic
Imports System.Text
Namespace BayesClassifier
Public Class Classifier
Implements BayesClassifier.IClassifier
Private m_Categories As SortedDictionary(Of String, ICategory)
Private m_ExcludedWords As ExcludedWords
Public Sub New()
m_Categories = New SortedDictionary(Of String, ICategory)()
m_ExcludedWords = New ExcludedWords()
m_ExcludedWords.InitDefault()
End Sub
Private Function CountTotalWordsInCategories() As Integer
Dim total As Integer = 0
For Each cat As Category In m_Categories.Values
total += cat.TotalWords
Next
Return total
End Function
Private Function GetOrCreateCategory(ByVal cat As String) As ICategory
Dim c As Object = New Object()
If Not m_Categories.TryGetValue(cat, c) Then
c = New Category(cat, m_ExcludedWords)
m_Categories.Add(cat, c)
End If
Return c
End Function
Public Sub TeachPhrases(ByVal cat As String, ByVal phrases As String()) Implements IClassifier.TeachPhrases
GetOrCreateCategory(cat).TeachPhrases(phrases)
End Sub
Public Sub TeachCategory(ByVal cat As String, ByVal tr As System.IO.TextReader) Implements IClassifier.TeachCategory
GetOrCreateCategory(cat).TeachCategory(tr)
End Sub
Public Function Classify(ByVal tr As System.IO.StreamReader) As Dictionary(Of String, Double) Implements IClassifier.Classify
Dim score As New Dictionary(Of String, Double)()
For Each cat As KeyValuePair(Of String, ICategory) In m_Categories
score.Add(cat.Value.Name, 0.0R)
Next
Dim words_in_file As New EnumerableCategory("", m_ExcludedWords)
words_in_file.TeachCategory(tr)
For Each kvp1 As KeyValuePair(Of String, PhraseCount) In words_in_file
Dim pc_in_file As PhraseCount = kvp1.Value
For Each kvp As KeyValuePair(Of String, ICategory) In m_Categories
Dim cat As ICategory = kvp.Value
Dim count As Integer = cat.GetPhraseCount(pc_in_file.RawPhrase)
If 0 < count Then
score(cat.Name) += System.Math.Log(CDbl(count) / CDbl(cat.TotalWords))
Else
score(cat.Name) += System.Math.Log(0.01 / CDbl(cat.TotalWords))
End If
System.Diagnostics.Trace.WriteLine(((pc_in_file.RawPhrase.ToString() & "(") + cat.Name & ")") + score(cat.Name))
Next
Next
For Each kvp As KeyValuePair(Of String, ICategory) In m_Categories
Dim cat As ICategory = kvp.Value
score(cat.Name) += System.Math.Log(CDbl(cat.TotalWords) / CDbl(Me.CountTotalWordsInCategories()))
Next
Return score
End Function
End Class
End Namespace