Imports System.Text.RegularExpressions
Namespace BayesClassifier
Public Class Category
Implements ICategory
Protected m_Phrases As System.Collections.Generic.SortedDictionary(Of String, PhraseCount)
Private m_TotalWords As Integer
Private m_Name As String
Private m_Excluded As ExcludedWords
Public ReadOnly Property Name() As String Implements ICategory.Name
Get
Return m_Name
End Get
End Property
Public ReadOnly Property TotalWords() As Integer Implements ICategory.TotalWords
Get
Return m_TotalWords
End Get
End Property
Public Sub New(ByVal cat As String, ByVal excluded As ExcludedWords)
m_Phrases = New SortedDictionary(Of String, PhraseCount)()
m_Excluded = excluded
m_Name = cat
End Sub
Public Function GetPhraseCount(ByVal phrase As String) As Integer Implements ICategory.GetPhraseCount
Dim pc As PhraseCount = New PhraseCount()
If m_Phrases.TryGetValue(phrase, pc) Then
Return pc.Count
Else
Return 0
End If
End Function
Public Sub Reset() Implements ICategory.Reset
m_TotalWords = 0
m_Phrases.Clear()
End Sub
Private ReadOnly Property Phrases() As System.Collections.Generic.SortedDictionary(Of String, PhraseCount)
Get
Return m_Phrases
End Get
End Property
Public Sub TeachCategory(ByVal reader As System.IO.TextReader) Implements ICategory.TeachCategory
Dim re As New Regex("(\w+)\W*", RegexOptions.Compiled)
Dim line As String = String.Empty
While (InlineAssignHelper(line, reader.ReadLine())) IsNot Nothing
Dim m As Match = re.Match(line)
While m.Success
Dim word As String = m.Groups(1).Value
TeachPhrase(word)
m = m.NextMatch()
End While
End While
End Sub
Public Sub TeachPhrases(ByVal words As String()) Implements ICategory.TeachPhrases
For Each word As String In words
TeachPhrase(word)
Next
End Sub
Public Sub TeachPhrase(ByVal rawPhrase As String) Implements ICategory.TeachPhrase
If (m_Excluded IsNot Nothing) AndAlso (m_Excluded.IsExcluded(rawPhrase)) Then
Exit Sub
End If
Dim pc As PhraseCount = New PhraseCount()
Dim Phrase As String = DePhrase(rawPhrase)
If Not m_Phrases.TryGetValue(Phrase, pc) Then
pc = New PhraseCount(rawPhrase)
m_Phrases.Add(Phrase, pc)
End If
pc.Count += 1
m_TotalWords += 1
End Sub
Shared ms_PhraseRegEx As New Regex("\W", RegexOptions.Compiled)
Public Shared Function CheckIsPhrase(ByVal s As String) As Boolean
Return ms_PhraseRegEx.IsMatch(s)
End Function
Public Shared Function DePhrase(ByVal s As String) As String
Return ms_PhraseRegEx.Replace(s, "")
End Function
Private Shared Function InlineAssignHelper(Of T)(ByRef target As T, ByVal value As T) As T
target = value
Return value
End Function
End Class
End Namespace