Don't like ads? PRO users don't see any ads ;-)
Guest

Category

By: a guest on May 1st, 2010  |  syntax: VB.NET  |  size: 3.51 KB  |  hits: 120  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Imports System.Text.RegularExpressions
  2.  
  3. Namespace BayesClassifier
  4.  
  5.     Public Class Category
  6.         Implements ICategory
  7.  
  8.         Protected m_Phrases As System.Collections.Generic.SortedDictionary(Of String, PhraseCount)
  9.         Private m_TotalWords As Integer
  10.         Private m_Name As String
  11.         Private m_Excluded As ExcludedWords
  12.  
  13.         Public ReadOnly Property Name() As String Implements ICategory.Name
  14.             Get
  15.                 Return m_Name
  16.             End Get
  17.         End Property
  18.  
  19.         Public ReadOnly Property TotalWords() As Integer Implements ICategory.TotalWords
  20.             Get
  21.                 Return m_TotalWords
  22.             End Get
  23.         End Property
  24.  
  25.         Public Sub New(ByVal cat As String, ByVal excluded As ExcludedWords)
  26.             m_Phrases = New SortedDictionary(Of String, PhraseCount)()
  27.             m_Excluded = excluded
  28.             m_Name = cat
  29.         End Sub
  30.  
  31.         Public Function GetPhraseCount(ByVal phrase As String) As Integer Implements ICategory.GetPhraseCount
  32.             Dim pc As PhraseCount = New PhraseCount()
  33.             If m_Phrases.TryGetValue(phrase, pc) Then
  34.                 Return pc.Count
  35.             Else
  36.                 Return 0
  37.             End If
  38.         End Function
  39.  
  40.         Public Sub Reset() Implements ICategory.Reset
  41.             m_TotalWords = 0
  42.             m_Phrases.Clear()
  43.         End Sub
  44.  
  45.         Private ReadOnly Property Phrases() As System.Collections.Generic.SortedDictionary(Of String, PhraseCount)
  46.             Get
  47.                 Return m_Phrases
  48.             End Get
  49.         End Property
  50.  
  51.         Public Sub TeachCategory(ByVal reader As System.IO.TextReader) Implements ICategory.TeachCategory
  52.             Dim re As New Regex("(\w+)\W*", RegexOptions.Compiled)
  53.             Dim line As String = String.Empty
  54.             While (InlineAssignHelper(line, reader.ReadLine())) IsNot Nothing
  55.                 Dim m As Match = re.Match(line)
  56.                 While m.Success
  57.                     Dim word As String = m.Groups(1).Value
  58.                     TeachPhrase(word)
  59.                     m = m.NextMatch()
  60.                 End While
  61.             End While
  62.         End Sub
  63.  
  64.         Public Sub TeachPhrases(ByVal words As String()) Implements ICategory.TeachPhrases
  65.             For Each word As String In words
  66.                 TeachPhrase(word)
  67.             Next
  68.         End Sub
  69.  
  70.         Public Sub TeachPhrase(ByVal rawPhrase As String) Implements ICategory.TeachPhrase
  71.             If (m_Excluded IsNot Nothing) AndAlso (m_Excluded.IsExcluded(rawPhrase)) Then
  72.                 Exit Sub
  73.             End If
  74.  
  75.             Dim pc As PhraseCount = New PhraseCount()
  76.             Dim Phrase As String = DePhrase(rawPhrase)
  77.             If Not m_Phrases.TryGetValue(Phrase, pc) Then
  78.                 pc = New PhraseCount(rawPhrase)
  79.                 m_Phrases.Add(Phrase, pc)
  80.             End If
  81.  
  82.             pc.Count += 1
  83.             m_TotalWords += 1
  84.         End Sub
  85.  
  86.         Shared ms_PhraseRegEx As New Regex("\W", RegexOptions.Compiled)
  87.  
  88.         Public Shared Function CheckIsPhrase(ByVal s As String) As Boolean
  89.             Return ms_PhraseRegEx.IsMatch(s)
  90.         End Function
  91.  
  92.         Public Shared Function DePhrase(ByVal s As String) As String
  93.             Return ms_PhraseRegEx.Replace(s, "")
  94.         End Function
  95.  
  96.         Private Shared Function InlineAssignHelper(Of T)(ByRef target As T, ByVal value As T) As T
  97.             target = value
  98.             Return value
  99.         End Function
  100.     End Class
  101.  
  102. End Namespace