Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def train(self, to_exclude, corpus: TextCorpus):
- table = defaultdict(Counter)
- words = NGram(self.STOP_WORD for _ in range(self._ngram_size))
- for text in corpus:
- tagged_text = pos_tag(text)
- for tagged_word in text:
- tag = tagged_word[1]
- word = tagged_word[0]
- if tag in to_exclude:
- continue
- else:
- table[tuple(words)][tagged_word] += 1
- words.popleft()
- words.append(tagged_word)
- table[tuple(words)][self.STOP_WORD] += 1
- self._table = table
- self._probabilities = {words: ProbabilityTable(counts)
- for words, counts in table.items()}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement