1. # Create "p", which represents probabilities P(word|newsgroup) = P(a randomly selected document from given newsgroup contains given word)
2. for each newsgroup:
3.     ...
4.     # How often each word was seen in documents of this newsgroup
5.     train_word_counts[newsgroup] = train[newsgroup]['wordID'].value_counts()
6.     # Count documents + pseudocount +1 per word for 53975 words
7.     count_docs = len(train[newsgroup]['docID'].unique()) + 53975
8.     # P(newsgroup,word) as explained at the top. All
9.     p[newsgroup] = train_word_counts[newsgroup].add(1) / count_docs
10.     # Sort by wordId and fill pseudovalue for nonexisting words
11.     p[newsgroup] = pd.DataFrame(data={'fraction': p[newsgroup]}, index=p[newsgroup].index)
12.     p[newsgroup].sort_index(inplace=True)
13.     p = pd.DataFrame(p, index=range(1,53976), columns=['fraction'])
14.     # Pseudocount for unseen words
15.     p = p.fillna(1 / count_docs)
16.
17.
18. # Classify rows, which are wordID-docID combinations, representing "this word is present in this document".
19. # We will write our results in DataFrame "b", where rows represent documents and columns represent newsgroups
20. # At first we fill columns with prior values for each newsgroup
21. # I'm omitting prior calculations, because my results don't change materially even when all priors are set to 1/20 (and logarithmized)
22. for each row of data we want to classify:
23.     for each newsgroup:
24.         docID = row
25.         wordID = row
26.         # Val represents how likely this document belongs to this newsgroup, before dealing with this current row
27.         val = b.at[docID, newsgroup]
28.         # P(word|newsgroup)
29.         wordFraction = p[newsgroup].iat[wordID-1, 0]
30.         # Summing up logarithmized probabilities produces the same results as multiplying normal probabilities
31.         val += math.log(wordFraction)
32.         b.set_value(docID, newsgroup, val)
33.
34. # For each doc, pick the newsgroup with max val.
