Untitled

import tokenize

corpus = open("corpus.txt", "r", encoding='utf8')

medicine = [] #positive
other = [] #negative

Features = {}

Features["Verband"] = 0
Features["Struktur"] = 0
Features["Lehre"] = 0
Features["Körper"] = 0
Features["Teilgebiet"] = 0
Features["Person"] = 0
Features["Krankheit"] = 0
Features["Medizin"] = 0
Features["Sinne"] = 0

testData = ""
testResult = 0

next = 0
for line in corpus:

    if line == "medicine (+1)\n":
        next = 1
        continue
    elif line == "other (-1)\n":
        next = -1
        continue

    if next != 0 and testData == "":
        testData = line
        testResult = next
        next = 0
        continue

    if next == 1:
        medicine.append(line)
        next = 0
    elif next == -1:
        other.append(line)
        next = 0

FeaturesPos = Features.copy()
FeaturesNeg = Features.copy()

for training in medicine:
    tokenized = training.split()

    for token in tokenized:
        for feature in Features:
            if token.find(feature) != -1:
                FeaturesPos[feature] += 1

for training in other:
    tokenized = training.split()

    for token in tokenized:
        for feature in Features:
            if token.find(feature) != -1:
                FeaturesNeg[feature] += 1

PosCount = 0
for feature in FeaturesPos:
    PosCount += FeaturesPos[feature]

NegCount = 0
for feature in FeaturesNeg:
    NegCount += FeaturesNeg[feature]

AllCount = PosCount + NegCount

PosProb = PosCount / AllCount
NegProb = NegCount / AllCount

SinglePoss = Features.copy()
SingleNegs = Features.copy()

for feature in SinglePoss:
    SinglePoss[feature] = (FeaturesPos[feature] + 1) / (PosCount + 9)

for feature in SingleNegs:
    SingleNegs[feature] = (FeaturesNeg[feature] + 1) / (NegCount + 9)


tokenized = testData.split()

testProbPos = PosProb
for token in tokenized:
    for feature in Features:
        if token.find(feature) != -1:
            testProbPos *= SinglePoss[feature]

testProbNeg = NegProb
for token in tokenized:
    for feature in Features:
        if token.find(feature) != -1:
            testProbNeg *= SingleNegs[feature]

print("Medicine: " + str(testProbPos))
print("Other: " + str(testProbNeg))

if testProbPos > testProbNeg:
    print("Normal Naive Bayes: Medicine")
else:
    print("Normal Naive Bayes: Other")

print(FeaturesPos)
print(FeaturesNeg)