Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tokenize
- corpus = open("corpus.txt", "r", encoding='utf8')
- medicine = [] #positive
- other = [] #negative
- Features = {}
- Features["Verband"] = 0
- Features["Struktur"] = 0
- Features["Lehre"] = 0
- Features["Körper"] = 0
- Features["Teilgebiet"] = 0
- Features["Person"] = 0
- Features["Krankheit"] = 0
- Features["Medizin"] = 0
- Features["Sinne"] = 0
- testData = ""
- testResult = 0
- next = 0
- for line in corpus:
- if line == "medicine (+1)\n":
- next = 1
- continue
- elif line == "other (-1)\n":
- next = -1
- continue
- if next != 0 and testData == "":
- testData = line
- testResult = next
- next = 0
- continue
- if next == 1:
- medicine.append(line)
- next = 0
- elif next == -1:
- other.append(line)
- next = 0
- FeaturesPos = Features.copy()
- FeaturesNeg = Features.copy()
- for training in medicine:
- tokenized = training.split()
- for token in tokenized:
- for feature in Features:
- if token.find(feature) != -1:
- FeaturesPos[feature] += 1
- for training in other:
- tokenized = training.split()
- for token in tokenized:
- for feature in Features:
- if token.find(feature) != -1:
- FeaturesNeg[feature] += 1
- PosCount = 0
- for feature in FeaturesPos:
- PosCount += FeaturesPos[feature]
- NegCount = 0
- for feature in FeaturesNeg:
- NegCount += FeaturesNeg[feature]
- AllCount = PosCount + NegCount
- PosProb = PosCount / AllCount
- NegProb = NegCount / AllCount
- SinglePoss = Features.copy()
- SingleNegs = Features.copy()
- for feature in SinglePoss:
- SinglePoss[feature] = (FeaturesPos[feature] + 1) / (PosCount + 9)
- for feature in SingleNegs:
- SingleNegs[feature] = (FeaturesNeg[feature] + 1) / (NegCount + 9)
- tokenized = testData.split()
- testProbPos = PosProb
- for token in tokenized:
- for feature in Features:
- if token.find(feature) != -1:
- testProbPos *= SinglePoss[feature]
- testProbNeg = NegProb
- for token in tokenized:
- for feature in Features:
- if token.find(feature) != -1:
- testProbNeg *= SingleNegs[feature]
- print("Medicine: " + str(testProbPos))
- print("Other: " + str(testProbNeg))
- if testProbPos > testProbNeg:
- print("Normal Naive Bayes: Medicine")
- else:
- print("Normal Naive Bayes: Other")
- print(FeaturesPos)
- print(FeaturesNeg)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement