Advertisement
Guest User

Untitled

a guest
Nov 22nd, 2017
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.41 KB | None | 0 0
  1. import tokenize
  2.  
  3. corpus = open("corpus.txt", "r", encoding='utf8')
  4.  
  5. medicine = [] #positive
  6. other = [] #negative
  7.  
  8. Features = {}
  9.  
  10. Features["Verband"] = 0
  11. Features["Struktur"] = 0
  12. Features["Lehre"] = 0
  13. Features["Körper"] = 0
  14. Features["Teilgebiet"] = 0
  15. Features["Person"] = 0
  16. Features["Krankheit"] = 0
  17. Features["Medizin"] = 0
  18. Features["Sinne"] = 0
  19.  
  20. testData = ""
  21. testResult = 0
  22.  
  23. next = 0
  24. for line in corpus:
  25.  
  26.     if line == "medicine (+1)\n":
  27.         next = 1
  28.         continue
  29.     elif line == "other (-1)\n":
  30.         next = -1
  31.         continue
  32.  
  33.     if next != 0 and testData == "":
  34.         testData = line
  35.         testResult = next
  36.         next = 0
  37.         continue
  38.  
  39.     if next == 1:
  40.         medicine.append(line)
  41.         next = 0
  42.     elif next == -1:
  43.         other.append(line)
  44.         next = 0
  45.  
  46. FeaturesPos = Features.copy()
  47. FeaturesNeg = Features.copy()
  48.  
  49. for training in medicine:
  50.     tokenized = training.split()
  51.  
  52.     for token in tokenized:
  53.         for feature in Features:
  54.             if token.find(feature) != -1:
  55.                 FeaturesPos[feature] += 1
  56.  
  57. for training in other:
  58.     tokenized = training.split()
  59.  
  60.     for token in tokenized:
  61.         for feature in Features:
  62.             if token.find(feature) != -1:
  63.                 FeaturesNeg[feature] += 1
  64.  
  65. PosCount = 0
  66. for feature in FeaturesPos:
  67.     PosCount += FeaturesPos[feature]
  68.  
  69. NegCount = 0
  70. for feature in FeaturesNeg:
  71.     NegCount += FeaturesNeg[feature]
  72.  
  73. AllCount = PosCount + NegCount
  74.  
  75. PosProb = PosCount / AllCount
  76. NegProb = NegCount / AllCount
  77.  
  78. SinglePoss = Features.copy()
  79. SingleNegs = Features.copy()
  80.  
  81. for feature in SinglePoss:
  82.     SinglePoss[feature] = (FeaturesPos[feature] + 1) / (PosCount + 9)
  83.  
  84. for feature in SingleNegs:
  85.     SingleNegs[feature] = (FeaturesNeg[feature] + 1) / (NegCount + 9)
  86.  
  87.  
  88. tokenized = testData.split()
  89.  
  90. testProbPos = PosProb
  91. for token in tokenized:
  92.     for feature in Features:
  93.         if token.find(feature) != -1:
  94.             testProbPos *= SinglePoss[feature]
  95.  
  96. testProbNeg = NegProb
  97. for token in tokenized:
  98.     for feature in Features:
  99.         if token.find(feature) != -1:
  100.             testProbNeg *= SingleNegs[feature]
  101.  
  102. print("Medicine: " + str(testProbPos))
  103. print("Other: " + str(testProbNeg))
  104.  
  105. if testProbPos > testProbNeg:
  106.     print("Normal Naive Bayes: Medicine")
  107. else:
  108.     print("Normal Naive Bayes: Other")
  109.  
  110. print(FeaturesPos)
  111. print(FeaturesNeg)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement