Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2019
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.08 KB | None | 0 0
  1. class Ngram ():
  2. children = [] # these are class variables
  3. name = ""
  4. count = 0
  5. probability = 0
  6.  
  7. def __init__(self, name): # called the constructor, allocates memory for the class
  8. self.name = name
  9. self.children = []
  10. self.count = 0
  11. self.probability = 0
  12.  
  13. def calc_prob(self):
  14. child_count=0 #the total number of child occurances
  15. for n in self.children: # self.chidlren is basically a list of all the children
  16. child_count+= n.count #the total sum of all the children, aka the number following the word on the worksheet
  17. for n in self.children:
  18. n.probability= n.count / child_count
  19. n.calc_prob() # basically means that it'll repeat so long as there are children
  20.  
  21.  
  22. def set_count(self, count):
  23. self.count = count
  24.  
  25. def increment_count(self):
  26. self.count += 1
  27.  
  28. def add_child(self, child):
  29. self.children.append(child)
  30.  
  31. def get_child(self, name):
  32. for ng in self.children:
  33. if ng.name == name:
  34. return ng
  35.  
  36. return None
  37.  
  38. def find_children(self, names):
  39. nms = names.split() # split on space
  40.  
  41. node = self
  42.  
  43. for n in nms:
  44. node = node.get_child(n)
  45.  
  46. if node == None:
  47. break;
  48.  
  49. return node
  50.  
  51. def construct(self, data, length):
  52. # changes all words to lowercase
  53. # removes 's
  54. # removes '
  55. # removes ,
  56. # removes .
  57. words = data.lower()\
  58. .replace("'s","")\
  59. .replace("'","") \
  60. .replace(",","") \
  61. .replace(".","") \
  62. .replace("-","") \
  63. .replace("\"","").split()
  64.  
  65. for lower_lim in range(len(words)+1-length):
  66.  
  67. word_string = words[lower_lim]
  68.  
  69. n = self.find_children(word_string)
  70.  
  71. if n is None:
  72. n = Ngram(words[lower_lim])
  73. n.set_count(1)
  74.  
  75. self.add_child(n)
  76. else:
  77. n.increment_count()
  78.  
  79.  
  80.  
  81. node = n
  82.  
  83. for upper_lim in range(1, length):
  84. word_string += " " + words[lower_lim+upper_lim]
  85. n2 = self.find_children(word_string)
  86.  
  87. if n2 is None:
  88. n2 = Ngram(words[lower_lim+upper_lim])
  89. n2.set_count(1)
  90.  
  91. node.add_child(n2)
  92. else:
  93. n2.increment_count()
  94.  
  95. node = n2
  96.  
  97. def print_index(self, i):
  98. print(" "*i, self.name, "(", self.count, ")", "probability:",self.probability) #" "*i indents it according to depth
  99.  
  100. for c in self.children:
  101. c.print_index(i+1)
  102.  
  103. def print(self):
  104. self.print_index(0)
  105.  
  106. n= Ngram("root")
  107.  
  108. paragraph=""
  109. with open("sogstory","r") as f:
  110. for line in f:
  111. paragraph+=line
  112.  
  113. n.construct(paragraph,2)
  114. n.calc_prob()
  115. n.print()
  116. print("dogprob",n.get_child("the").get_child("dog").probability) #prints the probability of "the dog" happens
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement