• API
• FAQ
• Tools
• Archive
SHARE
TWEET Untitled a guest Jan 23rd, 2019 43 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. class Ngram ():
2.     children = [] # these are class variables
3.     name = ""
4.     count = 0
5.     probability = 0
6.
7.     def __init__(self, name): # called the constructor, allocates memory for the class
8.         self.name = name
9.         self.children = []
10.         self.count = 0
11.         self.probability = 0
12.
13.     def calc_prob(self):
14.         child_count=0 #the total number of child occurances
15.         for n in self.children: # self.chidlren is basically a list of all the children
16.             child_count+= n.count #the total sum of all the children, aka the number following the word on the worksheet
17.         for n in self.children:
18.             n.probability= n.count / child_count
19.             n.calc_prob() # basically means that it'll repeat so long as there are children
20.
21.
22.     def set_count(self, count):
23.         self.count = count
24.
25.     def increment_count(self):
26.         self.count += 1
27.
29.         self.children.append(child)
30.
31.     def get_child(self, name):
32.         for ng in self.children:
33.             if ng.name == name:
34.                 return ng
35.
36.         return None
37.
38.     def find_children(self, names):
39.         nms = names.split() # split on space
40.
41.         node = self
42.
43.         for n in nms:
44.             node = node.get_child(n)
45.
46.             if node == None:
47.                 break;
48.
49.         return node
50.
51.     def construct(self, data, length):
52.         # changes all words to lowercase
53.         # removes 's
54.         # removes '
55.         # removes ,
56.         # removes .
57.         words = data.lower()\
58.             .replace("'s","")\
59.             .replace("'","") \
60.             .replace(",","") \
61.             .replace(".","") \
62.             .replace("-","") \
63.             .replace("\"","").split()
64.
65.         for lower_lim in range(len(words)+1-length):
66.
67.             word_string = words[lower_lim]
68.
69.             n = self.find_children(word_string)
70.
71.             if n is None:
72.                 n = Ngram(words[lower_lim])
73.                 n.set_count(1)
74.
76.             else:
77.                 n.increment_count()
78.
79.
80.
81.             node = n
82.
83.             for upper_lim in range(1, length):
84.                 word_string += " " + words[lower_lim+upper_lim]
85.                 n2 = self.find_children(word_string)
86.
87.                 if n2 is None:
88.                     n2 = Ngram(words[lower_lim+upper_lim])
89.                     n2.set_count(1)
90.
92.                 else:
93.                     n2.increment_count()
94.
95.                 node = n2
96.
97.     def print_index(self, i):
98.         print(" "*i, self.name, "(", self.count, ")", "probability:",self.probability) #" "*i indents it according to depth
99.
100.         for c in self.children:
101.             c.print_index(i+1)
102.
103.     def print(self):
104.         self.print_index(0)
105.
106. n= Ngram("root")
107.
108. paragraph=""
109. with open("sogstory","r") as f:
110.     for line in f:
111.         paragraph+=line
112.
113. n.construct(paragraph,2)
114. n.calc_prob()
115. n.print()
116. print("dogprob",n.get_child("the").get_child("dog").probability) #prints the probability of "the dog" happens
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top