Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class FeatureStats(object):
- def __init__(self,word, ld, net):
- f = net.getConvFunction()
- inds = ld.word2inds(word)
- windows = []
- for i in range(len(inds) + 1 - net.filter_height):
- windows.append(inds[i:i+net.filter_height])
- windows = map(lambda x: ld.inds2word(x), windows)
- features = f([[inds]])
- shape = features.shape
- features = features.reshape((shape[1],shape[2])).transpose()
- self.data = zip(windows,features)
- def thresholdStats(self, threshold):
- res = []
- for pair in self.data:
- l = len(filter(lambda x: x >= threshold, pair[1]))
- res.append((pair[0],l))
- return res
- def normStats(self):
- res = []
- for pair in self.data:
- m = np.linalg.norm(pair[1])
- res.append((pair[0],m))
- return res
- def maxStats(self):
- ngramms
- res = {i:0 for i in map(lambda x: x[0], self.data)}
- for i in xrange(len(self.data)):
- l = lambda x: x[1][i]
- cur_data = self.data
- cur_data.sort(key=l, reverse=True)
- res[cur_data[0][0]] += 1
- return [res[i] for i in ]
- def printStats(self, metrics='threshold',threshold=None, ordered=True, mask="%s - %d"):
- if metrics == 'threshold':
- pairs = self.thresholdStats(threshold)
- elif metrics == 'norm':
- pairs = self.normStats()
- if ordered:
- pairs.sort(key=(lambda x: x[1]), reverse=True)
- for i in pairs:
- print mask % (i[0],i[1])
- def test(word):
- print
- print
- print word
- print
- print "Максимальные нграммы по количеству фильтров прошедших порог 0.03"
- FeatureStats(word, letterDict, network).printStats(metrics='threshold', threshold=0.04)
- print
- print "Максимсальные нграммы по норме"
- FeatureStats(word, letterDict, network).printStats(metrics='norm', mask="%s - %.3f")
- print
- print
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement