Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import modshogun as sg
- class Own_kernel(sg.CustomKernel):
- """
- Abstract base class for own kernels
- """
- def __init__(self, train_data, *kernel_args):
- sg.CustomKernel.__init__(self)
- self.kernel_function_args = kernel_args
- triangle = []
- for i, d1 in enumerate(train_data):
- for d2 in train_data[i:]:
- triangle.append(self.kernel_function(d1, d2, *kernel_args))
- self.set_triangle_kernel_matrix_from_triangle(np.array(triangle, dtype = np.float64))
- self.train_data = train_data
- def kernel_function(self, a, b, *kernel_args):
- raise NotImplementedError
- def set_test(self, test_data):
- matrix = []
- kernel_args = self.kernel_function_args
- for d1 in self.train_data:
- row = []
- for d2 in test_data:
- row.append(self.kernel_function(d1, d2, *kernel_args))
- matrix.append(row)
- self.set_full_kernel_matrix_from_full(np.array(matrix, dtype=np.float64))
- def get_name(self):
- return 'Own_kernel'
- def get_kernel_type(self):
- return sg.K_UNKNOWN
- class Own_word_kernel(Own_kernel):
- def __init__(self, train_data, prop_list, similarity_dict=None):
- super(self.__class__, self).__init__(train_data, prop_list, similarity_dict)
- def kernel_function(self, w1, w2, prop_list, similarity_dict):
- similarity_dict = {} if not similarity_dict else similarity_dict
- result = 0
- for prop in prop_list:
- similarity = similarity_dict[prop] if prop in similarity_dict else lambda p1, p2: 1 if p1 == p2 else 0
- result += similarity(w1.properties[prop], w2.properties[prop])
- return result
- def get_name(self):
- return 'Own_word_kernel'
- def get_kernel_type(self):
- return sg.K_LINEAR
- class Kernel_action_learner(Learner):
- def __init__(self, sentence_list, svm_class = sg.SVMLight, C = 1, kernel = Own_word_kernel, kernel_args = ()):
- words = []
- for s in sentence_list:
- words.extend(s.word_list)
- if not words:
- raise Exception("Can't train learner on empty dataset!")
- labels = []
- for w in words:
- labels.append(w.label_from_properties(TYPE, ACTION))
- labels = sg.BinaryLabels(np.array(labels, dtype = np.float64))
- k = kernel(words, (WORD, POS), *kernel_args)
- svm = svm_class(C, k, labels)
- svm.train()
- self.kernel, self.svm = k, svm
- def apply(self, s):
- words = s.word_list
- self.kernel.set_test(words)
- result = self.svm.apply()
- for i, w in enumerate(words):
- if result.get_label(i) == 1:
- sentence = w.sentence
- entity_index = max([int(t[1:]) for t in sentence.id_dict if t[0] == 'T'] + [0]) + 1
- d = {'T' + str(entity_index): {TYPE: ACTION, LOC: [sentence.span_tokens[i]], TEXT: w[WORD]}}
- sentence._populate_id_dict([d])
- return None # all the changes are in-place
- def compare(self, sentence_list):
- """
- given a list of fully annotated sentences (test set), strip bare, run tests, and compare.
- returns: two lists: predicted labels, ground truth labels
- """
- test_labels, truth_labels = [], []
- for s in sentence_list:
- s_test = s.copy_for_testing()
- self.apply(s_test)
- for w, w_test in zip(s.word_list, s_test.word_list):
- truth_labels.append(w.label_from_properties(TYPE, ACTION))
- test_labels.append(w_test.label_from_properties(TYPE, ACTION))
- # TODO add test: if both action with different tag nr. => realign
- return test_labels, truth_labels
- @classmethod
- def test(cls, training_list, test_list, *init_args, **init_kwargs):
- """
- run full test: train learner, and compare test results with ground truth
- training_list, test_list: lists of sentence objects
- returns: two lists: predicted labels, ground truth labels
- """
- a_learner = cls(training_list, *init_args, **init_kwargs)
- return a_learner.compare(test_list)
- def timetest(learner, number=100):
- import time
- np.random.seed(42)
- results=[]
- for i in range(number):
- t = time.time()
- print(test_learner_fraction(data, learner, fraction_test=0.1, iterations=25, verbose=True).get_F1())
- results.append(time.time() - t)
- print("Iteration {}: {:.2f}s".format(i,results[-1]))
- return results
- own = timetest(Kernel_action_learner)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement