Advertisement
Guest User

Untitled

a guest
Apr 3rd, 2013
16
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.67 KB | None | 0 0
  1. import numpy as np
  2. import modshogun as sg
  3.  
  4. class Own_kernel(sg.CustomKernel):
  5.     """
  6.        Abstract base class for own kernels
  7.    """
  8.     def __init__(self, train_data, *kernel_args):
  9.         sg.CustomKernel.__init__(self)
  10.         self.kernel_function_args = kernel_args
  11.         triangle = []
  12.         for i, d1 in enumerate(train_data):
  13.             for d2 in train_data[i:]:
  14.                 triangle.append(self.kernel_function(d1, d2, *kernel_args))
  15.         self.set_triangle_kernel_matrix_from_triangle(np.array(triangle, dtype = np.float64))
  16.         self.train_data = train_data
  17.  
  18.     def kernel_function(self, a, b, *kernel_args):
  19.         raise NotImplementedError
  20.  
  21.     def set_test(self, test_data):
  22.         matrix = []
  23.         kernel_args = self.kernel_function_args
  24.         for d1 in self.train_data:
  25.             row = []
  26.             for d2 in test_data:
  27.                 row.append(self.kernel_function(d1, d2, *kernel_args))
  28.             matrix.append(row)
  29.         self.set_full_kernel_matrix_from_full(np.array(matrix, dtype=np.float64))
  30.  
  31.     def get_name(self):
  32.         return 'Own_kernel'
  33.  
  34.     def get_kernel_type(self):
  35.         return sg.K_UNKNOWN
  36.  
  37.  
  38. class Own_word_kernel(Own_kernel):
  39.     def __init__(self, train_data, prop_list, similarity_dict=None):
  40.         super(self.__class__, self).__init__(train_data, prop_list, similarity_dict)
  41.  
  42.     def kernel_function(self, w1, w2, prop_list, similarity_dict):
  43.         similarity_dict = {} if not similarity_dict else similarity_dict
  44.         result = 0
  45.         for prop in prop_list:
  46.             similarity = similarity_dict[prop] if prop in similarity_dict else lambda p1, p2: 1 if p1 == p2 else 0
  47.             result += similarity(w1.properties[prop], w2.properties[prop])
  48.         return result
  49.  
  50.     def get_name(self):
  51.         return 'Own_word_kernel'
  52.  
  53.     def get_kernel_type(self):
  54.         return sg.K_LINEAR
  55.  
  56. class Kernel_action_learner(Learner):
  57.  
  58.     def __init__(self, sentence_list, svm_class = sg.SVMLight, C = 1, kernel = Own_word_kernel, kernel_args = ()):
  59.         words = []
  60.         for s in sentence_list:
  61.             words.extend(s.word_list)
  62.         if not words:
  63.             raise Exception("Can't train learner on empty dataset!")
  64.         labels = []
  65.         for w in words:
  66.             labels.append(w.label_from_properties(TYPE, ACTION))
  67.         labels = sg.BinaryLabels(np.array(labels, dtype = np.float64))
  68.         k = kernel(words, (WORD, POS), *kernel_args)
  69.         svm = svm_class(C, k, labels)
  70.         svm.train()
  71.         self.kernel, self.svm = k, svm
  72.  
  73.     def apply(self, s):
  74.         words = s.word_list
  75.         self.kernel.set_test(words)
  76.         result = self.svm.apply()
  77.         for i, w in enumerate(words):
  78.             if result.get_label(i) == 1:
  79.                 sentence = w.sentence
  80.                 entity_index = max([int(t[1:]) for t in sentence.id_dict if t[0] == 'T'] + [0]) + 1
  81.                 d = {'T' + str(entity_index): {TYPE: ACTION, LOC: [sentence.span_tokens[i]], TEXT: w[WORD]}}
  82.                 sentence._populate_id_dict([d])
  83.         return None # all the changes are in-place
  84.  
  85.     def compare(self, sentence_list):
  86.         """
  87.            given a list of fully annotated sentences (test set), strip bare, run tests, and compare.
  88.            returns: two lists: predicted labels, ground truth labels
  89.        """
  90.         test_labels, truth_labels = [], []
  91.         for s in sentence_list:
  92.             s_test = s.copy_for_testing()
  93.             self.apply(s_test)
  94.             for w, w_test in zip(s.word_list, s_test.word_list):
  95.                 truth_labels.append(w.label_from_properties(TYPE, ACTION))
  96.                 test_labels.append(w_test.label_from_properties(TYPE, ACTION))
  97.                 # TODO add test: if both action with different tag nr. => realign
  98.         return test_labels, truth_labels
  99.  
  100.     @classmethod
  101.     def test(cls, training_list, test_list, *init_args, **init_kwargs):
  102.         """
  103.            run full test: train learner, and compare test results with ground truth
  104.            training_list, test_list: lists of sentence objects
  105.            returns: two lists: predicted labels, ground truth labels
  106.        """
  107.         a_learner = cls(training_list, *init_args, **init_kwargs)
  108.         return a_learner.compare(test_list)
  109.  
  110.  
  111. def timetest(learner, number=100):
  112.     import time
  113.     np.random.seed(42)
  114.     results=[]
  115.     for i in range(number):
  116.         t = time.time()
  117.         print(test_learner_fraction(data, learner, fraction_test=0.1, iterations=25, verbose=True).get_F1())
  118.         results.append(time.time() - t)
  119.         print("Iteration {}: {:.2f}s".format(i,results[-1]))
  120.     return results
  121.  
  122.  
  123. own = timetest(Kernel_action_learner)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement