Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import division, print_function
- from data_parser import *
- from data_struct import *
- from learners import *
- import nltk
- import modshogun as sg
- import numpy as np
- from main import test_learner_fraction
- training_dir = '/home/thomas/Dropbox/PhD/GRN_Task/Data/Training/'
- development_dir = '/home/thomas/Dropbox/PhD/GRN_Task/Data/Development/'
- mini_dir = '/home/thomas/Dropbox/PhD/GRN_Task/Data/mini_subset/'
- seed = 42 # seed used for testing consistency
- class Own_director_kernel(sg.DirectorKernel):
- """
- Abstract base class for own kernels
- """
- def __init__(self):
- def __init__(self, train_data, *kernel_args):
- sg.DirectorKernel.__init__(self)
- self.kernel_function_args = kernel_args
- self.init(train_data, train_data)
- def true_kernel_function(self, a, b, *args):
- raise NotImplementedError
- def kernel_function(self, idx_a, idx_b):
- if not (self.l and self.r):
- raise Exception("Kernel {} needs to be initialised first!".format(self.get_name()))
- return self.true_kernel_function(self.l[idx_a], self.r[idx_b], *self.kernel_function_args)
- def init(self, l, r):
- self.l, self.r = l, r
- l_features = sg.DummyFeatures(len(l))
- r_features = l_features if l == r else sg.DummyFeatures(len(r))
- sg.DirectorKernel.init(self, l_features, r_features)
- def set_test(self, test_data):
- self.init(self.l, test_data)
- def get_name(self):
- return 'Own_director_kernel'
- def get_kernel_type(self):
- return sg.K_UNKNOWN
- class Own_director_word_kernel(Own_director_kernel):
- def __init__(self, prop_list, similarity_dict = None):
- super(self.__class__, self).__init__()
- self.prop_list = prop_list
- self.similarity_dict = similarity_dict
- def true_kernel_function(self, w1, w2, prop_list, similarity_dict):
- similarity_dict = {} if not similarity_dict else similarity_dict
- result = 0
- for prop in prop_list:
- similarity = similarity_dict[prop] if prop in similarity_dict else lambda p1, p2: 1 if p1 == p2 else 0
- result += similarity(w1.properties[prop], w2.properties[prop])
- return result
- def get_name(self):
- return 'Own_director_word_kernel'
- def get_kernel_type(self):
- return sg.K_LINEAR
- class Director_kernel_action_learner(Learner):
- def __init__(self, sentence_list, svm_class = sg.SVMLight, C = 1, kernel = Own_director_word_kernel, kernel_args = ()):
- words = []
- for s in sentence_list:
- words.extend(s.word_list)
- if not words:
- raise Exception("Can't train learner on empty dataset!")
- labels = []
- for w in words:
- labels.append(w.label_from_properties(TYPE, ACTION))
- labels = sg.BinaryLabels(np.array(labels, dtype = np.float64))
- k = kernel((WORD, POS), *kernel_args)
- k.init(words, words)
- svm = svm_class(C, k, labels)
- svm.train()
- self.kernel, self.svm = k, svm
- def apply(self, s):
- words = s.word_list
- self.kernel.set_test(words)
- result = self.svm.apply()
- for i, w in enumerate(words):
- if result.get_label(i) == 1:
- sentence = w.sentence
- entity_index = max([int(t[1:]) for t in sentence.id_dict if t[0] == 'T'] + [0]) + 1
- d = {'T' + str(entity_index): {TYPE: ACTION, LOC: [sentence.span_tokens[i]], TEXT: w[WORD]}}
- sentence._populate_id_dict([d])
- return None # all the changes are in-place
- def compare(self, sentence_list):
- """
- given a list of fully annotated sentences (test set), strip bare, run tests, and compare.
- returns: two lists: predicted labels, ground truth labels
- """
- test_labels, truth_labels = [], []
- for s in sentence_list:
- s_test = s.copy_for_testing()
- self.apply(s_test)
- for w, w_test in zip(s.word_list, s_test.word_list):
- truth_labels.append(w.label_from_properties(TYPE, ACTION))
- test_labels.append(w_test.label_from_properties(TYPE, ACTION))
- # TODO add test: if both action with different tag nr. => realign
- return test_labels, truth_labels
- @classmethod
- def test(cls, training_list, test_list, *init_args, **init_kwargs):
- """
- run full test: train learner, and compare test results with ground truth
- training_list, test_list: lists of sentence objects
- returns: two lists: predicted labels, ground truth labels
- """
- a_learner = cls(training_list, *init_args, **init_kwargs)
- return a_learner.compare(test_list)
- data = []
- for d in (training_dir, development_dir):
- # for d in (mini_dir, mini_dir + 'test/'):
- for f in get_dir_namelist(d):
- data.append(parse_file(f, d))
- np.random.seed(seed)
- results = {"F1": [], "precision": [], "accuracy": [], "recall": []}
- for i in range(100):
- m = test_learner_fraction(data, Director_kernel_action_learner, fraction_test=0.1, iterations=25, verbose=False).get_F1()
- for x in results:
- results[x].append(m.__getattribute__('get_'+x)())
- print("Averages:")
- for x, r in results.itervalues():
- print("{>11}: {}s".format(x, sum(r)/len(r)))
- from pylab import *
- for x, r in results.itervalues():
- plot(r, '-')
- legend(results.keys())
- grid(True)
- show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement