Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def match(self, node):
- # Find entities that do not have a node spanning them
- word = node.child(0)
- if not word.isLeaf():
- return False
- print 'word: %s' % word
- ### Get an entity:
- if not word.entity.startswith('B-'):
- return False
- entity_type = word.entity[2:]
- print 'entity_type: %s' % entity_type
- print 'Finding root node: %s' % word
- while not node.isRoot():
- print 'node.label in while loop: %s, %s' % (node.label, node)
- if node.isEntity(entity_type):
- print 'node %s is an entity' % word
- print 'returning false'
- return False
- node = node.parent()
- entity_words = self._getSpan(word)
- print 'entity_words: %s' % ([w.text for w in entity_words])
- last_word = entity_words[-1]
- print 'last_word: %s' % last_word
- last_word_node = last_word.parent()
- print 'last_word_node: %s' % last_word_node
- sibling = last_word_node.sibling()
- print 'sibling: %s' % sibling
- if not sibling:
- return False
- if not sibling.label.is_adjunct:
- return False
- modifier = sibling.getWord(0)
- if modifier.label != 'IN':
- return False
- print '\n<working1>'
- ### for all the nodes in the entity under entity_root, if there exists a sibling node
- ### which is not part of the entity, there's a problem...
- ### Check siblings of all words in the entity:
- ### first, get parent of first leaf node that is an entity - what we just found
- entity_root = word.parent().parent()
- print 'entity_root: %s' % entity_root
- entity_nodes = entity_root.depthList()
- print 'entity_nodes:'
- for i in entity_nodes:
- if i.isLeaf() and not i.entity:
- print 'not an entity???'
- return True
- print '</working1>\n'
- return True
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement