Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- object TacNERPhraseFinder extends MentionPhraseFinder {
- // def prereqAttrs = Seq(classOf[BilouTacNerTag])
- def prereqAttrs = Seq(classOf[BioTacNerTag])
- def apply(document: Document):Iterable[Phrase] = {
- var sentenceSectionStart = 0
- val allPhrases = document.sentences.flatMap { sentence =>
- if (sentence.indexInSection == 0) sentenceSectionStart = 0
- val phrases = mutable.ArrayBuffer[Phrase]()
- var i = 0
- while (i < sentence.length) {
- val tag = sentence(i).nerTag
- if (tag.categoryValue != "O") {
- val entitySentenceStart = i
- var j = i + 1
- if (!tag.categoryValue.startsWith("U")) {
- while (j < sentence.length &&
- (sentence(j).nerTag.categoryValue.startsWith("I") || sentence(j).nerTag.categoryValue.startsWith("L"))) {
- j += 1
- }
- }
- val entityLen = j-i
- i = j
- /* we have the entityStart idx wrt this sentence, now we need to get it wrt this section */
- val entitySectionStart = entitySentenceStart + sentenceSectionStart
- // val phrase = new Phrase(new TokenSpan(sentence.section, entitySectionStart, entityLen))
- val ts = new TokenSpan(sentence.section, entitySectionStart, entityLen)
- val phrase = new Phrase(sentence.section, entitySectionStart, entityLen, ts.length-1)
- // println(s"Found mention: [${phrase.tokensString(" ")}]")
- phrases += phrase
- // println(s"phrase: ${phrase.string} (${phrase.headToken.nerTag.categoryValue})")
- } else {
- i += 1
- }
- }
- sentenceSectionStart += sentence.length
- phrases
- }
- allPhrases.foreach(DeterministicNounPhraseTypeLabeler.process)
- document.attr += new PhraseList(allPhrases)
- allPhrases
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement