Advertisement
Guest User

Untitled

a guest
Jul 30th, 2014
213
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 1.82 KB | None | 0 0
  1. object TacNERPhraseFinder extends MentionPhraseFinder {
  2. //  def prereqAttrs = Seq(classOf[BilouTacNerTag])
  3.   def prereqAttrs = Seq(classOf[BioTacNerTag])
  4.   def apply(document: Document):Iterable[Phrase] = {
  5.     var sentenceSectionStart = 0
  6.     val allPhrases = document.sentences.flatMap { sentence =>
  7.       if (sentence.indexInSection == 0) sentenceSectionStart = 0
  8.       val phrases = mutable.ArrayBuffer[Phrase]()
  9.       var i = 0
  10.       while (i < sentence.length) {
  11.         val tag = sentence(i).nerTag
  12.         if (tag.categoryValue != "O") {
  13.           val entitySentenceStart = i
  14.           var j = i + 1
  15.           if (!tag.categoryValue.startsWith("U")) {
  16.             while (j < sentence.length &&
  17.               (sentence(j).nerTag.categoryValue.startsWith("I") || sentence(j).nerTag.categoryValue.startsWith("L"))) {
  18.               j += 1
  19.             }
  20.           }
  21.           val entityLen = j-i
  22.           i = j
  23.  
  24.           /* we have the entityStart idx wrt this sentence, now we need to get it wrt this section */
  25.           val entitySectionStart = entitySentenceStart + sentenceSectionStart
  26.  
  27. //          val phrase = new Phrase(new TokenSpan(sentence.section, entitySectionStart, entityLen))
  28.           val ts = new TokenSpan(sentence.section, entitySectionStart, entityLen)
  29.           val phrase = new Phrase(sentence.section, entitySectionStart, entityLen, ts.length-1)
  30. //          println(s"Found mention: [${phrase.tokensString(" ")}]")
  31.           phrases += phrase
  32. //          println(s"phrase: ${phrase.string} (${phrase.headToken.nerTag.categoryValue})")
  33.         } else {
  34.           i += 1
  35.         }
  36.       }
  37.       sentenceSectionStart += sentence.length
  38.       phrases
  39.     }
  40.     allPhrases.foreach(DeterministicNounPhraseTypeLabeler.process)
  41.     document.attr += new PhraseList(allPhrases)
  42.     allPhrases
  43.   }
  44. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement