Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env ruby
- # -*- coding: utf-8; mode: ruby -*-
- require 'test/unit'
- require 'standoff'
- require 'pp'
- require 'stringio'
- require 'tempfile'
- class StandOff
- def containings(x)
- s = []
- each_containing_annotation(x) do|a|
- s << a
- end
- s
- end
- def containeds(x)
- s = []
- each_contained_annotation(x) do|a|
- s << a
- end
- s
- end
- end
- class TC_StandOff < Test::Unit::TestCase
- TMP = Tempfile.new('tmp.standoff')
- LETTER_ANNON = {
- 'one' =>'1',
- 'two' =>'2',
- 'three'=>'3',
- 'four' =>'4' }
- def setup
- @so = StandOff.new('1234')
- @so.read_annotation_file(StringIO.new <<'EOD')
- 0 3 hundread-and-twenty-three
- 0 2 twelve
- 0 1 one
- 1 3 twenty-three
- 1 2 two
- 2 4 thirty-four
- 2 3 three
- 3 4 Four
- 3 4 four
- EOD
- s =<<'EOD'
- 0 22 ptb_article id="mydoc"
- 0 22 NP
- 4 8 NN
- 4 8 synonym
- 14 17 JJ
- 14 17 synonym
- 18 22 NN
- 18 22 synonym
- EOD
- @so2 = StandOff.new('the girl with red eyes', StringIO.new(s))
- end
- def test_join
- s1 = <<'EOD'
- 0 22 ptb_article id="mydoc"
- 0 22 NP
- 4 8 NN
- 14 17 JJ
- 18 22 NN
- EOD
- s2 = <<'EOD'
- 4 8 synonym
- 14 17 synonym
- 18 22 synonym
- EOD
- so = StandOff.new()
- so.read_annotation_file(StringIO.new(s1))
- so.add_annotations(StringIO.new(s2))
- assert_equal(<<'EOD', so.annotations.map{|x| x.to_s+"\n"}.join)
- 0 22 ptb_article id="mydoc"
- 0 22 NP
- 4 8 NN
- 4 8 synonym
- 14 17 JJ
- 14 17 synonym
- 18 22 NN
- 18 22 synonym
- EOD
- end
- def test_substr
- assert_equal('123', @so.substr(0, 3))
- assert_equal( '23', @so.substr(1, 3))
- assert_equal('123', @so.substr(Annotation.new(0, 3)))
- end
- def test_matching_annotations
- assert_equal([Annotation.new(0, 1, 'a')],
- StandOff.new('', [Annotation.new(0, 1, 'A'),
- Annotation.new(0, 1, 'a'),
- Annotation.new(0, 1, 'b')]).
- matching_disjoint_annotations(/^[a-z]$/))
- assert_equal([Annotation.new(0, 0, '0'),
- Annotation.new(1, 2, 'b'),
- Annotation.new(2, 3, 'c')],
- StandOff.new('', [Annotation.new(0, 3, '-'),
- Annotation.new(0, 0, '0'),
- Annotation.new(1, 2, 'b'),
- Annotation.new(2, 3, 'c__'),
- Annotation.new(2, 3, 'c'),
- Annotation.new(3, 4, 'd')]).
- matching_disjoint_annotations(/^.$/, Annotation.new(0, 3, '-')))
- regexp = Regexp.new('^('+LETTER_ANNON.keys.join('|')+')$')
- assert_equal(Set.new(LETTER_ANNON.to_a),
- Set.new(@so.matching_disjoint_annotations(regexp).map{|x| [x.tag, @so.substr(x)]}))
- s = Set.new
- @so.each_matching_disjoint_annotation(regexp) do |a,_|
- s << [a.tag, @so.substr(a)]
- end
- assert_equal(Set.new(LETTER_ANNON.to_a), s)
- end
- def test_annotations_select
- assert_equal([Annotation.new(0,1,''), Annotation.new(0,0,'')],
- StandOff.new('', [Annotation.new(0, 2, ''),
- Annotation.new(0, 1, ''),
- Annotation.new(0, 0, '')]).annotations do |a|
- a.end - a.start <= 1
- end)
- end
- def test_enclosing_annotation
- assert_equal(Annotation.new(0,1,'b'),
- StandOff.new('', [Annotation.new(0, 1, 'a'),
- Annotation.new(0, 1, 'b'),
- Annotation.new(0, 1, 'c')]).enclosing_annotation(Annotation.new(0, 1, 'c')))
- assert_equal(nil,
- StandOff.new('', [Annotation.new(0, 1, 'a'),
- Annotation.new(0, 1, 'b'),
- Annotation.new(0, 1, 'c')]).enclosing_annotation(Annotation.new(0, 100, 'd')))
- assert_equal(Annotation.new(0,2,'twelve'),
- @so.enclosing_annotation([0,1]))
- assert_equal(Annotation.new(2,4,'thirty-four'),
- @so.enclosing_annotation([2,3]))
- assert_equal(Annotation.new(1,3,'twenty-three'),
- @so.enclosing_annotation([2,3],2))
- assert_equal(nil,
- @so.enclosing_annotation([2,4]))
- assert_equal(Annotation.new(3,4,'Four'),
- @so.enclosing_annotation([3,4,'four']))
- end
- def test_near
- so = StandOff.new('123',
- StringIO.new(<<'EOD'
- 1 3 twenty-three
- 1 2 two
- 1 2 Two
- 1 2 TWO
- 2 3 three
- EOD
- ))
- assert_equal([Annotation.new(1,2,'TWO'),
- Annotation.new(1,2,'Two'),
- Annotation.new(1,2,'two')],
- so.near_annotations(Annotation.new(1,2,'two'), 0))
- end
- def test_subset
- assert_equal([ Annotation.new(1,3,'twenty-three'),
- Annotation.new(1,2,'two'),
- Annotation.new(2,3,'three') ],
- @so.subset([1,3,'twenty-three']).annotations)
- assert_equal(@so2.annotations, @so2.subset(@so2.annotations[0]).annotations)
- end
- def test_bsearch
- assert_equal((1...3), [1,2,2,3,4].bsearch_range{|x| x <=> 2})
- assert_equal((0...0), [1,2,2,3,4].bsearch_range{|x| x <=> 0})
- assert_equal((4...4), [1,2,2,3,4].bsearch_range{|x| x <=> 3.5})
- assert_equal((5...5), [1,2,2,3,4].bsearch_range{|x| x <=> 5})
- assert_equal((4...5), [1,2,2,3,4].bsearch_range{|x| x <=> 4})
- assert_equal((0...0), [].bsearch_range{|x| x <=> 2})
- assert_equal((2...2), [[1,2],[2,1],[5,6],[5,1]].bsearch_range{|x| x[0] <=> 4})
- assert_equal(Annotation.new(2,4,'thirty-four'),
- @so.annotations[@so.annotations.bsearch_lower_boundary{|x| [x.start, -x.end] <=> [2,-@so.body.length-1]}])
- assert_equal(Annotation.new(3,4,'Four'),
- @so.annotations[@so.annotations.bsearch_lower_boundary{|x| [x.start, -x.end] <=> [2,0]}])
- assert_equal(0,
- @so.annotations.bsearch_lower_boundary{|x| [x.start, -x.end] <=> [0,-@so.body.length-1]})
- end
- def test_add_annotation
- so = StandOff.new
- @so.annotations.reverse.each do |a|
- so.add_annotation(a, false)
- end
- assert_equal(@so.annotations,
- so.annotations)
- end
- def test_following_preceding
- assert_equal([Annotation.new(2,3,'three'),Annotation.new(3,4,'four')],
- @so.following_annotations([0,2],2))
- assert_equal([Annotation.new(2,3,'three')],
- @so.following_annotations([0,2],1))
- assert_equal([Annotation.new(3,4,'four')],
- @so.following_annotations([0,3],1))
- assert_equal([],
- @so.following_annotations([0,4],1))
- assert_equal([@so.following_annotations([0,2],1)[0],
- @so.following_annotations(@so.following_annotations([0,2],1)[0],1)[0]],
- @so.following_annotations([0,2],2))
- assert_equal([Annotation.new(0,1,'one'),Annotation.new(1,2,'two'),Annotation.new(2,3,'three')],
- @so.preceding_annotations([3,4],3))
- assert_equal([Annotation.new(0,1,'one'),Annotation.new(1,2,'two'),Annotation.new(2,3,'three')],
- @so.preceding_annotations(@so.annotations.last,3))
- assert_equal([], @so.preceding_annotations([0,3]))
- assert_equal([], @so.following_annotations([2,4]))
- assert_equal([Annotation.new(2,3,'three'),Annotation.new(3,4,'four')],
- @so.preceding_annotations([4,4],2))
- end
- def test_following_preceding2
- assert_equal([Annotation.new(4,8,'synonym')],
- @so2.preceding_annotations([14,17], 3))
- assert_equal([Annotation.new(4,8,'synonym'),Annotation.new(14,17,'synonym')],
- @so2.preceding_annotations([18,22], 3))
- assert_equal([], @so2.preceding_annotations([4,8], 3))
- assert_equal([Annotation.new(14,17,'synonym'), Annotation.new(18,22,'synonym')],
- @so2.following_annotations([4,8], 3))
- end
- def test_following_preceding3
- # TODO: support the case where an empty annotation is preceding
- # so = StandOff.new(@so.body)
- # so.add_annotations(@so.annotations)
- # so.add_annotation(Annotation.new(0,0,'BEGIN'))
- # so.add_annotation(Annotation.new(4,4,'END'))
- # assert_equal([Annotation.new(0,0,'BEGIN')],
- # so.preceding_annotations([0,1],2))
- end
- def test_following_preceding4
- so = StandOff.new('the girl with red eyes , girl and chicks')
- so.read_annotation_file(StringIO.new <<'EOD')
- 0 40 ptb_article id="mydoc"
- 0 22 S
- 0 22 NP-SBJ-1
- 4 8 NN
- 4 8 NN2
- 14 17 JJ
- 14 17 JJ2
- 18 22 NN
- 18 22 NN2
- 25 29 NN
- 34 40 NN
- EOD
- assert_equal([Annotation.new(14,17,'JJ2'),
- Annotation.new(18,22,'NN2'),
- Annotation.new(25,29,'NN')], so.following_annotations([4,8], 3))
- end
- def test_contain2
- assert_equal([Annotation.new(4,8,'NN'),
- Annotation.new(4,8,'synonym')],
- @so2.containeds([0,16]))
- assert_equal([Annotation.new(14,17,'JJ'),
- Annotation.new(14,17,'synonym')],
- @so2.containeds([14,18]))
- assert_equal([Annotation.new(0,22,'ptb_article id="mydoc"'),
- Annotation.new(0,22,'NP')],
- @so2.containings([14,18]))
- # assert_equal([Annotation.new(14,17,'synonym')],
- # @so2.containeds([14,17,'synonym']))
- # assert_equal([Annotation.new(0,22,'ptb_article id="mydoc"'),
- # Annotation.new(0,22,'NP')],
- # @so2containings([14,17,'JJ']))
- end
- def test_contained_annotations
- s = []
- @so.each_contained_annotation([1,3]) do |a|
- s << a
- end
- assert_equal([Annotation.new(1,3,'twenty-three'),
- Annotation.new(1,2,'two'),
- Annotation.new(2,3,'three')],
- s)
- end
- def test_containing_annotations
- ans1 = [Annotation.new(0,3,'hundread-and-twenty-three'),
- Annotation.new(0,2,'twelve'),
- Annotation.new(0,1,'one')]
- assert_equal(ans1,
- @so.containings([0,1]))
- s = []
- @so.each_containing_annotation([0,1]) do |a|
- s << a
- end
- assert_equal(ans1, s)
- [[2,3], Annotation.new(2,3)].each do |region|
- assert_equal([Annotation.new(0,3,'hundread-and-twenty-three'),
- Annotation.new(1,3,'twenty-three'),
- Annotation.new(2,4,'thirty-four'),
- Annotation.new(2,3,'three')],
- @so.containings(region))
- end
- end
- def test_write_annotation
- @so.write_annotation_file(File.open(TMP.path,'w'))
- so = StandOff.new(@so.body)
- TMP.close
- so.read_annotation_file(File.open(TMP.path,'r'))
- assert_equal(@so.annotations,
- so.annotations)
- end
- def test_render
- assert_equal("<one>1</one><two>2</two><three>3</three><four>4</four>",
- @so.render{|a| a.tag =~ /^(one|two|three|four)$/})
- end
- def test_render_empties
- so = StandOff.new('1234.')
- so.add_annotations([
- Annotation.new(0, 1, 'thousands'),
- Annotation.new(1, 1, 'thousands-separator'),
- Annotation.new(1, 4, 'ones'),
- ])
- assert_equal("<THOUSANDS>1</THOUSANDS><ONES>234</ONES>.",
- so.render(
- Proc.new{|a| "<#{a.tag_name.upcase}>"},
- Proc.new{|a| "</#{a.tag_name.upcase}>"},
- Proc.new{|a| ""}
- )
- )
- assert_equal("<thousands>1</thousands><ones><thousands-separator/>234</ones>.",
- so.render)
- end
- end
Add Comment
Please, Sign In to add comment