Guest User

Untitled

a guest
May 26th, 2018
226
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.28 KB | None | 0 0
  1. #! /usr/bin/env ruby
  2. # -*- coding: utf-8; mode: ruby -*-
  3.  
  4. require 'test/unit'
  5. require 'standoff'
  6. require 'pp'
  7. require 'stringio'
  8. require 'tempfile'
  9.  
  10. class StandOff
  11. def containings(x)
  12. s = []
  13. each_containing_annotation(x) do|a|
  14. s << a
  15. end
  16. s
  17. end
  18. def containeds(x)
  19. s = []
  20. each_contained_annotation(x) do|a|
  21. s << a
  22. end
  23. s
  24. end
  25. end
  26. class TC_StandOff < Test::Unit::TestCase
  27. TMP = Tempfile.new('tmp.standoff')
  28. LETTER_ANNON = {
  29. 'one' =>'1',
  30. 'two' =>'2',
  31. 'three'=>'3',
  32. 'four' =>'4' }
  33. def setup
  34. @so = StandOff.new('1234')
  35. @so.read_annotation_file(StringIO.new <<'EOD')
  36. 0 3 hundread-and-twenty-three
  37. 0 2 twelve
  38. 0 1 one
  39. 1 3 twenty-three
  40. 1 2 two
  41. 2 4 thirty-four
  42. 2 3 three
  43. 3 4 Four
  44. 3 4 four
  45. EOD
  46. s =<<'EOD'
  47. 0 22 ptb_article id="mydoc"
  48. 0 22 NP
  49. 4 8 NN
  50. 4 8 synonym
  51. 14 17 JJ
  52. 14 17 synonym
  53. 18 22 NN
  54. 18 22 synonym
  55. EOD
  56. @so2 = StandOff.new('the girl with red eyes', StringIO.new(s))
  57. end
  58.  
  59. def test_join
  60. s1 = <<'EOD'
  61. 0 22 ptb_article id="mydoc"
  62. 0 22 NP
  63. 4 8 NN
  64. 14 17 JJ
  65. 18 22 NN
  66. EOD
  67. s2 = <<'EOD'
  68. 4 8 synonym
  69. 14 17 synonym
  70. 18 22 synonym
  71. EOD
  72. so = StandOff.new()
  73. so.read_annotation_file(StringIO.new(s1))
  74. so.add_annotations(StringIO.new(s2))
  75. assert_equal(<<'EOD', so.annotations.map{|x| x.to_s+"\n"}.join)
  76. 0 22 ptb_article id="mydoc"
  77. 0 22 NP
  78. 4 8 NN
  79. 4 8 synonym
  80. 14 17 JJ
  81. 14 17 synonym
  82. 18 22 NN
  83. 18 22 synonym
  84. EOD
  85. end
  86. def test_substr
  87. assert_equal('123', @so.substr(0, 3))
  88. assert_equal( '23', @so.substr(1, 3))
  89. assert_equal('123', @so.substr(Annotation.new(0, 3)))
  90. end
  91. def test_matching_annotations
  92. assert_equal([Annotation.new(0, 1, 'a')],
  93. StandOff.new('', [Annotation.new(0, 1, 'A'),
  94. Annotation.new(0, 1, 'a'),
  95. Annotation.new(0, 1, 'b')]).
  96. matching_disjoint_annotations(/^[a-z]$/))
  97.  
  98. assert_equal([Annotation.new(0, 0, '0'),
  99. Annotation.new(1, 2, 'b'),
  100. Annotation.new(2, 3, 'c')],
  101. StandOff.new('', [Annotation.new(0, 3, '-'),
  102. Annotation.new(0, 0, '0'),
  103. Annotation.new(1, 2, 'b'),
  104. Annotation.new(2, 3, 'c__'),
  105. Annotation.new(2, 3, 'c'),
  106. Annotation.new(3, 4, 'd')]).
  107. matching_disjoint_annotations(/^.$/, Annotation.new(0, 3, '-')))
  108.  
  109. regexp = Regexp.new('^('+LETTER_ANNON.keys.join('|')+')$')
  110. assert_equal(Set.new(LETTER_ANNON.to_a),
  111. Set.new(@so.matching_disjoint_annotations(regexp).map{|x| [x.tag, @so.substr(x)]}))
  112. s = Set.new
  113. @so.each_matching_disjoint_annotation(regexp) do |a,_|
  114. s << [a.tag, @so.substr(a)]
  115. end
  116. assert_equal(Set.new(LETTER_ANNON.to_a), s)
  117. end
  118.  
  119. def test_annotations_select
  120. assert_equal([Annotation.new(0,1,''), Annotation.new(0,0,'')],
  121. StandOff.new('', [Annotation.new(0, 2, ''),
  122. Annotation.new(0, 1, ''),
  123. Annotation.new(0, 0, '')]).annotations do |a|
  124. a.end - a.start <= 1
  125. end)
  126. end
  127.  
  128. def test_enclosing_annotation
  129. assert_equal(Annotation.new(0,1,'b'),
  130. StandOff.new('', [Annotation.new(0, 1, 'a'),
  131. Annotation.new(0, 1, 'b'),
  132. Annotation.new(0, 1, 'c')]).enclosing_annotation(Annotation.new(0, 1, 'c')))
  133. assert_equal(nil,
  134. StandOff.new('', [Annotation.new(0, 1, 'a'),
  135. Annotation.new(0, 1, 'b'),
  136. Annotation.new(0, 1, 'c')]).enclosing_annotation(Annotation.new(0, 100, 'd')))
  137. assert_equal(Annotation.new(0,2,'twelve'),
  138. @so.enclosing_annotation([0,1]))
  139. assert_equal(Annotation.new(2,4,'thirty-four'),
  140. @so.enclosing_annotation([2,3]))
  141. assert_equal(Annotation.new(1,3,'twenty-three'),
  142. @so.enclosing_annotation([2,3],2))
  143. assert_equal(nil,
  144. @so.enclosing_annotation([2,4]))
  145. assert_equal(Annotation.new(3,4,'Four'),
  146. @so.enclosing_annotation([3,4,'four']))
  147. end
  148. def test_near
  149. so = StandOff.new('123',
  150. StringIO.new(<<'EOD'
  151. 1 3 twenty-three
  152. 1 2 two
  153. 1 2 Two
  154. 1 2 TWO
  155. 2 3 three
  156. EOD
  157. ))
  158. assert_equal([Annotation.new(1,2,'TWO'),
  159. Annotation.new(1,2,'Two'),
  160. Annotation.new(1,2,'two')],
  161. so.near_annotations(Annotation.new(1,2,'two'), 0))
  162. end
  163. def test_subset
  164. assert_equal([ Annotation.new(1,3,'twenty-three'),
  165. Annotation.new(1,2,'two'),
  166. Annotation.new(2,3,'three') ],
  167. @so.subset([1,3,'twenty-three']).annotations)
  168. assert_equal(@so2.annotations, @so2.subset(@so2.annotations[0]).annotations)
  169. end
  170. def test_bsearch
  171. assert_equal((1...3), [1,2,2,3,4].bsearch_range{|x| x <=> 2})
  172. assert_equal((0...0), [1,2,2,3,4].bsearch_range{|x| x <=> 0})
  173. assert_equal((4...4), [1,2,2,3,4].bsearch_range{|x| x <=> 3.5})
  174. assert_equal((5...5), [1,2,2,3,4].bsearch_range{|x| x <=> 5})
  175. assert_equal((4...5), [1,2,2,3,4].bsearch_range{|x| x <=> 4})
  176. assert_equal((0...0), [].bsearch_range{|x| x <=> 2})
  177. assert_equal((2...2), [[1,2],[2,1],[5,6],[5,1]].bsearch_range{|x| x[0] <=> 4})
  178. assert_equal(Annotation.new(2,4,'thirty-four'),
  179. @so.annotations[@so.annotations.bsearch_lower_boundary{|x| [x.start, -x.end] <=> [2,-@so.body.length-1]}])
  180. assert_equal(Annotation.new(3,4,'Four'),
  181. @so.annotations[@so.annotations.bsearch_lower_boundary{|x| [x.start, -x.end] <=> [2,0]}])
  182. assert_equal(0,
  183. @so.annotations.bsearch_lower_boundary{|x| [x.start, -x.end] <=> [0,-@so.body.length-1]})
  184. end
  185. def test_add_annotation
  186. so = StandOff.new
  187. @so.annotations.reverse.each do |a|
  188. so.add_annotation(a, false)
  189. end
  190. assert_equal(@so.annotations,
  191. so.annotations)
  192. end
  193. def test_following_preceding
  194. assert_equal([Annotation.new(2,3,'three'),Annotation.new(3,4,'four')],
  195. @so.following_annotations([0,2],2))
  196. assert_equal([Annotation.new(2,3,'three')],
  197. @so.following_annotations([0,2],1))
  198. assert_equal([Annotation.new(3,4,'four')],
  199. @so.following_annotations([0,3],1))
  200. assert_equal([],
  201. @so.following_annotations([0,4],1))
  202. assert_equal([@so.following_annotations([0,2],1)[0],
  203. @so.following_annotations(@so.following_annotations([0,2],1)[0],1)[0]],
  204. @so.following_annotations([0,2],2))
  205. assert_equal([Annotation.new(0,1,'one'),Annotation.new(1,2,'two'),Annotation.new(2,3,'three')],
  206. @so.preceding_annotations([3,4],3))
  207. assert_equal([Annotation.new(0,1,'one'),Annotation.new(1,2,'two'),Annotation.new(2,3,'three')],
  208. @so.preceding_annotations(@so.annotations.last,3))
  209. assert_equal([], @so.preceding_annotations([0,3]))
  210. assert_equal([], @so.following_annotations([2,4]))
  211. assert_equal([Annotation.new(2,3,'three'),Annotation.new(3,4,'four')],
  212. @so.preceding_annotations([4,4],2))
  213. end
  214. def test_following_preceding2
  215. assert_equal([Annotation.new(4,8,'synonym')],
  216. @so2.preceding_annotations([14,17], 3))
  217. assert_equal([Annotation.new(4,8,'synonym'),Annotation.new(14,17,'synonym')],
  218. @so2.preceding_annotations([18,22], 3))
  219. assert_equal([], @so2.preceding_annotations([4,8], 3))
  220. assert_equal([Annotation.new(14,17,'synonym'), Annotation.new(18,22,'synonym')],
  221. @so2.following_annotations([4,8], 3))
  222. end
  223. def test_following_preceding3
  224. # TODO: support the case where an empty annotation is preceding
  225. # so = StandOff.new(@so.body)
  226. # so.add_annotations(@so.annotations)
  227. # so.add_annotation(Annotation.new(0,0,'BEGIN'))
  228. # so.add_annotation(Annotation.new(4,4,'END'))
  229. # assert_equal([Annotation.new(0,0,'BEGIN')],
  230. # so.preceding_annotations([0,1],2))
  231. end
  232. def test_following_preceding4
  233. so = StandOff.new('the girl with red eyes , girl and chicks')
  234. so.read_annotation_file(StringIO.new <<'EOD')
  235. 0 40 ptb_article id="mydoc"
  236. 0 22 S
  237. 0 22 NP-SBJ-1
  238. 4 8 NN
  239. 4 8 NN2
  240. 14 17 JJ
  241. 14 17 JJ2
  242. 18 22 NN
  243. 18 22 NN2
  244. 25 29 NN
  245. 34 40 NN
  246. EOD
  247. assert_equal([Annotation.new(14,17,'JJ2'),
  248. Annotation.new(18,22,'NN2'),
  249. Annotation.new(25,29,'NN')], so.following_annotations([4,8], 3))
  250. end
  251. def test_contain2
  252. assert_equal([Annotation.new(4,8,'NN'),
  253. Annotation.new(4,8,'synonym')],
  254. @so2.containeds([0,16]))
  255. assert_equal([Annotation.new(14,17,'JJ'),
  256. Annotation.new(14,17,'synonym')],
  257. @so2.containeds([14,18]))
  258. assert_equal([Annotation.new(0,22,'ptb_article id="mydoc"'),
  259. Annotation.new(0,22,'NP')],
  260. @so2.containings([14,18]))
  261. # assert_equal([Annotation.new(14,17,'synonym')],
  262. # @so2.containeds([14,17,'synonym']))
  263. # assert_equal([Annotation.new(0,22,'ptb_article id="mydoc"'),
  264. # Annotation.new(0,22,'NP')],
  265. # @so2containings([14,17,'JJ']))
  266. end
  267. def test_contained_annotations
  268. s = []
  269. @so.each_contained_annotation([1,3]) do |a|
  270. s << a
  271. end
  272. assert_equal([Annotation.new(1,3,'twenty-three'),
  273. Annotation.new(1,2,'two'),
  274. Annotation.new(2,3,'three')],
  275. s)
  276. end
  277. def test_containing_annotations
  278. ans1 = [Annotation.new(0,3,'hundread-and-twenty-three'),
  279. Annotation.new(0,2,'twelve'),
  280. Annotation.new(0,1,'one')]
  281. assert_equal(ans1,
  282. @so.containings([0,1]))
  283. s = []
  284. @so.each_containing_annotation([0,1]) do |a|
  285. s << a
  286. end
  287. assert_equal(ans1, s)
  288. [[2,3], Annotation.new(2,3)].each do |region|
  289. assert_equal([Annotation.new(0,3,'hundread-and-twenty-three'),
  290. Annotation.new(1,3,'twenty-three'),
  291. Annotation.new(2,4,'thirty-four'),
  292. Annotation.new(2,3,'three')],
  293. @so.containings(region))
  294. end
  295. end
  296. def test_write_annotation
  297. @so.write_annotation_file(File.open(TMP.path,'w'))
  298. so = StandOff.new(@so.body)
  299. TMP.close
  300. so.read_annotation_file(File.open(TMP.path,'r'))
  301. assert_equal(@so.annotations,
  302. so.annotations)
  303. end
  304. def test_render
  305. assert_equal("<one>1</one><two>2</two><three>3</three><four>4</four>",
  306. @so.render{|a| a.tag =~ /^(one|two|three|four)$/})
  307. end
  308. def test_render_empties
  309. so = StandOff.new('1234.')
  310. so.add_annotations([
  311. Annotation.new(0, 1, 'thousands'),
  312. Annotation.new(1, 1, 'thousands-separator'),
  313. Annotation.new(1, 4, 'ones'),
  314. ])
  315. assert_equal("<THOUSANDS>1</THOUSANDS><ONES>234</ONES>.",
  316. so.render(
  317. Proc.new{|a| "<#{a.tag_name.upcase}>"},
  318. Proc.new{|a| "</#{a.tag_name.upcase}>"},
  319. Proc.new{|a| ""}
  320. )
  321. )
  322. assert_equal("<thousands>1</thousands><ones><thousands-separator/>234</ones>.",
  323. so.render)
  324. end
  325. end
Add Comment
Please, Sign In to add comment