Advertisement
Guest User

Untitled

a guest
Nov 29th, 2015
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.36 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import sys, re
  4.  
  5. if len (sys.argv) < 3:
  6. sys.exit ("Usage: %s auto gold" % sys.argv[0])
  7.  
  8. def collect_info (fn):
  9. sep = re.compile (r'(?:\t|\s)*')
  10. chunk, depnd = [], []
  11. l, pos, head = 0, [], []
  12. for line in open (fn):
  13. if line[0] == '#':
  14. pass
  15. elif line[0] == '*':
  16. if line[-3] != '-':
  17. head.append (int (line[:-1].split (' ')[2][:-1]))
  18. pos.append (l)
  19. elif line[0] == 'E':
  20. pos.append (l)
  21. chunk_pos = []
  22. for i in range (1, len (pos)):
  23. chunk_pos.append (tuple ([pos[i-1], pos[i]]))
  24. chunk.append (set (chunk_pos))
  25. depnd.append (set ())
  26. for i in range (0, len (chunk_pos) - 1):
  27. depnd[-1].add (tuple ([chunk_pos[i], chunk_pos[head[i]]]))
  28. l, pos, head = 0, [], []
  29.  
  30. else:
  31. l += len (sep.split (line[:-1], 1)[0])
  32. return chunk, depnd
  33.  
  34. chunk_a, depnd_a = collect_info (sys.argv[1])
  35. chunk_g, depnd_g = collect_info (sys.argv[2])
  36.  
  37. if len (chunk_a) != len (chunk_g):
  38. sys.stderr.write ("# sentences mismatched\n")
  39.  
  40. n = len (chunk_a)
  41.  
  42. ok = sum (len (chunk_a[i] & chunk_g[i]) for i in range (n))
  43. sok = sum (1 if len (chunk_a[i] & chunk_g[i]) == len (chunk_g[i]) else 0 for i in range (n))
  44. all_a = sum (len (chunk) for chunk in chunk_a)
  45. all_g = sum (len (chunk) for chunk in chunk_g)
  46.  
  47. pre = ok * 1.0 / all_a
  48. rec = ok * 1.0 / all_g
  49.  
  50. sys.stderr.write ("chunk:\n")
  51. sys.stderr.write (" precision: %.4f (%d/%d)\n" % (pre, ok, all_a))
  52. sys.stderr.write (" recall: %.4f (%d/%d)\n" % (rec, ok, all_g))
  53. sys.stderr.write (" f1: %.4f\n" % (2 * pre * rec / (pre + rec)))
  54. sys.stderr.write (" sent acc.: %.4f (%d/%d)\n" % (sok * 1.0 / n, sok, n))
  55.  
  56. ok = sum (len (depnd_a[i] & depnd_g[i]) for i in range (n))
  57. sok = sum (1 if len (depnd_a[i] & depnd_g[i]) == len (depnd_g[i]) else 0 for i in range (n))
  58. all_a = sum (len (depnd) for depnd in depnd_a)
  59. all_g = sum (len (depnd) for depnd in depnd_g)
  60.  
  61. pre = ok * 1.0 / all_a
  62. rec = ok * 1.0 / all_g
  63.  
  64. sys.stderr.write ("depnd:\n")
  65. sys.stderr.write (" precision: %.4f (%d/%d)\n" % (pre, ok, all_a))
  66. sys.stderr.write (" recall: %.4f (%d/%d)\n" % (rec, ok, all_g))
  67. sys.stderr.write (" f1: %.4f\n" % (2 * pre * rec / (pre + rec)))
  68. sys.stderr.write (" sent acc.: %.4f (%d/%d)\n" % (sok * 1.0 / n, sok, n))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement