Guest User

Untitled

a guest
Nov 24th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.26 KB | None | 0 0
  1. """Tags a file with NEXT_SCI in extra feature column. Reads and writes vert files.
  2. """
  3. import json
  4.  
  5. #from pip._vendor.pyparsing import line
  6.  
  7.  
  8. VFILE = 'test_next.vert'
  9.  
  10. def split_line(line):
  11. """Split a line into five parts, word, tag, lempos, ti, sci"""
  12. # TODO: Speak to Diana about the spaces in the vert file - do they mean
  13. # anything?
  14. line = line.strip().split()
  15. if len(line) == 1:
  16. word = line[0]
  17. pos, lempos, tag = None, None, None
  18. elif len(line) == 3:
  19. word, pos, lempos = line
  20. tag = None
  21. elif len(line) == 4:
  22. word, pos, lempos, tag = line
  23. return [word, pos, lempos, tag]
  24.  
  25. def tag_next_sci(lines):
  26. """Loops through lines of original document to add to new file (tagged)
  27. """
  28. taggedlines = []
  29. for line in lines:
  30. taggedlines.append(tagline_next_sci(line, taggedlines))
  31. return taggedlines
  32.  
  33.  
  34. def tagline_next_sci(line, taggedlines):
  35. """Assigns an indicator tag to a line
  36. """
  37. #<> are structural and do not need to be considered for feature tags so can be committed directly
  38. if line.startswith('<'):
  39. return line
  40. #look back at previous line to see if SCI, if so tag current line
  41. previous_line = taggedlines[-1]
  42. previous_line = split_line(previous_line)
  43. line = split_line(line)
  44. #look at last column. if SCI, print line, go to next line and add tag in final column ("tttNXT_SCIn")
  45. if previous_line[-1] == "SCI":
  46. if len(line) == 3:
  47. print(line + "tttSCI_MODn")
  48. return(line + "tttSCI_MODn")
  49. if len(line) == 4:
  50. print(line + "ttSCI_MODn")
  51. return(line + "ttSCI_MODn")
  52. return line
  53.  
  54. def read_vfile(fname):
  55. """Reads a vert file
  56. """
  57. with open(fname, 'r') as vfile:
  58. lines = vfile.readlines()
  59. return lines
  60.  
  61. def write_vfile(fname, taggedlines):
  62. """Writes a vert file
  63. """
  64. # write to file
  65. with open(fname, 'w') as outfile:
  66. outfile.writelines(taggedlines)
  67.  
  68. def tag_vert_sci_next(fname, fname_out):
  69. """Creates a new file with tags
  70. """
  71. # read vertical file
  72. lines = read_vfile(fname)
  73. # tag file
  74. taggedlines = tag_next_sci(lines)
  75. # call write file
  76. write_vfile(fname_out, taggedlines)
  77.  
  78. def main(fname, fname_out):
  79. #call sci_next tagging
  80. tag_vert_sci_next('test_next.vert', fname_out)
  81.  
  82. if __name__ == "__main__":
  83. main('test_next.vert', 'zenodo_tagged_SCI_MOD.vert')
  84.  
  85. Traceback (most recent call last):
  86. File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 123, in <module>
  87. main('test_next.vert', 'zenodo_tagged_SCI_MOD.vert')
  88. File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 120, in main
  89. tag_vert_sci_next('test_next.vert', fname_out)
  90. File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 78, in tag_vert_sci_next
  91. taggedlines = tag_next_sci(lines)
  92. File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 31, in tag_next_sci
  93. taggedlines.append(tagline_next_sci(line, taggedlines))
  94. File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 43, in tagline_next_sci
  95. previous_line = split_line(previous_line)
  96. File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 14, in split_line
  97. line = line.strip().split()
  98. AttributeError: 'list' object has no attribute 'strip'
Add Comment
Please, Sign In to add comment