Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """Tags a file with NEXT_SCI in extra feature column. Reads and writes vert files.
- """
- import json
- #from pip._vendor.pyparsing import line
- VFILE = 'test_next.vert'
- def split_line(line):
- """Split a line into five parts, word, tag, lempos, ti, sci"""
- # TODO: Speak to Diana about the spaces in the vert file - do they mean
- # anything?
- line = line.strip().split()
- if len(line) == 1:
- word = line[0]
- pos, lempos, tag = None, None, None
- elif len(line) == 3:
- word, pos, lempos = line
- tag = None
- elif len(line) == 4:
- word, pos, lempos, tag = line
- return [word, pos, lempos, tag]
- def tag_next_sci(lines):
- """Loops through lines of original document to add to new file (tagged)
- """
- taggedlines = []
- for line in lines:
- taggedlines.append(tagline_next_sci(line, taggedlines))
- return taggedlines
- def tagline_next_sci(line, taggedlines):
- """Assigns an indicator tag to a line
- """
- #<> are structural and do not need to be considered for feature tags so can be committed directly
- if line.startswith('<'):
- return line
- #look back at previous line to see if SCI, if so tag current line
- previous_line = taggedlines[-1]
- previous_line = split_line(previous_line)
- line = split_line(line)
- #look at last column. if SCI, print line, go to next line and add tag in final column ("tttNXT_SCIn")
- if previous_line[-1] == "SCI":
- if len(line) == 3:
- print(line + "tttSCI_MODn")
- return(line + "tttSCI_MODn")
- if len(line) == 4:
- print(line + "ttSCI_MODn")
- return(line + "ttSCI_MODn")
- return line
- def read_vfile(fname):
- """Reads a vert file
- """
- with open(fname, 'r') as vfile:
- lines = vfile.readlines()
- return lines
- def write_vfile(fname, taggedlines):
- """Writes a vert file
- """
- # write to file
- with open(fname, 'w') as outfile:
- outfile.writelines(taggedlines)
- def tag_vert_sci_next(fname, fname_out):
- """Creates a new file with tags
- """
- # read vertical file
- lines = read_vfile(fname)
- # tag file
- taggedlines = tag_next_sci(lines)
- # call write file
- write_vfile(fname_out, taggedlines)
- def main(fname, fname_out):
- #call sci_next tagging
- tag_vert_sci_next('test_next.vert', fname_out)
- if __name__ == "__main__":
- main('test_next.vert', 'zenodo_tagged_SCI_MOD.vert')
- Traceback (most recent call last):
- File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 123, in <module>
- main('test_next.vert', 'zenodo_tagged_SCI_MOD.vert')
- File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 120, in main
- tag_vert_sci_next('test_next.vert', fname_out)
- File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 78, in tag_vert_sci_next
- taggedlines = tag_next_sci(lines)
- File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 31, in tag_next_sci
- taggedlines.append(tagline_next_sci(line, taggedlines))
- File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 43, in tagline_next_sci
- previous_line = split_line(previous_line)
- File "/home/sandra/git/trophic/tagging/tagging_NEXT.py", line 14, in split_line
- line = line.strip().split()
- AttributeError: 'list' object has no attribute 'strip'
Add Comment
Please, Sign In to add comment