Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- with open('ner_dataset.csv', 'r') as f:
- lines = f.readlines()
- lines.pop(0)
- lines.pop(0)
- sentences = []
- words = []
- poss = []
- tags = []
- def csv(x):
- y = []
- buff = ''
- escaped = False
- for c in x:
- if c == '"':
- if escaped:
- escaped = False
- else:
- escaped = True
- elif c == ',':
- if escaped:
- buff += c
- else:
- y.append(buff)
- buff = ''
- else:
- buff += c
- if buff:
- y.append(buff)
- return y
- for x in lines:
- x = x[:-1]
- if x.startswith("Sentence: "):
- sentences.append([words, poss, tags])
- words = []
- poss = []
- tags = []
- x = x.split(": ")[1]
- _, word, pos, tag = csv(x)
- words.append(word)
- poss.append(pos)
- tags.append(tag)
- sentences.append([words, poss, tags])
- with open('data.json', 'w') as f:
- json.dump(sentences, f)
- with open('data.json', 'r') as f:
- sentences = json.load(f)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement