Advertisement
Guest User

Untitled

a guest
Feb 21st, 2019
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. import json
  2.  
  3. with open('ner_dataset.csv', 'r') as f:
  4. lines = f.readlines()
  5.  
  6. lines.pop(0)
  7. lines.pop(0)
  8.  
  9. sentences = []
  10.  
  11. words = []
  12. poss = []
  13. tags = []
  14.  
  15.  
  16. def csv(x):
  17. y = []
  18. buff = ''
  19. escaped = False
  20. for c in x:
  21. if c == '"':
  22. if escaped:
  23. escaped = False
  24. else:
  25. escaped = True
  26. elif c == ',':
  27. if escaped:
  28. buff += c
  29. else:
  30. y.append(buff)
  31. buff = ''
  32. else:
  33. buff += c
  34. if buff:
  35. y.append(buff)
  36. return y
  37.  
  38.  
  39. for x in lines:
  40. x = x[:-1]
  41. if x.startswith("Sentence: "):
  42. sentences.append([words, poss, tags])
  43. words = []
  44. poss = []
  45. tags = []
  46. x = x.split(": ")[1]
  47. _, word, pos, tag = csv(x)
  48. words.append(word)
  49. poss.append(pos)
  50. tags.append(tag)
  51.  
  52. sentences.append([words, poss, tags])
  53.  
  54. with open('data.json', 'w') as f:
  55. json.dump(sentences, f)
  56.  
  57. with open('data.json', 'r') as f:
  58. sentences = json.load(f)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement