Advertisement
Guest User

Untitled

a guest
Jun 25th, 2019
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.99 KB | None | 0 0
  1. Traceback (most recent call last):
  2. File "CSV_dico.py", line 62, in <module>
  3. writer.writerow(k)
  4. File "C:UsersEmmanuelleAppDataLocalProgramsPythonPython37-32libcsv.py", line 155, in writerow
  5. return self.writer.writerow(self._dict_to_list(rowdict))
  6. File "C:UsersEmmanuelleAppDataLocalProgramsPythonPython37-32libcsv.py", line 148, in _dict_to_list
  7. wrong_fields = rowdict.keys() - self.fieldnames
  8. AttributeError: 'str' object has no attribute 'keys'
  9.  
  10. Traceback (most recent call last):
  11. File "CSV_dico.py", line 75, in <module>
  12. if p not in stopwords:
  13. TypeError: argument of type 'WordListCorpusReader' is not iterable
  14.  
  15. # -*- coding: UTF-8 -*-
  16. import codecs
  17. import re
  18. import os
  19. import sys, argparse
  20. import subprocess
  21. import pprint
  22. import csv
  23. from itertools import islice
  24. import pickle
  25. import nltk
  26. from nltk import tokenize
  27. from nltk.tokenize import sent_tokenize, word_tokenize
  28. from nltk.corpus import stopwords
  29.  
  30. try:
  31. import treetaggerwrapper
  32. from treetaggerwrapper import TreeTagger, make_tags
  33. print("import TreeTagger OK")
  34. except:
  35. print("Import TreeTagger pas Ok")
  36.  
  37. from itertools import islice
  38. from collections import defaultdict
  39.  
  40. #export le lexique de sentiments
  41. pickle_in = open("dict_pickle", "rb")
  42. dico_lexique = pickle.load(pickle_in)
  43.  
  44.  
  45. # extraction colonne verbatim
  46. d = {}
  47.  
  48. with open(sys.argv[1], 'r', encoding='utf-8') as csv_file:
  49. csv_file.readline()
  50. for line in csv_file:
  51. token = line.split(';')
  52. try:
  53. d[token[0]] = token[1]
  54. except:
  55. print(line)
  56. print(d)
  57.  
  58. #create new file and adding new row
  59. CSV ="n".join([k+';'+v for k,v in d.items()])
  60. with open("sortie.csv", "w") as output:
  61. output.write("clé;verbatim;clé;tag;clé;lemme;scorePos;scoreNeu;scoreNegn")
  62. output.write(CSV)
  63.  
  64.  
  65. tagger = treetaggerwrapper.TreeTagger(TAGLANG='fr')
  66. d_tag = {}
  67. for key, val in d.items():
  68. newvalues = tagger.tag_text(val)
  69. d_tag[key] = newvalues
  70. #print(d_tag)
  71.  
  72.  
  73. #writing tags in the dictionary
  74. with open("sortie.csv", "w") as output:
  75. writer = csv.DictWriter(output, "w")
  76. for k, v in d_tag.items():
  77. writer.writerow(k)
  78. writer.writerow(v)
  79.  
  80.  
  81.  
  82.  
  83. #lemma dictionnary
  84. d_lemma = defaultdict(list)
  85.  
  86. for k, v in d_tag.items():
  87. for p in v:
  88. parts = p.split('t')
  89. try:
  90. d_lemma[k].append(parts[2])
  91. except:
  92. print(parts)
  93. print(d_lemma)
  94.  
  95. #filtered words
  96. stopWords = set(stopwords.words('french'))
  97. d_filtered = defaultdict(list)
  98. for k, v in d_lemma.items():
  99. for p in v:
  100. if p not in stopwords:
  101. try:
  102. d_filtered[k] = p.lower()
  103. except:
  104. print(p)
  105. print(d_filtered)
  106.  
  107. id;Verbatim;xml;score;
  108. 15;je suis monter à la boulangerie;4
  109. 44;tu es méchant;5
  110. 45;je te hais et la foret enchanté est belle de milles faux et les jeunes filles sont assises au bor de la mer;7
  111. 47;je ne comprends pas la vie et je suis perdue, aide moi stp maman je suis perdue tu es ma seule amie et je t'aime;8
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement