Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Traceback (most recent call last):
- File "CSV_dico.py", line 62, in <module>
- writer.writerow(k)
- File "C:UsersEmmanuelleAppDataLocalProgramsPythonPython37-32libcsv.py", line 155, in writerow
- return self.writer.writerow(self._dict_to_list(rowdict))
- File "C:UsersEmmanuelleAppDataLocalProgramsPythonPython37-32libcsv.py", line 148, in _dict_to_list
- wrong_fields = rowdict.keys() - self.fieldnames
- AttributeError: 'str' object has no attribute 'keys'
- Traceback (most recent call last):
- File "CSV_dico.py", line 75, in <module>
- if p not in stopwords:
- TypeError: argument of type 'WordListCorpusReader' is not iterable
- # -*- coding: UTF-8 -*-
- import codecs
- import re
- import os
- import sys, argparse
- import subprocess
- import pprint
- import csv
- from itertools import islice
- import pickle
- import nltk
- from nltk import tokenize
- from nltk.tokenize import sent_tokenize, word_tokenize
- from nltk.corpus import stopwords
- try:
- import treetaggerwrapper
- from treetaggerwrapper import TreeTagger, make_tags
- print("import TreeTagger OK")
- except:
- print("Import TreeTagger pas Ok")
- from itertools import islice
- from collections import defaultdict
- #export le lexique de sentiments
- pickle_in = open("dict_pickle", "rb")
- dico_lexique = pickle.load(pickle_in)
- # extraction colonne verbatim
- d = {}
- with open(sys.argv[1], 'r', encoding='utf-8') as csv_file:
- csv_file.readline()
- for line in csv_file:
- token = line.split(';')
- try:
- d[token[0]] = token[1]
- except:
- print(line)
- print(d)
- #create new file and adding new row
- CSV ="n".join([k+';'+v for k,v in d.items()])
- with open("sortie.csv", "w") as output:
- output.write("clé;verbatim;clé;tag;clé;lemme;scorePos;scoreNeu;scoreNegn")
- output.write(CSV)
- tagger = treetaggerwrapper.TreeTagger(TAGLANG='fr')
- d_tag = {}
- for key, val in d.items():
- newvalues = tagger.tag_text(val)
- d_tag[key] = newvalues
- #print(d_tag)
- #writing tags in the dictionary
- with open("sortie.csv", "w") as output:
- writer = csv.DictWriter(output, "w")
- for k, v in d_tag.items():
- writer.writerow(k)
- writer.writerow(v)
- #lemma dictionnary
- d_lemma = defaultdict(list)
- for k, v in d_tag.items():
- for p in v:
- parts = p.split('t')
- try:
- d_lemma[k].append(parts[2])
- except:
- print(parts)
- print(d_lemma)
- #filtered words
- stopWords = set(stopwords.words('french'))
- d_filtered = defaultdict(list)
- for k, v in d_lemma.items():
- for p in v:
- if p not in stopwords:
- try:
- d_filtered[k] = p.lower()
- except:
- print(p)
- print(d_filtered)
- id;Verbatim;xml;score;
- 15;je suis monter à la boulangerie;4
- 44;tu es méchant;5
- 45;je te hais et la foret enchanté est belle de milles faux et les jeunes filles sont assises au bor de la mer;7
- 47;je ne comprends pas la vie et je suis perdue, aide moi stp maman je suis perdue tu es ma seule amie et je t'aime;8
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement