Untitled

import nltk
import operator

# Reading text
textfile = open('testimony.txt', 'r')
all_text = textfile.read()
page_1 = text.split('\n\n\n')[1] # Selecting the first page

# Stopwords List
stopwords = nltk.corpus.stopwords.words('french')
stopwords.extend(['', '-', ':', 'Il', 'Nous', 'a', 'donc', 'comme', 'cette',
                  'ils', 'les', 'plus', "j'ai", 'donc'
                  'En', '+', "c'est", 'après', 'Le', '|', 'vers',
                  "qu'il", 'tous', 'tout', 'dont', 'peu', 'En', "C'était"])

# Words Frequencies on All Text
word_list = all_text.split(' ')

word_frequencies = {}
for w in word_list:
    if w not in stopwords:
        word_frequencies[w] = word_list.count(w)

most_frequent_words = dict(sorted(word_frequencies.items(),
                                  key=operator.itemgetter(1),
                                  reverse=True)[:15]
                           )
print(most_frequent_words.keys())