Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import texte
- ponctuation = ",.:;'-"
- def compter_ponctuation(txt):
- nombre = 0
- for x in txt:
- if x in ponctuation:
- nombre += 1
- return nombre
- def compter_mots(txt):
- for x in ponctuation:
- txt = txt.replace(x," " )
- tableau = txt.split(" ")
- nombre_mots = 0
- for y in tableau:
- if len(y) > 0:
- nombre_mots += 1
- return nombre_mots
- def compter_frequences(txt):
- for x in ponctuation:
- txt = txt.replace(x," " )
- tableau = txt.split(" ")
- mots_comptes = {}
- for mot in tableau:
- if mot in mots_comptes:
- mots_comptes[mot]+= 1
- else:
- if len(mot)>0:
- mots_comptes[mot] = 1
- return mots_comptes
- #print(texte.article)
- #print("\nponctuation:")
- #print(compter_ponctuation(texte.article))
- #print("mots:")
- #print(compter_mots(texte.article))
- #print("mots frequents:")
- #decompte = compter_frequences(texte.article)
- #for i in sorted(decompte,key=decompte.get,reverse=True ):
- # print(i,decompte[i])
- # Maintenant on analyse le sondage
- import donnees
- txt = " ".join(donnees.donnees).lower()
- #decompte = compter_frequences(txt)
- #for i in sorted(decompte,key=decompte.get,reverse=True ):
- # print(i,decompte[i])
- import nltk
- token = txt.split()
- freq = nltk.FreqDist(token)
- freq.plot(20, cumulative=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement