Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon Jul 23 16:52:17 2018
- @author: Mouaad
- """
- # -*- coding: utf-8 -*-
- """
- Created on Sat Jul 21 01:50:01 2018
- @author: Mouaad
- """
- from nltk.corpus import stopwords
- from nltk.tokenize import word_tokenize, sent_tokenize
- from nltk.stem.snowball import SnowballStemmer
- # If you get an error uncomment this line and download the necessary libraries
- #nltk.download()*
- from os import chdir
- chdir("C:\\Users\\Mouaad\\Desktop\\python\\projet_pythonSRD\\Corpus")
- def contenue(nomfichier):
- f=open(nomfichier,'r')
- content=f.read()
- return content
- def sum(text):
- stemmer = SnowballStemmer("french")
- stopWords = set(stopwords.words("french"))
- words = word_tokenize(text)
- freqTable = dict()
- for word in words:
- word = word.lower()
- if word in stopWords:
- continue
- word = stemmer.stem(word)
- if word in freqTable:
- freqTable[word] += 1
- else:
- freqTable[word] = 1
- sentences = sent_tokenize(text)
- sentenceValue = dict()
- for sentence in sentences:
- for word, freq in freqTable.items():
- if word in sentence.lower():
- if sentence in sentenceValue:
- sentenceValue[sentence] += freq
- else:
- sentenceValue[sentence] = freq
- sumValues = 0
- for sentence in sentenceValue:
- sumValues += sentenceValue[sentence]
- # Average value of a sentence from original text
- average = int(sumValues / len(sentenceValue))
- summary = ''
- for sentence in sentences:
- if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.3* average)):
- summary += " " + sentence
- print( summary )
- return summary # <-----
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement