renix1

summary

Jul 5th, 2018
149
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.72 KB | None | 0 0
  1. # coding:utf-8
  2. import re
  3.  
  4. class Parser(object):
  5.     def __init__(self, text):
  6.         self.text = text
  7.         self.sentences = self.split_sentences()
  8.  
  9.     def split_sentences(self):
  10.         """
  11.            Split the text into sentences
  12.        """
  13.         abbreviations = r"(Sr|Sra|Dr|Dra|Ex|Vs|Versus)\."
  14.         sentences = re.sub(abbreviations, '', self.text)
  15.         sentences = re.sub(r'!|\?|\.|\n', '<stop>', sentences)
  16.         sentences = sentences.strip()
  17.         sentences = sentences.split('<stop>')
  18.         return list(filter(lambda s: s != '', sentences))
  19.  
  20.     def remove_punct(self, sentence):
  21.         """
  22.            Remove punctuations from text
  23.        """
  24.         return re.sub(r"\?|\.{1,}|!|,|;|-|:|\(|\)|'|\"|\*", '', sentence)
  25.  
  26.     def is_sentence(self, sentence):
  27.         """
  28.            Check if is a sentence
  29.        """
  30.         return True if ' ' in sentence else False
  31.  
  32.     def intersec_score(self, sentenceX, sentenceY):
  33.         """
  34.            Get score of intersection
  35.        """
  36.         if self.is_sentence(sentenceX) and self.is_sentence(sentenceY):
  37.             result = 0
  38.             sentenceX, sentenceY = self.remove_punct(sentenceX).split(), self.remove_punct(sentenceY).split()
  39.             wordsInX = {(w, sentenceX.count(w)) for w in sentenceX}
  40.             wordsInY = {(w, sentenceY.count(w)) for w in sentenceY}
  41.             for k,v in wordsInX:
  42.                 result += v
  43.             for k,v in wordsInY:
  44.                 result += v
  45.             return result // 2
  46.         else:
  47.             return False
  48.  
  49.     def calc_score(self):
  50.         """
  51.            I suppose that method calc text score and return a summary?!
  52.        """
  53.         high_result = 0
  54.         winner = {'score': 0, 'sentence': ''}
  55.         sentences = []
  56.         for i in range(0, len(self.sentences)):
  57.             for j in range(0, len(self.sentences)):
  58.                 if self.sentences[i] != self.sentences[j]:
  59.                     score = self.intersec_score(self.sentences[i], self.sentences[j])
  60.                     if score > high_result:
  61.                         high_result = score
  62.                         sentences.append({'score': score, 'sentence': '{}. {}.'.format(self.sentences[i], self.sentences[j])})
  63.         return self.get_average(sentences)
  64.  
  65.     def get_average(self, data):
  66.         """
  67.            Idnk how i did it
  68.        """
  69.         length = len(data)
  70.         total = 0
  71.         minimum = 1000000
  72.         result = None
  73.         total = sum([d['score'] for d in data])
  74.         avg = round(total/length)
  75.         for d in data:
  76.             m = abs(d['score'] - avg)
  77.             if m < minimum:
  78.                 minimum = m
  79.                 result = d['sentence']
  80.         return result
Add Comment
Please, Sign In to add comment