Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #python2
- #-*- coding: utf8 -*-
- import os
- class parser:
- def __init__(self):
- self.dico_name = dict()
- self.balise_begin = '<div'
- self.balise_name = 'class="from_name">'
- self.balise_end = '\n </div>'
- def update_dico_name(self, path):
- file = open(path,'r')
- str = file.read()
- file.close()
- strtab1= str.split(self.balise_begin)
- strtab2 = [x for x in strtab1 if self.balise_name in x]
- strtab3 = [x.split(self.balise_name)[1] for x in strtab2]
- str_name = [x.split(self.balise_end)[0] for x in strtab3]
- for str in str_name:
- if str in self.dico_name.keys():
- self.dico_name[str] = self.dico_name[str]+1
- else:
- self.dico_name[str] = 0
- def parse_folder_for_name(self):
- for file in os.listdir('.'):
- if ('.html' in file):
- self.update_dico_name(file)
- def output_result(self):
- dico_simple = dict()
- dico_gif = dict()
- for k,v in self.dico_name.items():
- if v!=0:
- if 'via @gif' in k:
- str_key = k.split('via @gif')[0]
- dico_gif[str_key] = v
- elif 'via @bold' not in k :
- dico_simple[k] = v
- file = open('output_result.txt','w')
- file.write('GIF postés :\n')
- for k,v in dico_gif.items() :
- file.write( k[1:-1] + ' : ' + str(v) +' Occurences\n')
- file.write('\nMessages normaux :\n')
- for k,v in dico_simple.items() :
- file.write( k[1:] + ' : ' + str(v) +' Occurences\n')
- file.close()
- if __name__ == "__main__":
- coucou = parser()
- coucou.parse_folder_for_name()
- coucou.output_result()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement