Advertisement
ganryu

logger

Jul 6th, 2018
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.58 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Apr 10 00:21:26 2015
  4.  
  5. @author: Santiago
  6. """
  7.  
  8. import lxml.html
  9. from datetime import datetime, timedelta
  10. from collections import deque
  11. from time import sleep
  12.  
  13. CHAT_NAME = 'narutochat'
  14. FILENAME = 'narutochat.txt'
  15.  
  16. class Comment:
  17.     def __init__(self, comment):
  18.         self.comment = comment
  19.         self.links = comment.xpath('.//a[@rel="nofollow"]')
  20.         self.links = [ "\n" + link.attrib['href'] for link in self.links ]
  21.         self.images = comment.xpath('.//a[@target="_blank"]')
  22.         self.images = [ "\n" + image.attrib['href'] for image in self.images ]
  23.         b = comment.xpath('.//a[@class="b"]/text()')
  24.         self.nick = b.pop(0)
  25.    
  26.     def getText(self):
  27.         text = ""
  28.        
  29.         texts = self.comment.xpath(".//div[contains(@class, 'mcomentario')]//text()")        
  30.         ult = len(texts) - 1 - len(self.links)
  31.        
  32.         text = "".join(texts[1:ult])
  33.        
  34.         text += "\n".join(self.links)
  35.         text += "\n".join(self.images)
  36.            
  37.         return text
  38.    
  39.     def getNick(self):
  40.         return self.nick
  41.        
  42.     def getId(self):
  43.         return self.comment.attrib['i']
  44.    
  45.     def getTime(self):
  46.         delta = self.comment.xpath('.//a[@href=""]/text()')[0]
  47.        
  48.         tag = delta[-1]
  49.         delta = delta[:-1]
  50.        
  51.         if tag == "a":
  52.             time = datetime.today()
  53.             return time.strftime("%H:%M:%S")
  54.         elif tag == "m":
  55.             time = datetime.today() - timedelta(minutes = int(delta))
  56.             return time.strftime("%H:%M:00")
  57.         elif tag == "h":
  58.             time = datetime.today() - timedelta(hours = int(delta))
  59.             return time.strftime("%H:00:00")
  60.         else:
  61.             delta = delta + tag
  62.             datedelta = delta.split("/")
  63.             month = datedelta[1]
  64.             day = datedelta[0]
  65.             date = datetime.today() - timedelta(month = month, day = day)
  66.             return date.strftime("%y-%m-%d")
  67.            
  68.     def __repr__(self):
  69.         nick = str(self.getNick())
  70.         text = str(self.getText())
  71.         time = str(self.getTime())
  72.         return time + " " + nick + text
  73.  
  74.  
  75. def logstart(filename):
  76.    
  77.     with open(filename, "a", encoding='utf8') as f:
  78.         print(file = f)
  79.         print("###########################################################", file=f)    
  80.         date = datetime.today().strftime("%y-%m-%d %H:%M:%S")
  81.         print("Log: " + date, file=f)
  82.         print("###########################################################", file=f)
  83.         print("", file=f)    
  84.    
  85. def save_log(url, filename, last_id="1"):
  86.     intentos = 0
  87.     etree = ""
  88.     while intentos < 5 and not etree:
  89.         try:
  90.             etree = lxml.html.parse(url)
  91.             intentos = 0
  92.         except OSError:
  93.             print('Error al abrir ', url)
  94.             intentos += 1
  95.  
  96.     stack = deque()
  97.    
  98.     for comment in etree.xpath('.//div[@class="mensaje mb4"]'):    
  99.         comentario = Comment(comment)
  100.         if (comentario.getId() > last_id):
  101.             stack.append(comentario)
  102.            
  103.     file = open(filename, "a", encoding='utf8')        
  104.    
  105.     while stack:
  106.         comment = stack.pop()
  107.         last_id = comment.getId()
  108.         print(comment, file=file)
  109.    
  110.     file.close()
  111.    
  112.     return last_id
  113.    
  114. logstart(FILENAME)
  115.  
  116. url = 'http://' + CHAT_NAME + ".wocial.com"
  117. print('Cargando ' + url)
  118. print('Ctrl-C para cerrar')
  119.  
  120. last_id = save_log(url, FILENAME)
  121.  
  122. while True:
  123.     last_id = save_log(url, FILENAME, last_id)
  124.     sleep(5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement