Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Fri Apr 10 00:21:26 2015
- @author: Santiago
- """
- import lxml.html
- from datetime import datetime, timedelta
- from collections import deque
- from time import sleep
- CHAT_NAME = 'narutochat'
- FILENAME = 'narutochat.txt'
- class Comment:
- def __init__(self, comment):
- self.comment = comment
- self.links = comment.xpath('.//a[@rel="nofollow"]')
- self.links = [ "\n" + link.attrib['href'] for link in self.links ]
- self.images = comment.xpath('.//a[@target="_blank"]')
- self.images = [ "\n" + image.attrib['href'] for image in self.images ]
- b = comment.xpath('.//a[@class="b"]/text()')
- self.nick = b.pop(0)
- def getText(self):
- text = ""
- texts = self.comment.xpath(".//div[contains(@class, 'mcomentario')]//text()")
- ult = len(texts) - 1 - len(self.links)
- text = "".join(texts[1:ult])
- text += "\n".join(self.links)
- text += "\n".join(self.images)
- return text
- def getNick(self):
- return self.nick
- def getId(self):
- return self.comment.attrib['i']
- def getTime(self):
- delta = self.comment.xpath('.//a[@href=""]/text()')[0]
- tag = delta[-1]
- delta = delta[:-1]
- if tag == "a":
- time = datetime.today()
- return time.strftime("%H:%M:%S")
- elif tag == "m":
- time = datetime.today() - timedelta(minutes = int(delta))
- return time.strftime("%H:%M:00")
- elif tag == "h":
- time = datetime.today() - timedelta(hours = int(delta))
- return time.strftime("%H:00:00")
- else:
- delta = delta + tag
- datedelta = delta.split("/")
- month = datedelta[1]
- day = datedelta[0]
- date = datetime.today() - timedelta(month = month, day = day)
- return date.strftime("%y-%m-%d")
- def __repr__(self):
- nick = str(self.getNick())
- text = str(self.getText())
- time = str(self.getTime())
- return time + " " + nick + text
- def logstart(filename):
- with open(filename, "a", encoding='utf8') as f:
- print(file = f)
- print("###########################################################", file=f)
- date = datetime.today().strftime("%y-%m-%d %H:%M:%S")
- print("Log: " + date, file=f)
- print("###########################################################", file=f)
- print("", file=f)
- def save_log(url, filename, last_id="1"):
- intentos = 0
- etree = ""
- while intentos < 5 and not etree:
- try:
- etree = lxml.html.parse(url)
- intentos = 0
- except OSError:
- print('Error al abrir ', url)
- intentos += 1
- stack = deque()
- for comment in etree.xpath('.//div[@class="mensaje mb4"]'):
- comentario = Comment(comment)
- if (comentario.getId() > last_id):
- stack.append(comentario)
- file = open(filename, "a", encoding='utf8')
- while stack:
- comment = stack.pop()
- last_id = comment.getId()
- print(comment, file=file)
- file.close()
- return last_id
- logstart(FILENAME)
- url = 'http://' + CHAT_NAME + ".wocial.com"
- print('Cargando ' + url)
- print('Ctrl-C para cerrar')
- last_id = save_log(url, FILENAME)
- while True:
- last_id = save_log(url, FILENAME, last_id)
- sleep(5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement