Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests,re,HTMLParser,shutil,time,datetime,codecs
- from time import sleep
- from pyquery import PyQuery
- class LoggerBot:
- URL_BASE = 'http://forum.rpg-center.pl/index.php'
- LIST_XML = '?action=shoutbox;sa=get;xml;row=0;restart'
- def __init__(self):
- self.session = requests.Session()
- self.token = None
- self.h = HTMLParser.HTMLParser()
- self.lineCount = 0
- self.ts = 0
- self.st = "0"
- self.stShortened = "0"
- self.youtubeRe = (
- r'(https?://)?(www\.)?'
- '(youtube|youtu|youtube-nocookie)\.(com|be)/'
- '(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
- )
- self.mainLoop()
- def __logCleanup(self,rawLog):
- rawLog = rawLog.rsplit('<tr id="shoutbox_msgs"></tr>]]></msgs>',1)[0]
- #remove tags at the end...
- rawLog = rawLog.split('<![CDATA[',1)[1]
- #...and at the front
- rawLog = re.sub('</div></td></tr>','</div></td></tr>\n',rawLog)
- #add newlines
- rawLog = re.sub(r'\<tr id=\"shoutbox_row[0-9]{1,3}\"\>\<td nowrap=\"nowrap\" style\=\"text-align:right\" class\=\"sbtext\" valign\=\"top\"\>(\<a href\=\"http\://forum.rpg-center.pl/index\.php\?action\=profile\;u\=[0-9]*\" target\=\"\_blank\"( style\=\"color:#.{6}\")?\>)?','',rawLog)
- #remove tags part 1 (largest, stuff in front of the name) (should capture special-colored nicknames)
- rawLog = re.sub('(</a>)? <span style="color:#b7b7b7">',' ',rawLog)
- #remove tags part 2 (between username and time)
- rawLog = re.sub('</span>:</td><td style="text-align:left" class="sbtext" valign="top"><span style="(font-weight:bold;)?(font-style:italic;)?(text-decoration:underline;)?">',': ',rawLog)
- #remove tags part 3 (time, colon, message) (should also capture all kinds of text formatting)
- rawLog = re.sub('</div></td></tr>','',rawLog)
- #remove tags part 4 (stuff at the end)
- rawLog = re.sub('<span class="me">','',rawLog)
- #capture tags related to the "/me" command pt. 1
- rawLog = re.sub('</span>','',rawLog)
- #pt. 2
- rawLog = re.sub('<img src="http://forum.rpg-center.pl/.{1,32}" alt="','',rawLog)
- #strip emotes pt. 1
- rawLog = re.sub('" title=".{1,16}" class="smiley" />','',rawLog)
- #pt. 2
- rawLog = re.sub('<a href="','',rawLog)
- #strip links pt. 1
- rawLog = re.sub('" target="_blank">.*</a>','',rawLog)
- #pt.2
- rawLog = rawLog.splitlines()
- return rawLog
- #turn into a series of lines (still a string, not a list)
- #any value higher than 0 = preserve newline characters
- #we don't do that, though
- def logUpdate(self,processedLog):
- self.lineCount = 0
- with codecs.open('recentlines.txt','r','utf-8') as recentlines:
- linesList = []
- for line in recentlines:
- linesList.append(line.rstrip())
- with codecs.open('logfile.txt','a','utf-8') as f:
- with codecs.open('recentlines.txt','w','utf-8') as recentlines:
- for line in processedLog:
- if line not in linesList:
- self.youtubeResult = None
- self.lineCount += 1
- f.write(self.h.unescape("%s\n" % line))
- self.youtubeResult = re.search(self.youtubeRe, line)
- if self.youtubeResult:
- self.youtube(self.youtubeResult.group(0))
- recentlines.write("%s\n" % line)
- with open("date.txt","r+") as currDate:
- if currDate.readline() != self.stShortened:
- f.write("***Current Date: %s***\n" % self.stShortened)
- currDate.seek(0,0)
- currDate.truncate()
- currDate.write(self.stShortened)
- shutil.copyfile("logfile.txt","E:/Program Files/EasyPHP-Webserver-14.1b2/www/logfile.txt")
- def acquireTime(self):
- self.ts = time.time()
- self.st = datetime.datetime.fromtimestamp(self.ts).strftime("%d-%m-%Y %H:%M:%S")
- self.stShortened = datetime.datetime.fromtimestamp(self.ts).strftime("%d-%m-%Y")
- def login(self, username, password):
- formdata = {'user': username,
- 'passwrd': password,
- 'cookielength': '-1'}
- response = self.session.post("http://forum.rpg-center.pl/index.php?action=login2", data=formdata)
- m = re.search(r"sSessionId: '(?P<sessionToken>.+)'", response.text)
- token = m.group('sessionToken').encode('ascii')
- return token
- def youtube(self,url):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/37.0.2062.94 Chrome/37.0.2062.94 Safari/537.36'
- }
- if not re.search('(http://)|(https://)', url):
- url = 'http://' + url
- result = requests.get(url, headers=headers)
- document = PyQuery(result.text)
- content_meta = document('meta[itemprop="name"]').attr('content')
- if content_meta:
- ytMsg = 'Video posted: ' + content_meta
- payload = { 'italic': '1',
- 'msg': ytMsg}
- self.session.post("http://forum.rpg-center.pl/index.php?action=shoutbox;sa=send;sesc=" + self.token + ";xml;row=99",data=payload)
- def mainLoop(self):
- self.token = self.login('Spurdobot (RIP ;_;)','burdobordo')
- self.messages = self.session.get(self.__class__.URL_BASE + self.__class__.LIST_XML)
- self.logFile = self.messages.text
- self.previousResponse = self.messages.text
- self.firstLoop = 1
- self.timer = 0
- while True:
- if self.previousResponse != self.messages.text or self.firstLoop == 1:
- self.logFile = self.__logCleanup(self.messages.text)
- self.acquireTime()
- self.logUpdate(self.logFile)
- if self.lineCount > 1:
- print "Added %d lines on %s." % (self.lineCount, self.st)
- elif self.lineCount == 1:
- print "Added %d line on %s." % (self.lineCount, self.st)
- self.firstLoop = 0
- for i in range(10):
- sleep(1) #update once every 10 seconds, "for" loop made in order to
- #catch keyboard interrupts faster
- self.timer += 1
- if self.timer == 30:
- self.timer = 0
- heartbeat = self.session.get("http://forum.rpg-center.pl/index.php")
- self.previousResponse = self.messages.text
- self.messages = self.session.get(self.__class__.URL_BASE + self.__class__.LIST_XML)
- m = LoggerBot()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement