Untitled

import requests,re,HTMLParser,shutil,time,datetime,codecs
from time import sleep
from pyquery import PyQuery

class LoggerBot:
    URL_BASE = 'http://forum.rpg-center.pl/index.php'
    LIST_XML = '?action=shoutbox;sa=get;xml;row=0;restart'

    def __init__(self):
        self.session = requests.Session()
        self.token = None
        self.h = HTMLParser.HTMLParser()
        self.lineCount = 0
        self.ts = 0
        self.st = "0"
        self.stShortened = "0"
        self.youtubeRe = (
            r'(https?://)?(www\.)?'
            '(youtube|youtu|youtube-nocookie)\.(com|be)/'
            '(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
        )
        self.mainLoop()

    def __logCleanup(self,rawLog):
        rawLog = rawLog.rsplit('<tr id="shoutbox_msgs"></tr>]]></msgs>',1)[0]
        #remove tags at the end...
        rawLog = rawLog.split('<![CDATA[',1)[1]
        #...and at the front
        rawLog = re.sub('</div></td></tr>','</div></td></tr>\n',rawLog)
        #add newlines
        rawLog = re.sub(r'\<tr id=\"shoutbox_row[0-9]{1,3}\"\>\<td nowrap=\"nowrap\" style\=\"text-align:right\" class\=\"sbtext\" valign\=\"top\"\>(\<a href\=\"http\://forum.rpg-center.pl/index\.php\?action\=profile\;u\=[0-9]*\" target\=\"\_blank\"( style\=\"color:#.{6}\")?\>)?','',rawLog)
        #remove tags part 1 (largest, stuff in front of the name) (should capture special-colored nicknames)
        rawLog = re.sub('(</a>)? <span style="color:#b7b7b7">',' ',rawLog)
        #remove tags part 2 (between username and time)
        rawLog = re.sub('</span>:</td><td style="text-align:left" class="sbtext" valign="top"><span style="(font-weight:bold;)?(font-style:italic;)?(text-decoration:underline;)?">',': ',rawLog)
        #remove tags part 3 (time, colon, message) (should also capture all kinds of text formatting)
        rawLog = re.sub('</div></td></tr>','',rawLog)
        #remove tags part 4 (stuff at the end)
        rawLog = re.sub('<span class="me">','',rawLog)
        #capture tags related to the "/me" command pt. 1
        rawLog = re.sub('</span>','',rawLog)
        #pt. 2
        rawLog = re.sub('<img src="http://forum.rpg-center.pl/.{1,32}" alt="','',rawLog)
        #strip emotes pt. 1
        rawLog = re.sub('" title=".{1,16}" class="smiley" />','',rawLog)
        #pt. 2
        rawLog = re.sub('<a href="','',rawLog)
        #strip links pt. 1
        rawLog = re.sub('" target="_blank">.*</a>','',rawLog)
        #pt.2

        rawLog = rawLog.splitlines()
        return rawLog
        #turn into a series of lines (still a string, not a list)
        #any value higher than 0 = preserve newline characters
        #we don't do that, though

    def logUpdate(self,processedLog):
        self.lineCount = 0
        with codecs.open('recentlines.txt','r','utf-8') as recentlines:
            linesList = []
            for line in recentlines:
                linesList.append(line.rstrip())
        with codecs.open('logfile.txt','a','utf-8') as f:
            with codecs.open('recentlines.txt','w','utf-8') as recentlines:
                for line in processedLog:
                    if line not in linesList:
                        self.youtubeResult = None
                        self.lineCount += 1
                        f.write(self.h.unescape("%s\n" % line))
                        self.youtubeResult = re.search(self.youtubeRe, line)
                        if self.youtubeResult:
                            self.youtube(self.youtubeResult.group(0))
                    recentlines.write("%s\n" % line)
                with open("date.txt","r+") as currDate:
                    if currDate.readline() != self.stShortened:
                        f.write("***Current Date: %s***\n" % self.stShortened)
                        currDate.seek(0,0)
                        currDate.truncate()
                        currDate.write(self.stShortened)
        shutil.copyfile("logfile.txt","E:/Program Files/EasyPHP-Webserver-14.1b2/www/logfile.txt")

    def acquireTime(self):
        self.ts = time.time()
        self.st = datetime.datetime.fromtimestamp(self.ts).strftime("%d-%m-%Y %H:%M:%S")
        self.stShortened = datetime.datetime.fromtimestamp(self.ts).strftime("%d-%m-%Y")

    def login(self, username, password):
        formdata = {'user': username,
                    'passwrd': password,
                    'cookielength': '-1'}
        response = self.session.post("http://forum.rpg-center.pl/index.php?action=login2", data=formdata)
        m = re.search(r"sSessionId: '(?P<sessionToken>.+)'", response.text)
        token = m.group('sessionToken').encode('ascii')
        return token

    def youtube(self,url):
        headers = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/37.0.2062.94 Chrome/37.0.2062.94 Safari/537.36'
                        }
        if not re.search('(http://)|(https://)', url):
            url = 'http://' + url
        result = requests.get(url, headers=headers)
        document = PyQuery(result.text)
        content_meta = document('meta[itemprop="name"]').attr('content')
        if content_meta:
            ytMsg = 'Video posted: ' + content_meta
            payload = { 'italic': '1',
                        'msg': ytMsg}
            self.session.post("http://forum.rpg-center.pl/index.php?action=shoutbox;sa=send;sesc=" + self.token + ";xml;row=99",data=payload)

    def mainLoop(self):
        self.token = self.login('Spurdobot (RIP ;_;)','burdobordo')
        self.messages = self.session.get(self.__class__.URL_BASE + self.__class__.LIST_XML)
        self.logFile = self.messages.text
        self.previousResponse = self.messages.text
        self.firstLoop = 1
        self.timer = 0
        while True:
            if self.previousResponse != self.messages.text or self.firstLoop == 1:
                self.logFile = self.__logCleanup(self.messages.text)
                self.acquireTime()
                self.logUpdate(self.logFile)
                if self.lineCount > 1:
                    print "Added %d lines on %s." % (self.lineCount, self.st)
                elif self.lineCount == 1:
                    print "Added %d line on %s." % (self.lineCount, self.st)
                self.firstLoop = 0
            for i in range(10):
                sleep(1) #update once every 10 seconds, "for" loop made in order to
                         #catch keyboard interrupts faster
            self.timer += 1
            if self.timer == 30:
                self.timer = 0
                heartbeat = self.session.get("http://forum.rpg-center.pl/index.php")
            self.previousResponse = self.messages.text
            self.messages = self.session.get(self.__class__.URL_BASE + self.__class__.LIST_XML)

m = LoggerBot()