Advertisement
Cadrin

Untitled

Feb 7th, 2015
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.79 KB | None | 0 0
  1. import requests,re,HTMLParser,shutil,time,datetime,codecs
  2. from time import sleep
  3. from pyquery import PyQuery
  4.  
  5. class LoggerBot:
  6.     URL_BASE = 'http://forum.rpg-center.pl/index.php'
  7.     LIST_XML = '?action=shoutbox;sa=get;xml;row=0;restart'
  8.  
  9.     def __init__(self):
  10.         self.session = requests.Session()
  11.         self.token = None
  12.         self.h = HTMLParser.HTMLParser()
  13.         self.lineCount = 0
  14.         self.ts = 0
  15.         self.st = "0"
  16.         self.stShortened = "0"
  17.         self.youtubeRe = (
  18.             r'(https?://)?(www\.)?'
  19.             '(youtube|youtu|youtube-nocookie)\.(com|be)/'
  20.             '(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
  21.         )
  22.         self.mainLoop()
  23.  
  24.     def __logCleanup(self,rawLog):
  25.         rawLog = rawLog.rsplit('<tr id="shoutbox_msgs"></tr>]]></msgs>',1)[0]
  26.         #remove tags at the end...
  27.         rawLog = rawLog.split('<![CDATA[',1)[1]
  28.         #...and at the front
  29.         rawLog = re.sub('</div></td></tr>','</div></td></tr>\n',rawLog)
  30.         #add newlines
  31.         rawLog = re.sub(r'\<tr id=\"shoutbox_row[0-9]{1,3}\"\>\<td nowrap=\"nowrap\" style\=\"text-align:right\" class\=\"sbtext\" valign\=\"top\"\>(\<a href\=\"http\://forum.rpg-center.pl/index\.php\?action\=profile\;u\=[0-9]*\" target\=\"\_blank\"( style\=\"color:#.{6}\")?\>)?','',rawLog)
  32.         #remove tags part 1 (largest, stuff in front of the name) (should capture special-colored nicknames)
  33.         rawLog = re.sub('(</a>)? <span style="color:#b7b7b7">',' ',rawLog)
  34.         #remove tags part 2 (between username and time)
  35.         rawLog = re.sub('</span>:</td><td style="text-align:left" class="sbtext" valign="top"><span style="(font-weight:bold;)?(font-style:italic;)?(text-decoration:underline;)?">',': ',rawLog)
  36.         #remove tags part 3 (time, colon, message) (should also capture all kinds of text formatting)
  37.         rawLog = re.sub('</div></td></tr>','',rawLog)
  38.         #remove tags part 4 (stuff at the end)
  39.         rawLog = re.sub('<span class="me">','',rawLog)
  40.         #capture tags related to the "/me" command pt. 1
  41.         rawLog = re.sub('</span>','',rawLog)
  42.         #pt. 2
  43.         rawLog = re.sub('<img src="http://forum.rpg-center.pl/.{1,32}" alt="','',rawLog)
  44.         #strip emotes pt. 1
  45.         rawLog = re.sub('" title=".{1,16}" class="smiley" />','',rawLog)      
  46.         #pt. 2
  47.         rawLog = re.sub('<a href="','',rawLog)
  48.         #strip links pt. 1
  49.         rawLog = re.sub('" target="_blank">.*</a>','',rawLog)
  50.         #pt.2
  51.        
  52.         rawLog = rawLog.splitlines()
  53.         return rawLog
  54.         #turn into a series of lines (still a string, not a list)
  55.         #any value higher than 0 = preserve newline characters
  56.         #we don't do that, though
  57.  
  58.     def logUpdate(self,processedLog):
  59.         self.lineCount = 0
  60.         with codecs.open('recentlines.txt','r','utf-8') as recentlines:
  61.             linesList = []
  62.             for line in recentlines:
  63.                 linesList.append(line.rstrip())
  64.         with codecs.open('logfile.txt','a','utf-8') as f:
  65.             with codecs.open('recentlines.txt','w','utf-8') as recentlines:
  66.                 for line in processedLog:
  67.                     if line not in linesList:
  68.                         self.youtubeResult = None
  69.                         self.lineCount += 1
  70.                         f.write(self.h.unescape("%s\n" % line))
  71.                         self.youtubeResult = re.search(self.youtubeRe, line)
  72.                         if self.youtubeResult:
  73.                             self.youtube(self.youtubeResult.group(0))
  74.                     recentlines.write("%s\n" % line)
  75.                 with open("date.txt","r+") as currDate:
  76.                     if currDate.readline() != self.stShortened:
  77.                         f.write("***Current Date: %s***\n" % self.stShortened)
  78.                         currDate.seek(0,0)
  79.                         currDate.truncate()
  80.                         currDate.write(self.stShortened)
  81.         shutil.copyfile("logfile.txt","E:/Program Files/EasyPHP-Webserver-14.1b2/www/logfile.txt")
  82.  
  83.     def acquireTime(self):
  84.         self.ts = time.time()
  85.         self.st = datetime.datetime.fromtimestamp(self.ts).strftime("%d-%m-%Y %H:%M:%S")
  86.         self.stShortened = datetime.datetime.fromtimestamp(self.ts).strftime("%d-%m-%Y")
  87.  
  88.     def login(self, username, password):
  89.         formdata = {'user': username,
  90.                     'passwrd': password,
  91.                     'cookielength': '-1'}
  92.         response = self.session.post("http://forum.rpg-center.pl/index.php?action=login2", data=formdata)
  93.         m = re.search(r"sSessionId: '(?P<sessionToken>.+)'", response.text)
  94.         token = m.group('sessionToken').encode('ascii')
  95.         return token
  96.  
  97.     def youtube(self,url):
  98.         headers = {
  99.             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/37.0.2062.94 Chrome/37.0.2062.94 Safari/537.36'
  100.                         }
  101.         if not re.search('(http://)|(https://)', url):
  102.             url = 'http://' + url
  103.         result = requests.get(url, headers=headers)
  104.         document = PyQuery(result.text)
  105.         content_meta = document('meta[itemprop="name"]').attr('content')
  106.         if content_meta:
  107.             ytMsg = 'Video posted: ' + content_meta
  108.             payload = { 'italic': '1',
  109.                         'msg': ytMsg}
  110.             self.session.post("http://forum.rpg-center.pl/index.php?action=shoutbox;sa=send;sesc=" + self.token + ";xml;row=99",data=payload)
  111.        
  112.     def mainLoop(self):
  113.         self.token = self.login('Spurdobot (RIP ;_;)','burdobordo')
  114.         self.messages = self.session.get(self.__class__.URL_BASE + self.__class__.LIST_XML)
  115.         self.logFile = self.messages.text
  116.         self.previousResponse = self.messages.text
  117.         self.firstLoop = 1
  118.         self.timer = 0
  119.         while True:
  120.             if self.previousResponse != self.messages.text or self.firstLoop == 1:
  121.                 self.logFile = self.__logCleanup(self.messages.text)
  122.                 self.acquireTime()
  123.                 self.logUpdate(self.logFile)
  124.                 if self.lineCount > 1:
  125.                     print "Added %d lines on %s." % (self.lineCount, self.st)
  126.                 elif self.lineCount == 1:
  127.                     print "Added %d line on %s." % (self.lineCount, self.st)
  128.                 self.firstLoop = 0
  129.             for i in range(10):
  130.                 sleep(1) #update once every 10 seconds, "for" loop made in order to
  131.                          #catch keyboard interrupts faster
  132.             self.timer += 1
  133.             if self.timer == 30:
  134.                 self.timer = 0
  135.                 heartbeat = self.session.get("http://forum.rpg-center.pl/index.php")
  136.             self.previousResponse = self.messages.text
  137.             self.messages = self.session.get(self.__class__.URL_BASE + self.__class__.LIST_XML)
  138.  
  139. m = LoggerBot()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement