daily pastebin goal
23%
SHARE
TWEET

bot v.0.2.1

Cadrin Apr 10th, 2016 (edited) 14 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #coding=utf-8
  2.  
  3. import requests, codecs, re
  4. from models import Message
  5. from bs4 import BeautifulSoup, SoupStrainer
  6.  
  7. print("Modules loaded.\n")
  8.  
  9. class NetworkManager():
  10.     URL_BASE = 'http://forum.rpg-center.pl/index.php'
  11.     LIST_XML = '?action=shoutbox;sa=get;xml;row=0'
  12.  
  13.     def __init__(self):
  14.         self.session = requests.session()
  15.  
  16.     def main(self):
  17.         response = self.session.get(self.__class__.URL_BASE + self.__class__.LIST_XML)
  18.         messages = Message.getMessageList(response)
  19.         return messages
  20.  
  21. class Bot(object):
  22.     def __enter__(self):
  23.         return NetworkManager()
  24.  
  25.     def __exit__(self, exc_type, exc_value, traceback):
  26.         Message.storeLastMsg()
  27.         print('Program terminated.')
  28.         return isinstance(exc_value, KeyboardInterrupt)
  29.  
  30.  
  31. #-----------------------------------------------------
  32. #---------------------models.py-----------------------
  33. #-----------------------------------------------------
  34.  
  35. from bs4 import BeautifulSoup, SoupStrainer
  36. import re, sys, pickle
  37. import json
  38.  
  39. class Message:
  40.     lastMsg = ''
  41.    
  42.     def __init__(self,author,time,message,links,printable):
  43.         self.author = author
  44.         self.time = time
  45.         self.message = message
  46.         self.links = links
  47.         self.printable = printable
  48.  
  49.     def __str__(self):
  50.         return self.printable.encode(sys.stdout.encoding,'ignore')
  51.  
  52.     @staticmethod
  53.     def getMessageList(response):
  54.         '''
  55.        Accepts a Requests response object. Verifies the validity of the
  56.        contents of its 'text' attribute and handles some errors when the
  57.        attribute is invalid.
  58.        Returns a list of processed new messages for the bot to use.
  59.        '''
  60.         response = response.text
  61.         if not response: return []
  62.         try:
  63.             assert('<?xml version="1.0" encoding="UTF-8"?>' in response)
  64.         except AssertionError:
  65.             print("Invalid response. The server might be experiencing problems.\nRetrying in {} seconds".format(300))
  66.             return []
  67.         response = response.replace('<![CDATA[','',1)
  68.         onlyTheMessages = SoupStrainer('tr',id=re.compile("^shoutbox_row([0-9]+)"))
  69.         raw = BeautifulSoup(response, 'lxml',parse_only=onlyTheMessages).contents
  70.         new = Message.getNewMsgs(raw,Message.getLastMsg())
  71.         return map(Message.process, new)
  72.  
  73.     @staticmethod
  74.     def getNewMsgs(other, recent):
  75.         '''
  76.        Assumes other is a list of messages of length at least 1.
  77.        Takes messages from other and compares them one by one against the most
  78.        recent registered message. Stops if the two are the same. Skips the check
  79.        altogether if the list is reasonably short (defined here as <= 10).
  80.        Always updates the most recent message. Returns a list of new messages.
  81.        (We don't expect there to be more than 1 new message at a time.)
  82.        (The <= 10 rule should allow for registering repeating messages that
  83.        aren't ultra excessive flood.)
  84.        '''
  85.  
  86.         if len(other) <= 10:
  87.             Message.setLastMsg(str(other[-1].contents))
  88.             return other
  89.        
  90.         for i in xrange(len(other) -1, -1, -1):
  91.             if recent == str(other[i].contents):
  92.                 break
  93.         Message.setLastMsg(str(other[-1].contents))
  94.         return other[i:]
  95.    
  96.     @staticmethod
  97.     def process(message):
  98.         '''
  99.        Takes a single message in HTML format, enclosed in a <tr> tag with the
  100.        id="shoutbox_rowX", where X is some number. Strips all the <a> and <img>
  101.        tags (THIS MUTATES THE TREE, but that's a desired effect here),
  102.        finds the date, name of the author and body of the message. Returns
  103.        the entire message (for now) in writable format, along with some other
  104.        useful information. Assumes all <img> tags have an 'alt' attribute
  105.        (img.has_attr('alt') = True).
  106.  
  107.        Operates under the assumption that each message is divided into two
  108.        table cells (<td>), where the second one contains the body and
  109.        formatting of the message, and the first one stores the name and the
  110.        date (unless it's a "/me" kind of message).
  111.        '''
  112.         segment1 = message.td
  113.         segment2 = message.td.next_sibling
  114.         time = segment1.span.string
  115.         links = []
  116.        
  117.         if len(list(segment1.strings)) == 4:
  118.             name = segment1.a.string
  119.             body = segment2.span
  120.         else:
  121.             name = segment2.span.next_element.rstrip()
  122.             body = segment2.span.span
  123.  
  124.         for link in body.find_all('a'):
  125.             links.append(link['href'])
  126.             link.replace_with(link['href'])
  127.  
  128.         for image in body.find_all('img'):
  129.             image.replace_with(image['alt'])
  130.  
  131.         body = body.get_text() #identical with segment2.get_text()
  132.  
  133.         return Message(name,time,body,links,segment1.get_text().lstrip() + ' ' + body)
  134.  
  135.     @classmethod
  136.     def getLastMsg(cls):
  137.         if cls.lastMsg:
  138.             return cls.lastMsg
  139.         try:
  140.             with open('lastmsg','r') as f:
  141.                 return pickle.load(f)
  142.         except IOError:
  143.             print("Warning: Last message file not found.")
  144.             return ''
  145.  
  146.     @classmethod
  147.     def setLastMsg(cls,msg):
  148.         cls.lastMsg = msg
  149.    
  150.     @classmethod
  151.     def storeLastMsg(cls):
  152.         with open('lastmsg','w') as f:
  153.             pickle.dump(cls.lastMsg,f)
  154.  
  155.  
  156. class Database:
  157.     def __init__(self,dbPath):
  158.         self.dbPath = dbPath
  159.         self.tables = {}
  160.         for table in ['userstats']:
  161.             self.tables[table] = self.load(table)
  162.  
  163.     def getPath(self):
  164.         return self.dbPath
  165.        
  166.     def getTable(self,name):
  167.         return self.tables[name]
  168.        
  169.     def getTableList(self):
  170.         return self.tables.keys()
  171.  
  172.     def updateUserRecord(self,message):
  173.         userStats = self.getTable('userstats')
  174.         record = userStats.setdefault(message.author, {
  175.             'msgCount' : 0,
  176.             'totalLen' : 0,
  177.             'linksPosted' : 0,
  178.             'activityPattern' : [0 for i in range(24)]
  179.             })
  180.         record['msgCount'] += 1
  181.         record['totalLen'] += len(message.message)
  182.         record['linksPosted'] += len(message.links)
  183.         hour = int(message.time[1:3]) % 24
  184.         record['activityPattern'][hour] += 1
  185.  
  186.     def save(self,table):
  187.         with open(self.getPath() + table, 'w') as datafile:
  188.             json.dump(self.getTable(table), datafile)
  189.  
  190.     def load(self,table):
  191.         try:
  192.             with open(self.getPath() + table) as datafile:
  193.                 return json.load(datafile)
  194.         except IOError:
  195.             print("Table '{}' not found, creating a new one.".format(table))
  196.             return {}
  197.  
  198. class DatabaseConnection(object):
  199.     '''
  200.    Usage:
  201.    
  202.        with DatabaseConnection() as <variable>:
  203.            <variable>.doStuff(vars)
  204.        
  205.    And then you just call the Database class methods through <variable>
  206.    as normal.
  207.    
  208.    This connection makes sure that all changes to the database are saved to
  209.    a file when exiting the 'with' segment. Saving the changes at the end is
  210.    probably what you want, and using this class is more convenient and
  211.    readable than using "try" and "finally" statements.
  212.    '''
  213.     def __enter__(self, dbPath = ''):
  214.         self.db = Database(dbPath)
  215.         return self.db
  216.     def __exit__(self, exc_type, exc_value, traceback):
  217.         for table in self.db.getTableList():
  218.             self.db.save(table)
  219.         print("All statistics tables saved successfully.")
  220.         return isinstance(exc_value, KeyboardInterrupt)
  221.  
  222.  
  223. #-----------------------------------------------------
  224. #----------------------main.py------------------------
  225. #-----------------------------------------------------
  226.  
  227. import network
  228. from models import MsgDatabase, DatabaseConnection
  229. from time import sleep
  230.      
  231. with network.Bot() as Malbolge, DatabaseConnection() as database:
  232.     while True:
  233.         for message in Malbolge.main():
  234.             database.updateUserRecord(message)
  235.             print message
  236.         for i in range(30):
  237.             sleep(0.1)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top