Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import os
- import sys
- import json
- import logging
- import hashlib
- import requests
- import time
- import getpass
- import csv
- from HTMLParser import HTMLParser
- import codecs
- import re
- def remove_windows_characters(s):
- chars = {
- '\xc2\x82' : ',', # High code comma
- '\xc2\x84' : ',,', # High code double comma
- '\xc2\x85' : '...', # Tripple dot
- '\xc2\x88' : '^', # High carat
- '\xc2\x91' : '\x27', # Forward single quote
- '\xc2\x92' : '\x27', # Reverse single quote
- '\xc2\x93' : '\x22', # Forward double quote
- '\xc2\x94' : '\x22', # Reverse double quote
- '\xc2\x95' : ' ',
- '\xc2\x96' : '-', # High hyphen
- '\xc2\x97' : '--', # Double hyphen
- '\xc2\x99' : ' ',
- '\xc2\xa0' : ' ',
- '\xc2\xa6' : '|', # Split vertical bar
- '\xc2\xab' : '<<', # Double less than
- '\xc2\xbb' : '>>', # Double greater than
- '\xc2\xbc' : '1/4', # one quarter
- '\xc2\xbd' : '1/2', # one half
- '\xc2\xbe' : '3/4', # three quarters
- '\xca\xbf' : '\x27', # c-single quote
- '\xcc\xa8' : '', # modifier - under curve
- '\xcc\xb1' : '' # modifier - under line
- }
- def replace_chars(match):
- char = match.group(0)
- return chars[char]
- return re.sub('(' + '|'.join(chars.keys()) + ')', replace_chars, s)
- def restore_windows_1252_characters(s):
- def to_windows_1252(match):
- try:
- return bytes([ord(match.group(0))]).decode('windows-1252')
- except UnicodeDecodeError:
- return ''
- return re.sub(r'[\u0080-\u0099]', to_windows_1252, s)
- class AutoAdmitParser(HTMLParser):
- def __init__(self, *args, **kwargs):
- self.payload = {}
- self._in_textarea = False
- HTMLParser.__init__(self, *args, **kwargs)
- def handle_starttag(self, tag, attrs):
- if tag == "input":
- _attrs = dict(attrs)
- if _attrs.get("type", None) != "submit" and "name" in _attrs:
- self.payload[_attrs["name"]] = _attrs.get("value", None)
- elif tag == "textarea":
- self._in_textarea = True
- def handle_endtag(self, tag):
- if tag == "textarea":
- self._in_textarea = False
- def handle_data(self, data):
- if self._in_textarea is True:
- if self.payload.get("message", None) is not None:
- self.payload["message"] += data
- else:
- self.payload["message"] = data
- class Blanker(object):
- USER_AGENT = "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_2_1 " \
- "like Mac OS X; nb-no) AppleWebKit/533.17.9 " \
- "(KHTML, like Gecko) Version/5.0.2 Mobile/8C148a " \
- "Safari/6533.18.5"
- LOG_FILE = "blanker.log"
- MIN_WAIT_DURATION = 2.5
- HTTP_REFERER_TEMPLATE = "http://xoxohth.com/thread.php?" \
- "thread_id={}&forum_id=2"
- HTTP_GET_TEMPLATE = "http://xoxohth.com/post.php?" \
- "message_id={}&" \
- "thread_id={}&forum_id=2"
- HTTP_POST_TEMPLATE = "http://xoxohth.com/post.php"
- CSV_FILE = "old_posts.csv"
- CSV_FIELD_NAMES = ["thread_id", "message_id", "poster_email",
- "message_subject", "message"]
- def __init__(self, logger, posts, **kwargs):
- self._username = kwargs.get("username", None)
- self._password = kwargs.get("password", None)
- self.posts = posts
- self.logger = logger
- self.session = requests.Session()
- self._processed_messages = []
- def prepare_csv(self):
- """Prepares the csv file for storing old posts by reading in the
- already-processed message_ids"""
- if os.path.exists(self.CSV_FILE):
- with open(self.CSV_FILE, 'rb') as csvf:
- reader = csv.DictReader(csvf)
- for row in reader:
- self._processed_messages.append(int(row["message_id"]))
- print self._processed_messages
- else:
- with open(self.CSV_FILE, 'wb') as csvf:
- writer = csv.DictWriter(csvf, fieldnames=self.CSV_FIELD_NAMES)
- writer.writeheader()
- def get_wait(self):
- return self.MIN_WAIT_DURATION + \
- (int(hashlib.sha256(os.urandom(2**15)).hexdigest(), 16) %
- 10**6)/1e6
- def login(self, **kwargs):
- auth_url = "http://xoxohth.com/login.php"
- form_data = {
- "ref_page": "main.php?forum_id=2",
- "username": self._username,
- "password": self._password,
- "Submit": "Sign In"
- }
- self.session.headers.update({
- "User-Agent": self.USER_AGENT,
- "Referer": kwargs.get(
- "referer",
- "http://xoxohth.com/main.php?forum_id=2&pft=1")
- })
- self.logger.info("Attempting to log in as user '{}'...".format(
- self._username))
- r = self.session.post(auth_url, data=form_data)
- if r.text.find("Invalid login. Register if you haven't!") != -1:
- self.logger.error("Unable to sign in.")
- return False
- self.logger.info("Successfully logged in as user '{}'.".format(
- self._username))
- return True
- def get_original(self, thread_id, message_id):
- while True:
- # Gets the original post for storage purposes
- self.logger.info(
- "Getting the original message data for "
- "message_id={} in thread_id={}...".format(
- message_id, thread_id))
- self.session.headers.update({
- "User-Agent": self.USER_AGENT,
- "Referer": self.HTTP_REFERER_TEMPLATE.format(thread_id)
- })
- r = self.session.get(
- self.HTTP_GET_TEMPLATE.format(message_id, thread_id))
- if r.url.find("login.php") != -1:
- self.logger.warning("Logged out")
- failed_attempts = 0
- time.sleep(self.get_wait())
- while not self.login():
- failed_attempts += 1
- if failed_attempts == 5:
- self.logger.critical("Could not log in. Stopping.")
- return False
- time.sleep(self.get_wait())
- elif r.text == "Nope!":
- self.logger.error(
- "You aren't the author of message_id={} in "
- "thread_id={}!".format(message_id, thread_id))
- else:
- parser = AutoAdmitParser()
- parser.feed(r.text)
- return parser.payload
- def blank(self, payload, thread_id, message_id):
- while True:
- data = {
- "action": payload.get("action", "post"),
- "forum_id": payload.get("forum_id", 2),
- "thread": payload.get("thread", None),
- "threadclass": payload.get("threadclass", None),
- "subj": "",
- "gmp": payload.get("gmp", None),
- "sc": payload.get("sc", None),
- "thread_id": payload.get("thread_id", thread_id),
- "message_id": payload.get("message_id", message_id),
- "parent_thread_id": payload.get("parent_thread_id", None),
- "txtAuthor": payload.get("txtAuthor", None),
- "poster_email": "",
- "message_subject": "",
- "message": "",
- "taHTML_Code": payload.get("taHTML_Code", None),
- "cbEmbeddedImages": payload.get("cbEmbeddedImages", None)
- }
- self.session.headers.update({
- "Referer": self.HTTP_GET_TEMPLATE.format(message_id, thread_id)
- })
- r = self.session.post(self.HTTP_POST_TEMPLATE, data=data)
- if r.url.find("login.php") != -1:
- self.logger.warning("Logged out")
- failed_attempts = 0
- while not self.login():
- failed_attempts += 1
- if failed_attempts == 5:
- self.logger.critical("Could not log in. Stopping.")
- return False
- time.sleep(self.get_wait())
- else:
- self.logger.info(
- "Message_id={} in thread_id={} was successfully "
- "blanked.".format(message_id, thread_id))
- return True
- def run(self):
- self.prepare_csv()
- for post in self.posts:
- message_id = post["message_id"]
- thread_id = post["thread_id"]
- if int(message_id) not in self._processed_messages:
- payload = self.get_original(thread_id, message_id)
- if payload:
- self.logger.info(
- "Successfully retrieved the original message.")
- with open(self.CSV_FILE, 'ab') as csvf:
- writer = csv.DictWriter(csvf, self.CSV_FIELD_NAMES)
- row = dict((k, (remove_windows_characters(v).encode(
- 'utf8', 'ignore')
- if isinstance(v, unicode)
- else v))
- for k,v in payload.items()
- if k in self.CSV_FIELD_NAMES)
- writer.writerow(row)
- self.logger.info("New CSV row successfully written")
- self.logger.info(
- "Attempting to blank message_id={} in "
- "thread_id={}...".format(message_id, thread_id))
- time.sleep(self.get_wait())
- self.blank(payload, thread_id, message_id)
- time.sleep(self.get_wait())
- else:
- self.logger.warning(
- "The message with message_id={} and "
- "thread_id={} has already been blanked.".format(
- message_id, thread_id))
- def usage():
- sys.stderr.write(
- "Post Blanker - Blanks all of your posts on xoxohth.com "
- "(just enter your password when prompted).\n\n"
- "Usage:\n"
- "./blanker.py [path to the json file I sent] [login name]\n")
- sys.stderr.flush()
- exit(1)
- def main():
- log_formatter = logging.Formatter(
- "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
- logger = logging.getLogger('blanker')
- logger.setLevel(logging.DEBUG)
- file_handler = logging.FileHandler(Blanker.LOG_FILE)
- file_handler.setFormatter(log_formatter)
- file_handler.setLevel(logging.DEBUG)
- logger.addHandler(file_handler)
- console_handler = logging.StreamHandler()
- console_handler.setFormatter(log_formatter)
- console_handler.setLevel(logging.DEBUG)
- logger.addHandler(console_handler)
- if len(sys.argv) != 3:
- usage()
- exit(1)
- with open(sys.argv[1], 'r') as f:
- posts = json.load(f)
- blanker = Blanker(
- logger,
- posts,
- username=sys.argv[2],
- password=getpass.getpass(
- "Enter the password for user '{}': ".format(
- sys.argv[2]))
- )
- blanker.run()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement