spam climate confessions with /pol posts

#!/usr/bin/python
import json
import pickle
import requests
import time
import re
import datetime
import requests
import sys


class Scrape:
    def __init__(self, board='pol'):
        print("Starting class to scrape: " + board)
        self.verbose = True
        self.board = board
        self.timestamps = []
        self.timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(time.time()))
        self.timestamps.append(self.timestamp)
        self.catalog = 0 # Current catalog as JSON object
        self.live_threads = [] # JSON objects
        self.live_nums = [] # Current thread numbers
        self.archived_threads = [] # Put dead threads here

    def do_http(self,url):
        time.sleep(1)
        return requests.get(url)


    def get_catalog(self, board=0):
        if board == 0:
            board = self.board
        url = 'https://a.4cdn.org/' + board + '/catalog.json'
        if self.verbose: print('Pulling /' + self.board + '/ catalog: ' + url)
        #headers = {'If-Modified-Since': self.timestamp} # <-- Some wizardry header that I don't think works
        catalog = self.do_http(url)
        self.catalog = json.loads(catalog.text)
        return json.loads(catalog.text)

    # Returns list of thread numbers
    def get_thread_nums(self, catalog):
        if self.verbose: print('Gathering thread numbers')
        nums = []
        if self.board == 'pol': # Ignore the sticky...
            sticky = 51971506
        for page in catalog:
            for thread in page['threads']:
                num = thread['no']
                if not num == sticky:
                    nums.append(num)
        self.nums = nums
        if self.verbose: print('Found ' + str(len(nums)) + ' threads')
        return nums
      # 11 pages, 15 threads per page

    # Returns array of threads
    def get_threads(self):
        self.get_catalog()
        nums = self.get_thread_nums(self.catalog)
        if self.verbose: print('Getting threads')
        threads = []
        #for num in nums:
        i = 50
        while(i > 0):
            url = 'https://a.4cdn.org/pol/thread/' + str(nums[i]) + '.json'
            thread = self.do_http(url)
            thread = json.loads(thread.text)
            print('Got ' + url + ' with ' + str(len(thread['posts'])) + ' posts')
            threads.append(thread)
            i = i - 1
        if self.verbose: print('Finished parsing catalog\'s threads')
        self.live_threads = threads
        return threads

def cleanhtml(raw_html):
  cleanr = re.compile('<.*?>')
  cleantext = re.sub(cleanr, '', raw_html)
  return cleantext

def cleanhtml2(raw_html):
  cleanr = re.compile('\n')
  cleantext = re.sub(cleanr, '', raw_html)
  return cleantext


def sendContent(content):
  currentDate = datetime.datetime.today().strftime ('%B %e %Y %H:%M:%S -500')
  date = str(currentDate)
  fieldContent = '{"slug":"paper", "date_submitted": "' + date + '", "sentence": "' + content + '", "state":""}'
  #print(str(fieldContent))
  print("test")
  headerContent = {"Content-Type":"application/json", "Origin":"https://www.nbcnews.com", "Referer":"https://www.nbcnews.com/news/specials/climate-confessions-share-solutions-climate-change-n1054791", "Sec-Fetch-Mode":"cors", "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"}
  r = requests.post("https://election-confessions.nbcnews.com/", data=fieldContent, headers=headerContent)
  print("sent request, status " + str(r.status_code))
  print("response content " + str(r.text) + "\n")


if(len(sys.argv) == 0):
    print("No provided arguments. Here are your options... \n'python3 scrape.py scrape'\tto scrape 4chan\n'python3 scrape.py spam'\tto spam NBC with 4chan content")
elif(sys.argv[1] == "scrape"):
    print("scraping 4chan. The output will be in 'output.txt")
    a = Scrape('pol')
    threadz = a.get_threads()
    print("parsing threads")
    f = open("output.txt", "a")
    for threadd in threadz:
        try:
            for post in threadd['posts']:
                f.write(str(cleanhtml(post['com']) + "\n"))
        except:
            pass
    f.close()
    print("finished. Run this again with the 'spam' argument to spam NBC")

elif(sys.argv[1] == "spam"):
    print("spamming")
    with open("output.txt", "r") as fp:
        for line in fp:
            sendContent(cleanhtml2(str(line)))
else:
    print("Error - inavlid arguments. Here are your options... \n'python3 scrape.py scrape'\tto scrape 4chan\n'python3 scrape.py spam'\tto spam NBC with 4chan content")