Untitled

#! /usr/bin/env python2.7
"""This application will go through all the comments on a specific subreddit and parse all
the comments and save them to a local text file"""
import threading
import praw
import time
import cleaner
import glob

#reffer to the praw documentation for more info on clientID and client_secret. (API Keys)
r = praw.Reddit(username = "xxxxxxxxx", password = "xxxxxxxx", client_id = "xxxxxxxx", client_secret = "xxxxxxxx", user_agent = "Subreddit Comment parser")
print ("Logging in...")


counter = 0
cache = []


#this will get all the comments for the last 500 posts for that specific subreddit.
def run_bot(subredditName):
    print "Parsing comments from " + subredditName
    #just type the subreddit name, do not use /r/ or anything else
    subreddit = r.subreddit(subredditName)
    fileName = subredditName + ".txt"
    print "Grabbing comments from " + subredditName
    submissions = subreddit.hot(limit=500)
    for submission in submissions:
        submission.comments.replace_more(limit=0)
        comment_queue = submission.comments[:]

        while comment_queue:
            comment = comment_queue.pop(0)
            comment_text = comment.body.lower()
            if comment.id not in cache:
                print comment_text
                with open(fileName, "a") as myfile:
                    try:
                        myfile.write(" " + comment_text + "\n")
                    except UnicodeEncodeError:
                        pass
                    myfile.close()
                cache.append(comment.id)

#this is a 10 minute break to give reddit servers a break. THe action above will be executed 1000 times, but duplicates will be ignored using the cache variable.

subredditsToParse = ["trumpgret", "sandersforpresident", "funny", "wholesomememes", "news", "todayilearned", "interestingasfuck", "wtf", "gifs",\
"highqualitygifs", "jokes", "the_donald", "keepournetfree", "nintendoswitch", "atbge", "mildlyinfuriating", \
"rage", "blackpeoplegifs", "pcmasterrace", "evilbuildings", "upliftingnews", "fellowkids", "whitepeopletwitter", \
"atheism", "beholdthemasterrace", "enoughtrumpspam", "political_revolution", "worldnews", \
"hillaryforprison", "liberal", "politics", "esist", "fuckthealtright", "sjwhate", "imgoingtohellforthis"]


while counter < 10:
    for i in subredditsToParse:
        run_bot(i)


        print "Taking a little break"
        time.sleep(30)
    counter += 1


#this line will gather all the txt files in the directory, might need some tweaking to work in your environment.
txt_file_list = glob.glob("*.txt")


#this will clean all the txt files in the current directory.
#By cleaning I mean removing all the spaces and unsupported symbols.
for i in txt_file_list:
    cleaner.cleanemptylines(i)