Untitled

#!/usr/bin/env python2
import sys
from PyQt4 import QtGui, QtCore
from ComicScrape import Ui_PieAndCake
import requests
from time import gmtime, strftime
import os
import platform
import getpass

os = platform.system()

def Homestuck():
    print 'Initiating download of the complete Homestuck archive.'
        username = getpass.getuser()
    #HTML
    rootdata = "/home/" + username + "/Documents/Homestuck/"
    #images
    rootimg = "/home/" + username + "/Documents/Homestuck/images/"
    print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

    #content is how many pages you want to download. Comment it out for auto-detection.
    #The first 31 pages include all the differnt types of pages in the series: .gif, multigif, and .swf files.
        global content
    content = 1

    #endval is the comic's identification number
    endval = "001901"

    #root page and image urls
    page = "http://www.mspaintadventures.com/?s=6&p="
    imgroot = "http://www.mspaintadventures.com/storyfiles/hs2/"
    favipath = rootdata + "favicon.ico"

    #setting up variables for later
    imgval = 0
    flashcounter = 0

    #Homestuck uses non-unicode symbols in some panels. This makes python crash, so we'll
    #remove the symbols.
    trans_table = ''.join( [chr(i) for i in range(128)] + [' '] * 128 )

    #create the folders for the data if they dont exist
    if not os.path.exists(rootdata):
        os.mkdirs(rootdata)
    if not os.path.exists(rootimg):
        os.mkdirs(rootimg)
        #download alignment images used on every page
        fnames = ["v2_blankstrip.gif",
              "v2_blanksquare.gif",
              "spacer.gif",
              "v2_blanksquare2.gif",
              "v2_blanksquare3.gif",
              "favicon.ico"]

        for i, name in enumerate(fnames):
            print "Fetching spacers... (%s/5)" % i
            f = requests.get("http://www.mspaintadventures.com/images/" + name)
            q = open(rootdata + name, 'w+')
            q.write(f.content)
            q.close()

    #script will now attempt to identify how many comics there are.
    print "Identifying amount of content to download..."
    r = requests.get("http://www.mspaintadventures.com/")
    html = r.text

    start  = html.find('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
    end   =  html.find("<!-----------------------LATEST PAGES----------------------------->")
    html = html[:start] + html[end:]

    start = html.find("<!-----------------------END LATEST PAGES------------------------->")
    end = html.find("</html>")
    html = html[:start] + html[end:]

    start = html.find("<!-----------------------LATEST PAGES----------------------------->")
    end = html.find('?s=6&p=')
    html = html[:start] + html[end:]

    start = html.find('">"')
    end = html.find('</html>')
    html = html[:start] + html[end:]

    html = html[13:]
    html = html[:6]

    currentcomicval = html
    currentcomicval = int(currentcomicval) - 1901

    print "Downloading " + str(currentcomicval) + " pages of comics."

    content = int(currentcomicval)
    global h_content
    h_content = content

    #main loop

    while content > 0:
        #set up file detection service. In need to know if there's a .gif, multiple .gifs, or a .swf
        gif =True
        multigif = True
        flash = True

        multigifid = 1

        imgval = int(imgval) + 1
        imgval = str(imgval).rjust(5,'0')

        urlgif = str(imgroot) + str(imgval) + ".gif"
        urlmultigif = str(imgroot) + str(imgval) + "_" + str(multigifid) + ".gif"
        flaurl = str(imgroot) + str(imgval) +"/" + str(imgval) + ".swf"

        urlgifpath = rootdata + str(imgval) + ".gif"
        urlmultigifpath = rootdata + str(imgval) + "_1.gif"
        flashpath = rootdata + str(imgval) + ".swf"

        #This checks to see if the file already exists; if it doesn't, it downloads it

        if not os.path.exists(urlgifpath) and not os.path.exists(urlmultigifpath) and not os.path.exists(flashpath):
            gif = True
            multigif = True
            Flash = True

            response = requests.get(urlgif)
            if response.status_code == 404:
                gif = False

            response = requests.get(urlmultigif)
            if response.status_code == 404:
                multigif = False

            response = requests.get(flaurl)
            if response.status_code == 404:
                Flash = False

            #now to download the file

            #regular, single .gifs
            if multigif == False and Flash == False:
                try: f = requests.get(urlgif)
                except f.statuscode == 404:
                    print "Something went wrong while downloading the .gif."
                    print urlgif
                    break
                imgpath = rootdata + str(imgval) + ".gif"
                q = open(imgpath, 'w+')
                q.write(f.content)
                q.close()

            #more than 1 gif on a page
            elif gif == False and Flash == False:
                gifstatus = True
                while gifstatus == True:
                    urlmultigif = imgroot + imgval + "_" + str(multigifid) + ".gif"
                    print urlmultigif
                    f = requests.get(urlmultigif)
                    if f.status_code == 404:
                        gifstatus = False
                        break
                    imgpath = rootdata + str(imgval) + "_" + str(multigifid) + ".gif"
                    q = open(imgpath, 'w+')
                    q.write(f.content)
                    q.close()
                    multigifid += 1

            #Flash content
            elif gif == False and multigif == False:
                flashcounter +=1
                swfurl = imgroot + imgval + "/" + imgval + ".swf"
                print swfurl
                f = requests.get(swfurl)
                imgpath = rootdata + str(imgval) + ".swf"
                q = open(imgpath, 'w+')
                q.write(f.content)
                q.close()

            else:
                print "Something went horribly wrong!"
        else:
            print "Image number " + imgval + " skipped."

        # Now we download the html
        root = rootdir + str(endval) + ".html"
        if not os.path.exists(root):
            #create page id
            url = page + str(endval)

            #open the webpage
            response = requests.get(url)
            html = response.text

            #write data to file and fix path associations
            q = open(root, 'w+')

            #fix paths and whatnot
            html = html.replace("http://www.mspaintadventures.com/storyfiles/hs2/", rootdata)
            htmlpath = rootdir + str(endval) + ".html"

            start  = html.find("<!------------------------end comic content----------------------------------->")
            end   =  html.find("</html>")
            html = html[:start] + html[end:]

            start  = html.find("<!------------------------begin nav----------------------------------->")
            end   =  html.find("<!------------------------end nav----------------------------------->")
            html = html[:start] + html[end:]
            html = html.replace(str(endval), "")

            #we need to increase the emdval by one to link to the next comic
            endval = int(endval) + 1
            endval = str(endval).rjust(6,'0')
            htmlpath = rootdir + str(endval) + ".html"
            html = html.replace("?s=6&p=" + endval, htmlpath)
            html = html.replace("images/", rootdata)
            html = html.replace("favicon.ico", favipath)

            #flash URL repair code
            if gif == False and multigif == False:
                print "Repairing flash code..."
                start  = html.find('<script language="javascript">AC_FL_RunContent = 0;</script>')
                objns = ''' </object>
        </noscript>'''
                end   =  html.find(objns)
                html = html[:start] + html[end + 1:]
                swffilelink = rootdata + str(imgval) + ".swf"
                swflink = "<a href=" + '"' + swffilelink + '"' + 'target="_self" name="Flash Content Link">Click here for flash</a>'
                html = html.replace("</object>", swflink)

            html = html.encode('ascii', 'ignore')
            q.write(html)
            q.close()
        else:
            endval = int(endval) + 1
            endval = str(endval).rjust(6,'0')
            print "html page " + endval + " skipped."

        content -= 1
        myapp.homeBar()

    print "Finsihed downloading @:", strftime("%Y-%m-%d %H:%M:%S", gmtime())

def QC():
    print 'Initiating download of the complete Qestionable Content archive.'

    #HTML
    rootdir = "/home/" + username + "/Documents/Questionable Content/"
    #CSS Local
    localcss = rootdir + 'newstyles.css'
    #local logo
    locallogo = rootdir + "logo.png"
    #images
    rootdata = "/home/" + username + "/Documents/Questionable Content/comics/"

    print "Program started @", strftime("%Y-%m-%d %H:%M:%S", gmtime())

    #root page and image urls
    page = "http://www.questionablecontent.net/"
    imgroot = "http://www.questionablecontent.net/comics/"
    css = "http://questionablecontent.net/newstyles.css"
    logourl = "http://questionablecontent.net/testing/logo.png"
    favipath = rootdata + "favicon."

    if not os.path.exists(rootdir):
        os.mkdir(rootdir)
    if not os.path.exists(rootdata):
        os.mkdir(rootdata)
    if not os.path.exists(localcss):
        r = requests.get(css)
        css = r.text
        q = open(localcss, 'w+')
        q.write(css)
        q.close()
    if not os.path.exists(locallogo):
        r = requests.get(logourl)
        logo = r.content
        q = open(locallogo, 'w+')
        q.write(logo)
        q.close()

    #get current comic id.
    print "Identifying amount of content to download..."
    r = requests.get("http://www.questionablecontent.net")
    html = r.text
    start  = html.find('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">')
    end   =  html.find('<img id="strip" src="http://www.questionablecontent.net/comics/')
    html = html[:start] + html[end:]

    start  = html.find('.png">')
    end   =  html.find('</html>')
    html = html[:start] + html[end:]

    html = html[64:]
    html = html[:4]
    global qontent
    qontent = html
    global qc_content
    qc_content = qontent
    imgval = 1
    urlroot = 'http://questionablecontent.net/view.php?comic='
    print qontent
    # main downlaod loop
    while qontent > 0:
        url = urlroot + str(imgval) + '.html'
        print url
        localpage = rootdir + str(imgval) + '.html'
        localimage = rootdata + str(imgval) + '.png'
        imgurl = imgroot + str(imgval) + '.png'

        #get the webpage
        if not os.path.exists(localpage):
            r = requests.get(urlroot)
            html = r.text
            #fix file path associations and write content to local file
            nextcomic = int(imgval) + 1
            html = html.replace('./comics/', './comics/' + str(imgval) + '.png')
            html = html.replace('../testing/logo.png', './logo.png')
            html = html.replace('view.php?comic=1', rootdir + str(nextcomic) + '.html')
            start  = html.find('<b>Warning</b>')
            end   =  html.find('<b>74</b><br />')
            html = html[:start] + html[end + 5:]
            q = open(localpage, 'w+')
            q.write(html)
            q.close()

        #get the image
        print imgurl
        if not os.path.exists(localimage):
            r = requests.get(imgurl)
            image = r.content
            q = open(localimage, 'w+')
            q.write(image)
            q.close()

        imgval = int(imgval) + 1
        qontent = int(qontent) - 1
        myapp.qc_bar()

class MyApp(QtGui.QMainWindow):
    def __init__(self):
        QtGui.QMainWindow.__init__(self)
        self.ui = Ui_PieAndCake()
        self.ui.setupUi(self)
        self.ui.the_button.clicked.connect(self.display_results)

    def display_results(self):
        if self.ui.pie_check.isChecked() and self.ui.cake_check.isChecked():
            Homestuck()
            QC()
        if self.ui.cake_check.isChecked():
            QC()
        if self.ui.pie_check.isChecked():
            Homestuck()
    def homeBar(self):
        # h_content = total | content = current comic
        addval = ((content * 100) / h_content)
        addval = 100 - addval
        self.ui.hBar.setValue(self.ui.hBar.value() + addval)
    def qc_bar(self):
        addval = ((int(qontent) * 100) / int(qc_content))
        addval = 100 - addval
        self.ui.qcBar.setValue(self.ui.qcBar.value() + addval)
    def bugBar(self):
                print "bug comic"

if __name__ == '__main__':
    app = QtGui.QApplication(sys.argv)
    myapp = MyApp()
    myapp.show()

    sys.exit(app.exec_())