ffffind.py

"""

    ffffind.py
    by aaron@andcuriouser.com

    Automatically downloads all images from ffffound saved by a specific user.
    Will first try to download the image from the original source (to get the highest quality possible).
    If that fails, it'll download the cached version from ffffound.

    Prerequisities:
        Beautiful Soup (http://www.crummy.com/software/BeautifulSoup/)

    Usage:
        python ffffind.py username

"""


import os, sys, urllib, imghdr
from BeautifulSoup import BeautifulSoup
from urlparse import urlparse
from posixpath import basename, dirname

def main(user):
    offset = 0
    page = 1
    while True:
        print "Capturing page "+str(page)+" ..."
        print
        f = urllib.urlopen("http://ffffound.com/home/"+user+"/found/?offset="+str(offset))
        s = f.read()
        f.close()
        if "<div class=\"description\">" in s:
            images = []
            offset += 25
            count = 0
            soup = BeautifulSoup(s)
            for i in soup.findAll("div", { "class" : "description" }):
                images.append({"url": urlparse("http://" + str(i).split("<br />")[0].replace("<div class=\"description\">", ""))})
            for i in soup.findAll("img"):
                if str(i).find("_m.") != -1:
                    images[count]["backup"] = str(i).split("src=\"")[1].split("\"")[0]
                    count += 1
            for i in images:
                if os.path.exists(user+"/"+basename(i["url"].path)):
                    print basename(i["url"].path) + " exists, stopping."
                    print
                    sys.exit()
                else:
                    print "Downloading " + basename(i["url"].path),
                    try:
                        urllib.urlretrieve(i["url"].geturl(), user+"/"+basename(i["url"].path))
                        print "... done."
                        if not imghdr.what(user+"/"+basename(i["url"].path)) in ["gif", "jpeg", "png", None]:
                            print "... unfortunately, it seems to be a bad image.\nDownloading backup",
                            try:
                                urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path))
                                print "... which seems to have worked."
                            except:
                                print "... which also failed."
                        if os.path.getsize(user+"/"+basename(i["url"].path)) < 5000:
                            raise
                    except:
                        print "... failed. Downloading backup",
                        try:
                            urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path))
                            print "... which seems to have worked."
                        except:
                            print "... which also failed."
                print
            page += 1
        else:
            print "Reached the end of the list, stopping."
            break

if __name__ == '__main__':
    print
    print("ffffound image downloader")
    print
    if len(sys.argv) < 2:
        print "Usage:\n\t python ffffind.py username"
        print
    else:
        try:
            if not os.path.exists("./"+sys.argv[1]):
                os.mkdir(sys.argv[1])
        except:
            print "Error creating directory."
            sys.exit()
        user = sys.argv[1]
        print "Downloading all pictures from user '"+user+"'"
        print
        main(user)