mkk-upload.py

#!/usr/bin/env python

import re
import os
import urllib2
import simplejson as json


# Configuration.
S3_KEYS = "U7TwGpIjERYE9cx4:47gEsUpL4y4tR4iE"


#______________________________________________________________________________

def de_camel(string):
    string = string.strip('./ avi')
    string = string.replace('_', ' ')
    #return re.sub('((?=[A-Z][a-z])|(?<=[a-z])(?=[A-Z]))', ' ', string)
    string = re.sub("([a-z])([A-Z])","\g<1> \g<2>",string)
    string = re.sub("(-)(\w)","\g<1> \g<2>",string)
    string = re.sub("(\w)(-)","\g<1> \g<2>",string)
    return re.sub("([a-z]|[A-Z])(\d)","\g<1> \g<2>",string)

#______________________________________________________________________________

def upload(file, identifier, title, subjects):
    file_name = file.split('/')[-1]
    print 'uploading "%s" to http://archive.org/details/%s' % (file_name, identifier)
    upload_data = open(file).read()
    request = urllib2.Request('http://s3.us.archive.org/%s/%s' % (identifier, file_name), data=upload_data)
    opener = urllib2.build_opener(urllib2.HTTPHandler)
    request.add_header('Content-Type', 'video/avi')
    request.add_header('x-amz-auto-make-bucket', '1')
    request.add_header('authorization', 'LOW %s' % S3_KEYS)

    """ <METADATA> """
    request.add_header('x-archive-meta-mediatype', 'movies')
    request.add_header('x-archive-meta-collection', 'michael-kan-kan-archives')
    request.add_header('x-archive-queue-derive', '0')
    #request.add_header('x-archive-meta-description', description)
    request.add_header('x-archive-meta-title', title)
    i=0
    for subject in subjects:
        header = 'x-archive-meta-subject0%s' % i
        request.add_header(header, subject)
        i+=1
    """ </METADATA> """

    request.get_method = lambda: 'PUT'

    try:
        url = opener.open(request)
        status = url.getcode()
        if status == 200:
            return ('\n\nSUCCESS :: http://archive.org/details/%s"\n\n' % identifier)
    except urllib2.HTTPError, e:
        return ('\n\nERROR: HTTP status code: %s\n\n' % e.code)

#______________________________________________________________________________

def main():

    files = []
    for dirname, dirnames, filenames in os.walk('.'):
        for file in filenames:
            if file.endswith('.avi'):
                files.append((dirname,file))

    for f in files:
        file = os.path.join(f[0],f[1])
        title = de_camel(f[1])
        identifier = "%s-%s" % ('mkk',title.lower().replace(' ','-'))
        identifier = identifier.replace('---','-')
        subjects = de_camel(f[0]).split('/')
        jstor = json.loads(urllib2.urlopen('http://archive.org/metadata/%s' % identifier).read())
        if not jstor:
            upload(file, identifier, title, subjects)
        else:
            print 'SKIPPING (already exists) :: "%s"' % identifier

if __name__ == '__main__':
    main()