Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env/python
- """
- gdocs2s3.py
- Automatically downloads all of your Google Docs and backs them up to Amazon S3
- """
- """
- Copyright (c) 2010 Scott Rubin apreche@frontrowcrew.com
- Permission is hereby granted, free of charge, to any person
- obtaining a copy of this software and associated documentation
- files (the "Software"), to deal in the Software without
- restriction, including without limitation the rights to use,
- copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the
- Software is furnished to do so, subject to the following
- conditions:
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- OTHER DEALINGS IN THE SOFTWARE.
- """
- """ Begin User Config """
- # Google
- GOOGLE_USERNAME=""
- GOOGLE_PASSWORD=""
- DOCUMENT_FORMAT = 'html' # change to 'doc', if you desire
- # Amazon
- AWS_ACCESS_KEY_ID = ''
- AWS_SECRET_ACCESS_KEY = ''
- AWS_BUCKET_NAME = ''
- # Other
- TEMP_STORAGE_DIR = '/tmp/'
- """ End User Config """
- # Prepare S3 Connection
- import boto
- from boto.s3.connection import S3Connection
- from boto.s3 import Key
- conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
- try:
- bucket = conn.create_bucket(AWS_BUCKET_NAME)
- except boto.exception.S3CreateError:
- print "Someone else has that bucket name. Try a different one."
- exit()
- print "Connected to S3 bucket '%s'" % AWS_BUCKET_NAME
- # Export From Google Docs
- import gdata.docs.service
- GOOGLE_SOURCE="Apreche-GDocs2S3-v1"
- gd_client = gdata.docs.service.DocsService(source=GOOGLE_SOURCE)
- gd_client.ClientLogin(GOOGLE_USERNAME, GOOGLE_PASSWORD)
- feed = gd_client.GetDocumentListFeed()
- print "Connected to Google as '%s'" % GOOGLE_USERNAME
- for entry in feed.entry:
- if entry.GetDocumentType() == 'document':
- title = entry.title.text.encode('UTF-8').replace(' ','_')
- filename = "%s.%s" % (title, DOCUMENT_FORMAT)
- temp_filename = "%s%s" % (TEMP_STORAGE_DIR, filename)
- print "Downloading '%s' to '%s'" % (filename, TEMP_STORAGE_DIR)
- export_entries = {'doc': entry, 'html': entry.resourceId.text }
- gd_client.Export(export_entries[DOCUMENT_FORMAT], temp_filename)
- k = Key(bucket)
- k.key = filename
- k.set_contents_from_filename(temp_filename)
- print "'%s' Backed up to S3 Bucket '%s'" % (filename, AWS_BUCKET_NAME)
- print "Final Contents of Bucket '%s'" % AWS_BUCKET_NAME
- for key in bucket:
- print " %s" % key.name
Add Comment
Please, Sign In to add comment