Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- This library is a collection of Python 3.71 functions to download and archive images
- from tumblr.com, before it is too late. You can grab all the images from a specific
- tumblr, or archive your own Likes. Pic files are saved with a filename formatted like
- mySFWtumblr-1234567-kitty-cat-cute.jpg, where 1234567 is the post ID and kitty-cat-cute
- are tags. It may or may not work with Python versions earlier than Python 3.71.
- 0. Get the latest version of Python and have some hazy understanding of how it works.
- https://www.python.org/
- 1. Get the pytumblr library and install somewhere in your python path https://github.com/tumblr/pytumblr.
- You want the pytumblr directory there not the pytumblr-master.
- 2. Get OAuth credentials/keys here: https://www.tumblr.com/docs/en/api/v2
- The four keys are strings of 50 chars, consumer_key, consumer_secret, oauth_token, oauth_token_secret
- in that order. Each tumblr you have has its own set of keys. You do this while logged in on a browser
- to the particular tumblr in question. This is a requirement to call the tumblr API.
- Edit this file, or preferably, create a separate file to contain your actual keys below.
- 3. Save this file as alboget.py to a directory in your Python path.
- 4. Start python console. Import os and alboget. Navigate to the directoy that you want your incoming pics
- to be saved, e.g. >>>os.chdir('C:\\TUMBLR'). Otherwise the files will be saved to the CWD.
- 5. For your tumblr at mySFWtumblr.tumblr.com, you can save your Liked pics like this
- >>>alboget.updateLikes('mySFWtumblr', 0)
- or
- >>>alboget.updateLikes('mySFWtumblr', 3560)
- where 3560 was the number of Likes you had when you last did an update.
- 6. To get the post pics from your own or any other tumblr, use
- >>>alboget.picScrape('mySFWtumblr', 0, 'mySFWtumblr')
- or
- >>>alboget.picScrape('female-presenting-nipples', 0, 'mySFWtumblr')
- where 0 will get all available pics (careful) and another number will get pics back to
- the post count specified. You have to use YOUR OWN tumblr name in the third field, which tells
- the function which keys to use.
- This backwards seeming behaviour was put in to make it easier to
- do incremental backups of both Likes and Posts, and the directories created for the saved data have
- names like female-presenting-nipples-44560, where 44560 was the most recent post count when called.
- 7. Tumblr Limitations. In practice, tumblr has changed the way the API works so that it will only
- effectively fetch the most recent ~1000 posts from one's Likes. This is not documented. The
- console at https://api.tumblr.com/console also is not 100% faithful to reality. It does seem that the
- posts function still works backward into the arbitrary past. So when calling the updateLikes function,
- you should specify the count field as a number about 1000 less than your current Likes count. Also, the
- API allows only 1000 calls per hour, and 5000 calls per day, however each call fetches 20 posts, so you can
- get up to 20,000 pics/hour. The pic file download from tumblrs servers is slow, and I suspect
- that they throttle this speed once they detect that a single IP is grabbing many pics per minute. If you
- have multiple tumblrs and multiple key sets, that will help to get around this limit.
- 8. Happy downloading.
- '''
- import os
- import re
- import urllib
- from urllib.parse import urlparse
- import requests
- import pytumblr
- # Edit the strings below to put in your actual keys, and tumblr names. These are dummies.
- # You can create keys for as many tumblrs as you own.
- keys = {'mySFWtumblr': ['XMKyiiypJP0Kz5EkUOKVn0dmqGBBSezxSFEgJCBghRUqizGtu3',
- 'lmoBq8zLwe99YqGHn0rJYilQHiwKcPkXJQwLJxQzWiRR5zj8xV',
- 'NiPZMaurMnhAEinHEiXS7ncguRkgCb36Asu3C9IJpgX8LrZ0Dv',
- 'mu0vgW6qQEfMqS42kAthbex9Eq54sDSM6ME6YxTLMfLJPJ9brg'],
- 'myPORNtumblr': ['izljviYTtjTZJc9ftvB0W1y66fVQC2O7IHuce8GPH5U4fCb4FX',
- 'YEGXrrigdSpQbwE1GrgwBWodlWJ3fifYScBVkpUSSCTwxLW6yR',
- 'kWpNZ5v60CtYbOHIE6aNUogw2UgcIJ1jnpaHn2GoQZQg1YInnK',
- 'x675q63ESyypmXJgjMIyWokvStEHriVMruQ9dIWElFcD5UDRmt']}
- tumblrrootdir = os.getcwd() #you can hard code your preferred dir here if you like
- global client
- false = False; true = True; null = None;
- def cleanFN(filename):
- return( re.sub("[^a-zA-Z0-9-_]+", "",filename))
- def initclient(tname):
- global client
- client = pytumblr.TumblrRestClient(*keys[tname])
- def namesandURLsFromPosts(postlist):
- namesURLs = []
- for i in range(len(postlist)):
- apost = postlist[i]
- blogname = apost['blog_name']
- postid = str(apost['id'])
- filepre = blogname + '-' + postid
- if apost['type'] == 'photo':
- atag = '-'.join(apost['tags']).replace(' ', '_')
- atag = atag[0:50]
- if len(atag) > 0:
- atag = '-' + atag
- if len(apost['photos']) == 1:
- urlfetch = apost['photos'][0]['original_size']['url']
- aurl = urlparse(urlfetch)
- abase, aext = os.path.splitext(aurl.path) #if url has no file extension this might not work
- pic = cleanFN(filepre + atag) + aext
- namesURLs.append([urlfetch, pic])
- else:
- for j in range(len(apost['photos'])):
- urlfetch = apost['photos'][j]['original_size']['url']
- aurl = urlparse(urlfetch)
- abase, aext = os.path.splitext(aurl.path)
- pic = cleanFN(filepre + atag + '-' + str(j+1).zfill(2)) + aext
- namesURLs.append([urlfetch, pic])
- return(namesURLs)
- def getPosts(tumblrname, mylimit, myoffset):
- global client
- tumresp = client.posts(tumblrname + '.tumblr.com',limit=mylimit, offset=myoffset)
- if ('posts' in tumresp):
- out = namesandURLsFromPosts(tumresp['posts'])
- else:
- print('tumblr call fail at getPosts(' + tumblrname + 'myoffset =' + str(myoffset))
- print(tumresp)
- out = []
- return(out)
- def getSelfLiked(mylimit, myoffset):
- global client
- tumresp = client.likes(limit=mylimit, offset=myoffset)
- if ('liked_posts' in tumresp):
- out = namesandURLsFromPosts(tumresp['liked_posts'])
- if out == []:
- print('empty list returned from getSelfLiked at (mylimit, myoffset) ' + str(mylimit) + ' '+ str(myoffset))
- else:
- print('tumblr call fail at getSelfLiked at myoffset = ' + str(myoffset))
- print(tumresp)
- out = []
- return(out)
- def getNUSincePrevious(tumblrname, previousCount): # if previouscount = 0 get them all
- global client
- currentPostCount = client.blog_info(tumblrname + '.tumblr.com')['blog']['posts']
- print('For ' + tumblrname + ' current post count = ' + str(currentPostCount))
- offs = 0
- numtoget = currentPostCount - previousCount
- nulist = []
- for offs in range(0,numtoget, 20):
- nulist.extend(getPosts(tumblrname, 20, offs))
- return(nulist)
- def getLikedNUSincePrevious(previousCount): # if previouscount = 0 get them all
- global client
- cinfo = client.info()
- currentLikedCount = cinfo['user']['likes']
- print('For ' + cinfo['user']['name'] + ' current liked count = ' + str(currentLikedCount))
- offs = 0
- numtoget = currentLikedCount - previousCount
- nulist = []
- for offs in range(0,numtoget, 20):
- nulist.extend(getSelfLiked(20, offs))
- return(nulist)
- def fetchPicsToCWD(NUlist):
- badcalls = []
- for q in range(len(NUlist)):
- a = NUlist[q]
- try:
- r = requests.get(a[0],timeout=5.0)
- except requests.exceptions.Timeout:
- badcalls.append(a)
- print('requests.exceptions.Timeout at getting '+ a[0])
- except Exception as e:
- print('Caught an exception as some sub call of fetchPicsToCWD ')
- print(e)
- else:
- if r.status_code != requests.codes.ok:
- badcalls.append(a)
- print('Bad call made with r.status_code = ' + str(r.status_code) + '\n while getting '+ a[0])
- else:
- #need to check if the filename a[1] already exists!!!
- with open(a[1], 'wb') as f:
- f.write(r.content)
- print(str(len(badcalls)) + ' bad calls made in this pass of pic gets')
- return(badcalls)
- def updateLikes(mytumblrname, previous):
- if mytumblrname in keys:
- global client
- initclient(mytumblrname)
- cinfo = client.info()
- currentLikedCount = cinfo['user']['likes']
- os.chdir(tumblrrootdir)
- newLikesDir =mytumblrname + '-LIKES-' + str(currentLikedCount)
- try:
- os.mkdir(newLikesDir)
- except FileExistsError:
- print('directory already exists, and it is fine')
- os.chdir(newLikesDir)
- print('starting to get ' + mytumblrname + ' photolinks likes ' + str(currentLikedCount) + ' down to ' + str(previous))
- nu = getLikedNUSincePrevious(previous)
- print('starting to download '+ str(len(nu)) + ' photo files to ' + newLikesDir)
- bc = fetchPicsToCWD(nu)
- if len(bc) > 0:
- print('Starting second pass on bad calls')
- bc2 = fetchPicsToCWD(bc)
- print('updateLikes completed.')
- os.chdir(tumblrrootdir)
- else:
- print('No keys in tumblrkeys.py available for ', mytumblrname)
- def picScrape(tumblrname, previous, myKeyName): #myKeyName is your blog credentials to use
- if myKeyName in keys:
- global client
- initclient(myKeyName)
- currentPostCount = client.blog_info(tumblrname + '.tumblr.com')['blog']['posts']
- os.chdir(tumblrrootdir)
- newPostsDir =tumblrname + '-' + str(currentPostCount)
- try:
- os.mkdir(newPostsDir)
- except FileExistsError:
- print('directory already exists, and it is fine')
- os.chdir(newPostsDir)
- print('starting to get ' + tumblrname + ' photolinks ' + str(currentPostCount) + ' down to ' + str(previous))
- nu = getNUSincePrevious(tumblrname, previous)
- print('starting to download '+ str(len(nu)) + ' photo files to ' + newPostsDir)
- bc = fetchPicsToCWD(nu)
- if len(bc) > 0:
- print('Starting second pass on bad calls')
- bc2 = fetchPicsToCWD(bc)
- print('picScrape completed.')
- os.chdir(tumblrrootdir)
- else:
- print('No keys in tumblrkeys.py available for ', myKeyName)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement