Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib import urlopen
- import re
- import sys
- channel = raw_input('Enter vimeo channel: ')
- pageNum = raw_input('Enter number of follower pages to scrape:')
- pageNum = int(pageNum)
- adjList = open(channel+'.csv', 'w') #create csv
- for i in range(1, pageNum + 1):
- print >> sys.stderr, 'Scraping page '+str(i)+'...'
- url = urlopen('http://vimeo.com/channels/'+channel+'/followers/page:'+str(i)+'/sort:datefollow')
- htmltext = url.read()
- pattern = re.compile('a href="/user(.+?)"')
- userIds = re.findall(pattern, htmltext)
- for i in range(len(userIds)):
- #standard request - http://vimeo.com/api/v2/username/request.output
- apiRequest = urlopen('http://vimeo.com/api/v2/user'+userIds[i]+'/channels.json')
- userJsonData = apiRequest.read()
- pattern = re.compile(',"name":"(.+?)"')
- channelNames = re.findall(pattern, userJsonData)
- for j in range(len(channelNames)): #remove commas and whitespace
- channelNames[j] = channelNames[j].replace(' ', '_')
- channelNames[j] = channelNames[j].replace(',', '')
- #Write userId and channel to csv using a comma as delimiter
- for j in range(len(channelNames)):
- try:
- adjList.write('user'+userIds[i]+ ', '+ channelNames[j]+'\n')
- except BaseException, e:
- print >> sys.stderr, 'Error:', str(e)
- adjList.close()
- print >> sys.stderr, 'Done writing. '+channel+'.csv saved to script folder.'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement