Advertisement
gingeredhorse

Untitled

Mar 24th, 2014
319
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.56 KB | None | 0 0
  1. from urllib import urlopen
  2. import re
  3. import sys
  4.  
  5. channel = raw_input('Enter vimeo channel: ')
  6. pageNum = raw_input('Enter number of follower pages to scrape:')
  7. pageNum = int(pageNum)
  8. adjList = open(channel+'.csv', 'w') #create csv
  9.  
  10.  
  11. for i in range(1, pageNum + 1):
  12.     print >> sys.stderr, 'Scraping page '+str(i)+'...'
  13.      
  14.     url = urlopen('http://vimeo.com/channels/'+channel+'/followers/page:'+str(i)+'/sort:datefollow')
  15.     htmltext = url.read()
  16.     pattern = re.compile('a href="/user(.+?)"')
  17.     userIds = re.findall(pattern, htmltext)
  18.  
  19.  
  20.  
  21.     for i in range(len(userIds)):
  22.         #standard request - http://vimeo.com/api/v2/username/request.output
  23.         apiRequest = urlopen('http://vimeo.com/api/v2/user'+userIds[i]+'/channels.json')
  24.         userJsonData = apiRequest.read()
  25.          
  26.         pattern = re.compile(',"name":"(.+?)"')
  27.         channelNames = re.findall(pattern, userJsonData)
  28.         for j in range(len(channelNames)):  #remove commas and whitespace
  29.             channelNames[j] = channelNames[j].replace(' ', '_')
  30.             channelNames[j] = channelNames[j].replace(',', '')
  31.  
  32.  
  33.  
  34.         #Write userId and channel to csv using a comma as delimiter
  35.         for j in range(len(channelNames)):
  36.             try:
  37.                 adjList.write('user'+userIds[i]+ ', '+ channelNames[j]+'\n')
  38.             except BaseException, e:
  39.                 print >> sys.stderr, 'Error:', str(e)
  40.  
  41.      
  42.      
  43. adjList.close()
  44. print >> sys.stderr, 'Done writing. '+channel+'.csv saved to script folder.'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement