SHARE
TWEET

Untitled

a guest Sep 17th, 2017 8 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from grab import Grab
  2. import sys
  3. import requests
  4. import os
  5.  
  6. import urllib3
  7. urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
  8.  
  9. from time import sleep
  10. import random
  11.  
  12. path = '/tmp/site/'
  13.  
  14.  
  15.  
  16. login = 'some_login'
  17. password = 'some_passwd'
  18.  
  19. g = Grab(log_file='out.html')
  20. g.go('http://some_site/authentication/')
  21. g.doc.set_input('username', login)
  22. g.doc.set_input('password', password)
  23. g.doc.submit()
  24. g.go('http://some_site/link/')
  25. sel = g.doc.select('//a[contains(@class, "course-content__topic-item-link")]')
  26. count = 0
  27.  
  28. for elem in sel:
  29.     directory = path + str(count) + '/'
  30.     if not os.path.exists(directory):
  31.         os.makedirs(directory)
  32.     url = g.make_url_absolute(elem.attr('href'))
  33.     print(url)
  34.     g.go(url)
  35.     try:
  36.         video = g.make_url_absolute(g.doc.select('//video[contains(@class, "video-js")]//source').attr('src'))
  37.         filename = video.split('/')[-1].split('#')[0].split('?')[0]
  38.         afilename = directory + filename
  39.         try:
  40.             r = requests.get(video, stream=True, verify=False)
  41.             with open(afilename, "wb") as file:
  42.                 for chunk in r.iter_content(chunk_size=255):
  43.  
  44.                     # writing one chunk at a time to pdf file
  45.                     if chunk:
  46.                         file.write(chunk)
  47.             file.close()
  48.         except:
  49.             pass
  50.         else:
  51.             pass
  52.  
  53.     except IndexError:
  54.         print('no video')
  55.     else:
  56.         print(video)
  57.         print(filename)
  58.  
  59.  
  60.  
  61.     slides = g.doc.select('//div[contains(@class, "online-content__preview-slider")]//img')
  62.     for slide in slides:
  63.         img = g.make_url_absolute(slide.attr('src')).rstrip()
  64.         print(img)
  65.         filename = img.split('/')[-1].split('#')[0].split('?')[0]
  66.         afilename = directory + filename
  67.         print(afilename)
  68.         data = requests.get(img, verify=False).content
  69.         with open(afilename, 'wb') as f:
  70.             f.write(data)
  71.  
  72.     #input("Press Enter to continue...")
  73.  
  74.     print(count)
  75.     count += 1
  76.     print('count rised:', count)
RAW Paste Data
Top