Advertisement
Guest User

Untitled

a guest
Sep 17th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.07 KB | None | 0 0
  1. from grab import Grab
  2. import sys
  3. import requests
  4. import os
  5.  
  6. import urllib3
  7. urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
  8.  
  9. from time import sleep
  10. import random
  11.  
  12. path = '/tmp/site/'
  13.  
  14.  
  15.  
  16. login = 'some_login'
  17. password = 'some_passwd'
  18.  
  19. g = Grab(log_file='out.html')
  20. g.go('http://some_site/authentication/')
  21. g.doc.set_input('username', login)
  22. g.doc.set_input('password', password)
  23. g.doc.submit()
  24. g.go('http://some_site/link/')
  25. sel = g.doc.select('//a[contains(@class, "course-content__topic-item-link")]')
  26. count = 0
  27.  
  28. for elem in sel:
  29.     directory = path + str(count) + '/'
  30.     if not os.path.exists(directory):
  31.         os.makedirs(directory)
  32.     url = g.make_url_absolute(elem.attr('href'))
  33.     print(url)
  34.     g.go(url)
  35.     try:
  36.         video = g.make_url_absolute(g.doc.select('//video[contains(@class, "video-js")]//source').attr('src'))
  37.         filename = video.split('/')[-1].split('#')[0].split('?')[0]
  38.         afilename = directory + filename
  39.         try:
  40.             r = requests.get(video, stream=True, verify=False)
  41.             with open(afilename, "wb") as file:
  42.                 for chunk in r.iter_content(chunk_size=255):
  43.  
  44.                     # writing one chunk at a time to pdf file
  45.                     if chunk:
  46.                         file.write(chunk)
  47.             file.close()
  48.         except:
  49.             pass
  50.         else:
  51.             pass
  52.  
  53.     except IndexError:
  54.         print('no video')
  55.     else:
  56.         print(video)
  57.         print(filename)
  58.  
  59.  
  60.  
  61.     slides = g.doc.select('//div[contains(@class, "online-content__preview-slider")]//img')
  62.     for slide in slides:
  63.         img = g.make_url_absolute(slide.attr('src')).rstrip()
  64.         print(img)
  65.         filename = img.split('/')[-1].split('#')[0].split('?')[0]
  66.         afilename = directory + filename
  67.         print(afilename)
  68.         data = requests.get(img, verify=False).content
  69.         with open(afilename, 'wb') as f:
  70.             f.write(data)
  71.  
  72.     #input("Press Enter to continue...")
  73.  
  74.     print(count)
  75.     count += 1
  76.     print('count rised:', count)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement