SHARE
TWEET

Wykładoinator

a guest Jun 25th, 2019 65 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import urllib
  2. import requests
  3. from bs4 import BeautifulSoup #here
  4. import os, sys
  5. from PIL import Image # pip install Pillow
  6.  
  7. curr_lecture = 1
  8.  
  9. while curr_lecture <= 13:
  10.  
  11.     curr_str = str(curr_lecture)
  12.  
  13.     if len(curr_str) < 2:
  14.         curr_str = '0' + curr_str
  15.  
  16.     headers = {'User-Agent': 'Mozilla/5.0'}
  17.     s = requests.Session()
  18.     res = s.get('http://eszkola.jrj.pl/viewer?r=pt'+curr_str,headers=headers) #tutaj zamieĹ„ pt np. pt01 albo pt11
  19.  
  20.     html = res.text
  21.  
  22.     parsed_html = BeautifulSoup(html)
  23.  
  24.     n = int(parsed_html.body.findAll('td',attrs={'align':'right'})[-1].text[-8:])
  25.    
  26.     path = curr_str
  27.    
  28.     os.mkdir(path)
  29.    
  30.     images = []
  31.  
  32.     for i in range(n):
  33.         next = 'http://eszkola.jrj.pl/' + parsed_html.body.find('td',attrs={'width':'600'}).findAll('a')[2]['href']
  34.         img = 'http://eszkola.jrj.pl/' + parsed_html.body.find('img', attrs={'width':'1000'})['src']
  35.         urllib.request.urlretrieve(img,curr_str+'/'+str(i)+'.jpg') # './'+
  36.         im = Image.open(curr_str+'/'+str(i)+'.jpg')
  37.         images.append(im)
  38.         print('\r',(i+1),'/',n)
  39.         res =  s.get(next,headers=headers)
  40.         html = res.text
  41.         parsed_html = BeautifulSoup(html)
  42.        
  43.     pdf_filename = curr_str + '/' + curr_str + '.pdf'
  44.    
  45.     images[0].save(pdf_filename, "PDF", resolution=100.0, save_all = True, append_images = images[1:])
  46.    
  47.     curr_lecture += 1
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top