Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib
- import requests
- from bs4 import BeautifulSoup #here
- import os, sys
- from PIL import Image # pip install Pillow
- curr_lecture = 1
- while curr_lecture <= 13:
- curr_str = str(curr_lecture)
- if len(curr_str) < 2:
- curr_str = '0' + curr_str
- headers = {'User-Agent': 'Mozilla/5.0'}
- s = requests.Session()
- res = s.get('http://eszkola.jrj.pl/viewer?r=pt'+curr_str,headers=headers) #tutaj zamień pt np. pt01 albo pt11
- html = res.text
- parsed_html = BeautifulSoup(html)
- n = int(parsed_html.body.findAll('td',attrs={'align':'right'})[-1].text[-8:])
- path = curr_str
- os.mkdir(path)
- images = []
- for i in range(n):
- next = 'http://eszkola.jrj.pl/' + parsed_html.body.find('td',attrs={'width':'600'}).findAll('a')[2]['href']
- img = 'http://eszkola.jrj.pl/' + parsed_html.body.find('img', attrs={'width':'1000'})['src']
- urllib.request.urlretrieve(img,curr_str+'/'+str(i)+'.jpg') # './'+
- im = Image.open(curr_str+'/'+str(i)+'.jpg')
- images.append(im)
- print('\r',(i+1),'/',n)
- res = s.get(next,headers=headers)
- html = res.text
- parsed_html = BeautifulSoup(html)
- pdf_filename = curr_str + '/' + curr_str + '.pdf'
- images[0].save(pdf_filename, "PDF", resolution=100.0, save_all = True, append_images = images[1:])
- curr_lecture += 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement