Advertisement
Guest User

selmuella

a guest
Feb 27th, 2017
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | None | 0 0
  1. import requests, bs4
  2. from pprint import pprint
  3. import os
  4. from multiprocessing.dummy import Pool
  5.  
  6. def hentlinks():
  7.     url = 'http://studerende.au.dk/studier/fagportaler/jur/eksamen/eksamensopgaver-og-rettevejledninger/1-aar-paa-bacheloruddannelsen/'
  8.     res = requests.get(url)
  9.     soup = bs4.BeautifulSoup(res.content, 'lxml')
  10.  
  11.     poi = soup.find('div', {'id': 'c1485-default'})
  12.     uls = poi.find_all('ul')
  13.  
  14.  
  15.     linklist = []
  16.     for u in uls:
  17.         for i in u.find_all('li'):
  18.             link = i.find('a')['href']
  19.             linklist.append(link)
  20.  
  21.     return linklist
  22.  
  23.  
  24. def downloader(url):
  25.     baseurl = 'http://studerende.au.dk/'
  26.     pdfurl = baseurl+url
  27.     filname = os.path.split(url)[1]
  28.     sti = '/home/mathis/pdf_selma'
  29.     pprint(pdfurl)
  30.     pprint(filname)
  31.     if not os.path.exists(sti):
  32.         os.mkdir(sti)
  33.     os.path.isfile()
  34.     res = requests.get(pdfurl)
  35.  
  36.     with open(os.path.join(sti, filname), 'wb') as f:
  37.         for chunk in res.iter_content(100000):
  38.             f.write(chunk)
  39.  
  40.         f.close()
  41.  
  42.     print('hentet og skrevet')
  43.  
  44.  
  45. def main():
  46.     links = hentlinks()
  47.     pprint(links)
  48.     TP = Pool(20)
  49.     TP.map(downloader, links)
  50.     TP.close()
  51.     TP.join()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement