Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup as Soup
- import re
- from pathlib import Path
- import os
- session = requests.session()
- session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36 Edg/80.0.361.69'
- requests.packages.urllib3.disable_warnings()
- session.verify=False
- def get_session_cookie(session=session):
- url = 'https://services.tubitak.gov.tr/edergi/user/index.jsp'
- session.get(url, verify=False)
- def downloader(url, filename):
- path = Path(filename)
- path.parent.mkdir(parents=True, exist_ok=True)
- with session.get(url, stream=True) as r:
- r.raise_for_status()
- with open(path, 'wb') as f:
- for chunk in r.iter_content(chunk_size=8*2**10):
- if chunk:
- f.write(chunk)
- def download_pdf(page_link):
- res = session.get(page_link)
- soup = Soup(res.text, 'html.parser')
- magazine_name = soup.find('font', {'class':'DergiAdi'})
- info = magazine_name.parent.next_sibling.get_text()
- info = re.match('(?P<ay>\w+) +(?P<yil>\d{4}).*? (?P<sayi>\d+)', info.strip())
- ay, yil, sayi = info.groups()
- magazine_name = magazine_name.get_text()
- pdf_name = f'tubitak/{magazine_name}/{magazine_name}-{yil}-{ay}-{sayi}.pdf'
- pdf_link = f'https://services.tubitak.gov.tr/edergi/user/{soup.find("a").get("href")}'
- downloader(pdf_link, pdf_name)
- print(f'{pdf_name} indirildi')
- def index(year, magazine, session=session):
- url = 'https://services.tubitak.gov.tr/edergi/user/yilList1.jsp'
- data = {'submitButton': '', 'yil':year, 'dergiKodu':magazine}
- res = session.post(url, data=data)
- soup = Soup(res.text, 'html.parser')
- return soup
- def get_year(year, magazine, session=session):
- soup = index(year, magazine, session)
- links = soup.find_all('a')
- links = [f'https://services.tubitak.gov.tr/edergi/user/{link.get("href")}' for link in links]
- return links
- def get_years(magazine, session=session):
- soup = index(2020, magazine, session)
- year_select = soup.find('select', {'name': 'yil'})
- years = [option.get('value') for option in year_select.find_all('option')]
- return years
- def magazine_downloader(magazine):
- years = get_years(magazine)
- downloaded_path = f'tubitak/downloaded{magazine}.txt'
- downloaded = []
- if os.path.exists(downloaded_path):
- with open(downloaded_path) as f:
- downloaded = f.read().strip().split('\n')
- for year in years:
- links = get_year(year, magazine)
- for link in links:
- if link not in downloaded:
- download_pdf(link)
- with open(downloaded_path, 'a') as f:
- f.write(f'{link}\n')
- def get_magazine_codes():
- soup = index(2020, 4, session)
- magazine_select = soup.find('select', {'name': 'dergiKodu'})
- options = [(option.get('value'), option.get_text()) for option in magazine_select.find_all('option')][1:]
- print(f'{"kod":^5} - {"dergi adı":<30}')
- for option in options:
- print(f'{option[0]:^5} - {option[1]:<30})')
- if __name__ == '__main__':
- get_magazine_codes()
- dergi_kodu = input('indirmek istediğiniz derginin kodunu giriniz: ')
- magazine_downloader(dergi_kodu)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement