Advertisement
Guest User

Untitled

a guest
Nov 17th, 2017
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.54 KB | None | 0 0
  1. import requests
  2. import os
  3. from bs4 import BeautifulSoup
  4.  
  5.  
  6. def create_math_directory():
  7.     home_directory = os.path.join(os.path.expanduser("~"), "Desktop")
  8.     if not os.path.exists(home_directory + '/Math391_Notes'):  # if file does not exist then make it
  9.         os.makedirs(home_directory + '/Math391_Notes')
  10.  
  11.     os.chdir(home_directory + '/Math391_Notes/')
  12.     # return home_directory + '/Math391_Notes'
  13.  
  14.  
  15. #sites - attachments - icon sites - attachments - icon - accessible
  16.  
  17.  
  18. def parse_pdf():
  19.     url = 'https://sites.google.com/site/summeryunyang/teaching'
  20.     r = requests.get(url)
  21.     soup = BeautifulSoup(r.content, "html.parser")
  22.     payload = soup.find_all("div", {"class": "sites-attachments-name"})  # cointains all the sites to sites but its mix with html
  23.     google_url = []
  24.     pdf_title = []
  25.     for website in payload:
  26.         if website.a:
  27.             google_url.append(website.a.get('href'))
  28.         pdf_title.append(website.a.text)
  29.  
  30.     url_and_title = list(zip(google_url, pdf_title))
  31.     return url_and_title
  32.  
  33.  
  34. def download_pdf(parsed_pdf):
  35.     pass
  36.     '''
  37.    for pdf_file, pdf_title in parsed_pdf:
  38.  
  39.        response = requests.get(pdf_file)
  40.        with open(pdf_title, 'wb',) as f:
  41.             # for chunk in response.iter_content(chunk_size=15000):
  42.            f.write(response.content)
  43.        print('downloading {}'.format(pdf_title))
  44.    print('should be done')
  45.    '''
  46.  
  47.  
  48. def main():
  49.     math_folder = create_math_directory()
  50.     pdf = parse_pdf()
  51.  
  52.  
  53. if __name__ == "__main__":
  54.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement