Advertisement
dgobrien

DAFONT SCRAPER

Oct 12th, 2022 (edited)
1,677
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.57 KB | Source Code | 0 0
  1. import requests
  2. import urllib.request
  3. import wget
  4. import os
  5. import re
  6. from bs4 import BeautifulSoup
  7. main_dir="s:/fonts/dafonts/"
  8. if not os.path.exists(main_dir):
  9.     os.makedirs(main_dir)
  10.  
  11. letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ#'
  12. attrs = { 'href': re.compile(r'\.mid$') }
  13. # Iterate over the string
  14. page = 1
  15. for element in letters:
  16.     if not os.path.exists(main_dir + element):
  17.         os.makedirs(main_dir + element)    
  18.     vgm_url = 'https://www.dafont.com/alpha.php?lettre=' + element.lower() + '&page=' + str(page) + "&fpp=200"
  19.     headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
  20.     result = requests.get(vgm_url , headers=headers)    
  21.     pagetext = result.text
  22.     soup = BeautifulSoup(pagetext, 'html.parser')
  23.     lastpage = soup.find('a', title='Keyboard shortcut: Right arrow').find_previous().text
  24.  
  25.     for x in range(1, int(lastpage)):    
  26.         print(element , "Page: ", x)
  27.         vgm_url = 'https://www.dafont.com/alpha.php?lettre=' + element.lower() + '&page=' + str(x) + "&fpp=200"
  28.         headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
  29.         result = requests.get(vgm_url , headers=headers)    
  30.         pagetext = result.text
  31.         soup = BeautifulSoup(pagetext, 'html.parser')            
  32.         mydivs = soup.find_all("div", {"class": "preview"})
  33.         for div in mydivs:
  34.  
  35.             downurl = "https://dl.dafont.com/dl/?f="
  36.             poster = div["style"].replace("background-image:url(/","").replace(")","")
  37.             downpost = poster.replace(".png",'')
  38.             down = downpost.rsplit('/', 1)[-1]
  39.             down = down[:-1]            
  40.             preurl = "https://www.dafont.com/" + poster            
  41.            
  42.             filename = wget.download(preurl , out = main_dir + element + "/")            
  43.             headers = {'User-Agent': 'Mozilla/5.0'}
  44.             r = requests.get(downurl + down, headers=headers)
  45.             try:
  46.                 filenamer = r.headers['Content-Disposition'].replace('attachment; filename=','')
  47.                 with open(main_dir + element + "/" + filenamer, 'wb') as fh:
  48.                     fh.write(r.content)            
  49.                 print(" Downloaded: " + down )
  50.             except:
  51.                 print(r.headers)
  52.                 with open(main_dir + element + "/" + down + ".zip", 'wb') as fh:
  53.                     fh.write(r.content)
  54.                 print(" Downloaded: " + down )
  55.  
  56.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement