Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. import requests
  2. import tarfile
  3. from lxml.html import parse
  4. from sys import exit
  5. import codecs
  6. import os
  7.  
  8.  
  9. def process_downloads():
  10.         url = "http://www.net-security.org/insecure-archive.php"
  11.         html_tree = parse(url)
  12.         pdf_links = html_tree.xpath("//h2[@class='style12']/a/@href")
  13.  
  14.         print("\nNumber of Mags to be downloaded : " + str(len(pdf_links)))
  15.  
  16.         dir_save_name = "netsec-mags"
  17.         dir_save_path = os.path.join(os.getcwd(), dir_save_name)
  18.         print("\nMags Will be saved at : " + dir_save_path)
  19.  
  20.         if not os.path.exists(dir_save_path):
  21.                 os.mkdir(dir_save_path)
  22.  
  23.         os.chdir(dir_save_path)
  24.  
  25.         for link in iter(pdf_links):
  26.                 filename = link.rpartition('/')[2]    
  27.                 if os.path.exists(os.path.join(dir_save_path, filename)):
  28.                         print(filename + " already exists and therefore skipping.")    
  29.                 else:
  30.                         print("\nCurrently downloading " + filename)
  31.                         req = requests.get(link)
  32.                         with codecs.open(filename, "wb") as f:
  33.                             f.write(req.content)
  34.         print("\nAll mags have been downloaded.")
  35.  
  36.         print("\nNow adding all the files to an Archive")
  37.         archive_loc = create_targz(dir_save_path, dir_save_name + ".tar.gz")
  38.         if archive_loc is not None:
  39.             print("\nArchive created successfully. Location :- " + archive_loc)
  40.         else:
  41.             print("Unable to create .tar.gz file. Report to Developer.")
  42.  
  43.  
  44. def create_targz(dirpath, archivename):
  45.         """
  46.        Create a .tar.gz Archive of a directory
  47.  
  48.        :param dirpath: Directory location to save the the book archive.
  49.        :param archivename: Name of the archive.
  50.        """
  51.         try:
  52.             with tarfile.open(archivename, "w:gz") as tar:
  53.                 tar.add(dirpath, arcname=os.path.basename(dirpath))
  54.             return os.path.join(dirpath, archivename)
  55.         except tarfile.TarError:
  56.             return None
  57.  
  58. if __name__ == "__main__":
  59.     exit(process_downloads())