SHARE
TWEET

list or get python source files using requests_html

DeaD_EyE Jun 6th, 2019 61 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. """
  2. This script list or download Python source files
  3. It was requested here: https://python-forum.io/Thread-downloading-source-for-every-version
  4.  
  5. Dependencies: requests, requests_html
  6. Task: Remove external dependencies
  7. """
  8.  
  9.  
  10. import re
  11. import sys
  12. try:
  13.     import requests
  14.     import requests_html
  15. except ImportError:
  16.     print('Please install requests and requests_html.', file=sys.stdout)
  17.     sys.exit(255)
  18.  
  19.  
  20. def sorter(url):
  21.     """
  22.    This sorter is used to sort the
  23.    python by version string.
  24.    
  25.    Early version have only two digits.
  26.    """
  27.     v1 = re.search(r'(\d+)\.(\d+)\.(\d+)', url)
  28.     v2 = re.search(r'(\d+)\.(\d+)', url)
  29.     if v1:
  30.         return tuple(map(int, v1.groups()))
  31.     elif v2:
  32.         return tuple(map(int, [*v2.groups(), 0]))
  33.     else:
  34.         return (0,0,0)
  35.  
  36.  
  37. def get_python_sources(filetype='tgz'):
  38.     """
  39.    Grab all links from python.org/downloads/source/
  40.    Return the source file archives sorted as a list.
  41.    """
  42.     url = 'https://www.python.org/downloads/source/'
  43.     with requests_html.HTMLSession() as session:
  44.         req = session.get(url)
  45.     links = [link for link in req.html.links if link.endswith(filetype)]
  46.     return sorted(links, key=sorter)
  47.  
  48.  
  49. def print_python_sources():
  50.     """
  51.    Print all python source links to stdout.
  52.    """
  53.     for link in get_python_sources():
  54.         print(link)
  55.  
  56.  
  57. def download_python_sources():
  58.     """
  59.    Download all python source files to current location.
  60.    """
  61.     for link in get_python_sources():
  62.         file = link.split('/')[-1]
  63.         # find a better way to get the file name from url
  64.         # maybe urllib.parse?
  65.         # yarl is an alternative with more abstraction
  66.         print('Downloading', file)
  67.         req = requests.get(link, stream=True)
  68.         with open(file, 'wb') as fd:
  69.             for chunk in req.iter_content(chunk_size=64*1024**1):
  70.                 # what is the ideal chunk_size?
  71.                 # how to detect the best chunk_size?
  72.                 fd.write(chunk)
  73.  
  74.  
  75. def help():
  76.     print('python3', sys.argv[0], '<list|download>')
  77.     sys.exit(1)
  78.  
  79.  
  80. if __name__ == '__main__':
  81.     if len(sys.argv) != 2:
  82.         help()
  83.     cmd = sys.argv[1].lower()
  84.     if cmd not in ('download', 'list'):
  85.         help()        
  86.     try:
  87.         if cmd == 'list':
  88.             print_python_sources()
  89.         elif cmd == 'download':
  90.             download_python_sources()
  91.     except Exception as e:
  92.         # catching all exceptions here, the're bubbleing up
  93.         # extend this to handle different cases
  94.         print('Error:', e)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top