Advertisement
MrHitch

Untitled

May 22nd, 2020
1,070
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.37 KB | None | 0 0
  1. from urllib.parse import urlparse
  2.  
  3. import os
  4. import wget
  5. import gzip
  6. import shutil
  7. import zipfile
  8.  
  9.  
  10. GRAPH_URLS = [
  11.     'http://snap.stanford.edu/data/loc-brightkite_edges.txt.gz',
  12.     'https://snap.stanford.edu/data/amazon0302.txt.gz',
  13.     'https://snap.stanford.edu/data/roadNet-PA.txt.gz',
  14.     'https://snap.stanford.edu/data/cit-HepPh.txt.gz',
  15.     'https://snap.stanford.edu/data/amazon0505.txt.gz',
  16.     'https://snap.stanford.edu/data/roadNet-CA.txt.gz',
  17. ]
  18.  
  19. FULLGRAPH_POWS = [
  20.     i
  21.     for p in range(0, 4)
  22.     for i in range(10 ** p, 10 ** (p + 1), 10 ** p)
  23. ]
  24.  
  25.  
  26. def donwload_graph(url):
  27.     archive_path = './input/' + os.path.split(urlparse(url).path)[1]
  28.     file_path = os.path.splitext(archive_path)[0]
  29.  
  30.     if os.path.exists(file_path) is False:
  31.         wget.download(url, './input')
  32.  
  33.         with gzip.open(archive_path, 'rb') as f_in:
  34.             with open(file_path, 'wb') as f_out:
  35.                 shutil.copyfileobj(f_in, f_out)
  36.  
  37.         content = None
  38.         with open(file_path, 'r') as f_in:
  39.             content = f_in.readlines()
  40.  
  41.         with open(file_path, 'w') as f_out:
  42.             for line in content:
  43.                 if not line.startswith('#'):
  44.                     f_out.write(str.replace(line, ' ', ' '))
  45.  
  46.         os.remove(archive_path)
  47.  
  48.  
  49. if __name__ == '__main__':
  50.     for url in GRAPH_URLS:
  51.         donwload_graph(url)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement