Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib.parse import urlparse
- import os
- import wget
- import gzip
- import shutil
- import zipfile
- GRAPH_URLS = [
- 'http://snap.stanford.edu/data/loc-brightkite_edges.txt.gz',
- 'https://snap.stanford.edu/data/amazon0302.txt.gz',
- 'https://snap.stanford.edu/data/roadNet-PA.txt.gz',
- 'https://snap.stanford.edu/data/cit-HepPh.txt.gz',
- 'https://snap.stanford.edu/data/amazon0505.txt.gz',
- 'https://snap.stanford.edu/data/roadNet-CA.txt.gz',
- ]
- FULLGRAPH_POWS = [
- i
- for p in range(0, 4)
- for i in range(10 ** p, 10 ** (p + 1), 10 ** p)
- ]
- def donwload_graph(url):
- archive_path = './input/' + os.path.split(urlparse(url).path)[1]
- file_path = os.path.splitext(archive_path)[0]
- if os.path.exists(file_path) is False:
- wget.download(url, './input')
- with gzip.open(archive_path, 'rb') as f_in:
- with open(file_path, 'wb') as f_out:
- shutil.copyfileobj(f_in, f_out)
- content = None
- with open(file_path, 'r') as f_in:
- content = f_in.readlines()
- with open(file_path, 'w') as f_out:
- for line in content:
- if not line.startswith('#'):
- f_out.write(str.replace(line, ' ', ' '))
- os.remove(archive_path)
- if __name__ == '__main__':
- for url in GRAPH_URLS:
- donwload_graph(url)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement