Guest User

Untitled

a guest
Jan 18th, 2018
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.54 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import os
  4. from urllib.parse import urlparse
  5. from multiprocessing.pool import ThreadPool
  6. from functools import partial
  7. import gzip
  8.  
  9. from boto.s3.connection import S3Connection
  10. from boto.s3.key import Key
  11. from boto.exception import S3ResponseError
  12. import boto
  13. import click
  14.  
  15. tile_count = 0
  16. upload_count = 0
  17. max_retries = 3
  18.  
  19. def upload_walk(bucket, key_prefix, headers, root_dir, walkee, progress=True):
  20. root, dirs, files = walkee
  21.  
  22. file_dir = root.replace(root_dir, "")
  23. for f in files:
  24. base, ext = os.path.splitext(f)
  25. if ext != ".gph":
  26. continue
  27. key_path = key_prefix + file_dir + "/" + f
  28. upload_tile(bucket, headers, os.path.join(root, f), key_path, progress=progress)
  29.  
  30.  
  31. def upload_tile(bucket, headers, file_path, key_path, progress=True, retries=0):
  32. print("Uploading " + file_path + " to " + key_path)
  33. try:
  34. k = Key(bucket)
  35. k.key = key_path
  36.  
  37. for key, value in headers.items():
  38. k.set_metadata(key, value)
  39.  
  40. with open(file_path, "rb") as f:
  41. k.set_contents_from_string(gzip.compress(f.read()))
  42.  
  43. global upload_count
  44. upload_count += 1
  45. if progress and upload_count % 10 == 0:
  46. print("%i/%i" % (upload_count, tile_count))
  47. except Exception as e:
  48. print(e)
  49. if retries < max_retries:
  50. upload_tile(bucket, headers, file_path, key_path, progress=progress, retries=retries + 1)
  51. else:
  52. raise Exception("Too Many upload failures")
  53.  
  54.  
  55. @click.command()
  56. @click.argument('tile_dir', type=click.Path(exists=True), required=True)
  57. @click.argument('s3_url', required=True)
  58. @click.option('--threads', default=10,
  59. help="Number of simultaneous uploads")
  60. def upload(tile_dir, s3_url, threads):
  61. base_url = urlparse(s3_url)
  62. conn = S3Connection(calling_format=boto.s3.connection.OrdinaryCallingFormat())
  63. bucket = conn.get_bucket(base_url.netloc)
  64. key_prefix = base_url.path.lstrip("/")
  65.  
  66. headers = {
  67. "Content-Encoding":"gzip",
  68. "Content-Type": "application/octet-stream"
  69. }
  70.  
  71. print("uploading tiles from %s to s3://%s/%s" % (tile_dir, bucket.name, key_prefix))
  72. global tile_count
  73. tile_count = sum([len([f for f in files if f.endswith("gph")]) for root, dirs, files in os.walk(tile_dir)])
  74. root = os.path.abspath(tile_dir)
  75. if not root.endswith("/"):
  76. root = root + "/"
  77.  
  78. pool = ThreadPool(threads)
  79. func = partial(upload_walk, bucket, key_prefix, headers, root)
  80. pool.map(func, os.walk(tile_dir))
  81.  
  82.  
  83. if __name__ == '__main__':
  84. upload()
Add Comment
Please, Sign In to add comment