Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import unicode_literals
- import argparse
- import csv
- import errno
- import logging
- import multiprocessing
- import os
- import shutil
- import time
- import traceback
- import sys
- import time
- from PIL import Image
- import requests
- import six
- CONTAINER="image_Storage"
- import swiftclient
- def config_logger():
- logger = logging.getLogger('download')
- logger.setLevel(logging.DEBUG)
- ch = logging.StreamHandler()
- ch.setLevel(logging.DEBUG)
- formatter = logging.Formatter('%(process)d @ %(asctime)s (%(relativeCreated)d) '
- '%(name)s - %(levelname)s - %(message)s')
- ch.setFormatter(formatter)
- logger.addHandler(ch)
- return logger
- def parse_args():
- parser = argparse.ArgumentParser(description='Download Google open image dataset.')
- parser.add_argument('--timeout', type=float, default=2.0,
- help='image download timeout')
- parser.add_argument('--queue-size', type=int, default=1000,
- help='maximum image url queue size')
- parser.add_argument('--consumers', type=int, default=1,
- help='number of download workers')
- parser.add_argument('--min-dim', type=int, default=256,
- help='smallest dimension for the aspect ratio preserving scale'
- '(-1 for no scale)')
- parser.add_argument('--sub-dirs', type=int, default=1000,
- help='number of directories to split downloads over')
- parser.add_argument('--force', default=False, action='store_true',
- help='force download and overwrite local files')
- parser.add_argument('input', help='open image input csv')
- return parser.parse_args()
- def unicode_dict_reader(f, **kwargs):
- csv_reader = csv.DictReader(f, **kwargs)
- for row in csv_reader:
- yield {key: value for key, value in six.iteritems(row)}
- def read_image(response, min_dim):
- """ Download response in chunks and convert to a scaled Image object """
- content = six.BytesIO()
- shutil.copyfileobj(response.raw, content)
- content.seek(0)
- return content
- def consumer(args, queue):
- """ Whilst the queue has images, download and save them """
- swift = swiftclient.client.Connection(
- auth_version=OS_IDENTITY_API_VERSION,
- user=OS_USERNAME,
- key=OS_PASSWORD,
- authurl=OS_AUTH_URL,
- tenant_name=OS_TENANT_NAME,
- os_options={'region_name':OS_REGION_NAME}
- )
- while queue.empty():
- time.sleep(0.5) # give the queue a chance to populate
- while not queue.empty():
- code, url = queue.get(block=True, timeout=None)
- #t0 = time.time()
- #c0 = time.clock()
- try:
- response = requests.get(url, stream=True, timeout=args.timeout)
- image = read_image(response, args.min_dim)
- #print('Time to read image', time.time()-t0)
- #print('Time to read image', time.clock()-c0)
- #image.save(out_path)
- #image.save('images/'+code+'.jpg')
- #swift.put_object('image_Storage', 'images/'+code+'.jpg', open('images/'+code+'.jpg'))
- #t0 = time.time()
- swift.put_object('image_Storage', 'imageTest/'+code+'.jpg', image)
- #print('Time to upload on swift:', time.clock()-c0)
- #log.debug('saving {} to {}'.format(url, code+'.jpg'))
- #os.remove('images/'+code+'.jpg')
- except Exception:
- log.warning('error {}'.format(traceback.format_exc()))
- if queue.empty:
- time.sleep(0.1)
- log.debug('!!!!!!!!!!!!! \n EXITING COSUMER \n !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
- def producer(args, queue):
- """ Populate the queue with image_id, url pairs. """
- with open(args.input) as f:
- for row in unicode_dict_reader(f):
- queue.put([row['ImageID'], row['OriginalURL']], block=True, timeout=None)
- #log.debug('queue_size = {}'.format(queue.qsize()))
- queue.close()
- log = config_logger()
- if __name__ == '__main__':
- args = parse_args()
- log.debug(args)
- #queue = multiprocessing.Queue(args.queue_size)
- queue = multiprocessing.Queue(10000)
- processes = [
- multiprocessing.Process(target=producer, args=(args, queue))
- ]
- for i in range(args.consumers):
- processes.append(multiprocessing.Process(target=consumer, args=(args, queue)))
- print('There is :', len(processes), 'processes' )
- for p in processes:
- p.start()
- for p in processes:
- p.join()
Add Comment
Please, Sign In to add comment