Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- dataset = CustomDataset(items)
- data_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_PREPROCESS_WORKERS, drop_last=False, collate_fn=filtered_collate_fn)
- if len(data_loader) == 0:
- print('Nothing to segment in {}', items)
- return
- curr_items = BATCH_SIZE * len(data_loader)
- print('---> processing dataloader with {} batches of BATCH SIZE {} each at {}'.format(len(data_loader), BATCH_SIZE, datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')))
- st = time.time()
- cids, features, meta = [], [], []
- # note each of *_batch is a tuple
- for batch_idx, (id_batch, img_batch, tag_batch) in enumerate(data_loader):
- try:
- mask_batch = self.model.forward_pass(img_batch)
- ids = [*ids, *list(id_batch)]
- np_masks = [*np_masks, *list(mask_batch)]
- meta = [*meta, *list(meta_batch)]
- print('--> finished batch processing dataloader with {} batches of BATCH SIZE {} each at {}, Overall time taken : {:.4f} seconds'.format(len(data_loader), BATCH_SIZE, datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), time.time() - st))
- checkpoint1 = time.time()
- filename_pattern = 'chunk-{}.json'.format(get_random_string_with_timestamp())
- items = []
- for cid, feature, tag in zip(cids, np_masks, meta):
- json_obj = {
- "cid" : str(cid),
- "feature" : base64.b64encode(gzip.compress(feature)).decode(), # compress a high dimensional vector
- "tag" : str(tag)
- }
- items.append(json.dumps(json_obj))
- file_data = '\n'.join(items)
- filename = os.path.join(s3_bucket_name, filename_pattern)
- S3_utils.write_data_to_bucket(boto3_s3_client, filename, file_data)
- print('===> saving inference data to {}, Count {}, overall time taken {:.4f}'.format(filename, len(cids), time.time() - checkpoint))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement