SHARE
TWEET

Untitled

a guest May 22nd, 2019 91 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import os
  2. import sys
  3. from config import *
  4. from tqdm import tqdm
  5. from rle import rle_decode, rle_encode
  6. import numpy as np
  7. import pandas as pd
  8. import h5py
  9. import cv2
  10. from matplotlib import pyplot as plt
  11. from shared import *
  12. from skimage.transform import resize
  13. from skimage.io import imread, imshow, imread_collection, concatenate_images
  14. import utils
  15.  
  16.  
  17.  
  18. MAX_DIM = 512
  19.  
  20. RESIZE_MASK = True
  21.  
  22. config = KaggleBowlConfig()
  23.  
  24. print('Loading metadata...')
  25. TRAIN_PATH = '../train/'
  26. df = pd.read_csv('../train.csv')
  27. df['Id'] = df['ImageId'].apply(lambda x: x[:-4])
  28. df = df.set_index('Id')
  29. if TRAIN_DATA_OVERRIDE and len(TRAIN_DATA_OVERRIDE) > 0:
  30.     for i in TRAIN_DATA_OVERRIDE:
  31.         df.loc[df['ImageId'] == i['ImageId'], 'Height'] = i['Height']
  32.         df.loc[df['ImageId'] == i['ImageId'], 'Width'] = i['Width']
  33.  
  34.  
  35.  
  36. print('Resizing image and masks...')
  37.  
  38. # images_df = pd.read_csv('../train_images.csv')
  39.  
  40.  
  41.  
  42. train_ids = list(os.listdir(TRAIN_PATH))
  43. train_path_sm = '../train_512/'
  44.  
  45.  
  46. if not os.path.exists(train_path_sm):
  47.     os.makedirs(train_path_sm)
  48.  
  49. for i, _id in enumerate(tqdm(train_ids)):
  50.     sub_dir = train_path_sm + _id + '/'
  51.     if not os.path.exists(sub_dir):
  52.         os.makedirs(sub_dir)
  53.  
  54.     source_image_path = TRAIN_PATH  + _id + '/images/' + _id + '.jpg'
  55.  
  56.     source_image = load_img(source_image_path)
  57.  
  58.     source_height = source_image.shape[0]
  59.     source_width = source_image.shape[1]
  60.  
  61.  
  62.     # plot_image(source_image)
  63.    
  64.     # target_image, window, scale, padding, crop = utils.resize_image(
  65.  #        source_image,
  66.  #        min_dim=config.IMAGE_MIN_DIM,
  67.  #        max_dim=config.IMAGE_MAX_DIM,
  68.  #        padding=config.IMAGE_PADDING)
  69.     target_image, window, scale, padding, crop = utils.resize_image(
  70.         source_image,
  71.         min_dim=MAX_DIM,
  72.         max_dim=MAX_DIM,
  73.         padding=config.IMAGE_PADDING)
  74.  
  75.     # target_image = cv2.resize(source_image, (512, 512), cv2.INTER_NEAREST)
  76.     # plot_image(target_image)
  77.  
  78.     image_path = train_path_sm + _id + '/images/'
  79.     if not os.path.exists(image_path):
  80.         os.makedirs(image_path)
  81.     target_image_path = train_path_sm + _id + '/images/' + _id + '.jpg'
  82.     cv2.imwrite(target_image_path, cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB))
  83.  
  84.     if RESIZE_MASK:
  85.         mask_path = train_path_sm + _id + '/masks/'
  86.        
  87.         if not os.path.exists(mask_path):
  88.             os.makedirs(mask_path)
  89.        
  90.         pixels_arr = list(df.loc[df.ImageId == _id + '.jpg'][['EncodedPixels']]['EncodedPixels'])
  91.         classes_arr = [int(x.split('_')[0]) for x in list(df.loc[df.ImageId == _id + '.jpg'][['ClassId']]['ClassId'])]
  92.  
  93.         masks = []
  94.         for i, pixels in enumerate(pixels_arr):
  95.             source_mask = rle_decode(pixels, (source_height, source_width), np.uint8)
  96.             source_mask = binary_fill_holes(source_mask).astype(np.uint8)
  97.             if np.sum(source_mask) >= 1:
  98.                 masks.append(np.squeeze(source_mask))
  99.             else:
  100.                 print(_id, i)
  101.                 print(pixels)
  102.                 plot_image(source_mask)
  103.            
  104.         masks = np.stack(masks, axis=-1)
  105.         masks = masks.astype(np.uint8)
  106.        
  107.         target_mask = utils.resize_mask(masks, scale, padding, crop)
  108.  
  109.         # print(target_image.shape, target_mask.shape)
  110.        
  111.         path = train_path_sm + _id
  112.  
  113.         fname = path + '/masks/' + _id + '.h5'
  114.         with h5py.File(fname, "w") as hf:
  115.             hf.create_dataset("arr", data=target_mask)
  116.  
  117.         classes = np.array(classes_arr)
  118.         fname = path + '/masks/' + _id + '_label.h5'
  119.         with h5py.File(fname, "w") as hf:
  120.             hf.create_dataset("arr", data=classes)
  121.  
  122.  
  123.         # resize back to w/o margin
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top