Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import sys
- from config import *
- from tqdm import tqdm
- from rle import rle_decode, rle_encode
- import numpy as np
- import pandas as pd
- import h5py
- import cv2
- from matplotlib import pyplot as plt
- from shared import *
- from skimage.transform import resize
- from skimage.io import imread, imshow, imread_collection, concatenate_images
- import utils
- MAX_DIM = 512
- RESIZE_MASK = True
- config = KaggleBowlConfig()
- print('Loading metadata...')
- TRAIN_PATH = '../train/'
- df = pd.read_csv('../train.csv')
- df['Id'] = df['ImageId'].apply(lambda x: x[:-4])
- df = df.set_index('Id')
- if TRAIN_DATA_OVERRIDE and len(TRAIN_DATA_OVERRIDE) > 0:
- for i in TRAIN_DATA_OVERRIDE:
- df.loc[df['ImageId'] == i['ImageId'], 'Height'] = i['Height']
- df.loc[df['ImageId'] == i['ImageId'], 'Width'] = i['Width']
- print('Resizing image and masks...')
- # images_df = pd.read_csv('../train_images.csv')
- train_ids = list(os.listdir(TRAIN_PATH))
- train_path_sm = '../train_512/'
- if not os.path.exists(train_path_sm):
- os.makedirs(train_path_sm)
- for i, _id in enumerate(tqdm(train_ids)):
- sub_dir = train_path_sm + _id + '/'
- if not os.path.exists(sub_dir):
- os.makedirs(sub_dir)
- source_image_path = TRAIN_PATH + _id + '/images/' + _id + '.jpg'
- source_image = load_img(source_image_path)
- source_height = source_image.shape[0]
- source_width = source_image.shape[1]
- # plot_image(source_image)
- # target_image, window, scale, padding, crop = utils.resize_image(
- # source_image,
- # min_dim=config.IMAGE_MIN_DIM,
- # max_dim=config.IMAGE_MAX_DIM,
- # padding=config.IMAGE_PADDING)
- target_image, window, scale, padding, crop = utils.resize_image(
- source_image,
- min_dim=MAX_DIM,
- max_dim=MAX_DIM,
- padding=config.IMAGE_PADDING)
- # target_image = cv2.resize(source_image, (512, 512), cv2.INTER_NEAREST)
- # plot_image(target_image)
- image_path = train_path_sm + _id + '/images/'
- if not os.path.exists(image_path):
- os.makedirs(image_path)
- target_image_path = train_path_sm + _id + '/images/' + _id + '.jpg'
- cv2.imwrite(target_image_path, cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB))
- if RESIZE_MASK:
- mask_path = train_path_sm + _id + '/masks/'
- if not os.path.exists(mask_path):
- os.makedirs(mask_path)
- pixels_arr = list(df.loc[df.ImageId == _id + '.jpg'][['EncodedPixels']]['EncodedPixels'])
- classes_arr = [int(x.split('_')[0]) for x in list(df.loc[df.ImageId == _id + '.jpg'][['ClassId']]['ClassId'])]
- masks = []
- for i, pixels in enumerate(pixels_arr):
- source_mask = rle_decode(pixels, (source_height, source_width), np.uint8)
- source_mask = binary_fill_holes(source_mask).astype(np.uint8)
- if np.sum(source_mask) >= 1:
- masks.append(np.squeeze(source_mask))
- else:
- print(_id, i)
- print(pixels)
- plot_image(source_mask)
- masks = np.stack(masks, axis=-1)
- masks = masks.astype(np.uint8)
- target_mask = utils.resize_mask(masks, scale, padding, crop)
- # print(target_image.shape, target_mask.shape)
- path = train_path_sm + _id
- fname = path + '/masks/' + _id + '.h5'
- with h5py.File(fname, "w") as hf:
- hf.create_dataset("arr", data=target_mask)
- classes = np.array(classes_arr)
- fname = path + '/masks/' + _id + '_label.h5'
- with h5py.File(fname, "w") as hf:
- hf.create_dataset("arr", data=classes)
- # resize back to w/o margin
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement