Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Arrange files for binary classification. The files are shuffled before being copied.
- """
- import os
- import numpy as np
- from shutil import copyfile
- from os import listdir
- import argparse
- def get_args():
- parser = argparse.ArgumentParser('python')
- parser.add_argument('-src_dir',
- default='./',
- required=False,
- help='directory containing folders of images')
- parser.add_argument('-dst_dir',
- default='./cats_and_dogs/',
- required=False,
- help='destination directory')
- parser.add_argument('-target_name',
- default='cats',
- required=False,
- help='directory containing folders of images')
- parser.add_argument('-target_num',
- default='0',
- required=False,
- help='number of the target class')
- parser.add_argument('-val_frac',
- type=float,
- default=0.2,
- required=False,
- help='the fraction of validation data')
- return parser.parse_args()
- # save the splitted datasets to folders
- def save_to_folder(fileCollection, sourceFolder, targetFolder):
- m = len(fileCollection)
- for i in range(m):
- source = sourceFolder + '/' + fileCollection[i]
- target = targetFolder + '/' + fileCollection[i]
- copyfile(source, target)
- def split_bc(src_dir, dst_dir, val_frac, target_name='cats', target_num='0'):
- src_target = src_dir + target_name
- # a list of files in target class
- list_target = [f for f in listdir(src_target)]
- num_target = len(list_target)
- # shuffle the collection to create a new one
- idx_target = np.arange(num_target)
- permu_target = np.random.permutation(idx_target)
- shuffled_target = [list_target[i] for i in permu_target]
- # calculate the length of each new folder of target
- len_target_test = int(num_target*val_frac)
- print('lenth of test data of {}:{}'.format(target_name, len_target_test))
- len_target_train = num_target - len_target_test
- print('length of train data of {}:{}'.format(target_name, len_target_train))
- # create 2 new lists of images of target
- target_train_set = shuffled_target[0:len_target_train]
- target_test_set = shuffled_target[len_target_train : num_target]
- # create sub-directory in dst_dir of nucleotide
- target_train_dir = dst_dir + 'train/' + target_num + '-' + target_name + '/'
- if not os.path.exists(target_train_dir):
- os.makedirs(target_train_dir)
- target_test_dir = dst_dir + 'val/' + target_num + '-' + target_name + '/'
- if not os.path.exists(target_test_dir):
- os.makedirs(target_test_dir)
- # copy files to corresponding new folder
- save_to_folder(target_train_set, src_target, target_train_dir)
- save_to_folder(target_test_set, src_target, target_test_dir)
- if __name__ == "__main__":
- args = get_args()
- src_dir = args.src_dir
- dst_dir = args.dst_dir
- target_name = args.target_name
- target_num = args.target_num
- val_frac = args.val_frac
- split_bc(src_dir, dst_dir, val_frac, target_name, target_num)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement