Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import pandas as pd
- import glob
- import sys
- import subprocess
- from collections import Counter
- def find_descriptor(path = os.getcwd()):
- """
- path - string
- Tool to find descriptor csv file in path.
- Returns a string of descriptor name or descriptor path to place in pandas.read_csv(filename).
- Example:
- csv_path, csv_name = find_descriptor()
- """
- descriptor_name = (glob.glob('*.csv'))
- descriptor_path = os.getcwd() +'/'+ descriptor_name[0]
- return descriptor_name[0]
- def create_ordering_dict(label_one, label_two, descriptor):
- """
- label_one - string
- label_two - string
- descriptor - string or global variable assigned to string
- Tool to create a dictionary for dataset ordering purposes.
- Returns dictionary of consist scheme 'label_one_list[i]:label_two_list[i].
- Example:
- ordering_dict = create_ordering_dict('cropped image file path', 'abnormality type', descriptor_path)
- """
- df = pd.read_csv(descriptor)
- label_one_list = list(df.iloc[:,label_one])
- label_two_list = list(df.iloc[:,label_two])
- ordering_dict = {}
- if len(label_one_list) != len(label_two_list):
- print("Cannot perform operation. Inconsist number of labels")
- else:
- ordering_dict = {}
- labels_length = len(label_one_list)
- for i in range(labels_length):
- ordering_dict[label_one_list[i]] = label_two_list[i]
- return ordering_dict
- def segregate_data(original_datapath, ordered_datapath, ordering_dict):
- #Creating of main directory
- ordered_datapath_cmd = "mkdir " + ordered_datapath
- subprocess.run(ordered_datapath_cmd.split(), stdout=subprocess.PIPE)
- os.chdir(ordered_datapath)
- #Creating 'label' directories
- dict_values_counter = Counter(ordering_dict.values())
- labels = list(dict_values_counter.keys())
- for i in labels:
- command = "mkdir " + str(i)
- subprocess.run(command.split(),stdout=subprocess.PIPE)
- os.chdir(original_datapath)
- #Moving files to new directories
- patiendid_list = list(ordering_dict.keys())
- filepath_list = glob.glob('*/*/*/000000.dcm')
- file_id = list(range(len(filepath_list)))
- for i in patientid_list:
- for j in range(len(filepath_list)):
- if i in filepath_list[j]:
- name = str(filepath_list[j])
- command = 'cp ' + name + ' ' + str(ordered_datapath) + '/' + ordering_dict[i] + '/' + i + str(file_id[j]) + '.dcm'
- subprocess.run(command.split(), stdout=subprocess.PIPE)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement