Advertisement
michal_k

segregation_tools

Aug 23rd, 2018
208
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.58 KB | None | 0 0
  1. import os
  2. import pandas as pd
  3. import glob
  4. import sys
  5. import subprocess
  6. from collections import Counter
  7.  
  8. def find_descriptor(path = os.getcwd()):
  9.     """
  10.    path - string
  11.    
  12.    Tool to find descriptor csv file in path.
  13.    Returns a string of descriptor name or descriptor path to place in pandas.read_csv(filename).
  14.    Example:
  15.    csv_path, csv_name = find_descriptor()
  16.    """
  17.    
  18.     descriptor_name = (glob.glob('*.csv'))
  19.     descriptor_path = os.getcwd() +'/'+ descriptor_name[0]
  20.    
  21.     return descriptor_name[0]
  22.  
  23. def create_ordering_dict(label_one, label_two, descriptor):
  24.     """
  25.    label_one - string
  26.    label_two - string
  27.    descriptor - string or global variable assigned to string
  28.  
  29.    Tool to create a dictionary for dataset ordering purposes.
  30.    Returns dictionary of consist scheme 'label_one_list[i]:label_two_list[i].
  31.    Example:
  32.    ordering_dict = create_ordering_dict('cropped image file path', 'abnormality type', descriptor_path)
  33.    """
  34.  
  35.     df = pd.read_csv(descriptor)
  36.     label_one_list = list(df.iloc[:,label_one])
  37.     label_two_list = list(df.iloc[:,label_two])
  38.     ordering_dict = {}
  39.    
  40.     if len(label_one_list) != len(label_two_list):
  41.         print("Cannot perform operation. Inconsist number of labels")
  42.        
  43.     else:
  44.         ordering_dict = {}
  45.         labels_length = len(label_one_list)
  46.         for i in range(labels_length):
  47.             ordering_dict[label_one_list[i]] = label_two_list[i]
  48.         return ordering_dict
  49.  
  50. def segregate_data(original_datapath, ordered_datapath, ordering_dict):
  51.  
  52.     #Creating of main directory
  53.     ordered_datapath_cmd = "mkdir " + ordered_datapath
  54.     subprocess.run(ordered_datapath_cmd.split(), stdout=subprocess.PIPE)
  55.     os.chdir(ordered_datapath)
  56.  
  57.     #Creating 'label' directories
  58.     dict_values_counter = Counter(ordering_dict.values())
  59.     labels = list(dict_values_counter.keys())
  60.     for i in labels:
  61.         command = "mkdir " + str(i)
  62.         subprocess.run(command.split(),stdout=subprocess.PIPE)
  63.     os.chdir(original_datapath)
  64.  
  65.     #Moving files to new directories
  66.     patiendid_list = list(ordering_dict.keys())
  67.     filepath_list = glob.glob('*/*/*/000000.dcm')
  68.     file_id = list(range(len(filepath_list)))
  69.     for i in patientid_list:
  70.         for j in range(len(filepath_list)):
  71.             if i in filepath_list[j]:
  72.                 name = str(filepath_list[j])
  73.                 command = 'cp ' + name + ' ' + str(ordered_datapath) + '/' + ordering_dict[i] + '/' + i + str(file_id[j]) + '.dcm'
  74.                 subprocess.run(command.split(), stdout=subprocess.PIPE)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement