segregation_tools

import os
import pandas as pd
import glob
import sys
import subprocess
from collections import Counter

def find_descriptor(path = os.getcwd()):
    """
    path - string

    Tool to find descriptor csv file in path.
    Returns a string of descriptor name or descriptor path to place in pandas.read_csv(filename).
    Example:
    csv_path, csv_name = find_descriptor()
    """

    descriptor_name = (glob.glob('*.csv'))
    descriptor_path = os.getcwd() +'/'+ descriptor_name[0]

    return descriptor_name[0]

def create_ordering_dict(label_one, label_two, descriptor):
    """
    label_one - string
    label_two - string
    descriptor - string or global variable assigned to string

    Tool to create a dictionary for dataset ordering purposes.
    Returns dictionary of consist scheme 'label_one_list[i]:label_two_list[i].
    Example:
    ordering_dict = create_ordering_dict('cropped image file path', 'abnormality type', descriptor_path)
    """

    df = pd.read_csv(descriptor)
    label_one_list = list(df.iloc[:,label_one])
    label_two_list = list(df.iloc[:,label_two])
    ordering_dict = {}

    if len(label_one_list) != len(label_two_list):
        print("Cannot perform operation. Inconsist number of labels")

    else:
        ordering_dict = {}
        labels_length = len(label_one_list)
        for i in range(labels_length):
            ordering_dict[label_one_list[i]] = label_two_list[i]
        return ordering_dict

def segregate_data(original_datapath, ordered_datapath, ordering_dict):

    #Creating of main directory
    ordered_datapath_cmd = "mkdir " + ordered_datapath
    subprocess.run(ordered_datapath_cmd.split(), stdout=subprocess.PIPE)
    os.chdir(ordered_datapath)

    #Creating 'label' directories
    dict_values_counter = Counter(ordering_dict.values())
    labels = list(dict_values_counter.keys())
    for i in labels:
        command = "mkdir " + str(i)
        subprocess.run(command.split(),stdout=subprocess.PIPE)
    os.chdir(original_datapath)

    #Moving files to new directories
    patiendid_list = list(ordering_dict.keys())
    filepath_list = glob.glob('*/*/*/000000.dcm')
    file_id = list(range(len(filepath_list)))
    for i in patientid_list:
        for j in range(len(filepath_list)):
            if i in filepath_list[j]:
                name = str(filepath_list[j])
                command = 'cp ' + name + ' ' + str(ordered_datapath) + '/' + ordering_dict[i] + '/' + i + str(file_id[j]) + '.dcm'
                subprocess.run(command.split(), stdout=subprocess.PIPE)