import os import pickle home_path = os.getcwd() path_to_cds_pickle_files = f'{home_path}/{"CDS_pickles_sequential"}' # where the cds pickle files are, this one specifially uses the CDS_pickles_sequential folder path_to_result = f'{home_path}/{"Test_folder/Test_cds_results"}' # where the result files will be dumped path_to_algined_files = f'{home_path}/{"Test_folder/Test_peptide_input"}' # where the aligned files are kept path_to_script_accessories =f'{home_path}/{"Script_accesories"}' # where a couple of accesory scripts are def main(): hop_to_species_shorthand = os.chdir(path_to_script_accessories) # this is the folder that has the dict in pickle format with open('cds_identifiers.pkl','rb') as cds_shorthand: # this opens the pickle file cds_shorthand_dict = pickle.load(cds_shorthand) # open the dict hop_to_aligned_files = os.chdir(path_to_algined_files) # hop to the peptide files list_of_aligned_files = os.listdir(path_to_algined_files) # make a list of the files in the peptide folder def test_species(current_peptide_header): # this is a prototype, this function is actually called inside current_peptide_header for key,value in cds_shorthand_dict.items(): if key in current_peptide_header: return value for file in list_of_aligned_files: # go through the files hop_back_to_peptide_files = os.chdir(path_to_algined_files) with open(file,'rt') as current_peptide_file: # using with to not have to bother closing the files manaully current_peptide_headers = current_peptide_file.readlines() # turning it into an iterable object for current_peptide_header in current_peptide_headers: # opeing the peptide header line if current_peptide_header[0] == '>': # '>' makes sure that the header is actually a header this_file = test_species(current_peptide_header) map_sequence = key_vs_key(current_peptide_header,this_file,file) def key_vs_key(current_peptide_header,this_file,file): file = os.path.splitext(file)[0] file = f'{path_to_result}/{file}.{"acds"}' hop_to_cds_pickles = os.chdir(path_to_cds_pickle_files) with open(this_file,'rb') as current_cds_pickle: current_pickle = pickle.load(current_cds_pickle) for Universal_header,cds_sequence in current_pickle.items(): if Universal_header == current_peptide_header: with open(file,'a') as current_acds_file: current_acds_file.write(current_peptide_header) current_acds_file.write(cds_sequence) main()