Regional_Push

the one that is working for cds headers

Aug 4th, 2021
812
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import os
  2.  
  3. import pickle
  4.  
  5. path_to_cds_pickle_files = '/home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/CDS_pickles' # path to folder with cds pickle files, for worksation:/home/opensourceferns/Desktop/map-multifile-cds-to-protein-alignment/CDS_pickles  for the laptop:/home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/CDS_pickles
  6. path_to_result = '/home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/Test_folder/Test_results' # path to folder where I want to eventually dump my results. For the laptop: /home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/Test_folder/Test_results, for the workstation: /home/opensourceferns/Desktop/map-multifile-cds-to-protein-alignment/Test_folder/Test_results
  7. path_to_algined_files = '/home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/Test_folder/Aligned_peptide' # this one is the lest folder and will have to be changed later. For the workstation: /home/opensourceferns/Desktop/map-multifile-cds-to-protein-alignment/Test_folder/Aligned_peptide, for the laptop:'/home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/Test_folder/Aligned_peptide'
  8. path_to_script_accessories ='/home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/Script_accesories' # path to the accessory files. For the laptop: '/home/rijandhakal/Desktop/map-multifile-cds-to-protein-alignment/Script_accesories'. For the workstation: /home/opensourceferns/Desktop/map-multifile-cds-to-protein-alignment/Script_accesories
  9.  
  10. def main():
  11.  
  12.     hop_to_species_shorthand = os.chdir(path_to_script_accessories) # this is the folder that has the dict in pickle format
  13.  
  14.     with open('cds_identifiers.pkl','rb') as cds_shorthand: # this opens the pickle file
  15.         cds_shorthand_dict = pickle.load(cds_shorthand) # open the dict
  16.  
  17.     hop_to_aligned_files = os.chdir(path_to_algined_files) # hop to the peptide files
  18.  
  19.     list_of_aligned_files = os.listdir(path_to_algined_files) # make a list of the files in the peptide folder
  20.  
  21.     for file in list_of_aligned_files: # go through the files
  22.         with open(file,'rt') as current_peptide_file: # using with to not have to bother closing the files manaully
  23.             current_peptide_headers = current_peptide_file.readlines() # turning it into an iterable object
  24.  
  25.     def test_species(current_peptide_header): # this is a prototype, this function is actually called inside current_peptide_header
  26.         for key,value in cds_shorthand_dict.items():
  27.             if key in current_peptide_header:
  28.                 return value
  29.  
  30.     for current_peptide_header in current_peptide_headers: # opeing the peptide header line
  31.         if current_peptide_header[0] == '>': # '>' makes sure that the header is actually a header
  32.             this_file = test_species(current_peptide_header)
  33.             map_sequence = key_vs_key(current_peptide_header,this_file,file)
  34.            
  35.  
  36. def key_vs_key(current_peptide_header,this_file,file):
  37.  
  38.     file = os.path.splitext(file)[0]
  39.     file = f'{path_to_result}/{file}.{"acds"}'
  40.  
  41.     hop_to_cds_pickles = os.chdir(path_to_cds_pickle_files)
  42.     with open(this_file,'rb') as current_cds_pickle:
  43.         current_pickle = pickle.load(current_cds_pickle)
  44.    
  45.     for Universal_header,cds_sequence in current_pickle.items():
  46.         if current_peptide_header == Universal_header:
  47.             with open(file,'a') as current_acds_file:
  48.                 current_acds_file.write(current_peptide_header)
  49.                 current_acds_file.write(cds_sequence)
  50.  
  51. main()
  52.  
  53.  
  54.  
RAW Paste Data