Advertisement
Guest User

Antismash output filter EPS

a guest
Jan 29th, 2020
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.04 KB | None | 0 0
  1. """
  2. Select only BGCs with 4 or more genes that are related to (exo)polysaccharides
  3. """
  4.  
  5. import os
  6. from builtins import any
  7.  
  8.  
  9. def parse_genbank_antismash(input_dir,output_dir):
  10.     classification_list = ["Alpha-amylase","Bac_transf",
  11.                         "Capsule_synth","CBM_48","DegT_DnrJ_EryC1","dTDP_sugar_isom",
  12.                         "Epimerase_2","Glycos_transf",
  13.                         "Glyco_tran","Glyco_transf",
  14.                         "MannoseP_isomer","PIG-L","Poly_export","Polysacc_synt",
  15.                         "RmlD_sub_bind","Wzy_C"] # Sec_met classifications related to (exo)polysacharides
  16.     begin_lijst = os.listdir(f"{input_dir}")
  17.     bestand_lijst = []
  18.     for file in begin_lijst:
  19.         if ".gbk" in file and "fixed" not in file: # Only look at the GBK files and not the
  20.             bestand_lijst.append(file)
  21.     tuple_list = []
  22.     for i in range(len(bestand_lijst)):
  23.         with open(f"{input_dir}/{bestand_lijst[i]}") as f:
  24.             file = f.readlines()
  25.         sec_met_list = []
  26.         for line in file:
  27.             if "/sec_met_domain" in line:
  28.                 for k in classification_list:
  29.                     if k in line:
  30.                         line = line.strip().split(" ")[0][17:] # Print relevant sacharide related gene clusters
  31.                         sec_met_list.append(line)
  32.         if len(sec_met_list) > 3: # Select only with 4 or more gene clusters
  33.             tuple_list.append((bestand_lijst[i],sec_met_list,len(sec_met_list))) # Create tuple from data entry
  34.     a = sorted(tuple_list,key=lambda x: x[-1])[::-1] # Order list from low to high based on sacharide related count
  35.     return a
  36.  
  37.    
  38. if __name__ == "__main__":
  39.     folder_path = "./antismash_analyser_input"
  40.     with open("./antismash_analyser_output/output.txt","w+") as f:
  41.         for folder in os.listdir(folder_path):
  42.             print(folder)
  43.             f.write("\n"+str(folder)+"\n")
  44.             for i in parse_genbank_antismash(f"{folder_path}/{folder}", ""):
  45.                 print(i)
  46.                 f.write(str(i)+"\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement