Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Select only BGCs with 4 or more genes that are related to (exo)polysaccharides
- """
- import os
- from builtins import any
- def parse_genbank_antismash(input_dir,output_dir):
- classification_list = ["Alpha-amylase","Bac_transf",
- "Capsule_synth","CBM_48","DegT_DnrJ_EryC1","dTDP_sugar_isom",
- "Epimerase_2","Glycos_transf",
- "Glyco_tran","Glyco_transf",
- "MannoseP_isomer","PIG-L","Poly_export","Polysacc_synt",
- "RmlD_sub_bind","Wzy_C"] # Sec_met classifications related to (exo)polysacharides
- begin_lijst = os.listdir(f"{input_dir}")
- bestand_lijst = []
- for file in begin_lijst:
- if ".gbk" in file and "fixed" not in file: # Only look at the GBK files and not the
- bestand_lijst.append(file)
- tuple_list = []
- for i in range(len(bestand_lijst)):
- with open(f"{input_dir}/{bestand_lijst[i]}") as f:
- file = f.readlines()
- sec_met_list = []
- for line in file:
- if "/sec_met_domain" in line:
- for k in classification_list:
- if k in line:
- line = line.strip().split(" ")[0][17:] # Print relevant sacharide related gene clusters
- sec_met_list.append(line)
- if len(sec_met_list) > 3: # Select only with 4 or more gene clusters
- tuple_list.append((bestand_lijst[i],sec_met_list,len(sec_met_list))) # Create tuple from data entry
- a = sorted(tuple_list,key=lambda x: x[-1])[::-1] # Order list from low to high based on sacharide related count
- return a
- if __name__ == "__main__":
- folder_path = "./antismash_analyser_input"
- with open("./antismash_analyser_output/output.txt","w+") as f:
- for folder in os.listdir(folder_path):
- print(folder)
- f.write("\n"+str(folder)+"\n")
- for i in parse_genbank_antismash(f"{folder_path}/{folder}", ""):
- print(i)
- f.write(str(i)+"\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement