Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- path = "/Users/rmadupuri/GitHub/datahub/public"
- all_studies = os.listdir(path)
- tcga_studies = list()
- for studies in all_studies:
- if os.path.isdir(path+"/"+studies):
- if re.search(r'_tcga$',studies): # For Provisional studies
- tcga_studies.append(studies)
- if "_tcga_pub" in studies: # For published studies
- tcga_studies.append(studies)
- for st in tcga_studies:
- files = os.listdir(path+"/"+st)
- if "meta_RNA_Seq_v2_expression_median.txt" in files:
- data = str()
- with open(path+"/"+st+"/meta_RNA_Seq_v2_expression_median.txt",'r') as f:
- for line in f:
- if line.startswith("profile_description"):
- data += "profile_description: mRNA gene expression (RNA Seq V2 RSEM)\n"
- else:
- data += line
- os.remove(path+"/"+st+"/meta_RNA_Seq_v2_expression_median.txt")
- f= open(path+"/"+st+"/meta_RNA_Seq_v2_expression_median.txt","w+")
- f.write(data)
- f.close()
Add Comment
Please, Sign In to add comment