Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- This script creates Salmon flenDist.txt files from fld.gz files,
- which is parseable by MultiQC.
- """
- import struct
- import gzip
- import argparse
- import os
- parser = argparse.ArgumentParser()
- parser.add_argument('dataset', help='dataset to create flenDist.txt Salmon files for')
- args = parser.parse_args()
- def get_fld(path):
- '''
- Returns a tuple containing the fragment length distribution.
- Number of bins (integers): 1001 (from 0 to 1000).
- '''
- with gzip.open(path) as fld_file:
- fld = struct.unpack('i' * 1001, fld_file.read())
- return fld
- def Main():
- dat = args.dataset
- subfolders = [f.path for f in os.scandir(str(dat) +
- '/expression/salmon/') if f.is_dir()]
- for sub in subfolders:
- if os.path.exists(sub + '/aux_info/fld.gz'):
- print('Writing flenDist.txt to {}...\n'.format(sub))
- fld_tuple = get_fld(sub + '/aux_info/fld.gz')
- with open(sub + '/libParams/flenDist.txt', 'w') as out_file:
- for val in fld_tuple:
- out_file.write(str(val) + " ")
- else:
- print('{}/aux_info/fld.gz does not exists\n'.format(sub))
- if __name__ == '__main__':
- Main()
RAW Paste Data