Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- set -e
- set -x
- set -o pipefail
- TMP00=/scratch/umcg-mterpstra/
- ml picard/2.2.2-foss-2016a-Java-1.8.0_74
- (for fastq1 in $(ls /scratch/umcg-mterpstra/raw/160915_M01785_0355_000000000-AW3J7_ANNA/*_R1_001.fastq.gz); do
- echo "## INFO ##"$(date)" ## file '"$fastq1"' start"
- fastq2=$(dirname $fastq1)/$(basename $fastq1 _R1_001.fastq.gz)"_R3_001.fastq.gz"
- rbcfastq=$(dirname $fastq1)/$(basename $fastq1 _R1_001.fastq.gz)"_R2_001.fastq.gz"
- mkdir -p $TMP00/projects/downsample/paddedfq/
- tmpFastq1=$TMP00"/projects/downsample/paddedfq/"$(basename $fastq1)
- tmpFastq2=$TMP00"/projects/downsample/paddedfq/"$(basename $fastq2)
- #padding + random barcode in flowcell name
- ml DigitalBarcodeReadgroups/0.1.5-foss-2016a-Perl-5.20.2-bare
- gzip -dc $fastq1 | \
- perl -wpe 'if($.%4==2 && length($_) < 10){chomp $_;$_ .= "N" x (10 - length($_))."\n"}elsif($.%4==0 && length($_) < 10){chomp $_;$_ .= "#" x (10 - length($_))."\n"}' | \
- gzip | perl $EBROOTDIGITALBARCODEREADGROUPS/src/NugeneMergeFastqFiles.pl $rbcfastq ./ -
- mv -v -- "-.fq.gz" $tmpFastq1
- gzip -cd $fastq2 | \
- perl -wpe 'if($.%4==2 && length($_) < 10){chomp $_;$_ .= "N" x (10 - length($_))."\n"}elsif($.%4==0 && length($_) < 10){chomp $_;$_ .= "#" x (10 - length($_))."\n"}' | \
- gzip | perl $EBROOTDIGITALBARCODEREADGROUPS/src/NugeneMergeFastqFiles.pl $rbcfastq ./ -
- mv -v -- "-.fq.gz" $tmpFastq2
- #fastq to sam
- mkdir -p $TMP00/projects/downsample/ubam/
- tmpBam=$TMP00/projects/downsample/ubam/$(basename $fastq1 _R1_001.fastq.gz).bam
- java -jar ${EBROOTPICARD}/picard.jar FastqToSam F1=$tmpFastq1 F2=$tmpFastq2 O=$tmpBam SAMPLE_NAME="fastq" SORT_ORDER=queryname;
- #downsample 4 times and different amount of reads
- ( for run in "1" "2" "3" "4"; do
- for n in "1500000" "1000000" "200000" "400000" "100000"; do
- (
- mkdir -p $TMP00/projects/downsample/r${run}n${n}/
- tmpDownsampledBam=$TMP00/projects/downsample/r${run}n${n}/$(basename $fastq1 _R1_001.fastq.gz).bam
- fqLines=$(gzip -dc $fastq1| wc -l)
- #downsamplesam
- java -jar ${EBROOTPICARD}/picard.jar DownsampleSam I=$tmpBam O=$tmpDownsampledBam PROBABILITY=$(perl -we 'my $fqlines=shift @ARGV; my $fqReads=$fqlines/4;my $targetReads=shift @ARGV;print $targetReads/$fqReads;' $fqLines $n)
- #samtofastq
- downsampledFastq1gz=$TMP00/projects/downsample/r${run}n${n}/$(basename $fastq1)
- downsampledFastq2gz=$TMP00/projects/downsample/r${run}n${n}/$(basename $fastq2)
- java -jar ${EBROOTPICARD}/picard.jar SamToFastq I=$tmpDownsampledBam FASTQ=$downsampledFastq1gz SECOND_END_FASTQ=$downsampledFastq2gz
- gzip -dc $downsampledFastq1gz | perl -wne 'if($. % 4 == 1){print $_; my $seq = $_; $seq =~ s/.*_([ATCGN]{6}).*/$1/; print $seq."+\n"."B" x 6 . "\n"}' | gzip > $TMP00/projects/downsample/r${run}n${n}/$(basename $rbcfastq)
- )&
- done
- wait
- done)
- echo "## INFO ##"$(date)" ## file '"$fastq1"' done"
- done)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement