Advertisement
Guest User

Untitled

a guest
Jan 18th, 2017
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.82 KB | None | 0 0
  1. set -e
  2. set -x
  3. set -o pipefail
  4. TMP00=/scratch/umcg-mterpstra/
  5. ml picard/2.2.2-foss-2016a-Java-1.8.0_74
  6.  
  7. (for fastq1 in $(ls /scratch/umcg-mterpstra/raw/160915_M01785_0355_000000000-AW3J7_ANNA/*_R1_001.fastq.gz); do
  8. echo "## INFO ##"$(date)" ## file '"$fastq1"' start"
  9.  
  10. fastq2=$(dirname $fastq1)/$(basename $fastq1 _R1_001.fastq.gz)"_R3_001.fastq.gz"
  11. rbcfastq=$(dirname $fastq1)/$(basename $fastq1 _R1_001.fastq.gz)"_R2_001.fastq.gz"
  12. mkdir -p $TMP00/projects/downsample/paddedfq/
  13. tmpFastq1=$TMP00"/projects/downsample/paddedfq/"$(basename $fastq1)
  14. tmpFastq2=$TMP00"/projects/downsample/paddedfq/"$(basename $fastq2)
  15. #padding + random barcode in flowcell name
  16. ml DigitalBarcodeReadgroups/0.1.5-foss-2016a-Perl-5.20.2-bare
  17. gzip -dc $fastq1 | \
  18. perl -wpe 'if($.%4==2 && length($_) < 10){chomp $_;$_ .= "N" x (10 - length($_))."\n"}elsif($.%4==0 && length($_) < 10){chomp $_;$_ .= "#" x (10 - length($_))."\n"}' | \
  19. gzip | perl $EBROOTDIGITALBARCODEREADGROUPS/src/NugeneMergeFastqFiles.pl $rbcfastq ./ -
  20. mv -v -- "-.fq.gz" $tmpFastq1
  21. gzip -cd $fastq2 | \
  22. perl -wpe 'if($.%4==2 && length($_) < 10){chomp $_;$_ .= "N" x (10 - length($_))."\n"}elsif($.%4==0 && length($_) < 10){chomp $_;$_ .= "#" x (10 - length($_))."\n"}' | \
  23. gzip | perl $EBROOTDIGITALBARCODEREADGROUPS/src/NugeneMergeFastqFiles.pl $rbcfastq ./ -
  24. mv -v -- "-.fq.gz" $tmpFastq2
  25.  
  26. #fastq to sam
  27. mkdir -p $TMP00/projects/downsample/ubam/
  28. tmpBam=$TMP00/projects/downsample/ubam/$(basename $fastq1 _R1_001.fastq.gz).bam
  29. java -jar ${EBROOTPICARD}/picard.jar FastqToSam F1=$tmpFastq1 F2=$tmpFastq2 O=$tmpBam SAMPLE_NAME="fastq" SORT_ORDER=queryname;
  30. #downsample 4 times and different amount of reads
  31. ( for run in "1" "2" "3" "4"; do
  32. for n in "1500000" "1000000" "200000" "400000" "100000"; do
  33. (
  34. mkdir -p $TMP00/projects/downsample/r${run}n${n}/
  35. tmpDownsampledBam=$TMP00/projects/downsample/r${run}n${n}/$(basename $fastq1 _R1_001.fastq.gz).bam
  36. fqLines=$(gzip -dc $fastq1| wc -l)
  37.  
  38. #downsamplesam
  39. java -jar ${EBROOTPICARD}/picard.jar DownsampleSam I=$tmpBam O=$tmpDownsampledBam PROBABILITY=$(perl -we 'my $fqlines=shift @ARGV; my $fqReads=$fqlines/4;my $targetReads=shift @ARGV;print $targetReads/$fqReads;' $fqLines $n)
  40. #samtofastq
  41. downsampledFastq1gz=$TMP00/projects/downsample/r${run}n${n}/$(basename $fastq1)
  42. downsampledFastq2gz=$TMP00/projects/downsample/r${run}n${n}/$(basename $fastq2)
  43.  
  44. java -jar ${EBROOTPICARD}/picard.jar SamToFastq I=$tmpDownsampledBam FASTQ=$downsampledFastq1gz SECOND_END_FASTQ=$downsampledFastq2gz
  45. gzip -dc $downsampledFastq1gz | perl -wne 'if($. % 4 == 1){print $_; my $seq = $_; $seq =~ s/.*_([ATCGN]{6}).*/$1/; print $seq."+\n"."B" x 6 . "\n"}' | gzip > $TMP00/projects/downsample/r${run}n${n}/$(basename $rbcfastq)
  46. )&
  47.  
  48. done
  49. wait
  50. done)
  51. echo "## INFO ##"$(date)" ## file '"$fastq1"' done"
  52. done)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement