Advertisement
Guest User

Untitled

a guest
Nov 10th, 2015
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.56 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # This program filters out human and 16s sequences from fastq files.
  4. # To execute, you'll need to first change the path to where the fastq files are.
  5. # This makes things way easier
  6.  
  7. # To submit this, in mercer, you need to do sh human-16s-read-filter.sh sample-prefix
  8. # sample prefix is everything to the left of .r(1|2).fastq
  9. # this is currently modified to handle paired-end reads
  10.  
  11. path="/scratch/at120/virome-pipeline/virome-analysis"
  12. fastq=$1
  13.  
  14. cd $path
  15.  
  16. bowtie2=\
  17. $(echo \
  18. "module load bowtie2 && \
  19. bowtie2 \
  20. -p 12 \
  21. --very-sensitive-local \
  22. --un-conc $path/$fastq.unconc.fastq \
  23. -x /scratch/at120/shared/db/human-16s-for-filtering/hg19-silva_16s.fa \
  24. -1 $path/$fastq.r1.fastq \
  25. -2 $path/$fastq.r2.fastq \
  26. -S $path/$fastq.sam \
  27. && rm $path/$fastq.sam"\
  28. | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.bowtie2 -l walltime=72:00:00,nodes=1:ppn=12,mem=12gb)
  29. echo $bowtie2 > $fastq.ids.txt
  30. echo "Submitted Mapping"
  31.  
  32. convertBT2=\
  33. $(echo \
  34. "module load khmer && \
  35. fastq-to-fasta.py $path/$fastq.unconc.1.fastq > $path/$fastq.unconc.fasta && \
  36. fastq-to-fasta.py $path/$fastq.unconc.2.fastq >> $path/$fastq.unconc.fasta"\
  37. | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.convert -W depend=afterok:$bowtie2 -l walltime=72:00:00,nodes=1:ppn=1,mem=4gb)
  38. echo $convertBT2 >> $fastq.ids.txt
  39. echo "Submitted Converting to Fasta"
  40.  
  41. megablastFilter=\
  42. $(echo \
  43. "module load blast+ && \
  44. blastn \
  45. -query $path/$fastq.unconc.fasta \
  46. -out $path/$fastq.unconc.megablast.tsv \
  47. -outfmt 6 \
  48. -evalue 0.00001 \
  49. -max_target_seqs 1 \
  50. -culling_limit 2 \
  51. -num_threads 12 \
  52. -db /scratch/at120/shared/db/human-16s-for-filtering/hg19-silva_16s.fa"\
  53. | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.megablastFilter -W depend=afterok:$convertBT2 -l walltime=72:00:00,nodes=1:ppn=12,mem=12gb)
  54. echo $megablastFilter >> $fastq.ids.txt
  55. echo "Submitted Megablast Filter"
  56.  
  57. unmappedMegablast=\
  58. $(echo \
  59. "module load biopython && \
  60. python /scratch/at120/virome-pipeline/get-unmapped.py $path/$fastq.unconc.megablast.tsv $path/$fastq.unconc.fasta $path/$fastq.unconc.megablast.fasta"\
  61. | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.blastnFilter -W depend=afterok:$megablastFilter -l walltime=72:00:00,nodes=1:ppn=1,mem=12gb)
  62. echo $unmappedMegablast >> $fastq.ids.txt
  63. echo "Submitted Unmapped Megablast"
  64.  
  65. blastnFilter=\
  66. $(echo \
  67. "module load blast+ && \
  68. blastn \
  69. -task blastn \
  70. -query $path/$fastq.unconc.megablast.fasta \
  71. -out $path/$fastq.unconc.megablast.blastn.tsv \
  72. -outfmt 6 \
  73. -evalue 0.00001 \
  74. -max_target_seqs 1 \
  75. -culling_limit 2 \
  76. -num_threads 12 \
  77. -db /scratch/at120/shared/db/human-16s-for-filtering/hg19-silva_16s.fa"\
  78. | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.blastnFilter -W depend=afterok:$unmappedMegablast -l walltime=72:00:00,nodes=1:ppn=12,mem=12gb)
  79. echo $blastnFilter >> $fastq.ids.txt
  80. echo "Submitted blastn Filter"
  81.  
  82. unmappedBlastn=\
  83. $(echo \
  84. "module load biopython && \
  85. python $path/get-unmapped.py $path/$fastq.unconc.megablast.blastn.tsv $path/$fastq.unconc.megablast.fasta $path/$fastq.unconc.megablast.blastn.fasta"\
  86. | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.unmappedBlastn -W depend=afterok:$blastnFilter -l walltime=72:00:00,nodes=1:ppn=1,mem=12gb)
  87. echo $unmappedBlastn >> $fastq.ids.txt
  88. echo "Submitted Unmapped Blastn"
  89.  
  90. extractPairedReads=\
  91. $(echo \
  92. "module load khmer && \
  93. extract-paired-reads.py $path/$fastq.unconc.megablast.blastn.fasta"\
  94. | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.extractPairedReads -W depend=afterok:$unmappedBlastn -l walltime=72:00:00,nodes=1:ppn=1,mem=12gb)
  95. echo $extractPairedReads >> $fastq.ids.txt
  96. echo "Submitted Extract Paired Reads"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement