Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- # This program filters out human and 16s sequences from fastq files.
- # To execute, you'll need to first change the path to where the fastq files are.
- # This makes things way easier
- # To submit this, in mercer, you need to do sh human-16s-read-filter.sh sample-prefix
- # sample prefix is everything to the left of .r(1|2).fastq
- # this is currently modified to handle paired-end reads
- path="/scratch/at120/virome-pipeline/virome-analysis"
- fastq=$1
- cd $path
- bowtie2=\
- $(echo \
- "module load bowtie2 && \
- bowtie2 \
- -p 12 \
- --very-sensitive-local \
- --un-conc $path/$fastq.unconc.fastq \
- -x /scratch/at120/shared/db/human-16s-for-filtering/hg19-silva_16s.fa \
- -1 $path/$fastq.r1.fastq \
- -2 $path/$fastq.r2.fastq \
- -S $path/$fastq.sam \
- && rm $path/$fastq.sam"\
- | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.bowtie2 -l walltime=72:00:00,nodes=1:ppn=12,mem=12gb)
- echo $bowtie2 > $fastq.ids.txt
- echo "Submitted Mapping"
- convertBT2=\
- $(echo \
- "module load khmer && \
- fastq-to-fasta.py $path/$fastq.unconc.1.fastq > $path/$fastq.unconc.fasta && \
- fastq-to-fasta.py $path/$fastq.unconc.2.fastq >> $path/$fastq.unconc.fasta"\
- | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.convert -W depend=afterok:$bowtie2 -l walltime=72:00:00,nodes=1:ppn=1,mem=4gb)
- echo $convertBT2 >> $fastq.ids.txt
- echo "Submitted Converting to Fasta"
- megablastFilter=\
- $(echo \
- "module load blast+ && \
- blastn \
- -query $path/$fastq.unconc.fasta \
- -out $path/$fastq.unconc.megablast.tsv \
- -outfmt 6 \
- -evalue 0.00001 \
- -max_target_seqs 1 \
- -culling_limit 2 \
- -num_threads 12 \
- -db /scratch/at120/shared/db/human-16s-for-filtering/hg19-silva_16s.fa"\
- | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.megablastFilter -W depend=afterok:$convertBT2 -l walltime=72:00:00,nodes=1:ppn=12,mem=12gb)
- echo $megablastFilter >> $fastq.ids.txt
- echo "Submitted Megablast Filter"
- unmappedMegablast=\
- $(echo \
- "module load biopython && \
- python /scratch/at120/virome-pipeline/get-unmapped.py $path/$fastq.unconc.megablast.tsv $path/$fastq.unconc.fasta $path/$fastq.unconc.megablast.fasta"\
- | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.blastnFilter -W depend=afterok:$megablastFilter -l walltime=72:00:00,nodes=1:ppn=1,mem=12gb)
- echo $unmappedMegablast >> $fastq.ids.txt
- echo "Submitted Unmapped Megablast"
- blastnFilter=\
- $(echo \
- "module load blast+ && \
- blastn \
- -task blastn \
- -query $path/$fastq.unconc.megablast.fasta \
- -out $path/$fastq.unconc.megablast.blastn.tsv \
- -outfmt 6 \
- -evalue 0.00001 \
- -max_target_seqs 1 \
- -culling_limit 2 \
- -num_threads 12 \
- -db /scratch/at120/shared/db/human-16s-for-filtering/hg19-silva_16s.fa"\
- | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.blastnFilter -W depend=afterok:$unmappedMegablast -l walltime=72:00:00,nodes=1:ppn=12,mem=12gb)
- echo $blastnFilter >> $fastq.ids.txt
- echo "Submitted blastn Filter"
- unmappedBlastn=\
- $(echo \
- "module load biopython && \
- python $path/get-unmapped.py $path/$fastq.unconc.megablast.blastn.tsv $path/$fastq.unconc.megablast.fasta $path/$fastq.unconc.megablast.blastn.fasta"\
- | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.unmappedBlastn -W depend=afterok:$blastnFilter -l walltime=72:00:00,nodes=1:ppn=1,mem=12gb)
- echo $unmappedBlastn >> $fastq.ids.txt
- echo "Submitted Unmapped Blastn"
- extractPairedReads=\
- $(echo \
- "module load khmer && \
- extract-paired-reads.py $path/$fastq.unconc.megablast.blastn.fasta"\
- | qsub -m ae -M twaddlac@gmail.com -j oe -N $fastq.extractPairedReads -W depend=afterok:$unmappedBlastn -l walltime=72:00:00,nodes=1:ppn=1,mem=12gb)
- echo $extractPairedReads >> $fastq.ids.txt
- echo "Submitted Extract Paired Reads"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement