Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- ### DEFAULTS (FEEL FREE TO EDIT THESE) ##################
- MIN_CONTIG_SIZE_FOR_ASSEMBLY=1000
- MIN_CONTIG_SIZE_FOR_CONTIGS_DB=2500
- MIN_CONTIG_SIZE_FOR_PROFILE_DB=2500
- NUM_THREADS_FOR_ASSEMBLY=70
- NUM_THREADS_FOR_BLASTN=80
- NUM_THREADS_FOR_MAPPING=10
- NUM_THREADS_FOR_HMMSCAN=20
- # if you want to skip SNV analysis to make things faster then set the following variable to --skip-SNV-profiling
- SNV_ANALYSIS_CONFIGURATION=
- #########################################################
- # setting the pipeline to stop in case a command returns a non-zero status
- set -e
- job=$1
- email=$2
- if [ ! -f "samples.txt" ]
- then
- echo "
- You must have a samples.txt in this directory. The format of the
- samples.txt is identical to the input file of iu-gen-configs. For
- details, see iu-gen-congigs -h
- "
- exit 1
- fi
- if [ ! -f "references.txt" ]
- then
- echo "
- You must have a references.txt in this directory. "
- exit 1
- fi
- C() {
- echo -e "\033[0;30m\033[46m$1\033[0m"
- }
- INFO() {
- echo
- C "#"
- C "#"
- C "# $1"
- C "#"
- C "#"
- echo
- }
- WAIT () {
- python /workspace/meren/wait_for_cluster.py $1
- }
- CHECK_FILE () {
- if [ -s $1 ]
- then
- :
- else
- echo "Error: Expected to find $1, but it is not there!"
- exit 1
- fi
- }
- LOG () {
- msg="[`date`] $1"
- echo $msg >> $job-log.txt
- echo $msg | mail -s "Update on $job" $email
- }
- DO_INIT() {
- rm -rf 00_LOGS
- mkdir 00_LOGS
- LOG "Hi!"
- LOG `anvi-interactive -v`
- }
- DO_GEN_CONTIG () {
- ref_genome=$1
- ref_genome_path=$2
- DIR="03_CONTIGS"
- INFO $DIR && mkdir -p $DIR
- clusterize -n 4 -log 00_LOGS/gen-contigs.log anvi-gen-contigs-database -f $ref_genome_path -o $DIR/${ref_genome}.db
- WAIT anvi-gen-contigs-database
- echo "anvi-gen-contigs-database for $ref_genome in $job on `hostname` is done." | mail -s "Update on $job!" $email
- }
- DO_BOWTIE2_MAPPING () {
- ref_genome=$1
- ref_genome_path=$2
- DIR="04_MAPPING/$ref_genome"
- INFO $DIR
- # I'm not deleting the folder since I might map reads from new metagenomes to the same reference and so I wouldn't want to delete the folder
- # it is the user's responsibility to check if they are mapping metagenoms genomes that were already mapped
- mkdir -p $DIR
- clusterize -n 4 -log 00_LOGS/$ref_genome\_bowtie-build.log bowtie2-build $ref_genome_path $DIR/$ref_genome
- WAIT bowtie2-build
- while read sample R1 R2; do
- if [ "$sample" == "sample" ]; then continue; fi
- clusterize -n $NUM_THREADS_FOR_MAPPING -log 00_LOGS/$ref_genome\_$sample-bowtie2.log bowtie2 --threads $NUM_THREADS_FOR_MAPPING -x $DIR/$ref_genome -1 $R1 -2 $R2 -S $DIR/$sample.sam --no-unal
- done < samples.txt
- WAIT bowtie2
- while read sample R1 R2; do
- if [ "$sample" == "sample" ]; then continue; fi
- clusterize -n 4 -log 00_LOGS/$ref_genome\_$sample-samtools-view.log samtools view -F 4 -bS $DIR/$sample.sam -o $DIR/$sample-RAW.bam
- done < samples.txt
- WAIT samtools
- while read sample R1 R2; do
- if [ "$sample" == "sample" ]; then continue; fi
- clusterize -n 4 -log 00_LOGS/$ref_genome\_$sample-anvi-init-bam.log anvi-init-bam $DIR/$sample-RAW.bam -o $DIR/$sample\.bam
- done < samples.txt
- WAIT anvi-init-bam
- while read sample R1 R2; do
- if [ "$sample" == "sample" ]; then continue; fi
- CHECK_FILE $DIR/$sample.bam
- done < samples.txt
- while read sample R1 R2; do
- if [ "$sample" == "sample" ]; then continue; fi
- if [ -s $DIR/$sample.bam ]
- then
- clusterize -log 00_LOGS/$ref_genome\_$sample-rm-SAMs.log rm $DIR/$sample.sam $DIR/$sample-RAW.bam
- fi
- done < samples.txt
- echo "Bowtie mapping for $job on `hostname` is done." | mail -s "Update on $job!" $email
- }
- DO_ANVI_PROFILE () {
- ref_genome=$1
- DIR="05_ANVIO_PROFILE/$ref_genome"
- MAPPING_DIR="04_MAPPING/$ref_genome"
- INFO $DIR && mkdir -p $DIR
- while read sample R1 R2; do
- if [ "$sample" == "sample" ]; then continue; fi
- clusterize -n 4 -log 00_LOGS/$ref_genome\_$sample-anvi-profile.log anvi-profile $SNV_ANALYSIS_CONFIGURATION -i $MAPPING_DIR/$sample.bam -c 03_CONTIGS/${ref_genome}.db -o $DIR/$sample -M $MIN_CONTIG_SIZE_FOR_PROFILE_DB
- done < samples.txt
- WAIT anvi-profile
- echo "Anvi'o profiling of samples for $ref_genome in $job on `hostname` is done." | mail -s "Update on $job!" $email
- }
- DO_ANVI_MERGE () {
- ref_genome=$1
- mkdir -p 06-$job-MERGED
- DIR="06-$job-MERGED/$ref_genome"
- # this is the only place where I leave the delete folder
- INFO $DIR && rm -rf $DIR
- clusterize -n 4 -log 00_LOGS/$ref_genome\_anvi-merge.log anvi-merge 05_ANVIO_PROFILE/${ref_genome}/*/RUNINFO.cp -o $DIR -c 03_CONTIGS/${ref_genome}.db
- WAIT anvi-merge
- #CHECK_FILE $DIR/RUNINFO.mcp #FIXME: I don't know why this keeps on failing to find this file!!
- echo "Anvi'o merge for $ref_genome in $job on `hostname` is done." | mail -s "Update on $job!" $email
- }
- #DO_INIT
- while read reference path; do
- if [ "$reference" == "reference" ]; then continue; fi
- # Generate contigs db
- DO_GEN_CONTIG $reference $path
- # Map reads with bowtie
- DO_BOWTIE2_MAPPING $reference $path
- # Do anvi profile
- DO_ANVI_PROFILE $reference
- # Do anvi merge
- DO_ANVI_MERGE $reference
- done < references.txt
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement