Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/sh
- #$ -N runsve
- #$ -S /bin/sh
- #$ -l h_vmem=4G
- #$ -l m_mem_free=4G
- #$ -wd /mnt/isilon/conlin_lab/data/wgs/broad/batch1_sept_2018/
- #$ -pe smp 16
- . /etc/profile
- module load perl/current
- module load sve/current
- module load sam-bcf-tools/current
- # Sample name and indexed reference, assumed to be in WD (change to fit needs):
- SAMPLE="sample_name_no_suffix"
- REF="reference_name.fa"
- # Make output dirs for VCFs in WD (structured as future input to FusorSV):
- mkdir -p vcfFiles
- mkdir -p vcfFiles/$SAMPLE
- # Structure of CRAM sample dirs on Respublica is currently: $SAMPLE/v1/$SAMPLE.cram; $SAMPLE/v1/$SAMPLE.cram.crai
- # Could not get some callers to work properly with CRAM:
- samtools view -@16 -b $SAMPLE/v1/$SAMPLE.cram -T $REF > $SAMPLE/v1/$SAMPLE.bam # Already sorted.
- samtools index $SAMPLE/v1/$SAMPLE.bam
- # Three SV callers (GenomeSTRiP not integrated into SVE):
- sve call -r $REF -g hg38 $SAMPLE.bam -M4 -t16 -a cnvnator -o vcfFiles/"$SAMPLE"
- sve call -r $REF -g hg38 $SAMPLE.bam -M4 -t12 -a lumpy -o vcfFiles/"$SAMPLE"
- sve call -r $REF -g hg38 $SAMPLE.bam -M4 -t4 -a delly -o vcfFiles/"$SAMPLE"
- # Note: still optimizing delly - takes longest in this case.
- # cn.MOPS had issues and my current work-around is to force 1-22,X,Y,M mappings.
- # Error message related to creating vector with no reads mapped to it (hence 0 element in vector which creates the error).
- # May also have an issue with the naming convention of the particular ref seqs?
- # i.e., need to remove reference lines in BAM header containing references with 0 mapped reads and also removing HLA for now.
- # Current strategy: remove references not conforming to 1-22,X,Y,M (MT?) from header and non-header lines.
- # 1. "Fix" header:
- samtools view -H $SAMPLE/v1/$SAMPLE.bam > $SAMPLE/v1/$SAMPLE.nohla.header.sam
- samtools view -H $SAMPLE/v1/$SAMPLE.bam | grep -w "@HD" > $SAMPLE/v1/$SAMPLE.nohla.header.sam
- samtools view -H $SAMPLE/v1/$SAMPLE.bam | grep -w "@PG" $SAMPLE/v1/$SAMPLE.nohla.header >> $SAMPLE/v1/$SAMPLE.nohla.header.sam
- for i in {1..22} X Y M; do samtools view -H $SAMPLE/v1/$SAMPLE.bam | grep -w "SN:chr$i"; done >> $SAMPLE/v1/$SAMPLE.nohla.header.sam
- samtools view -H $SAMPLE/v1/$SAMPLE.bam | grep -w "@RG" >> $SAMPLE/v1/$SAMPLE.nohla.header.sam
- # 2. Obtain 1-22,X,Y,M; convert to BAM; run cn.MOPS:
- samtools view -@16 $SAMPLE/v1/$SAMPLE.bam >> $SAMPLE/v1/$SAMPLE.nohla.header.sam
- samtools view -@16 -b $SAMPLE/v1/$SAMPLE.nohla.header.sam > $SAMPLE/v1/$SAMPLE.nohla.header.bam
- samtools index $SAMPLE/v1/$SAMPLE.nohla.header.bam
- rm $SAMPLE/v1/$SAMPLE.nohla.header.sam
- sve call -r $REF -g hg38 $SAMPLE/v1/$SAMPLE.nohla.header.bam -M4 -t8 -a cnmops -o vcfFiles/"$SAMPLE"
Add Comment
Please, Sign In to add comment