Advertisement
Guest User

Untitled

a guest
Jul 16th, 2019
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.42 KB | None | 0 0
  1. #!/bin/sh
  2. prefix=$(echo "$1" | cut -f 1 -d '-')
  3. cores=$(nproc --all)
  4. if [ "$1" = "" ]
  5. then
  6. echo "Llamada incorrecta, debe indicar el nombre del archivo Input en la llamada al script."
  7. echo "Por ejemplo: ./script.sh 20190618-0-Input_R1_.fastq.gz"
  8. echo "Se debe poner los archivos de entrada en la ruta del usuario actual: ${HOME}"
  9. else
  10. if [ "$2" = "" ]
  11. then
  12. echo ""
  13. else
  14. cores=$2
  15. fi
  16. cd ${HOME}
  17. if [ $(dpkg-query -W -f='${Status}' samtools 2>/dev/null | grep -c "ok installed") -eq 0 ];
  18. then
  19. sudo apt install samtools;
  20. fi
  21. if [ $(dpkg-query -W -f='${Status}' curl 2>/dev/null | grep -c "ok installed") -eq 0 ];
  22. then
  23. sudo apt install curl;
  24. fi
  25. if [ $(dpkg-query -W -f='${Status}' bcftools 2>/dev/null | grep -c "ok installed") -eq 0 ];
  26. then
  27. sudo apt install bcftools;
  28. fi
  29.  
  30. if test -f "$prefix-0-Input_R1_.fastq.gz"; then
  31. if test -f "bin/trimmomatic.jar"; then
  32. echo ""
  33. else
  34. echo " === Error en el preprocesado. El archivo bin/trimmomatic.jar no existe. Descargando... ==="
  35. curl -L https://github.com/timflutre/trimmomatic/archive/master.zip -o master.zip
  36. unzip master.zip
  37. cd trimmomatic-master
  38. make
  39. make install
  40. cp adapters/TruSeq3-SE.fa ${HOME}/
  41. cp adapters/TruSeq3-PE.fa ${HOME}/
  42. echo "=== Instalacion de Trimmomatic finalizada. ==="
  43. cd ${HOME}
  44. fi
  45. echo "=== Realizando preprocesado ==="
  46. echo ""
  47. echo "$cores"
  48. java -jar bin/trimmomatic.jar PE -basein $prefix-0-Input_R1_.fastq.gz -baseout $prefix-1-TRIM.fastq.gz ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3
  49. if test -f "hg19.fa"; then
  50. echo ""
  51. else
  52. echo ""
  53. echo "No se ha encontrado el archivo hg19.fa, descargando..."
  54. echo ""
  55. wget http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz
  56. fi
  57. echo ""
  58. echo "=== Realizando alineamiento ==="
  59. echo ""
  60. if test -f "index/hg19_index.1.bt2"; then
  61. echo ""
  62. else
  63. echo ""
  64. echo "=== Archivos de referencia no encontrados. Este paso puede tardar unas horas ==="
  65. echo ""
  66.  
  67. mkdir index
  68. bowtie2-build hg19.fa.gz index/hg19_index
  69. fi
  70. bowtie2 -x index/hg19_index -p $cores -1 $prefix-1-TRIM_1P.fastq.gz -2 $prefix-1-TRIM_2P.fastq.gz -S $prefix-2-BOWTIE.sam
  71. echo ""
  72. echo "=== Ordenando ==="
  73. echo ""
  74. samtools sort -@ $cores $prefix-2-BOWTIE.sam > $prefix-3-SAMTOOLSsort.bam
  75. echo ""
  76. echo "=== Indexando ==="
  77. echo ""
  78. samtools index -@ $cores $prefix-3-SAMTOOLSsort.bam
  79. echo "Indexado terminado."
  80. echo ""
  81. echo "=== Opcion BCFTOOLS: ==="
  82. echo ""
  83.  
  84. bcftools mpileup -threads $cores -f hg19.fa $prefix-3-SAMTOOLSsort.bam | bcftools call -mv -Oz -o $prefix-4a-BCF.vcf.gz
  85.  
  86. bcftools filter -threads $cores -i 'QUAL>=20 & DP>=10' $prefix-4a-BCF.vcf.gz > $prefix-4a-BCF_filt.vcf.gz
  87.  
  88. echo ""
  89. echo "=== Opcion VARSCAN ==="
  90. echo ""
  91.  
  92. if test -f "VarScan.v2.4.3.jar"; then
  93. echo ""
  94. else
  95. echo ""
  96. echo "=== No se ha encontrado el archivo VarScan.v2.4.3.jar, descargando... ==="
  97. echo ""
  98. wget https://github.com/dkoboldt/varscan/raw/master/VarScan.v2.4.3.jar
  99. fi
  100.  
  101. echo "SNP + INDEL Variants"
  102.  
  103. samtools mpileup -f hg19.fa $prefix-3-SAMTOOLSsort.bam | java -jar VarScan.v2.4.3.jar mpileup2cns -v --min-coverage 100 --min-reads2 4 --min-avg-qual 20 --min-var-freq 0.30 --output-vcf --variants > $prefix-4b-VSCANcns_filt.vcf
  104.  
  105. echo "SNP Variants"
  106. samtools mpileup -f hg19.fa $prefix-3-SAMTOOLSsort.bam | java -jar VarScan.v2.4.3.jar mpileup2snp -v --min-coverage 100 --min-reads2 4 --min-avg-qual 20 --min-var-freq 0.30 --output-vcf --variants > $prefix-4b-VSCANcns_filt.vcf
  107. echo "INDEL Variants"
  108. samtools mpileup -f hg19.fa $prefix-3-SAMTOOLSsort.bam | java -jar VarScan.v2.4.3.jar mpileup2indel -v --min-coverage 100 --min-reads2 4 --min-avg-qual 20 --min-var-freq 0.30 --output-vcf --variants > $prefix-4b-VSCANcns_filt.vcf
  109.  
  110. echo ""
  111. echo "=== Replacement of read groups in BAM file ==="
  112. echo ""
  113. if test -f "gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar"; then
  114. echo ""
  115. else
  116. echo ""
  117. echo "=== No se ha encontrado el archivo gatk-package-4.1.2.0-local.jar, descargando... ==="
  118. echo ""
  119. wget https://github.com/broadinstitute/gatk/releases/download/4.1.2.0/gatk-4.1.2.0.zip
  120. unzip gatk-4.1.2.0.zip
  121. export PATH=$PATH:${HOME}/gatk-4.1.2.0/
  122. echo ""
  123. echo "Instalacion de GATK finalizada..."
  124. echo ""
  125. fi
  126. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar BaseRecalibrator -R hg19.fa -I $prefix-4c-GATKgroup.bam -known-sites All_20180423.vcf.gz -O $prefix-4c-GATKrecal.table
  127. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar CreateSequenceDictionary -R hg19.fa.gz -O hg19.dict
  128. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar AddOrReplaceReadGroups -I $prefix-3-SAMTOOLSsort.bam -O $prefix-4c-GATKgroup.bam -ID 1 -LB dna -PL illumina -PU UNKNOWN -SM $prefix-3-SAMTOOLSsort
  129.  
  130. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar HaplotypeCaller -R hg19.fa -I $prefix-4c-GATKrecal.bam --genotyping-mode DISCOVERY -O $prefix-4c-GATKcns.vcf
  131.  
  132. echo ""
  133. echo "=== Iniciando filtrado ==="
  134. echo ""
  135.  
  136. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar SelectVariants -R hg19.fa -V $prefix-4c-GATKcns.vcf -O $prefix-4c-GATKsnp.vcf -select-type SNP
  137.  
  138. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar VariantFiltration --filter-name "NO PASS" -filter "QD<2.0 || FS>60.0 || MQ<40.0|| MQRankSum<-12.5 ||ReadPosRankSum<-8.0" -R hg19.fa -V $prefix-4c-GATKsnp.vcf -O $prefix-4c-GATKsnp_filt.vcf
  139.  
  140. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar SelectVariants -R hg19.fa -V $prefix-4c-GATKcns.vcf -O $prefix-4c-GATKindel.vcf -select-type INDEL
  141.  
  142. java -jar gatk-4.1.2.0/gatk-package-4.1.2.0-local.jar VariantFiltration --filter-name "NO PASS" -filter "QD<2.0 || FS>200.0 || ReadPosRankSum<-20.0" -R hg19.fa -V $prefix-4c-GATKindel.vcf -O $prefix-4c-GATKindel_filt.vcf
  143.  
  144. echo ""
  145. echo "=== Combinando ==="
  146. echo ""
  147.  
  148.  
  149. gatk MergeVcfs -I $prefix-4c-GATKsnp_filt.vcf -I $prefix-4c-GATKindel_filt.vcf -R hg19.fa -O $prefix-4c-GATKmerge.vcf
  150.  
  151. gatk SelectVariants -R hg19.fa -V $prefix-4c-GATKmerge.vcf -O $prefix-4c-GATKmerge_pass.vcf -select 'vc.isNotFiltered()'
  152.  
  153. echo ""
  154. echo "=== Uniendo VCFs ==="
  155. echo ""
  156.  
  157. if test -f "GenomeAnalysisTK.jar"; then
  158.  
  159.  
  160. java -jar GenomeAnalysisTK.jar -T CombineVariants -R hg19.fa -minN 2 -V: $prefix-4a-BCF_filt.vcf -V: $prefix-4b-VSCANcns_filt.vcf -V: $prefix-4c-GATKmerge_pass.vcf -o $prefix-5_GATKunion.vcf
  161.  
  162. echo "=== VCFs unidos, saliendo... ==="
  163.  
  164. else
  165. echo ""
  166. echo "=== No se ha encontrado el archivo GenomeAnalysisTK.jar. Por favor descarguelo y ponga el jar en la carpeta ${HOME} ==="
  167. echo ""
  168.  
  169. fi
  170.  
  171.  
  172.  
  173. else
  174. echo "=== El archivo de entrada indicado no existe. Debe estar en la misma ruta del script ==="
  175. fi
  176. fi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement