SUBANGKAR

countKmers

Feb 21st, 2020
162
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. CORES=30 #number of cores to use for blast searches
  2. KMERSIZE=31 # RD:61
  3.  
  4. #modified from NIKS script
  5.  
  6. # dir=/home/atif/1000_genomes/Ecoli     #directory for read files
  7. # hawkDir=/home/atif/hawk-0.8.3-beta-linux          #directory where hawk is installed
  8. # jellyfishDir=/home/atif/jellyfish-Hawk/bin        #directory where jellyfish is installed
  9. # sortDir=/home/atif/coreutils/deps/bin     #directory where parallel sort is installed
  10.  
  11. configFile="/mnt/c/Users/Subangkar/Desktop/Thesis/Association-Mapping/HAWK/ecoli_analysis/dirList.sh"
  12.  
  13. #include config file
  14. source $configFile
  15. #variables are ->  hawkDir , eigenstratDir , dataDir ,jellyfishDir ,sortDir
  16.  
  17.  
  18. cd ${dir}
  19.  
  20. for file in `cat links.txt*`
  21. do
  22.     OUTPREFIX=$file
  23.    
  24.     mkdir ${file}
  25.    
  26.     cd ${file}
  27.  
  28.     fastq-dump /home/subangkar/ncbi/public/sra/${file}.sra
  29.  
  30.     mkdir ${OUTPREFIX}_kmers
  31.  
  32.     ${jellyfishDir}/jellyfish count -C -o ${OUTPREFIX}_kmers/tmp -m ${KMERSIZE} -t ${CORES} -s 512M *.fastq     #change if gzipped
  33.  
  34.     COUNT=$(ls ${OUTPREFIX}_kmers/tmp* |wc -l)
  35.  
  36.     if [ $COUNT -eq 1 ]
  37.     then
  38.         mv ${OUTPREFIX}_kmers/tmp_0 ${OUTPREFIX}_kmers_jellyfish
  39.     else
  40.         ${jellyfishDir}/jellyfish merge -o ${OUTPREFIX}_kmers_jellyfish ${OUTPREFIX}_kmers/tmp*
  41.     fi
  42.     rm -rf ${OUTPREFIX}_kmers
  43.    
  44.     COUNT=$(ls ${OUTPREFIX}_kmers_jellyfish |wc -l)
  45.  
  46.     if [ $COUNT -eq 1 ]
  47.     then
  48.  
  49.         ${jellyfishDir}/jellyfish histo -f -o ${OUTPREFIX}.kmers.hist.csv -t ${CORES} ${OUTPREFIX}_kmers_jellyfish
  50.         awk '{print $2"\t"$1}' ${OUTPREFIX}.kmers.hist.csv > ${OUTPREFIX}_tmp
  51.         mv ${OUTPREFIX}_tmp ${OUTPREFIX}.kmers.hist.csv
  52.  
  53.         awk -f ${hawkDir}/countTotalKmer.awk ${OUTPREFIX}.kmers.hist.csv >> ${dir}/total_kmer_counts.txt
  54.  
  55.         CUTOFF=1
  56.         echo $CUTOFF > ${OUTPREFIX}_cutoff.csv
  57.  
  58.  
  59.         ${jellyfishDir}/jellyfish dump -c -L `expr $CUTOFF + 1` ${OUTPREFIX}_kmers_jellyfish > ${OUTPREFIX}_kmers.txt
  60.         sort --parallel=${CORES} -n -k 1 ${OUTPREFIX}_kmers.txt > ${OUTPREFIX}_kmers_sorted.txt
  61.    
  62.         rm ${OUTPREFIX}_kmers_jellyfish
  63.         rm ${OUTPREFIX}_kmers.txt      
  64.            
  65.         echo "${dir}/${OUTPREFIX}/${OUTPREFIX}_kmers_sorted.txt" >> ${dir}/sorted_files.txt
  66.        
  67.     fi
  68.  
  69.     rm *.fastq
  70.  
  71.     cd ..
  72.  
  73. done
RAW Paste Data