Guest User

Untitled

a guest
Jun 21st, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.29 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # For running step0 of the imputation procedure
  4. # Splits files and creates random subset for step 1 of imputation
  5.  
  6. # adapted from original script written by Jacki Buros
  7.  
  8.  
  9. # arguments are
  10. # 1) plink bed source file
  11. # 2) prefix for output files
  12.  
  13. # check number of arguments
  14. E_BADARGS=65
  15.  
  16. if [ ! -n "$1" ]
  17. then
  18. echo "Usage: `basename $0` <file basename> <output prefix> "
  19. exit $E_BADARGS
  20. fi
  21.  
  22. orig_dir=$(pwd)
  23. prefix="$2"
  24.  
  25. # ---- settings ------
  26. # Plink bed source file (used in step0 script)
  27. SRCDIR="$(pwd)"
  28. PLINK="$1"
  29.  
  30. # store output here
  31. OUTDIR="${orig_dir}"
  32.  
  33. # num individuals per group in step 1 impute
  34. SUBSETSIZE=300
  35.  
  36. # num individuals (total) per subset in step 2 impute
  37. STEP2SIZE=200
  38.  
  39. # paths
  40. PLINKBIN="/usr/local/plink/plink --nonfounders --noweb" #CHANGE
  41. GAWKBIN="/usr/bin/gawk" #CHANGE
  42. TARBIN="/bin/tar" #CHANGE
  43.  
  44. # SUBSET and COMPLETE prefixes
  45. COMPLETE="_${prefix}_complete" # name of plink files containing complete bed files where parents are set to 0 0
  46. SUBSET="_${prefix}_subset" # name of plink files containing subsets of the above files
  47. GROUP="_${prefix}_group" # prefix for per-group id lists
  48.  
  49. USERNAME=$(whoami)
  50.  
  51. # prepare dirs for output & scratch
  52. mkdir -p ${OUTDIR}
  53. scratch="/scratch/${USERNAME}/${prefix}_impute" #CHANGE???
  54. mkdir -p ${scratch}
  55.  
  56. # if passing a pedigree file need to convert it to a binary file
  57. if [ -f "${PLINK}.ped" ]
  58. then
  59. echo "Creating PLINK binary files"
  60. $PLINKBIN --file ${PLINK} --map3 --allow-no-sex --make-bed --out ${PLINK}
  61. fi
  62.  
  63. echo "$(date) | Copying source files in $SRCDIR to ${scratch}"
  64. cd ${SRCDIR}
  65. cp -a --dereference ${PLINK}.* ${scratch}
  66.  
  67. echo "$(date) | Preparing base PLINK bed file (named ${COMPLETE})"
  68. gawk '{print $1,$2,"0","0"}' ${PLINK}.fam > _update_parents # pulls out family ids and individual ids from fam file and zero's the parents.
  69. $PLINKBIN --bfile $PLINK --update-parents _update_parents --set-hh-missing --allow-no-sex --make-bed --out $COMPLETE #> /dev/null # plink command to update the parental info with zeros.
  70.  
  71.  
  72. FAMFILE="temp.${PLINK}.fam"
  73. cp ${PLINK}.fam ${FAMFILE}
  74. rm ${PLINK}.*
  75.  
  76. echo "$(date) | Preparing subset PLINK bed file (named ${SUBSET}) to be used in Step 1"
  77.  
  78. # Randomly selects iids from fam file for use in model estimation
  79. for i in `cut -d' ' -f 1-2 $FAMFILE| sed s/\ /,/g`; do echo "$RANDOM $i"; done | sort | cut -d' ' -f 2| sed s/,/\ /g | head -n $SUBSETSIZE > subset.iids
  80. $PLINKBIN --bfile ${COMPLETE} --keep subset.iids --make-bed --out ${SUBSET}
  81.  
  82. rm *.ped
  83. rm *.map
  84. rm *.log
  85. rm *.hh
  86.  
  87. echo "$(date) | Preparing list of ids per subset (named ${GROUP}*) to be used in step 2"
  88. gawk '{print $1,$2}' ${COMPLETE}.fam > _idlist
  89. split -d -l $STEP2SIZE _idlist $GROUP
  90.  
  91. echo "$(date) | Preparation complete; copy files to $OUTDIR"
  92. tar cfz ${prefix}_step0.tar.gz ${COMPLETE}.* ${SUBSET}.* ${GROUP}*
  93. rm -f ${GROUP}*
  94. mv ${prefix}_step0.tar.gz ${OUTDIR}
  95.  
  96. echo "$(date) | copy remaining files to $OUTDIR/rsync & clean up"
  97. mkdir -p ${OUTDIR}/rsync
  98. rsync -avz ${scratch}/ ${OUTDIR}/rsync/
  99. # Let's clean up
  100. if [[ $? -eq 0 ]] ; then
  101. cd ${orig_dir}
  102. rm -rf ${scratch}
  103. else
  104. echo "Unable to sync remaining files."
  105. echo "Please ssh to $(hostname) and"
  106. echo "look at the content of ${scratch}"
  107. fi
  108.  
  109. echo "=========================================================="
  110. echo "Finished on : $(date)"
  111. echo "=========================================================="
Add Comment
Please, Sign In to add comment