Advertisement
ProzacR

Find simmilar compound to KEGGID

Nov 11th, 2015
447
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.37 KB | None | 0 0
  1. #!/bin/bash
  2. #
  3. # compare KEGGID with entire ZINC database
  4. # ZINC database is extracted to ~/ZINC and decompressed
  5. # with gunzip *.sdf.gz
  6. # VR
  7.  
  8.  
  9. #some checks
  10.  
  11. command -v wget >/dev/null 2>&1 || { echo >&2 "I require wget (gnu.org/software/wget) but it's not installed.  Aborting."; exit 1; }
  12. command -v babel >/dev/null 2>&1 || { echo >&2 "I require openbabel (http://openbabel.org) but it's not installed.  Aborting."; exit 1; }
  13.  
  14. if [ ! -d ~/ZINC ]; then
  15.   echo "Directory ~/ZINC with local ZINC database do not exist. Make it first."
  16.   exit 1
  17. fi
  18.      
  19. if [ $# -ne 1 ]; then
  20.        echo "Usage ./search_ZINC.sh KEGGID"
  21.        echo "KEGGID is C number from KEGG compound database:"
  22.        echo "http://www.genome.jp/kegg/compound/"
  23.        exit 1
  24. fi
  25.            
  26. KEGGID=$1
  27. echo "Downloading $KEGGID mol file from KEGG..."
  28. wget http://www.genome.jp/dbget-bin/www_bget?-f+m+compound+${KEGGID} -O ${KEGGID}.mol
  29. if [ ! -s ${KEGGID}.mol ]; then
  30.      "File ${KEGGID}.mol is empty. Likely wrong KEGGID"
  31.      exit 1
  32. fi
  33.                    
  34. for file in ~/ZINC/*.sdf
  35. do
  36.   echo "Comparing with ZINC package: $file"
  37.   #Slow part:
  38.   babel ${KEGGID}.mol $file -ofpt -xfFP2 > ${KEGGID}_${file}.log
  39.   #print cases when Tanimoto score is 1
  40.   grep 1$ ${KEGGID}_${file}.log
  41.   #print cases when Tanimoto score is larger than 0.9 and lower than 1
  42.   grep 0\.9.$ ${KEGGID}_${file}.log
  43. done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement