Advertisement
Guest User

acgt.sh

a guest
Jul 5th, 2024
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 2.68 KB | Jokes | 0 0
  1. #!/bin/bash
  2.  
  3. function die {
  4.   echo -e "\e[31mERROR:\e[0m $2" >&2
  5.   exit $1
  6. }
  7.  
  8. function usage {
  9.   echo "USAGE: $(basename $0) [-r] FILE"
  10.   echo "       $(basename $0) [-r] - <<<SEQUENCE"
  11.   echo "       $(basename $0) -R STRING"
  12. }
  13.  
  14. function debug {
  15.   if [ "$DEBUG_MODE" = "1" ]; then
  16.     echo -e "\e[35mDEBUG: $*\e[m" >&2
  17.   fi
  18. }
  19.  
  20. function acgt-conv {
  21.   tr -d '[:space:]' | while \
  22.     # Read 4 chars from stdin (cut out whitespace first)
  23.     read -r -N 4 BYTE ; do
  24.  
  25.     # Prepend with trailing T's (0s) from $FILL
  26.     # For convenience we're doing little-endian here
  27.     FILL=tttt
  28.     BYTE="${BYTE}${FILL:${#BYTE}}"
  29.  
  30.     VAL=0
  31.     for (( i=0 ; i < ${#BYTE} ; i++ )); do
  32.       g=${BYTE:$i:1}
  33.       # A=1 C=2 G=3 T=0
  34.       case $g in
  35.         A|a)  VAL=$(( $VAL + (1 << ($i * 2)) )) ;;
  36.         C|c)  VAL=$(( $VAL + (2 << ($i * 2)) )) ;;
  37.         G|g)  VAL=$(( $VAL + (3 << ($i * 2)) )) ;;
  38.         T|t)  ;;
  39.         *)    die 2 "Format Error: Invalid DNA '$g' in byte '$BYTE'" ;;
  40.       esac
  41.     done
  42.  
  43.     if (( $VAL > 255 )); then
  44.       die 3 "Math Error: Byte '$BYTE' produced out-of-range value '$VAL'"
  45.     fi
  46.  
  47.     # Convert decimal number to character byte (in whatever encoding, only ASCII values matter here anyways)
  48.     # It's around here I realize this probably should've been a C program...
  49.     printf "\x$(printf %x $VAL)"
  50.     # Testing: GTGA (little-endian) == AGTG (big-endian) == 115 == 0x73 == 's'
  51.   done
  52. }
  53.  
  54. # string to DNA
  55. function acgt-reverse {
  56.   while read -rs -n 1 CHAR ; do
  57.     # Convert char to hex, then hex to decimal
  58.     HEX=$(printf "$CHAR" | od -An -t x1 | tr -d ' ')
  59.     VAL=$(( 0x$HEX ))
  60.  
  61.     if (( $VAL > 255 )); then
  62.       die 2 "Format Error: Character '$CHAR' produced multiple bytes. Only ASCII characters are supported."
  63.     fi
  64.  
  65.     DNA=
  66.     for (( i=0 ; i < 4 ; i++ )); do
  67.       REM=$(( $VAL % 4 ))
  68.       # A=1 C=2 G=3 T=0
  69.       case $REM in
  70.         1)  DNA="${DNA}A" ;;
  71.         2)  DNA="${DNA}C" ;;
  72.         3)  DNA="${DNA}G" ;;
  73.         0)  DNA="${DNA}T" ;;
  74.       esac
  75.       VAL=$(( $VAL >> 2 ))
  76.     done
  77.  
  78.     printf "$DNA "
  79.   done
  80. }
  81.  
  82. if [ -z "$1" ]; then
  83.   usage >&2
  84.   exit 1
  85. elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
  86.   usage
  87.   exit 0
  88. elif [[ "$1" = "-T" || "$1" = "--test" ]]; then
  89.   DEBUG_MODE=1
  90.   shift
  91. elif [[ "$1" = "-R" || "$1" = "--reverse" ]]; then
  92.   REVERSE_MODE=1
  93.   shift
  94. elif [[ "$1" = "-r" || "$1" = "--raw" ]]; then
  95.   OUTPUT=cat
  96.   shift
  97. else
  98.   OUTPUT=strings
  99. fi
  100.  
  101. if [ "$1" = "-" ]; then
  102.   DNA_FILE=/dev/stdin
  103. else
  104.   DNA_FILE="$1"
  105. fi
  106.  
  107. if [ "$REVERSE_MODE" = "1" ]; then
  108.   printf "$*" | acgt-reverse
  109.   echo
  110. elif [ "$DEBUG_MODE" != "1" ]; then
  111.   cat $DNA_FILE | acgt-conv | $OUTPUT
  112. fi
  113.  
Tags: DNA
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement