Advertisement
pierostrada

urlencode.awk

Feb 5th, 2023
803
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Awk 3.60 KB | Source Code | 0 0
  1. :
  2. # http://www.shelldorado.com/scripts/commands.html
  3. ##########################################################################
  4. # Title      :  urlencode - encode URL data
  5. # Author     :  Heiner Steven (heiner.steven@odn.de)
  6. # Date       :  2000-03-15
  7. # Requires   :  awk
  8. # Categories :  File Conversion, WWW, CGI
  9. # SCCS-Id.   :  @(#) urlencode  1.4 06/10/29
  10. ##########################################################################
  11. # Description
  12. #   Encode data according to
  13. #       RFC 1738: "Uniform Resource Locators (URL)" and
  14. #       RFC 1866: "Hypertext Markup Language - 2.0" (HTML)
  15. #
  16. #   This encoding is used i.e. for the MIME type
  17. #   "application/x-www-form-urlencoded"
  18. #
  19. # Notes
  20. #    o  The default behaviour is not to encode the line endings. This
  21. #   may not be what was intended, because the result will be
  22. #   multiple lines of output (which cannot be used in an URL or a
  23. #   HTTP "POST" request). If the desired output should be one
  24. #   line, use the "-l" option.
  25. #
  26. #    o  The "-l" option assumes, that the end-of-line is denoted by
  27. #   the character LF (ASCII 10). This is not true for Windows or
  28. #   Mac systems, where the end of a line is denoted by the two
  29. #   characters CR LF (ASCII 13 10).
  30. #   We use this for symmetry; data processed in the following way:
  31. #       cat | urlencode -l | urldecode -l
  32. #   should (and will) result in the original data
  33. #
  34. #    o  Large lines (or binary files) will break many AWK
  35. #       implementations. If you get the message
  36. #       awk: record `...' too long
  37. #        record number xxx
  38. #   consider using GNU AWK (gawk).
  39. #
  40. #    o  urlencode will always terminate it's output with an EOL
  41. #       character
  42. #
  43. # Thanks to Stefan Brozinski for pointing out a bug related to non-standard
  44. # locales.
  45. #
  46. # See also
  47. #   urldecode
  48. ##########################################################################
  49.  
  50. PN=`basename "$0"`          # Program name
  51. VER='1.4'
  52.  
  53. : ${AWK=awk}
  54.  
  55. Usage () {
  56.     echo >&2 "$PN - encode URL data, $VER
  57. usage: $PN [-l] [file ...]
  58.    -l:  encode line endings (result will be one line of output)
  59.  
  60. The default is to encode each input line on its own."
  61.     exit 1
  62. }
  63.  
  64. Msg () {
  65.     for MsgLine
  66.     do echo "$PN: $MsgLine" >&2
  67.     done
  68. }
  69.  
  70. Fatal () { Msg "$@"; exit 1; }
  71.  
  72. set -- `getopt hl "$@" 2>/dev/null` || Usage
  73. [ $# -lt 1 ] && Usage           # "getopt" detected an error
  74.  
  75. EncodeEOL=no
  76. while [ $# -gt 0 ]
  77. do
  78.     case "$1" in
  79.         -l) EncodeEOL=yes;;
  80.     --) shift; break;;
  81.     -h) Usage;;
  82.     -*) Usage;;
  83.     *)  break;;         # First file name
  84.     esac
  85.     shift
  86. done
  87.  
  88. LANG=C  export LANG
  89. $AWK '
  90.     BEGIN {
  91.     # We assume an awk implementation that is just plain dumb.
  92.     # We will convert an character to its ASCII value with the
  93.     # table ord[], and produce two-digit hexadecimal output
  94.     # without the printf("%02X") feature.
  95.  
  96.     EOL = "%0A"     # "end of line" string (encoded)
  97.     split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
  98.     hextab [0] = 0
  99.     for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
  100.     if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0
  101.     }
  102.     {
  103.     encoded = ""
  104.     for ( i=1; i<=length ($0); ++i ) {
  105.         c = substr ($0, i, 1)
  106.         if ( c ~ /[a-zA-Z0-9.-]/ ) {
  107.         encoded = encoded c     # safe character
  108.         } else if ( c == " " ) {
  109.         #encoded = encoded "+"  # special handling
  110.         encoded = encoded "%20" # special handling
  111.         } else {
  112.         # unsafe character, encode it as a two-digit hex-number
  113.         lo = ord [c] % 16
  114.         hi = int (ord [c] / 16);
  115.         encoded = encoded "%" hextab [hi] hextab [lo]
  116.         }
  117.     }
  118.     if ( EncodeEOL ) {
  119.         printf ("%s", encoded EOL)
  120.     } else {
  121.         print encoded
  122.     }
  123.     }
  124.     END {
  125.         #if ( EncodeEOL ) print ""
  126.     }
  127. ' "$@"
  128.  
Tags: urlencode
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement