Advertisement
T3RRYT3RR0R

ASCII string filter v3.1

Dec 5th, 2021 (edited)
1,225
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Batch 10.11 KB | None | 0 0
  1. @Echo off
  2.  
  3.  For /f "tokens=4 delims=: " %%G in ('CHCP')Do Set "Restore_Codepage=CHCP %%G > nul"
  4.  Set "Return[Len]=" & Set "Return[String]=" & Set "{input}=" & Set "Modified="
  5.  
  6.  Setlocal DISABLEDelayedExpansion
  7.  
  8. REM the label marker ":#" is used within this script to delimit help output.
  9. :#
  10. :# ========================= ASCII string filter v3.1 by T3RRY ======================
  11. Rem - This script iterates over an input string character by character and tests
  12. Rem   each character against a a whitelist of printable ASCII characters, with
  13. Rem   succesful matches used to build a new string containing only printable
  14. Rem   ASCII characters.
  15. Rem - Execution time increases as string length increases. Each character in the
  16. Rem   string is tested against a whitelist containing 96 printable ASCII characters.
  17. :#
  18. :# Usage: Filepath <"String"> [ /P | /R | /T ] | [ -? | /? | -help ]
  19. :#
  20. :# Rem to use from another batch file:
  21. :# For /f delims^= %%G in ('FilePath "string"')Do Echo(%%G
  22. :#
  23. :# Accepts input String via doublequoted argument - reads %* and trims trailing " \P" " \T" or " \R"
  24. :# switches if present at EOL
  25. :# - No escaping of characters in the argument is required
  26. :# - If unbalanced doublequotes exist in the string all doublequotes will be Removed.
  27. :#
  28. :# Use Switch /P to preserve original spaces
  29. :#  - Default behaviour is to Remove all double spaces from the string.
  30. :#    Errorlevels:
  31. :#    0 : String contained only printable ASCII characters; Return[String]
  32. :#        contains the original input string.
  33. :#   -1 : String contained NonASCII or nonprintable ASCII characters;
  34. :#         Return[String] contains only printable ASCII characters
  35. :#         from the input string.
  36. :#
  37. :# Use Switch /R to reject input containing NonASCII characters
  38. :#  - Errorlevel 0 : string contains only printable ASCII Characters
  39. :#  - Errorlevel 1 or GTR: string contains one or more characters that are
  40. :#     not ASCII printable characters. The errorlevel corresponds to
  41. :#     the 1 indexed position of first non ASCII character encountered.
  42. :#     Note: the presence of TAB literals in the string will result
  43. :#     in an incorrect position being reported.
  44. :#
  45. :# Use Switch /T to truncate strings on first occurance of a non-Ascii character
  46. :#  - Errorlevel returned is String length
  47. :#  - String returned in Return[String] variable
  48. :#
  49. ::::::::::::::::::::::::::::::::::
  50. Rem Version changes 20/Jan/2021 :
  51. Rem - Added switch: /T
  52. Rem   Truncates string on occurance of first non ASCII character
  53. ::::::::::::::::::::::::::::::::::
  54. Rem Version changes 11/Dec/2021 :
  55. Rem - Added TAB to ASCII printable characters. Handled via substitution. Seee help for more info.
  56. Rem - Script now differentiates between original paired spaces and paired spaces
  57. Rem   resulting from removal of non ASCII characters.
  58. ::::::::::::::::::::::::::::::::::
  59. Rem Version changes 09/Dec/2021 :
  60. Rem - Changed input method to handle cases where qouted args contain
  61. Rem   standard delims within quotes IE: "string "substring=text""
  62. Rem - Implemented negative errorlevel return: -1 to flag if
  63. Rem   the input string has been modified. 0 indicates unmodified, -1 modified.
  64. ::::::::::::::::::::::::::::::::::
  65. Rem Version changes 08/Dec/2021 :
  66. Rem - Added Help Switches -? /? and -help
  67. Rem - Added switch: /R
  68. Rem   - Reject strings containing non ASCII characters. Default: Strip NonASCCi
  69. Rem     characters from the string.
  70. Rem     Note: this switch does not define Return[Len] or Return[String]
  71. ::::::::::::::::::::::::::::::::::
  72. Rem Version changes 07/Dec/2021 :
  73. Rem - Rewritten for faster performance - NOTE:
  74. Rem   - Added Switch: /P
  75. Rem    - Preserve all whitespace. Default: multiple spaces truncated to single.
  76. Rem - Renamed variable for returning String : Return[String]
  77. Rem - Added variable Return[Len] to return 0 indexed string length.
  78. Rem - Corrected handling of completely non ASCII strings to return empty / 0 Len
  79. Rem ** Utilize alternate data stream to store variable containing printable ASCII
  80. Rem    characters so the variable only needs to be generated on first execution.
  81. Rem     ** Requires this batch file to be run from an NTFS drive.
  82. :# =================================================================================
  83.  
  84. Set LF=^
  85.  
  86.  
  87. %= Empty lines above required =%
  88. For /F eol^=^%LF%%LF%^ delims^= %%A in ('forfiles /p "%~dp0." /m "%~nx0" /c "cmd /c echo(0x09"') do Set "TAB=%%A"
  89.  
  90.  Set "ASCII= !"
  91.  2> nul (
  92.   more < "%~f0:ASCII.dat" > nul || (
  93.    Setlocal EnableDelayedExpansion
  94.    For /l %%i in (34 1 126) Do (
  95.     Cmd /c Exit %%i
  96.     Set "ASCII=!ASCII!!=ExitCodeAscii!"
  97.    )
  98.    >"%~f0:ASCII.dat" (Echo(Set ^^"ASCII=!ASCII!")
  99.    ENDLOCAL
  100.  ))
  101.  
  102.  Set "ASCII="
  103.  For /f "delims=" %%G in ('More ^< "%~f0:ASCII.dat"')Do %%G
  104.  If not Defined ASCII (
  105.   2> nul (
  106.    Powershell.exe -nologo -noprofile -command "Remove-item -path '%~nx0' -Stream '*'"
  107.   )
  108.   1>&2 Echo(An error has occured. Ensure "%~nx0" is located on an NTFS drive.
  109.   Pause
  110.   ENDLOCAL
  111.   Exit /b 1
  112.  )
  113.  
  114.  Rem Maximum stringlength to support. Modify here to propagate to RemoveChar loop and Return[Len]
  115. REM maximum 1015 chars due to input reading method.
  116.  Set "SupportLength=1015"
  117.  Set "{input}="
  118.  
  119. ::====================================================================================================
  120. Rem :: input capture method is a modified version of Dave Benhams method:
  121. Rem :: https://www.dostips.com/forum/viewtopic.php?t=4288#p23980
  122. SETLOCAL EnableDelayedExpansion
  123.  1>"%~f0:Params.dat" <"%~f0:Params.dat" (
  124.   SETLOCAL DisableExtensions
  125.   Set prompt=#
  126.   Echo on
  127.   For %%a in (%%a) do rem . %*.
  128.   Echo off
  129.   ENDLOCAL
  130.   Set /p "{input}="
  131.   Set /p "{input}="
  132.   Set "{input}=!{input}:~7,-2!"
  133.  @Rem duplicate {input} for the purpose of counting doublequotes.
  134.   Set "count=!{input}!"
  135.  ) || (
  136.   1>&2 Echo(%~nx0 requires an NTFS drive system to function as intended.
  137.   CMD /C Exit -1073741510
  138.  ) || Goto:Eof
  139.  
  140. ::====================================================================================================
  141.  
  142. Rem the below line can be used to Remove the aleternate data stream this file creates.
  143. Rem Powershell -c "Remove-item -path '%~nx0' -Stream '*'"
  144.  
  145.  CHCP 65001 > nul
  146.  If not defined {input} (
  147.   Echo(Demo:
  148.  Rem escaped for definition in DelayedExpansion environment
  149.   Set "{input}=this is [    ] a demo) * ^! &^=| ^! <. ~ ^^ & %% ▒ ╔ § ♣ This"
  150.   Set {input}
  151.  )
  152.  
  153. REM handle help switches
  154.  
  155.  Set {input} | %SystemRoot%\System32\Findstr.exe /Xli "{input}=\/? {input}=-? {input}=-help" > nul && (
  156.   Setlocal EnableDelayedExpansion
  157.   For /f "tokens=2* delims=#" %%G in ('%SystemRoot%\System32\Findstr.exe /blic:":# " "%~f0"')Do (
  158.    Set "Usage=%%G"
  159.    Echo(!Usage:Filepath=%~f0!
  160.   )
  161.   ENDLOCAL & ENDLOCAL
  162.   Exit /b 0
  163.  )
  164.  
  165. REM substitute doublequotes in {input} clone 'count'; count substring in string;
  166. REM assess if count is even; If false; Remove doublequotes from string.
  167.  
  168.  Set Div="is=#", "1/(is<<31)"
  169.  Set "{DQ}=0"
  170.  Set ^"count=!count:"={DQ}!"
  171.  2> nul Set "null=%count:{DQ}=" & Set /A {DQ}+=1& set "null=%"
  172.  Set /A !Div:#={DQ} %% 2! 2> nul && (%= Doublequote count is Odd. =%
  173.   Set ^"{input}=!{input}:"=!"
  174.  )
  175.  
  176. REM handle nonhelp switches /R and /P [ mutually exclusive; only enacted if switch terminates commandline input. ]
  177.  
  178.  Set "ASCIISwitch[R]="
  179.  Set "ASCIISwitch[P]="
  180.  If defined {input} (
  181.   Set {input} | %SystemRoot%\System32\findstr.exe /Eli "\/P \/R \/T" > nul && (
  182.    If /I "!{input}:~-3!"==" /P" (
  183.     Set "{input}=!{input}:~0,-3!"
  184.     Set "ASCIISwitch[P]=true"
  185.    ) Else If /I "!{input}:~-3!"==" /R" (
  186.     Set "{input}=!{input}:~0,-3!"
  187.     Set "ASCIISwitch[R]=true"
  188.    ) Else If /I "!{input}:~-3!"==" /T" (
  189.     Set "{input}=!{input}:~0,-3!"
  190.     Set "ASCIISwitch[T]=true"
  191.  )))
  192.  
  193. Rem Remove outer doublequotes from input argument if not already removed due to unbalanced quoting.
  194.  If .^%{input}:~0,1%^%{input}:~-1%. == ."". Set "{input}=!{input}:~1,-1!"
  195.  
  196. Rem Substitute TAB
  197.  If not defined ASCIISwitch[R] If not defined ASCIISwitch[T] For /f "delims=" %%G in ("!TAB!")Do Set "{input}=!{input}:%%G={TAB}!"
  198.  
  199. Rem Substitute Paired spaces prior to character removal
  200.  If not defined ASCIISwitch[R] If not defined ASCIISwitch[T] Set "{input}=!{input}:  ={2xSp}!"
  201.  
  202. Rem RemoveChar loop - iterate over input character by character; Compare against each character in whitelist
  203. Rem Appends ASCII Whitelist characters to New string unless /R switch used, in which case NonASCII characters
  204. Rem  trigger an exit of the script with a positive errorlevel indicating the string is not ASCII.
  205. Rem  the return value is the 1 indexed position of the first non ascii character encountered.
  206.  
  207.  Set "end=" & Set "New="
  208.  For /l %%i in (0 1 %SupportLength%)Do If not "!{input}:~%%i,1!"=="" (
  209.   Set "Char=!{input}:~%%i,1!"
  210.   Set "ISAscii="
  211.   For /l %%c in (0 1 94)Do If not "!ASCII:~%%c,1!" == "" (
  212.    Set "C_Char=!ASCII:~%%c,1!"
  213.    if "!Char!"=="!C_Char!" (
  214.     Set "New=!New!!Char!"
  215.     Set "ISAscii=true"
  216.   ))
  217.   If Not Defined ISAscii (
  218.    If Defined ASCIISwitch[T] (
  219.     For /f "delims=" %%G in ("!New!")Do (
  220.      Echo(!New!
  221.      Endlocal & Endlocal & Set "Return[string]=%%G"
  222.      %Restore_Codepage%
  223.     )
  224.     Exit /b %%G
  225.    )
  226.    Set "Modified=true"
  227.    If Defined ASCIISwitch[R] (
  228.     Endlocal & Endlocal &  %Restore_Codepage%
  229.     For /f "delims=" %%G in ('Set /A %%i+1')Do Exit /b %%G
  230.  )))
  231.  
  232. Rem strip new Paired spaces from string if switch /P not used.
  233.  
  234.  Set "{Input}=!New!"
  235.  If not Defined ASCIISwitch[P] (
  236.   For /l %%i in (0 1 9)Do if defined {Input} Set "{Input}=!{Input}:  = !"
  237.  )
  238.  
  239. Rem reinsert original paired spaces and Tab:
  240.  If defined {input} (
  241.   Set "{input}=!{input}:{2xSp}=  !"
  242.   Set "{input}=!{input}:{TAB}=%TAB%!"
  243.  )
  244.  
  245.  If defined {input} (
  246.   Echo(
  247.   <nul Set /p "=!{input}!"
  248.   For /l %%i in (0 1 %SupportLength%)Do If not defined Return[Len] If "!{input}:~%%i,1!"=="" Set "Return[Len]=%%i"
  249.  ) Else (
  250.   ENDLOCAL & ENDLOCAL & Set "Return[Len]=0"
  251.   Set "Return[String]="
  252.  )
  253.  
  254.  If defined {input} For /f "Delims=" %%G in ("!{Input}!")Do (
  255.   ENDLOCAL & ENDLOCAL & Set "Return[Len]=%Return[Len]%" & Set "Return[string]=%%G"
  256.  )
  257.  
  258.  %Restore_Codepage%
  259.  If not defined modified Exit /B 0
  260.  Exit /b -1
  261.  
  262.  
  263.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement