Advertisement
Guest User

Untitled

a guest
Sep 14th, 2022
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
TCL 4.83 KB | Source Code | 0 0
  1. #!/usr/bin/env tclsh
  2.  
  3. # char2ent.tcl - Opens an HTML or XML file with special characters
  4. #       (diacritics) written in plain text, replaces these special
  5. #       characters with appropriate HTML or XML entities and writes the
  6. #       output to a new file.
  7. #
  8. # Author: Luciano Espirito Santo
  9. #
  10. # History
  11. #
  12. #   Version 1.0 2004-11-18  Luciano Espirito Santo
  13. #       First version. Alpha stage.
  14. #
  15. #       KNOWN ISSUES:
  16. #       - No user-proof measures, no error or exception handling, no nothing!
  17. #         No guarantees! Use it at your own risk!
  18. #       - Tested on Windows 98 and Linux only.
  19. #
  20. #       TODO:
  21. #       - Extend it so it can handle all possible characters (Unicode).
  22. #       - Make the ability to do the INVERSE operation (that would include the
  23. #         ability to tell non-escaped entities from escaped entities and NOT
  24. #         replace the escaped entities.
  25. #       - Make it handle STDIN.
  26. #
  27. #       LICENSE: BSD
  28. #
  29. # How to use it:
  30. #
  31. # char2ent.tcl  --help
  32.  
  33. # ----------------------------------------------------------------
  34. # Do not change anything below this point unless you know what you're doing.
  35.  
  36.  
  37. # Print help text and exit if '--help' is the only argument
  38. if  {[llength $argc] == 1 && [lindex $argv 0] == "--help"}  {
  39.     puts  ""
  40.     puts  "char2ent, by Luciano Espirito Santo - 2004"
  41.     puts  ""
  42.     puts  {Usage: char2ent -[option]  "input file"  "output file"}
  43.     puts  ""
  44.     puts  "Possible options:"
  45.     puts  "-h: convert special characters to HTML entities"
  46.     puts  "-x: convert special characters to XML entities"
  47.     puts  ""
  48.     puts  {"input file" MUST exist}
  49.     puts  {"output file" is created automatically if it does not exist}
  50.     puts  {"input file" and "output file" MUST NOT be the same file}
  51.     puts  ""
  52.     puts  {Example: char2ent -x "sample.xml" "converted.xml"}
  53.     puts  ""
  54.     exit
  55. }
  56.  
  57. # Complain and exit if option is neither '-h' nor '-x'
  58. if  {[lindex $argv 0] != "-h" && [lindex $argv 0] != "-x"}      {
  59.     puts  "Error! Try 'char2ent --help' to see how to use this program.\n"
  60.     exit
  61. }
  62.  
  63. # Complain and exit if not exactly 3 arguments (option, input, output) are found
  64. if  {$argc != 3}    {
  65.     puts  "Error! You must use exactly 3 arguments.\n"
  66.     puts  "$argv\n"
  67.     puts  "Error! Try 'char2ent --help' to see how to use this program.\n"
  68.     exit
  69. }
  70.  
  71. # Complain and exit if input file does not exist
  72. if  {! [file exists [lindex $argv 1]]   {
  73.     puts  "Error! File \"[lindex $argv 1]\" not found!\n"
  74.     exit
  75. }
  76.  
  77. # Complain and exit if input file is not readable
  78. if  {! [file readable [lindex $argv 1]]}    {
  79.     puts  "Error! Permission denied to read [ lindex $argv 1 ]!\n"
  80.     exit
  81. }
  82.  
  83. # Complain if input file and output file are the same
  84. if  {[lindex $argv 1] == [lindex $argv 2]}  {
  85.     puts  "Error! \"input file\" and \"output file\" must not be the same.\n"
  86.     exit
  87. }
  88.  
  89. # Try to open input file for reading.
  90. # Complain and exit in case of errors.
  91. if  {[catch {set IF [open [lindex $argv 1] r]} IFerror]}  {
  92.     puts  "Error! $IFerror\n"
  93.     exit
  94. }
  95.  
  96. # Try to open output file for writing.
  97. # Complain, close input file and exit in case of errors.
  98. if  {[catch {set OF [open [lindex $argv 2] w]} OFerror]}    {
  99.     close $IF
  100.     puts  "Error! $OFerror\n"
  101.     exit
  102. }
  103.    
  104.  
  105. # ================================================
  106. # Two files open. No errors this far. Let's replace.
  107.  
  108. set  CHARS  {
  109.     ª  º  À  Á    à Ä  Å  Æ  Ç 
  110.     È  É  Ê  Ë  Ì  Í  Π Ï  Р Ñ 
  111.     Ò  Ó  Ô  Õ  Ö  Ø  Ù  Ú  Û  Ü 
  112.     Ý  Þ  ß  à  á  â  ã  ä  å  æ 
  113.     ç  è  é  ê  ë  ì  í  î  ï  ð 
  114.     ñ  ò  ó  ô  õ  ö  ø  ù  ú  û 
  115.     ü  ý  þ  ÿ  Œ  œ  Ÿ
  116. }
  117.  
  118. set  HTML  {
  119.     ª  º  À    Á     à   Ä 
  120.     Å &Aelig; Ç    È    É    Ê Ë 
  121.     Ì    Í    Î Ï  Ð   Ñ    Ò   
  122.     Ó    Ô Õ    Ö  Ø    Ù    Ú   
  123.     Û Ü  Ý    Þ ß à    á   
  124.     â ã    ä  å æ ç    è   
  125.     é    ê ë  ì    í    î ï  ð  
  126.     ñ    ò    ó    ô õ    ö  ø   
  127.     ù    ú    û ü  ý    þ ÿ 
  128.     &Oelig; œ Ÿ
  129. }
  130.  
  131. set  XML  {
  132.     ª  º  À  Á    à Ä  Å  Æ 
  133.     Ç  È  É  Ê  Ë  Ì  Í  Π Ï 
  134.     Р Ñ  Ò  Ó  Ô  Õ  Ö  Ø  Ù 
  135.     Ú  Û  Ü  Ý  Þ  ß  à  á  â 
  136.     ã  ä  å  æ  ç  è  é  ê  ë 
  137.     ì  í  î  ï  ð  ñ  ò  ó  ô 
  138.     õ  ö  ø  ù  ú  û  ü  ý  þ 
  139.     ÿ  Œ œ Ÿ
  140. }
  141.  
  142.  
  143. set TEXT [read $IF]
  144.  
  145. for {set i 0}   {$i < [llength $CHARS]} {incr i}    {
  146.  
  147.     switch -- [lindex $argv 0]  {
  148.         "-h"    {set REPL [lindex $HTML $i]}
  149.         "-x"    {set REPL [lindex $XML  $i]}
  150.     }
  151.  
  152.     set TEXT [string map "[lindex $CHARS $i] $REPL" $TEXT]
  153. }
  154.  
  155.  
  156. puts -nonewline $OF $TEXT
  157. close $IF
  158. close $OF
  159.  
  160. exit
  161.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement