This week only. Pastebin PRO Accounts Christmas Special! Don't miss out!Want more features on Pastebin? Sign Up, it's FREE!
Guest

texml

By: a guest on Mar 31st, 2012  |  syntax: Perl  |  size: 2.30 KB  |  views: 34  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #!/usr/bin/perl
  2. use warnings;
  3. use strict;
  4.  
  5. # texml: a script to turn TeX-like \control{text} syntax into html
  6. #   input:  a data file with formatting controls like \b{bold text}
  7. #   output: html-formatted data like <strong>text</strong>
  8.  
  9. # supports nested elements:
  10. #   \i{italic \b{and bold}} goes to
  11. #   <em>italic <strong>and bold</strong></em>
  12.  
  13. # deprecated HTML4 tags are macro'd to the appropriate HTML5 tags
  14.  
  15. # supports class, style, etc:
  16. #   \span class="important"{something important!} goes to
  17. #   <span class="important">somtrhing important!</span>
  18.  
  19. # supports a literal mode, useful for internal styles and scripts
  20. # begin with {LIT} on a line by itself
  21. # end with {UNLIT} on a line by itself
  22.  
  23. # important special characters here:
  24. # $` (with a backtick) text before the string that matched
  25. # $' (with a tick) text after the string that matched
  26. # "abc123def" =~ m/123/ sets $` to "abc" and $' to "def"
  27.  
  28. # sub macro: expects a parameter like '\b class="foo"{'
  29. # returns the appropriate HTML tag
  30. sub macro {
  31.         shift =~ m/^\\(.*)\{$/;
  32.         my @params = split ' ', $1;
  33.         my $elem = shift @params;
  34.  
  35.         if ($elem eq "b") {
  36.                 $elem = "strong";
  37.         } elsif ($elem eq "i") {
  38.                 $elem = "em";
  39.         } elsif ($elem eq "u") {
  40.                 $elem = qq(span class="text-decoration: underline");
  41.         } elsif ($elem eq "tt") {
  42.                 $elem = "code";
  43.         }
  44.  
  45.         # unhandled: <s>, <strike> and <center> because i don't feel like it
  46.         unshift @params, $elem;
  47.         return (join ' ', @params);
  48. }
  49.  
  50. my @stack = ();
  51. my $mode = "interpolate";
  52.  
  53. while (<>) {
  54.         if ( m/^\{LIT\}$/ ) {
  55.                 $mode = "literal";
  56.                 next;
  57.         } elsif ( m/^\{UNLIT\}$/ ) {
  58.                 $mode = "interpolate";
  59.                 next;
  60.         }
  61.  
  62.         if ($mode eq "literal") {
  63.                 print;
  64.                 next;
  65.         }
  66.  
  67.         # this loop consumes most of the string
  68.         while ( m/(\\[^\{]+\{|\})/ ) {
  69.                 my ($pre, $post) = ($`, $');
  70.                 if ($1 eq "}") {
  71.                         if (!@stack) {
  72.                                 warn __FILE__.": closing } without opening {\n";
  73.                         } else {
  74.                                 print "${pre}</".(pop @stack).">";
  75.                         }
  76.                 } else {
  77.                         my $elem = &macro($1);
  78.                         print "${pre}<$elem>";
  79.                         my @attrib = split '\s+', $elem;
  80.                         push @stack, $attrib[0];
  81.                 }
  82.                 $_ = $post;
  83.         }
  84.  
  85.         print; # print remaining string
  86. }
  87.  
  88. # if the stack contains elements, brackets have not been resolved
  89. if (@stack) {
  90.         warn __FILE__.": unbalanced tags\n";
  91.         foreach (@stack) {
  92.                 warn "\t<$_>\n";
  93.         }
  94. }
clone this paste RAW Paste Data