Advertisement
Guest User

LaTeXTidy.pl

a guest
Aug 13th, 2013
471
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 4.85 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. # LaTeXTidy (c) 2004 by Eric Hsu <drerichsu@gmail.com>.
  4.  
  5. # Little Perl script to neaten up the format of LaTeX files.
  6. # This will be simple and naive. This takes STDIN .tex files and prints to STDOUT.
  7. # Check your file! Backup! No guarantees!
  8.  
  9. # License
  10. # -------
  11. # This is released as Niceware, which is like the Perl Artistic License, except you have to be nice to me when you criticize the code.
  12.  
  13. # General Idea
  14. # ------------
  15.  
  16. # Eat all single newlines. Add newlines after all "\\"
  17. # Newlines before each \begin and \end. After each \end{}
  18. # Each environment \begin adds a level of tab.
  19. # Newlines before each \item.
  20. # \n before each \[  and after each \]
  21.  
  22. my $in="";
  23.  
  24. while (<STDIN>) {
  25.     $in .= $_;
  26. }
  27.  
  28. my @keywords = qw(
  29.     appendix
  30.     author
  31.     bibliography
  32.     bigskip
  33.     chapter
  34.     date
  35.     def
  36.     document
  37.     evensidemargin
  38.     font
  39.     headheight
  40.     headsep
  41.     include
  42.     index
  43.     make
  44.     new
  45.     noindent
  46.     oddsidemargin
  47.     page
  48.     paragraph
  49.     part
  50.     ragged
  51.     renew
  52.     section
  53.     subsection
  54.     subsubsection
  55.     subsubsubsection
  56.     table
  57.     textheight
  58.     textwidth
  59.     title
  60.     topmargin
  61.     use
  62.     vfil
  63. );
  64.  
  65. # Let's ignore all comments in the following way. We first find all \%(.*?)\n.
  66. # Then we put a second \n at the end, and two leading \n
  67. # to ensure that they all land in
  68. # separate pieces. Then each piece that has a leading % is immediately passed.
  69.  
  70. $in =~ s/(?<!\\)\%(.*?)\n/\n\n\%$1\n\n/g;
  71.  
  72. my @pieces = split(/\n\s*\n/, $in);
  73. my $string="";
  74. my $keyword;
  75.  
  76. foreach (@pieces){
  77.  
  78. # Every comment is left as is.  But ignore % that are immediately preceded by \
  79.     if (/^\s*(?<!\\)\%/) {
  80.         $string .= $_ . "\n" ;
  81.         next;
  82.     }
  83.  
  84. #Eat all single newlines.
  85.  
  86.     s/\s+/ /g;
  87.    
  88. #Put @keywords on their own line.
  89.  
  90.     foreach $keyword (@keywords) {
  91.         s/(\\$keyword)/\n$1/g;
  92.     }
  93.  
  94. #Newlines before each \begin and \end. After each \end{}
  95. #We want to ignore begin and end document, since those shouldn't
  96. #induce additional indenting
  97.  
  98.     s/([^\\]\%)/\n$1/g;
  99.  
  100.  
  101.     s/(\\begin\{)((?!document).*?)(\})/\n$1$2$3\n/g;
  102.     s/(\\end\{)((?!document).*?)(\})/\n$1$2$3\n/g;
  103.     s/(\\begin\{array\})\n(\{)(.*?)(\})/\n$1$2$3$4\n/g;
  104.    
  105. #Newlines before each \item.
  106.  
  107.     s/(\\item)(.*?)(\\item)/$1$2\n$3/g;
  108.     s/(\\item)/\n$1/g;
  109.    
  110. #\n before each \[  and after each \]
  111. #Add newlines after all "\\" and "\\[...]"
  112.  
  113.     s/[^\\](\\\[)/\n$1/g;
  114.     s/(\\\])/$1\n/g;
  115.  
  116.     s/(\\\\)\s/$1\n/g;
  117.     s/(\\\\\[)(.*?)(\])\s/$1$2$3\n/g;
  118.  
  119.  
  120. # nuke accidentally added double newlines.
  121.  
  122.     s/\n\s*\n/\n/g;
  123.    
  124. # collect the cleaned string.
  125.  
  126.     s/^\n//;
  127.     chomp;
  128.     $string .= $_ . "\n\n";
  129.  
  130. }
  131.  
  132.  
  133. # First let's collapse all multiple \n's into double \n.
  134.  
  135. $string =~ s/\n\s+\n/\n\n/g;
  136.  
  137. # We will soon mark the \end and \begin keywords, but we want to ignore ones
  138. # found as comments. Hence we'll (awful kludge) wedge in a \{\n\n\n\} to commented
  139. # \begin and \end to avoid their processing.
  140. # We'll fix them right after the pieces are split.
  141.  
  142. $string =~ s/(\%[^\n]*)(\\)(end)/$1$2\{\n\n\n\}$3/g;
  143. $string =~ s/(\%[^\n]*)(\\)(begin)/$1$2\{\n\n\n\}$3/g;
  144.  
  145.  
  146. # Now let's put [triple \n] at the start of each \begin and the start of each
  147. # \end. Then we'll split on them, since they are unique.
  148. # Each of those pieces must be at the same indent level. Again, we need to
  149. # ignore beginning and end of document.
  150.  
  151. #$string =~s/(\\end)/\[\n\n\n$\]$1/g;
  152. #$string =~s/(\\begin)/\[\n\n\n$\]$1/g;
  153. #
  154. #@pieces = split(/\[\n\n\n$\]/, $string);
  155.  
  156. $string =~s/(\\end(?!\{document\}))/\[\n\n\n\]$1/g;
  157. $string =~s/(\\begin(?!\{document\}))/\[\n\n\n\]$1/g;
  158.  
  159. @pieces = split(/\[\n\n\n\]/, $string);
  160.  
  161. my $indent =1;
  162. my @lines;
  163. my ($piece, $i);
  164.  
  165. # $string is now free for reuse.
  166. $string="";
  167.  
  168. foreach $piece (@pieces) {
  169.     #first, is this a begin block, or after an end block?
  170.    
  171.     $piece =~ s/\{\n\n\n\}//g; #get rid of awful kludge.
  172.     $piece =~ /^\\(.*?)\{/;
  173.     if (lc($1) eq "begin") {
  174.         $indent++;
  175.     } else {
  176.         $indent--;
  177.     }
  178.     # each piece is split on \n. these pieces must begin with $indent tabs.
  179.     # We need to avoid combining comment lines with others.
  180.    
  181.     @lines = split(/\n/, $piece);
  182.    
  183.     foreach (@lines) {
  184.         s/^\s+//; #no leading whitespace
  185.         if (/^\\begin/i) {
  186.             for($i=1;$i<=$indent-1;$i++){
  187.                 $string .= "\t";
  188.             }
  189.         } else {
  190.             for($i=1;$i<=$indent;$i++){
  191.                 $string .= "\t";
  192.             }
  193.         }
  194.         $string .= $_ . "\n";
  195.     }
  196.    
  197. }
  198.  
  199.     print $string;
  200.  
  201. #(0.1) First version works. It indents LaTeX more or less correctly.
  202. #(0.2) Added a big list of LaTeX words to check. Squashed bug losing double lines. Handles comments. Handles sections, more or less. (5/3/02)
  203. #(0.21) Pushed keywords out to its own array. Added some more keywords. (5/4/02)
  204. #(0.22) Trying to ignore comments.
  205. #(0.23) Trying to port it to BBEdit.
  206. #(0.3) Now for TextMate.
  207. #(0.31) Neating the code so strict mode doesn't complain as much.
  208.  
  209. # To Do.
  210. # Not catching \usecommand!
  211. #  Take all such \sections
  212. #   and give the header a line of its own?
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement