Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- # vim:set fdm=marker:
- use strict;
- use warnings;
- use feature 'say';
- use feature 'switch';
- {
- package TexToken;
- use strict;
- use warnings;
- # sub new(file,line,col,type,content) {{{
- sub new {
- my($class,$file,$line,$col,$type,$str) = @_;
- my $self = {
- file => $file,
- line => $line,
- column => $col,
- type => $type,
- content => $str
- };
- bless $self, $class;
- return $self;
- }
- # }}}
- # sub first() {{{
- sub first {
- my $self = shift;
- return lc(substr($self->{content},0,1));
- }
- # }}}
- }
- {
- package TexStream;
- use FileHandle;
- use strict;
- use warnings;
- my @disabledWarnings = ();
- my @enabledErrors = ();
- my @enabledWarnings = ();
- my @enabledMessages = ();
- my $logType = 3;
- my @files = ();
- sub nextFile {
- pop @files;
- }
- # sub parseOpts {{{
- sub parseOpts() {
- my $last = undef;
- foreach my $arg(@ARGV) {
- given($arg) {
- when(/^-n/) {
- @disabledWarnings = (@disabledWarnings, substr($_,2));
- }
- when(/^-e/) {
- @enabledErrors = (@enabledErrors, substr($_,2));
- }
- when(/^-w/) {
- @enabledWarnings = (@enabledWarnings, substr($_,2));
- }
- when(/^-w/) {
- @enabledMessages = (@enabledMessages, substr($_,2));
- }
- when(/^-v/) {
- $logType = substr($_,2);
- }
- }
- given($last) {
- when("-x") {
- push @files, $arg;
- }
- when("-o") {
- open LOGFILE, "> $arg";
- select LOGFILE;
- }
- }
- $last = $arg;
- }
- }
- # }}}
- # @SilenceSpace {{{
- my @SilenceSpace = qw(
- \rm \em \bf \it \sl \sf \sc \tt \selectfont
- \rmfamily \sffamily \ttfamily \mdseries \bfseries
- \slshape \scshape \relax
- \vskip \pagebreak \nopagebreak
- \textrm \textem \textbf \textit \textsl \textsf \textsc \texttt
- \clearpage \ddots \dotfill \flushbottom \fussy \indent \linebreak
- \onecolumn \pagebreak \pushtabs \poptabs \scriptsize \sloppy
- \twocolumn \vdots
- \today \kill \newline \thicklines \thinlines
- \columnsep \space \item \tiny \footnotesize \small \normalsize
- \normal \large \Large \LARGE \huge \Huge \printindex
- \newpage \listoffigures \listoftables \tableofcontents
- \maketitle \makeindex
- \hline \hrule \vrule
- \centering
- \bigskip \medskip \smallskip
- \noindent \expandafter
- \makeatletter \makeatother
- \columnseprule
- \textwidth \textheight \hsize \vsize
- \if \fi
- \csname \endcsname
- \z@ \p@ \@warning \typeout
- \dots \ldots \input \endinput \nextline \leavevmode \cdots
- \vfill \vfil \hfill \hfil \topmargin \oddsidemargin
- \frenchspacing \nonfrenchspacing
- \begingroup \endgroup \par
- \vrefwarning \upshape \headheight \headsep \hoffset \voffset
- \cdot \qquad
- \left \right
- \rm \em \bf \it \sl \sf \sc \tt \selectfont
- \rmfamily \sffamily \ttfamily \mdseries \bfseries
- \slshape \scshape \relax
- \vskip \pagebreak \nopagebreak
- \textrm \textem \textbf \textit \textsl \textsf \textsc \texttt
- \clearpage \ddots \dotfill \flushbottom \fussy \indent \linebreak
- \onecolumn \pagebreak \pushtabs \poptabs \scriptsize \sloppy
- \twocolumn \vdots
- \today \kill \newline \thicklines \thinlines
- \columnsep \space \item \tiny \footnotesize \small \normalsize
- \normal \large \Large \LARGE \huge \Huge \printindex
- \newpage \listoffigures \listoftables \tableofcontents
- \maketitle \makeindex
- \hline \hrule \vrule
- \centering
- \bigskip \medskip \smallskip
- \noindent \expandafter
- \makeatletter \makeatother
- \columnseprule
- \textwidth \textheight \hsize \vsize
- \if \fi
- \csname \endcsname
- \z@ \p@ \@warning \typeout
- \dots \ldots \input \endinput \nextline \leavevmode \cdots
- \vfill \vfil \hfill \hfil \topmargin \oddsidemargin
- \frenchspacing \nonfrenchspacing
- \begingroup \endgroup \par
- \vrefwarning \upshape \headheight \headsep \hoffset \voffset
- \cdot \qquad
- \left \right
- );
- # }}}
- # @ProtectSpace {{{
- my @ProtectSpace;
- foreach( qw( 1st. 2nd. 3rd. 4th. Mr. Mrs. Miss. Ms. Dr. Prof. St. ) ) {
- push @ProtectSpace, lc $_;
- }
- # }}}
- # sub new(filename) {{{
- sub new {
- my($class,$filename) = @_;
- my $self = {
- isAtLetter => 1,
- isMathMode => 0,
- filename => $filename,
- filehandle => FileHandle->new(),
- tokens => [],
- tokenIdx => 0,
- currentLine => "",
- processingLine => "",
- envStack => []
- };
- $self->{filehandle}->open("< " . $filename) or die("Cannot open $filename: $!");
- bless $self, $class;
- return $self;
- }
- # }}}
- # sub getLtxCommand(type) {{{
- sub getLtxCommand {
- my($self,$type) = @_;
- my $letter = $self->{isAtLetter} ? "|@" : "";
- $self->{processingLine} =~ /^(\\([[:alpha:]]$letter)+)/;
- my $str = $1 // substr($self->{processingLine},0,1);
- return $self->extractChars($type, length $str);
- }
- # }}}
- # sub getLtxToken(type) {{{
- sub getLtxToken {
- my($self,$type) = @_;
- my $tok = $self->getLtxCommand($type);
- given($tok->{content}) {
- $self->{isAtLetter} = 1 when("\\makeatletter");
- $self->{isAtLetter} = 0 when("\\makeatother");
- $self->{isMathMode} = 1 when("\\[");
- $self->{isMathMode} = 0 when("\\]");
- $self->{isMathMode} = !$self->{isMathMode} when('$');
- }
- say $tok->{content};
- return $tok;
- }
- # }}}
- # sub extractChars(type,len) {{{
- sub extractChars {
- my($self,$type,$len) = @_;
- my $str = substr($self->{processingLine}, 0, $len);
- my $res = TexToken->new($self, $., length($self->{currentLine})-length($self->{processingLine}), $type, $str);
- $self->{processingLine} = substr($self->{processingLine}, length $str);
- return $res;
- }
- # }}}
- # sub parseLine() {{{
- sub parseLine {
- my $self = shift;
- TOKEN: while(length $self->{processingLine}) {
- study $self->{processingLine};
- my $c = substr($self->{processingLine},0,1);
- given($c) {
- when("%") {
- # skip comments
- $self->{processingLine} = "";
- last TOKEN;
- }
- when('\\') {
- # backslash, open bracket, open brace, closing bracket, closing brace
- my $t = $self->getLtxToken($c);
- #say $t->{content}, $t->{type} if ($t->{type} eq "\\" or $t->{content} =~ /begin/);
- push $self->{tokens}, $t;
- }
- when($_ ~~ ["[","]","{","}"]) {
- push $self->{tokens}, $self->extractChars($_, 1);
- }
- when(/[[:alpha:]]/) {
- # word (character a-z)
- $self->{processingLine} =~ /^([[:alpha:]]+)/;
- push $self->{tokens}, $self->extractChars('w', length $1);
- }
- when(/[[:digit:]]/) {
- # number
- $self->{processingLine} =~ /^([[:digit:]]+)/;
- push $self->{tokens}, $self->extractChars('d', length $1);
- }
- when(".") {
- # dot(s)
- $self->{processingLine} =~ /^(\.+)/;
- push $self->{tokens}, $self->extractChars('.', length $1);
- }
- when(/\h/) {
- # whitespace(s)
- $self->{processingLine} =~ /^(\h+)/;
- push $self->{tokens}, $self->extractChars('s', length $1);
- }
- when(/\v/) {
- # newline(s)
- $self->{processingLine} =~ /^(\v+)/;
- push $self->{tokens}, $self->extractChars('n', length $1);
- }
- when("-") {
- # dash(es)
- $self->{processingLine} =~ /^(\-+)/;
- push $self->{tokens}, $self->extractChars('-', length $1);
- }
- when("`") {
- # quote begin
- $self->{processingLine} =~ /^(`+)/;
- push $self->{tokens}, $self->extractChars('qb', length $1);
- }
- when("'") {
- # quote end
- $self->{processingLine} =~ /^('+)/;
- push $self->{tokens}, $self->extractChars('qe', length $1);
- }
- default {
- # all others
- push $self->{tokens}, $self->extractChars('?', length $c);
- }
- }
- }
- }
- # }}}
- # sub relTokenAt(idx) {{{
- sub relTokenAt {
- my($self,$idx) = @_;
- $idx += $self->{tokenIdx};
- return undef if 0>$idx || $idx>=$self->{tokens};
- return $self->{tokens}[$idx];
- }
- # }}}
- # sub nextToken() {{{
- sub nextToken {
- my $self = shift;
- ++$self->{tokenIdx};
- return $self->relTokenAt(-1);
- }
- # }}}
- # sub getTokensUntil(str) {{{
- sub getTokensUntil {
- my($self,$str) = @_;
- my $res = "";
- while(my $tok = $self->nextToken()) {
- last if($tok->{content} eq $str);
- $res .= $tok->{content};
- }
- return $res;
- }
- # }}}
- # sub hint(num,msg) {{{
- sub hint {
- my($self,$num,$msg) = @_;
- if($num ~~ @disabledWarnings) {
- return;
- }
- my $col = length($self->{currentLine}) - length($self->{processingLine}) + 1;
- given($logType) {
- when(0) {
- say "$self->{filename}:$.:$col:$num:$msg";
- }
- when(3) {
- say "Warning $num in $self->{filename} line $. $msg";
- say $self->{currentLine};
- say " " x ($col - 1) . "^";
- }
- }
- }
- # }}}
- # sub process() {{{
- sub process {
- my $self = shift;
- $self->{tokenIdx} = 0;
- while($self->{currentLine} = $self->{filehandle}->getline()) {
- $self->{processingLine} = $self->{currentLine};
- $self->parseLine();
- $self->{processingLine} = $self->{currentLine};
- my $punctWord = undef;
- while(my $tok = $self->nextToken()) {
- $self->{processingLine} = substr($self->{processingLine}, length($tok->{content}));
- study $tok->{content};
- #say $tok->{content}, " ", $tok->{type} if $tok->{content} =~ /begin/;
- given($tok->{type}) {
- when('w') {
- $punctWord = $tok->{content};
- # number followed by x
- if('x' eq lc($tok->{content}) && $self->relTokenAt(-1)->{type} eq 'd') {
- $self->hint(1, "Consider replacing 'x' after a number with '\\times'");
- }
- }
- when('d') {
- $punctWord = $tok->{content};
- }
- when('\\') {
- #say $tok->{content} if $tok->{content} ne "\\ldots";
- given($tok->{content}) {
- when([ '\\include', '\\input' ]) {
- say "SKIP: ", $self->getTokensUntil("{");
- my $fn = $self->getTokensUntil("}");
- $fn =~ s/\{(.+)\}/$1/;
- $fn =~ s/\\string"(.*)\\string"/$1/;
- if(not -f $fn) {
- $self->hint(7, "Include file does not exist: $fn");
- }
- else {
- my $stream = TexStream->new($fn);
- $stream->process();
- }
- }
- when( '\\begin' ) {
- say "SKIP: ", $self->getTokensUntil("{");
- my $e = $self->getTokensUntil("}");
- say "ENV: $e";
- push $self->{envStack}, $e;
- }
- when( '\\end' ) {
- say "SKIP: ", $self->getTokensUntil("{");
- my $top = pop $self->{envStack};
- my $cur = $self->getTokensUntil("}");
- $self->hint(6, "Mismatching environment - Expected $top, found $cur!") if $cur ne $top;
- }
- $self->hint(9, "Consider replacing '$_' with '``'") when ('\\textquotedblleft');
- $self->hint(10, "Consider replacing '$_' with '`'") when ('\\textquoteleft');
- $self->hint(11, "Consider replacing '$_' with `''`") when ('\\textquotedblright');
- $self->hint(12, "Consider replacing '$_' with `'`") when ('\\textquoteright');
- $self->hint(13, "Consider replacing '$_' with '``' or `''`") when ('\\textquotedbl');
- }
- }
- when('.') {
- $punctWord .= $tok->{content};
- given(length $tok->{content}) {
- when(1) { }
- when(2) {
- $self->hint(2, "Did you mean . or \\ldots{}?");
- }
- when(3) {
- $self->hint(3, "Replace ... with \\ldots{}");
- }
- default {
- $self->hint(4, "Replace ... with \\ldots{} and remove trailing dots");
- }
- }
- }
- when('s') {
- if($self->relTokenAt(-1)->{type} eq '\\') {
- $self->hint(5, "Command terminated by space") unless $self->relTokenAt(-1)->{content} ~~ @SilenceSpace;
- }
- $self->hint(6, "Multiple spaces") if length($tok->{content})>1;
- if($punctWord && lc($punctWord) ~~ @ProtectSpace) {
- my $t = chop $punctWord;
- $self->hint(14, "Insert protected space here after '$punctWord'") if '.' eq $t;
- }
- }
- }
- if($tok->{type} ne "-") {
- $punctWord = undef;
- }
- if(defined $punctWord) {
- my $prev = $self->relTokenAt(-1);
- if($punctWord =~ /^[A-Z]/ && $tok->{type} eq "w" && $tok->{content} =~ /^[A-Z]/ && $prev->{type} eq "-") {
- # word - word
- if(length $prev->{content} < 2) {
- # issue a special value when both are uppercase
- my $bothupper = $punctWord eq uc $punctWord && $tok->{content} eq uc $tok->{content};
- $self->hint($bothupper ? 16 : 15, "Consider inserting 3 dashes before '$tok->{content}'");
- }
- }
- elsif($punctWord =~ /^[0-9]/ && $tok->{type} eq "d" && $tok->{content} =~ /^[0-9]/ && $prev->{type} eq "-") {
- # number - number
- if(length $prev->{content} != 2) {
- $self->hint(17, "Consider inserting 2 dashes before '$tok->{content}'");
- }
- }
- elsif($punctWord =~ /^[0-9]/ && $tok->{type} eq "w" && $prev->{type} eq "-") {
- # number - word
- if(length $prev->{content} != 1) {
- $self->hint(18, "Consider inserting 1 dash before '$tok->{content}'");
- }
- }
- elsif($punctWord =~ /^[[:alpha:]]/ && $tok->{type} eq "d" && $prev->{type} eq "-") {
- # word - number
- if(length $prev->{content} != 1) {
- $self->hint(19, "Consider inserting 1 dash before '$tok->{content}'");
- }
- }
- }
- }
- };
- }
- # }}}
- }
- &TexStream::parseOpts();
- while(my $fn = TexStream::nextFile()) {
- my $p = TexStream->new($fn);
- $p->process();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement