TurtyWurty

PatternRule.java

Jul 30th, 2021
755
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /**
  2.  * Standard implementation of <code>IPredicateRule</code>.
  3.  * Is is capable of detecting a pattern which begins with a given start
  4.  * sequence and ends with a given end sequence. If the end sequence is
  5.  * not specified, it can be either end of line, end or file, or both. Additionally,
  6.  * the pattern can be constrained to begin in a certain column. The rule can also
  7.  * be used to check whether the text to scan covers half of the pattern, i.e. contains
  8.  * the end sequence required by the rule.
  9.  */
  10. public class PatternRule implements IPredicateRule {
  11.  
  12.     /**
  13.      * Comparator that orders <code>char[]</code> in decreasing array lengths.
  14.      *
  15.      * @since 3.1
  16.      */
  17.     private static class DecreasingCharArrayLengthComparator implements Comparator<char[]> {
  18.         @Override
  19.         public int compare(char[] o1, char[] o2) {
  20.             return o2.length - o1.length;
  21.         }
  22.     }
  23.  
  24.     /** Internal setting for the un-initialized column constraint */
  25.     protected static final int UNDEFINED= -1;
  26.  
  27.     /** The token to be returned on success */
  28.     protected IToken fToken;
  29.     /** The pattern's start sequence */
  30.     protected char[] fStartSequence;
  31.     /** The pattern's end sequence */
  32.     protected char[] fEndSequence;
  33.     /** The pattern's column constrain */
  34.     protected int fColumn= UNDEFINED;
  35.     /** The pattern's escape character */
  36.     protected char fEscapeCharacter;
  37.     /**
  38.      * Indicates whether the escape character continues a line
  39.      * @since 3.0
  40.      */
  41.     protected boolean fEscapeContinuesLine;
  42.     /** Indicates whether end of line terminates the pattern */
  43.     protected boolean fBreaksOnEOL;
  44.     /** Indicates whether end of file terminates the pattern */
  45.     protected boolean fBreaksOnEOF;
  46.  
  47.     /**
  48.      * Line delimiter comparator which orders according to decreasing delimiter length.
  49.      * @since 3.1
  50.      */
  51.     private Comparator<char[]> fLineDelimiterComparator= new DecreasingCharArrayLengthComparator();
  52.     /**
  53.      * Cached line delimiters.
  54.      * @since 3.1
  55.      */
  56.     private char[][] fLineDelimiters;
  57.     /**
  58.      * Cached sorted {@linkplain #fLineDelimiters}.
  59.      * @since 3.1
  60.      */
  61.     private char[][] fSortedLineDelimiters;
  62.  
  63.     /**
  64.      * Creates a rule for the given starting and ending sequence.
  65.      * When these sequences are detected the rule will return the specified token.
  66.      * Alternatively, the sequence can also be ended by the end of the line.
  67.      * Any character which follows the given escapeCharacter will be ignored.
  68.      *
  69.      * @param startSequence the pattern's start sequence
  70.      * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
  71.      * @param token the token which will be returned on success
  72.      * @param escapeCharacter any character following this one will be ignored
  73.      * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
  74.      */
  75.     public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL) {
  76.         Assert.isTrue(startSequence != null && !startSequence.isEmpty());
  77.         Assert.isTrue(endSequence != null || breaksOnEOL);
  78.         Assert.isNotNull(token);
  79.  
  80.         fStartSequence= startSequence.toCharArray();
  81.         fEndSequence= (endSequence == null ? new char[0] : endSequence.toCharArray());
  82.         fToken= token;
  83.         fEscapeCharacter= escapeCharacter;
  84.         fBreaksOnEOL= breaksOnEOL;
  85.     }
  86.  
  87.     /**
  88.      * Creates a rule for the given starting and ending sequence.
  89.      * When these sequences are detected the rule will return the specified token.
  90.      * Alternatively, the sequence can also be ended by the end of the line or the end of the file.
  91.      * Any character which follows the given escapeCharacter will be ignored.
  92.      *
  93.      * @param startSequence the pattern's start sequence
  94.      * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
  95.      * @param token the token which will be returned on success
  96.      * @param escapeCharacter any character following this one will be ignored
  97.      * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
  98.      * @param breaksOnEOF indicates whether the end of the file also terminates the pattern
  99.      * @since 2.1
  100.      */
  101.     public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF) {
  102.         this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL);
  103.         fBreaksOnEOF= breaksOnEOF;
  104.     }
  105.  
  106.     /**
  107.      * Creates a rule for the given starting and ending sequence.
  108.      * When these sequences are detected the rule will return the specified token.
  109.      * Alternatively, the sequence can also be ended by the end of the line or the end of the file.
  110.      * Any character which follows the given escapeCharacter will be ignored. An end of line
  111.      * immediately after the given <code>lineContinuationCharacter</code> will not cause the
  112.      * pattern to terminate even if <code>breakOnEOL</code> is set to true.
  113.      *
  114.      * @param startSequence the pattern's start sequence
  115.      * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
  116.      * @param token the token which will be returned on success
  117.      * @param escapeCharacter any character following this one will be ignored
  118.      * @param breaksOnEOL indicates whether the end of the line also terminates the pattern
  119.      * @param breaksOnEOF indicates whether the end of the file also terminates the pattern
  120.      * @param escapeContinuesLine indicates whether the specified escape character is used for line
  121.      *        continuation, so that an end of line immediately after the escape character does not
  122.      *        terminate the pattern, even if <code>breakOnEOL</code> is set
  123.      * @since 3.0
  124.      */
  125.     public PatternRule(String startSequence, String endSequence, IToken token, char escapeCharacter, boolean breaksOnEOL, boolean breaksOnEOF, boolean escapeContinuesLine) {
  126.         this(startSequence, endSequence, token, escapeCharacter, breaksOnEOL, breaksOnEOF);
  127.         fEscapeContinuesLine= escapeContinuesLine;
  128.     }
  129.  
  130.     /**
  131.      * Sets a column constraint for this rule. If set, the rule's token
  132.      * will only be returned if the pattern is detected starting at the
  133.      * specified column. If the column is smaller then 0, the column
  134.      * constraint is considered removed.
  135.      *
  136.      * @param column the column in which the pattern starts
  137.      */
  138.     public void setColumnConstraint(int column) {
  139.         if (column < 0)
  140.             column= UNDEFINED;
  141.         fColumn= column;
  142.     }
  143.  
  144.  
  145.     /**
  146.      * Evaluates this rules without considering any column constraints.
  147.      *
  148.      * @param scanner the character scanner to be used
  149.      * @return the token resulting from this evaluation
  150.      */
  151.     protected IToken doEvaluate(ICharacterScanner scanner) {
  152.         return doEvaluate(scanner, false);
  153.     }
  154.  
  155.     /**
  156.      * Evaluates this rules without considering any column constraints. Resumes
  157.      * detection, i.e. look sonly for the end sequence required by this rule if the
  158.      * <code>resume</code> flag is set.
  159.      *
  160.      * @param scanner the character scanner to be used
  161.      * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
  162.      * @return the token resulting from this evaluation
  163.      * @since 2.0
  164.      */
  165.     protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
  166.  
  167.         if (resume) {
  168.  
  169.             if (endSequenceDetected(scanner))
  170.                 return fToken;
  171.  
  172.         } else {
  173.  
  174.             int c= scanner.read();
  175.             if (c == fStartSequence[0]) {
  176.                 if (sequenceDetected(scanner, fStartSequence, false)) {
  177.                     if (endSequenceDetected(scanner))
  178.                         return fToken;
  179.                 }
  180.             }
  181.         }
  182.  
  183.         scanner.unread();
  184.         return Token.UNDEFINED;
  185.     }
  186.  
  187.     @Override
  188.     public IToken evaluate(ICharacterScanner scanner) {
  189.         return evaluate(scanner, false);
  190.     }
  191.  
  192.     /**
  193.      * Returns whether the end sequence was detected. As the pattern can be considered
  194.      * ended by a line delimiter, the result of this method is <code>true</code> if the
  195.      * rule breaks on the end of the line, or if the EOF character is read.
  196.      *
  197.      * @param scanner the character scanner to be used
  198.      * @return <code>true</code> if the end sequence has been detected
  199.      */
  200.     protected boolean endSequenceDetected(ICharacterScanner scanner) {
  201.  
  202.         char[][] originalDelimiters= scanner.getLegalLineDelimiters();
  203.         int count= originalDelimiters.length;
  204.         if (fLineDelimiters == null || fLineDelimiters.length != count) {
  205.             fSortedLineDelimiters= new char[count][];
  206.         } else {
  207.             while (count > 0 && Arrays.equals(fLineDelimiters[count - 1], originalDelimiters[count - 1]))
  208.                 count--;
  209.         }
  210.         if (count != 0) {
  211.             fLineDelimiters= originalDelimiters;
  212.             System.arraycopy(fLineDelimiters, 0, fSortedLineDelimiters, 0, fLineDelimiters.length);
  213.             Arrays.sort(fSortedLineDelimiters, fLineDelimiterComparator);
  214.         }
  215.  
  216.         int readCount= 1;
  217.         int c;
  218.         while ((c= scanner.read()) != ICharacterScanner.EOF) {
  219.             if (c == fEscapeCharacter) {
  220.                 // Skip escaped character(s)
  221.                 if (fEscapeContinuesLine) {
  222.                     c= scanner.read();
  223.                     for (char[] fSortedLineDelimiter : fSortedLineDelimiters) {
  224.                         if (c == fSortedLineDelimiter[0] && sequenceDetected(scanner, fSortedLineDelimiter, fBreaksOnEOF))
  225.                             break;
  226.                     }
  227.                 } else
  228.                     scanner.read();
  229.  
  230.             } else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
  231.                 // Check if the specified end sequence has been found.
  232.                 if (sequenceDetected(scanner, fEndSequence, fBreaksOnEOF))
  233.                     return true;
  234.             } else if (fBreaksOnEOL) {
  235.                 // Check for end of line since it can be used to terminate the pattern.
  236.                 for (char[] fSortedLineDelimiter : fSortedLineDelimiters) {
  237.                     if (c == fSortedLineDelimiter[0] && sequenceDetected(scanner, fSortedLineDelimiter, fBreaksOnEOF))
  238.                         return true;
  239.                 }
  240.             }
  241.             readCount++;
  242.         }
  243.  
  244.         if (fBreaksOnEOF)
  245.             return true;
  246.  
  247.         for (; readCount > 0; readCount--)
  248.             scanner.unread();
  249.  
  250.         return false;
  251.     }
  252.  
  253.     /**
  254.      * Returns whether the next characters to be read by the character scanner
  255.      * are an exact match with the given sequence. No escape characters are allowed
  256.      * within the sequence. If specified the sequence is considered to be found
  257.      * when reading the EOF character.
  258.      *
  259.      * @param scanner the character scanner to be used
  260.      * @param sequence the sequence to be detected
  261.      * @param eofAllowed indicated whether EOF terminates the pattern
  262.      * @return <code>true</code> if the given sequence has been detected
  263.      */
  264.     protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) {
  265.         for (int i= 1; i < sequence.length; i++) {
  266.             int c= scanner.read();
  267.             if (c == ICharacterScanner.EOF && eofAllowed) {
  268.                 return true;
  269.             } else if (c != sequence[i]) {
  270.                 // Non-matching character detected, rewind the scanner back to the start.
  271.                 // Do not unread the first character.
  272.                 scanner.unread();
  273.                 for (int j= i-1; j > 0; j--)
  274.                     scanner.unread();
  275.                 return false;
  276.             }
  277.         }
  278.  
  279.         return true;
  280.     }
  281.  
  282.     @Override
  283.     public IToken evaluate(ICharacterScanner scanner, boolean resume) {
  284.         if (fColumn == UNDEFINED)
  285.             return doEvaluate(scanner, resume);
  286.  
  287.         int c= scanner.read();
  288.         scanner.unread();
  289.         if (c == fStartSequence[0])
  290.             return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
  291.         return Token.UNDEFINED;
  292.     }
  293.  
  294.     @Override
  295.     public IToken getSuccessToken() {
  296.         return fToken;
  297.     }
  298. }
  299.  
RAW Paste Data