Advertisement
Guest User

MyTokenizerImpl.java

a guest
May 29th, 2013
102
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 21.93 KB | None
  1. /**
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *     http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17.  
  18. /*
  19.  
  20. WARNING: if you change StandardTokenizerImpl.jflex and need to regenerate
  21.       the tokenizer, only use Java 1.4 !!!
  22.       This grammar currently uses constructs (eg :digit:, :letter:) whose
  23.       meaning can vary according to the JRE used to run jflex.  See
  24.       https://issues.apache.org/jira/browse/LUCENE-1126 for details.
  25.       For current backwards compatibility it is needed to support
  26.       only Java 1.4 - this will change in Lucene 3.1.
  27.  
  28. */
  29.  
  30. import org.apache.lucene.analysis.Token;
  31. import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  32.  
  33.  
  34. /**
  35.  * This class is a scanner generated by
  36.  * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
  37.  * on 12/16/11 10:24 AM from the specification file
  38.  * <tt>C:/src/opensource/lucene/3.0.2/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
  39.  */
  40. class MyTokenizerImpl {
  41.  
  42.   /** This character denotes the end of file */
  43.   public static final int YYEOF = -1;
  44.  
  45.   /** initial size of the lookahead buffer */
  46.   private static final int ZZ_BUFFERSIZE = 16384;
  47.  
  48.   /** lexical states */
  49.   public static final int YYINITIAL = 0;
  50.  
  51.   /**
  52.    * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
  53.    * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
  54.    *                  at the beginning of a line
  55.    * l is of the form l = 2*k, k a non negative integer
  56.    */
  57.   private static final int ZZ_LEXSTATE[] = {
  58.      0, 0
  59.   };
  60.  
  61.   /**
  62.    * Translates characters to character classes
  63.    */
  64.   private static final String ZZ_CMAP_PACKED =
  65.     "\11\0\1\0\1\14\1\0\1\0\1\13\22\0\1\0\5\0\1\4"+
  66.     "\1\2\4\0\1\10\1\6\1\3\1\10\12\1\6\0\1\5\32\11"+
  67.     "\4\0\1\7\1\0\32\11\57\0\1\11\12\0\1\11\4\0\1\11"+
  68.     "\5\0\27\11\1\0\37\11\1\0\u0128\11\2\0\22\11\34\0\136\11"+
  69.     "\2\0\11\11\2\0\7\11\16\0\2\11\16\0\5\11\11\0\1\11"+
  70.     "\213\0\1\11\13\0\1\11\1\0\3\11\1\0\1\11\1\0\24\11"+
  71.     "\1\0\54\11\1\0\10\11\2\0\32\11\14\0\202\11\12\0\71\11"+
  72.     "\2\0\2\11\2\0\2\11\3\0\46\11\2\0\2\11\67\0\46\11"+
  73.     "\2\0\1\11\7\0\47\11\110\0\33\11\5\0\3\11\56\0\32\11"+
  74.     "\5\0\13\11\25\0\12\1\7\0\143\11\1\0\1\11\17\0\2\11"+
  75.     "\11\0\12\1\3\11\23\0\1\11\1\0\33\11\123\0\46\11\u015f\0"+
  76.     "\65\11\3\0\1\11\22\0\1\11\7\0\12\11\4\0\12\1\25\0"+
  77.     "\10\11\2\0\2\11\2\0\26\11\1\0\7\11\1\0\1\11\3\0"+
  78.     "\4\11\42\0\2\11\1\0\3\11\4\0\12\1\2\11\23\0\6\11"+
  79.     "\4\0\2\11\2\0\26\11\1\0\7\11\1\0\2\11\1\0\2\11"+
  80.     "\1\0\2\11\37\0\4\11\1\0\1\11\7\0\12\1\2\0\3\11"+
  81.     "\20\0\7\11\1\0\1\11\1\0\3\11\1\0\26\11\1\0\7\11"+
  82.     "\1\0\2\11\1\0\5\11\3\0\1\11\22\0\1\11\17\0\1\11"+
  83.     "\5\0\12\1\25\0\10\11\2\0\2\11\2\0\26\11\1\0\7\11"+
  84.     "\1\0\2\11\2\0\4\11\3\0\1\11\36\0\2\11\1\0\3\11"+
  85.     "\4\0\12\1\25\0\6\11\3\0\3\11\1\0\4\11\3\0\2\11"+
  86.     "\1\0\1\11\1\0\2\11\3\0\2\11\3\0\3\11\3\0\10\11"+
  87.     "\1\0\3\11\55\0\11\1\25\0\10\11\1\0\3\11\1\0\27\11"+
  88.     "\1\0\12\11\1\0\5\11\46\0\2\11\4\0\12\1\25\0\10\11"+
  89.     "\1\0\3\11\1\0\27\11\1\0\12\11\1\0\5\11\44\0\1\11"+
  90.     "\1\0\2\11\4\0\12\1\25\0\10\11\1\0\3\11\1\0\27\11"+
  91.     "\1\0\20\11\46\0\2\11\4\0\12\1\25\0\22\11\3\0\30\11"+
  92.     "\1\0\11\11\1\0\1\11\2\0\7\11\71\0\1\1\60\11\1\1"+
  93.     "\2\11\14\1\7\11\11\1\12\1\47\0\2\11\1\0\1\11\2\0"+
  94.     "\2\11\1\0\1\11\2\0\1\11\6\0\4\11\1\0\7\11\1\0"+
  95.     "\3\11\1\0\1\11\1\0\1\11\2\0\2\11\1\0\4\11\1\0"+
  96.     "\2\11\11\0\1\11\2\0\5\11\1\0\1\11\11\0\12\1\2\0"+
  97.     "\2\11\42\0\1\11\37\0\12\1\26\0\10\11\1\0\42\11\35\0"+
  98.     "\4\11\164\0\42\11\1\0\5\11\1\0\2\11\25\0\12\1\6\0"+
  99.     "\6\11\112\0\46\11\12\0\47\11\11\0\132\11\5\0\104\11\5\0"+
  100.     "\122\11\6\0\7\11\1\0\77\11\1\0\1\11\1\0\4\11\2\0"+
  101.     "\7\11\1\0\1\11\1\0\4\11\2\0\47\11\1\0\1\11\1\0"+
  102.     "\4\11\2\0\37\11\1\0\1\11\1\0\4\11\2\0\7\11\1\0"+
  103.     "\1\11\1\0\4\11\2\0\7\11\1\0\7\11\1\0\27\11\1\0"+
  104.     "\37\11\1\0\1\11\1\0\4\11\2\0\7\11\1\0\47\11\1\0"+
  105.     "\23\11\16\0\11\1\56\0\125\11\14\0\u026c\11\2\0\10\11\12\0"+
  106.     "\32\11\5\0\113\11\225\0\64\11\54\0\12\1\46\0\12\1\6\0"+
  107.     "\130\11\10\0\51\11\u0557\0\234\11\4\0\132\11\6\0\26\11\2\0"+
  108.     "\6\11\2\0\46\11\2\0\6\11\2\0\10\11\1\0\1\11\1\0"+
  109.     "\1\11\1\0\1\11\1\0\37\11\2\0\65\11\1\0\7\11\1\0"+
  110.     "\1\11\3\0\3\11\1\0\7\11\3\0\4\11\2\0\6\11\4\0"+
  111.     "\15\11\5\0\3\11\1\0\7\11\202\0\1\11\202\0\1\11\4\0"+
  112.     "\1\11\2\0\12\11\1\0\1\11\3\0\5\11\6\0\1\11\1\0"+
  113.     "\1\11\1\0\1\11\1\0\4\11\1\0\3\11\1\0\7\11\u0ecb\0"+
  114.     "\2\11\52\0\5\11\12\0\1\12\124\12\10\12\2\12\2\12\132\12"+
  115.     "\1\12\3\12\6\12\50\12\3\12\1\0\136\11\21\0\30\11\70\0"+
  116.     "\20\12\u0100\0\200\12\200\0\u19b6\12\12\12\100\0\u51a6\12\132\12\u048d\11"+
  117.     "\u0773\0\u2ba4\11\u215c\0\u012e\12\322\12\7\11\14\0\5\11\5\0\1\11"+
  118.     "\1\0\12\11\1\0\15\11\1\0\5\11\1\0\1\11\1\0\2\11"+
  119.     "\1\0\2\11\1\0\154\11\41\0\u016b\11\22\0\100\11\2\0\66\11"+
  120.     "\50\0\14\11\164\0\3\11\1\0\1\11\1\0\207\11\23\0\12\1"+
  121.     "\7\0\32\11\6\0\32\11\12\0\1\12\72\12\37\11\3\0\6\11"+
  122.     "\2\0\6\11\2\0\6\11\2\0\3\11\43\0";
  123.  
  124.   /**
  125.    * Translates characters to character classes
  126.    */
  127.   private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
  128.  
  129.   /**
  130.    * Translates DFA states to action switch labels.
  131.    */
  132.   private static final int [] ZZ_ACTION = zzUnpackAction();
  133.  
  134.   private static final String ZZ_ACTION_PACKED_0 =
  135.     "\1\0\1\1\2\2\1\3\1\1\10\0\1\2\1\4"+
  136.     "\1\0\2\5\1\6\1\4\2\7\1\10\1\0\1\11"+
  137.     "\1\12";
  138.  
  139.   private static int [] zzUnpackAction() {
  140.     int [] result = new int[27];
  141.     int offset = 0;
  142.     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
  143.     return result;
  144.   }
  145.  
  146.   private static int zzUnpackAction(String packed, int offset, int [] result) {
  147.     int i = 0;       /* index in packed string  */
  148.     int j = offset;  /* index in unpacked array */
  149.     int l = packed.length();
  150.     while (i < l) {
  151.       int count = packed.charAt(i++);
  152.       int value = packed.charAt(i++);
  153.       do result[j++] = value; while (--count > 0);
  154.     }
  155.     return j;
  156.   }
  157.  
  158.  
  159.   /**
  160.    * Translates a state to a row index in the transition table
  161.    */
  162.   private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
  163.  
  164.   private static final String ZZ_ROWMAP_PACKED_0 =
  165.     "\0\0\0\15\0\32\0\47\0\15\0\64\0\101\0\116"+
  166.     "\0\133\0\150\0\165\0\202\0\217\0\234\0\251\0\266"+
  167.     "\0\303\0\320\0\335\0\352\0\367\0\217\0\u0104\0\101"+
  168.     "\0\u0111\0\202\0\u011e";
  169.  
  170.   private static int [] zzUnpackRowMap() {
  171.     int [] result = new int[27];
  172.     int offset = 0;
  173.     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
  174.     return result;
  175.   }
  176.  
  177.   private static int zzUnpackRowMap(String packed, int offset, int [] result) {
  178.     int i = 0;  /* index in packed string  */
  179.     int j = offset;  /* index in unpacked array */
  180.     int l = packed.length();
  181.     while (i < l) {
  182.       int high = packed.charAt(i++) << 16;
  183.       result[j++] = high | packed.charAt(i++);
  184.     }
  185.     return j;
  186.   }
  187.  
  188.   /**
  189.    * The transition table of the DFA
  190.    */
  191.   private static final int [] ZZ_TRANS = zzUnpackTrans();
  192.  
  193.   private static final String ZZ_TRANS_PACKED_0 =
  194.     "\1\2\1\3\7\2\1\4\1\5\1\6\1\2\16\0"+
  195.     "\1\3\1\0\1\7\1\0\1\10\2\11\1\12\1\3"+
  196.     "\4\0\1\3\1\13\1\14\1\15\1\16\2\11\1\12"+
  197.     "\1\17\17\0\1\2\1\0\1\20\7\0\1\20\4\0"+
  198.     "\1\21\7\0\1\21\4\0\1\22\7\0\1\22\4\0"+
  199.     "\1\23\7\0\1\23\14\0\1\24\4\0\1\20\7\0"+
  200.     "\1\25\14\0\1\26\4\0\1\21\7\0\1\27\4\0"+
  201.     "\1\3\1\13\1\7\1\15\1\16\2\11\1\12\1\17"+
  202.     "\4\0\1\20\1\0\1\30\1\0\1\10\2\11\1\12"+
  203.     "\1\20\4\0\1\21\1\0\1\31\2\0\1\31\2\0"+
  204.     "\1\21\4\0\1\22\1\0\1\11\1\0\1\10\2\11"+
  205.     "\1\12\1\22\4\0\1\23\1\0\1\12\2\0\3\12"+
  206.     "\1\23\5\0\1\13\6\0\1\24\4\0\1\20\1\0"+
  207.     "\1\32\1\0\1\10\2\11\1\12\1\20\4\0\1\21"+
  208.     "\1\0\1\31\2\0\1\31\2\0\1\27\4\0\1\33"+
  209.     "\7\0\1\33\4\0\1\33\1\0\1\31\2\0\1\31"+
  210.     "\2\0\1\33\3\0";
  211.  
  212.   private static int [] zzUnpackTrans() {
  213.     int [] result = new int[299];
  214.     int offset = 0;
  215.     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
  216.     return result;
  217.   }
  218.  
  219.   private static int zzUnpackTrans(String packed, int offset, int [] result) {
  220.     int i = 0;       /* index in packed string  */
  221.     int j = offset;  /* index in unpacked array */
  222.     int l = packed.length();
  223.     while (i < l) {
  224.       int count = packed.charAt(i++);
  225.       int value = packed.charAt(i++);
  226.       value--;
  227.       do result[j++] = value; while (--count > 0);
  228.     }
  229.     return j;
  230.   }
  231.  
  232.  
  233.   /* error codes */
  234.   private static final int ZZ_UNKNOWN_ERROR = 0;
  235.   private static final int ZZ_NO_MATCH = 1;
  236.   private static final int ZZ_PUSHBACK_2BIG = 2;
  237.  
  238.   /* error messages for the codes above */
  239.   private static final String ZZ_ERROR_MSG[] = {
  240.     "Unkown internal scanner error",
  241.     "Error: could not match input",
  242.     "Error: pushback value was too large"
  243.   };
  244.  
  245.   /**
  246.    * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
  247.    */
  248.   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
  249.  
  250.   private static final String ZZ_ATTRIBUTE_PACKED_0 =
  251.     "\1\0\1\11\2\1\1\11\1\1\10\0\2\1\1\0"+
  252.     "\7\1\1\0\2\1";
  253.  
  254.   private static int [] zzUnpackAttribute() {
  255.     int [] result = new int[27];
  256.     int offset = 0;
  257.     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
  258.     return result;
  259.   }
  260.  
  261.   private static int zzUnpackAttribute(String packed, int offset, int [] result) {
  262.     int i = 0;       /* index in packed string  */
  263.     int j = offset;  /* index in unpacked array */
  264.     int l = packed.length();
  265.     while (i < l) {
  266.       int count = packed.charAt(i++);
  267.       int value = packed.charAt(i++);
  268.       do result[j++] = value; while (--count > 0);
  269.     }
  270.     return j;
  271.   }
  272.  
  273.   /** the input device */
  274.   private java.io.Reader zzReader;
  275.  
  276.   /** the current state of the DFA */
  277.   private int zzState;
  278.  
  279.   /** the current lexical state */
  280.   private int zzLexicalState = YYINITIAL;
  281.  
  282.   /** this buffer contains the current text to be matched and is
  283.       the source of the yytext() string */
  284.   private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
  285.  
  286.   /** the textposition at the last accepting state */
  287.   private int zzMarkedPos;
  288.  
  289.   /** the current text position in the buffer */
  290.   private int zzCurrentPos;
  291.  
  292.   /** startRead marks the beginning of the yytext() string in the buffer */
  293.   private int zzStartRead;
  294.  
  295.   /** endRead marks the last character in the buffer, that has been read
  296.       from input */
  297.   private int zzEndRead;
  298.  
  299.   /** number of newlines encountered up to the start of the matched text */
  300.   private int yyline;
  301.  
  302.   /** the number of characters up to the start of the matched text */
  303.   private int yychar;
  304.  
  305.   /**
  306.    * the number of characters from the last newline up to the start of the
  307.    * matched text
  308.    */
  309.   private int yycolumn;
  310.  
  311.   /**
  312.    * zzAtBOL == true <=> the scanner is currently at the beginning of a line
  313.    */
  314.   private boolean zzAtBOL = true;
  315.  
  316.   /** zzAtEOF == true <=> the scanner is at the EOF */
  317.   private boolean zzAtEOF;
  318.  
  319.   /** denotes if the user-EOF-code has already been executed */
  320.   private boolean zzEOFDone;
  321.  
  322.   /* user code: */
  323.  
  324. public static final int ALPHANUM          = MyTokenizer.ALPHANUM;
  325. public static final int APOSTROPHE        = MyTokenizer.APOSTROPHE;
  326. public static final int ACRONYM           = MyTokenizer.ACRONYM;
  327. public static final int COMPANY           = MyTokenizer.COMPANY;
  328. public static final int EMAIL             = MyTokenizer.EMAIL;
  329. public static final int HOST              = MyTokenizer.HOST;
  330. public static final int NUM               = MyTokenizer.NUM;
  331. public static final int CJ                = MyTokenizer.CJ;
  332. /**
  333.  * @deprecated this solves a bug where HOSTs that end with '.' are identified
  334.  *             as ACRONYMs.
  335.  */
  336. @Deprecated
  337. public static final int ACRONYM_DEP       = MyTokenizer.ACRONYM_DEP;
  338.  
  339. public static final String [] TOKEN_TYPES = MyTokenizer.TOKEN_TYPES;
  340.  
  341. public final int yychar()
  342. {
  343.     return yychar;
  344. }
  345.  
  346. /**
  347.  * Fills Lucene token with the current token text.
  348.  */
  349. final void getText(Token t) {
  350.   t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
  351. }
  352.  
  353. /**
  354.  * Fills TermAttribute with the current token text.
  355.  */
  356. final void getText(TermAttribute t) {
  357.   t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
  358. }
  359.  
  360.  
  361.  
  362.   /**
  363.    * Creates a new scanner
  364.    * There is also a java.io.InputStream version of this constructor.
  365.    *
  366.    * @param   in  the java.io.Reader to read input from.
  367.    */
  368.   MyTokenizerImpl(java.io.Reader in) {
  369.     this.zzReader = in;
  370.   }
  371.  
  372.   /**
  373.    * Creates a new scanner.
  374.    * There is also java.io.Reader version of this constructor.
  375.    *
  376.    * @param   in  the java.io.Inputstream to read input from.
  377.    */
  378.   MyTokenizerImpl(java.io.InputStream in) {
  379.     this(new java.io.InputStreamReader(in));
  380.   }
  381.  
  382.   /**
  383.    * Unpacks the compressed character translation table.
  384.    *
  385.    * @param packed   the packed character translation table
  386.    * @return         the unpacked character translation table
  387.    */
  388.   private static char [] zzUnpackCMap(String packed) {
  389.     char [] map = new char[0x10000];
  390.     int i = 0;  /* index in packed string  */
  391.     int j = 0;  /* index in unpacked array */
  392.     while (i < 1154) {
  393.       int  count = packed.charAt(i++);
  394.       char value = packed.charAt(i++);
  395.       do map[j++] = value; while (--count > 0);
  396.     }
  397.     return map;
  398.   }
  399.  
  400.  
  401.   /**
  402.    * Refills the input buffer.
  403.    *
  404.    * @return      <code>false</code>, iff there was new input.
  405.    *
  406.    * @exception   java.io.IOException  if any I/O-Error occurs
  407.    */
  408.   private boolean zzRefill() throws java.io.IOException {
  409.  
  410.     /* first: make room (if you can) */
  411.     if (zzStartRead > 0) {
  412.       System.arraycopy(zzBuffer, zzStartRead,
  413.                        zzBuffer, 0,
  414.                        zzEndRead-zzStartRead);
  415.  
  416.       /* translate stored positions */
  417.       zzEndRead-= zzStartRead;
  418.       zzCurrentPos-= zzStartRead;
  419.       zzMarkedPos-= zzStartRead;
  420.       zzStartRead = 0;
  421.     }
  422.  
  423.     /* is the buffer big enough? */
  424.     if (zzCurrentPos >= zzBuffer.length) {
  425.       /* if not: blow it up */
  426.       char newBuffer[] = new char[zzCurrentPos*2];
  427.       System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
  428.       zzBuffer = newBuffer;
  429.     }
  430.  
  431.     /* finally: fill the buffer with new input */
  432.     int numRead = zzReader.read(zzBuffer, zzEndRead,
  433.                                             zzBuffer.length-zzEndRead);
  434.  
  435.     if (numRead > 0) {
  436.       zzEndRead+= numRead;
  437.       return false;
  438.     }
  439.     // unlikely but not impossible: read 0 characters, but not at end of stream    
  440.     if (numRead == 0) {
  441.       int c = zzReader.read();
  442.       if (c == -1) {
  443.         return true;
  444.       } else {
  445.         zzBuffer[zzEndRead++] = (char) c;
  446.         return false;
  447.       }    
  448.     }
  449.  
  450.     // numRead < 0
  451.     return true;
  452.   }
  453.  
  454.    
  455.   /**
  456.    * Closes the input stream.
  457.    */
  458.   public final void yyclose() throws java.io.IOException {
  459.     zzAtEOF = true;            /* indicate end of file */
  460.     zzEndRead = zzStartRead;  /* invalidate buffer    */
  461.  
  462.     if (zzReader != null)
  463.       zzReader.close();
  464.   }
  465.  
  466.  
  467.   /**
  468.    * Resets the scanner to read from a new input stream.
  469.    * Does not close the old reader.
  470.    *
  471.    * All internal variables are reset, the old input stream
  472.    * <b>cannot</b> be reused (internal buffer is discarded and lost).
  473.    * Lexical state is set to <tt>ZZ_INITIAL</tt>.
  474.    *
  475.    * @param reader   the new input stream
  476.    */
  477.   public final void yyreset(java.io.Reader reader) {
  478.     zzReader = reader;
  479.     zzAtBOL  = true;
  480.     zzAtEOF  = false;
  481.     zzEOFDone = false;
  482.     zzEndRead = zzStartRead = 0;
  483.     zzCurrentPos = zzMarkedPos = 0;
  484.     yyline = yychar = yycolumn = 0;
  485.     zzLexicalState = YYINITIAL;
  486.   }
  487.  
  488.  
  489.   /**
  490.    * Returns the current lexical state.
  491.    */
  492.   public final int yystate() {
  493.     return zzLexicalState;
  494.   }
  495.  
  496.  
  497.   /**
  498.    * Enters a new lexical state
  499.    *
  500.    * @param newState the new lexical state
  501.    */
  502.   public final void yybegin(int newState) {
  503.     zzLexicalState = newState;
  504.   }
  505.  
  506.  
  507.   /**
  508.    * Returns the text matched by the current regular expression.
  509.    */
  510.   public final String yytext() {
  511.     return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
  512.   }
  513.  
  514.  
  515.   /**
  516.    * Returns the character at position <tt>pos</tt> from the
  517.    * matched text.
  518.    *
  519.    * It is equivalent to yytext().charAt(pos), but faster
  520.    *
  521.    * @param pos the position of the character to fetch.
  522.    *            A value from 0 to yylength()-1.
  523.    *
  524.    * @return the character at position pos
  525.    */
  526.   public final char yycharat(int pos) {
  527.     return zzBuffer[zzStartRead+pos];
  528.   }
  529.  
  530.  
  531.   /**
  532.    * Returns the length of the matched text region.
  533.    */
  534.   public final int yylength() {
  535.     return zzMarkedPos-zzStartRead;
  536.   }
  537.  
  538.  
  539.   /**
  540.    * Reports an error that occured while scanning.
  541.    *
  542.    * In a wellformed scanner (no or only correct usage of
  543.    * yypushback(int) and a match-all fallback rule) this method
  544.    * will only be called with things that "Can't Possibly Happen".
  545.    * If this method is called, something is seriously wrong
  546.    * (e.g. a JFlex bug producing a faulty scanner etc.).
  547.    *
  548.    * Usual syntax/scanner level error handling should be done
  549.    * in error fallback rules.
  550.    *
  551.    * @param   errorCode  the code of the errormessage to display
  552.    */
  553.   private void zzScanError(int errorCode) {
  554.     String message;
  555.     try {
  556.       message = ZZ_ERROR_MSG[errorCode];
  557.     }
  558.     catch (ArrayIndexOutOfBoundsException e) {
  559.       message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
  560.     }
  561.  
  562.     throw new Error(message);
  563.   }
  564.  
  565.  
  566.   /**
  567.    * Pushes the specified amount of characters back into the input stream.
  568.    *
  569.    * They will be read again by then next call of the scanning method
  570.    *
  571.    * @param number  the number of characters to be read again.
  572.    *                This number must not be greater than yylength()!
  573.    */
  574.   public void yypushback(int number)  {
  575.     if ( number > yylength() )
  576.       zzScanError(ZZ_PUSHBACK_2BIG);
  577.  
  578.     zzMarkedPos -= number;
  579.   }
  580.  
  581.  
  582.   /**
  583.    * Resumes scanning until the next regular expression is matched,
  584.    * the end of input is encountered or an I/O-Error occurs.
  585.    *
  586.    * @return      the next token
  587.    * @exception   java.io.IOException  if any I/O-Error occurs
  588.    */
  589.   public int getNextToken() throws java.io.IOException {
  590.     int zzInput;
  591.     int zzAction;
  592.  
  593.     // cached fields:
  594.     int zzCurrentPosL;
  595.     int zzMarkedPosL;
  596.     int zzEndReadL = zzEndRead;
  597.     char [] zzBufferL = zzBuffer;
  598.     char [] zzCMapL = ZZ_CMAP;
  599.  
  600.     int [] zzTransL = ZZ_TRANS;
  601.     int [] zzRowMapL = ZZ_ROWMAP;
  602.     int [] zzAttrL = ZZ_ATTRIBUTE;
  603.  
  604.     while (true) {
  605.       zzMarkedPosL = zzMarkedPos;
  606.  
  607.       yychar+= zzMarkedPosL-zzStartRead;
  608.  
  609.       zzAction = -1;
  610.  
  611.       zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
  612.  
  613.       zzState = ZZ_LEXSTATE[zzLexicalState];
  614.  
  615.  
  616.       zzForAction: {
  617.         while (true) {
  618.    
  619.           if (zzCurrentPosL < zzEndReadL)
  620.             zzInput = zzBufferL[zzCurrentPosL++];
  621.           else if (zzAtEOF) {
  622.             zzInput = YYEOF;
  623.             break zzForAction;
  624.           }
  625.           else {
  626.             // store back cached positions
  627.             zzCurrentPos  = zzCurrentPosL;
  628.             zzMarkedPos   = zzMarkedPosL;
  629.             boolean eof = zzRefill();
  630.             // get translated positions and possibly new buffer
  631.             zzCurrentPosL  = zzCurrentPos;
  632.             zzMarkedPosL   = zzMarkedPos;
  633.             zzBufferL      = zzBuffer;
  634.             zzEndReadL     = zzEndRead;
  635.             if (eof) {
  636.               zzInput = YYEOF;
  637.               break zzForAction;
  638.             }
  639.             else {
  640.               zzInput = zzBufferL[zzCurrentPosL++];
  641.             }
  642.           }
  643.           int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
  644.           if (zzNext == -1) break zzForAction;
  645.           zzState = zzNext;
  646.  
  647.           int zzAttributes = zzAttrL[zzState];
  648.           if ( (zzAttributes & 1) == 1 ) {
  649.             zzAction = zzState;
  650.             zzMarkedPosL = zzCurrentPosL;
  651.             if ( (zzAttributes & 8) == 8 ) break zzForAction;
  652.           }
  653.  
  654.         }
  655.       }
  656.  
  657.       // store back cached position
  658.       zzMarkedPos = zzMarkedPosL;
  659.  
  660.       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
  661.         case 4:
  662.           { return HOST;
  663.           }
  664.         case 11: break;
  665.         case 9:
  666.           { return ACRONYM;
  667.           }
  668.         case 12: break;
  669.         case 8:
  670.           { return ACRONYM_DEP;
  671.           }
  672.         case 13: break;
  673.         case 1:
  674.           { /* ignore */
  675.           }
  676.         case 14: break;
  677.         case 5:
  678.           { return NUM;
  679.           }
  680.         case 15: break;
  681.         case 3:
  682.           { return CJ;
  683.           }
  684.         case 16: break;
  685.         case 2:
  686.           { return ALPHANUM;
  687.           }
  688.         case 17: break;
  689.         case 7:
  690.           { return COMPANY;
  691.           }
  692.         case 18: break;
  693.         case 6:
  694.           { return APOSTROPHE;
  695.           }
  696.         case 19: break;
  697.         case 10:
  698.           { return EMAIL;
  699.           }
  700.         case 20: break;
  701.         default:
  702.           if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
  703.             zzAtEOF = true;
  704.             return YYEOF;
  705.           }
  706.           else {
  707.             zzScanError(ZZ_NO_MATCH);
  708.           }
  709.       }
  710.     }
  711.   }
  712.  
  713.  
  714. }
Advertisement
RAW Paste Data Copied
Advertisement