Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- *******************************************************************************
- * Copyright (C) 1996-2000, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
- * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SearchIterator.java,v $
- * $Date: 2002/04/03 19:13:56 $
- * $Revision: 1.6 $
- *
- *****************************************************************************************
- */
- package com.ibm.icu.text;
- import java.text.BreakIterator;
- import java.text.CharacterIterator;
- /**
- * <code>SearchIterator</code> is an abstract base class that provides methods
- * to search for a pattern within a text string. Instances of
- * <code>SearchIterator</code> maintain a current position and scan over
- * the target text, returning the indices the pattern is matched
- * and the length of each match.
- * <p>
- * <code>SearchIterator</code> is an abstract base class that defines a
- * protocol for text searching. Subclasses provide concrete implementations of
- * various search algorithms. For example, {@link StringSearch}
- * implements language-sensitive pattern matching based on the comparison rules
- * defined in a {@link java.text.RuleBasedCollator RuleBasedCollator} object.
- * <p>
- * Internally, <code>SearchIterator</code> scans text using a
- * {@link CharacterIterator}, and is thus able to scan text held
- * by any object implementing that protocol. A <code>StringCharacterIterator</code>
- * is used to scan <code>String</code> objects passed to <code>setText</code>.
- * <p>
- * <code>SearchIterator</code> provides an API that is similar to that of
- * other text iteration classes such as <code>BreakIterator</code>. Using this
- * class, it is easy to scan through text looking for all occurances of a
- * given pattern. The following example uses a <code>StringSearch</code> object to
- * find all instances of "fox" in the target string. Any other subclass of
- * <code>SearchIterator</code> can be used in an identical manner.
- * <pre><code>
- * String target = "The quick brown fox jumped over the lazy fox";
- * String pattern = "fox";
- *
- * SearchIterator iter = new StringSearch(pattern, target);
- *
- * for (int pos = iter.first(); pos != SearchIterator.DONE; pos = iter.next()) {
- * System.out.println("Found match at " + pos +
- * ", length is " + iter.getMatchLength());
- * }
- * </code></pre>
- *
- * @see StringSearch
- */
- public abstract class SearchIterator {
- /**
- * DONE is returned by previous() and next() after all valid
- * matches have been returned, and by first() and last() if
- * there are no matches at all.
- */
- public static final int DONE = -1;
- /**
- * Private value indicating that the iterator is pointing
- * before the beginning of the target text.
- */
- private static final int BEFORE = -2;
- /**
- * Return the first index at which the target text matches the search
- * pattern. The iterator is adjusted so that its current index
- * (as returned by {@link #getIndex}) is the match posisition if one was found
- * and <code>DONE</code> if one was not.
- *
- * @return The character index of the first match, or <code>DONE</code> if there
- * are no matches.
- */
- final public int first() {
- setIndex(BEFORE);
- return next();
- }
- /**
- * Return the first index greater than <tt>pos</tt> at which the target
- * text matches the search pattern. The iterator is adjusted so that its current index
- * (as returned by {@link #getIndex}) is the match posisition if one was found
- * and <code>DONE</code> if one was not.
- *
- * @return The character index of the first match following <code>pos</code>,
- * or <tt>DONE</tt> if there are no matches.
- */
- final public int following(int pos) {
- setIndex(pos);
- return next();
- }
- /**
- * Return the last index in the target text at which it matches
- * the search pattern and adjusts the iteration to point to that position.
- *
- * @return The index of the first match, or <tt>DONE</tt> if there
- * are no matches.
- */
- final public int last() {
- setIndex(DONE);
- return previous();
- }
- /**
- * Return the first index less than <code>pos</code> at which the target
- * text matches the search pattern. The iterator is adjusted so that its current index
- * (as returned by {@link #getIndex}) is the match posisition if one was found
- * and <tt>DONE</tt> if one was not.
- *
- * @return The character index of the first match preceding <code>pos</code>,
- * or <code>DONE</code> if there are no matches.
- */
- final public int preceding(int pos) {
- setIndex(pos);
- return previous();
- }
- /**
- * Return the index of the next point at which the text matches the
- * search pattern, starting from the current position.
- * @return The index of the next match after the current position,
- * or <code>DONE</code> if there are no more matches.
- *
- * @see #first
- */
- public int next() {
- if (index == BEFORE){
- // Starting at the beginning of the text
- index = target.getBeginIndex();
- } else if (length > 0) {
- // Finding the next match after a previous one
- index += overlap ? 1 : length;
- }
- index -= 1;
- do {
- length = 0;
- index = handleNext(index + 1);
- } while (index != DONE && !isBreakUnit(index, index+length));
- return index;
- }
- /**
- * Return the index of the previous point at which the text matches
- * the search pattern, starting at the current position
- *
- * @return The index of the previous match before the current position,
- * or <code>DONE</code> if there are no more matches.
- */
- public int previous() {
- if (index == DONE) {
- index = target.getEndIndex();
- } else if (length > 0) {
- // Finding the previous match before a following one
- index = overlap ? index + length - 1 : index;
- }
- index += 1;
- do {
- length = 0;
- index = handlePrev(index - 1);
- } while (index != DONE && !isBreakUnit(index, index+length));
- if (index == DONE) {
- index = BEFORE;
- }
- return getIndex();
- }
- /**
- * Return the current index in the text being searched.
- * If the iteration has gone past the end of the text
- * (or past the beginning for a backwards search),
- * {@link #DONE} is returned.
- */
- public int getIndex() {
- return index == BEFORE ? DONE : index;
- }
- /**
- * Determines whether overlapping matches are returned. If this
- * property is <code>true</code>, matches that begin within the
- * boundry of the previous match are considered valid and will
- * be returned. For example, when searching for "abab" in the
- * target text "ababab", both offsets 0 and 2 will be returned
- * as valid matches if this property is <code>true</code>.
- * <p>
- * The default setting of this property is <tt>true</tt>
- */
- public void setOverlapping(boolean allowOverlap) {
- overlap = allowOverlap;
- }
- /**
- * Determines whether overlapping matches are returned.
- *
- * @see #setOverlapping
- */
- public boolean isOverlapping() {
- return overlap;
- }
- /**
- * Returns the length of text in the target which matches the search
- * pattern. This call returns a valid result only after a successful
- * call to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
- * Just after construction, or after a searching method returns
- * <tt>DONE</tt>, this method will return 0.
- *
- * @return The length of the match in the target text, or 0 if there
- * is no match currently.
- */
- public int getMatchLength() {
- return length;
- }
- /**
- * Set the BreakIterator that will be used to restrict the points
- * at which matches are detected.
- *
- * @param iterator A {@link java.text.BreakIterator BreakIterator}
- * that will be used to restrict the points
- * at which matches are detected. If a match is found, but the match's start
- * or end index is not a boundary as determined by
- * the <tt>BreakIterator</tt>, the match will be rejected and
- * another will be searched for.
- *
- * If this parameter is <tt>null</tt>, no break
- * detection is attempted.
- *
- * @see #getBreakIterator
- */
- public void setBreakIterator(BreakIterator iterator) {
- breaker = iterator;
- if (breaker != null) {
- breaker.setText(target);
- }
- }
- /**
- * Returns the BreakIterator that is used to restrict the points
- * at which matches are detected. This will be the same object
- * that was passed to the constructor or to <code>setBreakIterator</code>.
- * Note that <tt>null</tt> is a legal value; it means that break
- * detection should not be attempted.
- *
- * @see #setBreakIterator
- */
- public BreakIterator getBreakIterator() {
- return breaker;
- }
- /**
- * Set the target text which should be searched and resets the
- * iterator's position to point before the start of the target text.
- * This method is useful if you want to re-use an iterator to
- * search for the same pattern within a different body of text.
- *
- * @see #getTarget
- */
- public void setTarget(CharacterIterator iterator) {
- target = iterator;
- if (breaker != null) {
- breaker.setText(target);
- }
- setIndex(BEFORE);
- }
- /**
- * Return the target text which is being searched
- *
- * @see #setTarget
- */
- public CharacterIterator getTarget() {
- return target;
- }
- /**
- * Returns the text that was matched by the most recent call to
- * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
- * If the iterator is not pointing at a valid match (e.g. just after
- * construction or after <tt>DONE</tt> has been returned, returns
- * an empty string.
- */
- public String getMatchedText() {
- StringBuffer buffer = new StringBuffer();
- if (length > 0) {
- int i = 0;
- for (char c = target.setIndex(index); i < length; c = target.next(), i++)
- {
- buffer.append(c);
- }
- }
- return buffer.toString();
- }
- //-------------------------------------------------------------------
- // Protected interface for subclasses
- //-------------------------------------------------------------------
- /**
- * Constructor for use by subclasses.
- * <p>
- * @param target The target text to be searched. This is for internal
- * use by this class. Subclasses need to maintain their
- * own reference to or iterator over the target text
- * for use by their {@link #handleNext handleNext} and
- * {@link #handlePrev handlePrev} methods.
- *
- * @param breaker A {@link BreakIterator} that is used to restrict the points
- * at which matches are detected. If <tt>handleNext</tt> or
- * <tt>handlePrev</tt> finds a match, but the match's start
- * or end index is not a boundary as determined by
- * the <tt>BreakIterator</tt>, the match is rejected and
- * <tt>handleNext</tt> or <tt>handlePrev</tt> is called again.
- * If this parameter is <tt>null</tt>, no break
- * detection is attempted.
- *
- */
- protected SearchIterator(CharacterIterator target, BreakIterator breaker)
- {
- this.target = target;
- if (breaker != null) {
- this.breaker = (BreakIterator)breaker.clone();
- this.breaker.setText(target);
- }
- index = target.getBeginIndex();
- length = 0;
- }
- /**
- * Abstract method which subclasses override to provide the mechanism
- * for finding the next match in the target text. This allows different
- * subclasses to provide different search algorithms.
- * <p>
- * If a match is found, the implementation should return the index at
- * which the match starts and should call {@link #setMatchLength setMatchLength}
- * with the number of characters in the target
- * text that make up the match. If no match is found, the method
- * should return DONE and should not call <tt>setMatchLength</tt>.
- * <p>
- * @param startAt The index in the target text at which the search
- * should start.
- *
- * @see #setMatchLength
- */
- protected abstract int handleNext(int startAt);
- /**
- * Abstract method which subclasses override to provide the mechanism
- * for finding the previous match in the target text. This allows different
- * subclasses to provide different search algorithms.
- * <p>
- * If a match is found, the implementation should return the index at
- * which the match starts and should call {@link #setMatchLength setMatchLength}
- * with the number of characters in the target
- * text that make up the match. If no match is found, the method
- * should return DONE and should not call <tt>setMatchLength</tt>.
- * <p>
- * @param startAt The index in the target text at which the search
- * should start.
- *
- * @see #setMatchLength
- */
- protected abstract int handlePrev(int startAt);
- /**
- * Sets the length of the currently matched string in the target text.
- * Subclasses' <code>handleNext</code> and <code>handlePrev</code>
- * methods should call this when they find a match in the target text.
- */
- protected void setMatchLength(int length) {
- this.length = length;
- }
- //-------------------------------------------------------------------
- // Privates
- //
- /**
- * Internal method used by preceding and following. Sets the index
- * to point to the given position, and clears any state that's
- * affected.
- */
- private void setIndex(int pos) {
- index = pos;
- length = 0;
- }
- /**
- * Determine whether the target text bounded by <code>start</code> and
- * <code>end</code> is one or more whole units of text as determined by
- * the current <code>BreakIterator</code>.
- */
- private boolean isBreakUnit(int start, int end)
- {
- if (breaker == null) {
- return true;
- }
- boolean startBound = breaker.isBoundary(start);
- boolean endBound = (end == target.getEndIndex()) || breaker.isBoundary(end);
- return startBound && endBound;
- }
- //-------------------------------------------------------------------------
- // Private data...
- //-------------------------------------------------------------------------
- private int index; // Current position in the target text
- private int length; // Length of matched text, or 0
- private boolean overlap = true; // Return overlapping matches?
- private CharacterIterator target; // Target text to be searched
- private BreakIterator breaker; // Break iterator to constrain matches
- };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement