Advertisement
Guest User

Untitled

a guest
Jul 21st, 2017
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.63 KB | None | 0 0
  1. /*
  2. *******************************************************************************
  3. * Copyright (C) 1996-2000, International Business Machines Corporation and *
  4. * others. All Rights Reserved. *
  5. *******************************************************************************
  6. *
  7. * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SearchIterator.java,v $
  8. * $Date: 2002/04/03 19:13:56 $
  9. * $Revision: 1.6 $
  10. *
  11. *****************************************************************************************
  12. */
  13.  
  14. package com.ibm.icu.text;
  15.  
  16. import java.text.BreakIterator;
  17. import java.text.CharacterIterator;
  18.  
  19. /**
  20. * <code>SearchIterator</code> is an abstract base class that provides methods
  21. * to search for a pattern within a text string. Instances of
  22. * <code>SearchIterator</code> maintain a current position and scan over
  23. * the target text, returning the indices the pattern is matched
  24. * and the length of each match.
  25. * <p>
  26. * <code>SearchIterator</code> is an abstract base class that defines a
  27. * protocol for text searching. Subclasses provide concrete implementations of
  28. * various search algorithms. For example, {@link StringSearch}
  29. * implements language-sensitive pattern matching based on the comparison rules
  30. * defined in a {@link java.text.RuleBasedCollator RuleBasedCollator} object.
  31. * <p>
  32. * Internally, <code>SearchIterator</code> scans text using a
  33. * {@link CharacterIterator}, and is thus able to scan text held
  34. * by any object implementing that protocol. A <code>StringCharacterIterator</code>
  35. * is used to scan <code>String</code> objects passed to <code>setText</code>.
  36. * <p>
  37. * <code>SearchIterator</code> provides an API that is similar to that of
  38. * other text iteration classes such as <code>BreakIterator</code>. Using this
  39. * class, it is easy to scan through text looking for all occurances of a
  40. * given pattern. The following example uses a <code>StringSearch</code> object to
  41. * find all instances of "fox" in the target string. Any other subclass of
  42. * <code>SearchIterator</code> can be used in an identical manner.
  43. * <pre><code>
  44. * String target = "The quick brown fox jumped over the lazy fox";
  45. * String pattern = "fox";
  46. *
  47. * SearchIterator iter = new StringSearch(pattern, target);
  48. *
  49. * for (int pos = iter.first(); pos != SearchIterator.DONE; pos = iter.next()) {
  50. * System.out.println("Found match at " + pos +
  51. * ", length is " + iter.getMatchLength());
  52. * }
  53. * </code></pre>
  54. *
  55. * @see StringSearch
  56. */
  57. public abstract class SearchIterator {
  58. /**
  59. * DONE is returned by previous() and next() after all valid
  60. * matches have been returned, and by first() and last() if
  61. * there are no matches at all.
  62. */
  63. public static final int DONE = -1;
  64.  
  65. /**
  66. * Private value indicating that the iterator is pointing
  67. * before the beginning of the target text.
  68. */
  69. private static final int BEFORE = -2;
  70.  
  71. /**
  72. * Return the first index at which the target text matches the search
  73. * pattern. The iterator is adjusted so that its current index
  74. * (as returned by {@link #getIndex}) is the match posisition if one was found
  75. * and <code>DONE</code> if one was not.
  76. *
  77. * @return The character index of the first match, or <code>DONE</code> if there
  78. * are no matches.
  79. */
  80. final public int first() {
  81. setIndex(BEFORE);
  82. return next();
  83. }
  84.  
  85. /**
  86. * Return the first index greater than <tt>pos</tt> at which the target
  87. * text matches the search pattern. The iterator is adjusted so that its current index
  88. * (as returned by {@link #getIndex}) is the match posisition if one was found
  89. * and <code>DONE</code> if one was not.
  90. *
  91. * @return The character index of the first match following <code>pos</code>,
  92. * or <tt>DONE</tt> if there are no matches.
  93. */
  94. final public int following(int pos) {
  95. setIndex(pos);
  96. return next();
  97. }
  98.  
  99. /**
  100. * Return the last index in the target text at which it matches
  101. * the search pattern and adjusts the iteration to point to that position.
  102. *
  103. * @return The index of the first match, or <tt>DONE</tt> if there
  104. * are no matches.
  105. */
  106. final public int last() {
  107. setIndex(DONE);
  108. return previous();
  109. }
  110.  
  111. /**
  112. * Return the first index less than <code>pos</code> at which the target
  113. * text matches the search pattern. The iterator is adjusted so that its current index
  114. * (as returned by {@link #getIndex}) is the match posisition if one was found
  115. * and <tt>DONE</tt> if one was not.
  116. *
  117. * @return The character index of the first match preceding <code>pos</code>,
  118. * or <code>DONE</code> if there are no matches.
  119. */
  120. final public int preceding(int pos) {
  121. setIndex(pos);
  122. return previous();
  123. }
  124.  
  125. /**
  126. * Return the index of the next point at which the text matches the
  127. * search pattern, starting from the current position.
  128. * @return The index of the next match after the current position,
  129. * or <code>DONE</code> if there are no more matches.
  130. *
  131. * @see #first
  132. */
  133. public int next() {
  134. if (index == BEFORE){
  135. // Starting at the beginning of the text
  136. index = target.getBeginIndex();
  137. } else if (length > 0) {
  138. // Finding the next match after a previous one
  139. index += overlap ? 1 : length;
  140. }
  141. index -= 1;
  142.  
  143. do {
  144. length = 0;
  145. index = handleNext(index + 1);
  146. } while (index != DONE && !isBreakUnit(index, index+length));
  147.  
  148. return index;
  149. }
  150.  
  151. /**
  152. * Return the index of the previous point at which the text matches
  153. * the search pattern, starting at the current position
  154. *
  155. * @return The index of the previous match before the current position,
  156. * or <code>DONE</code> if there are no more matches.
  157. */
  158. public int previous() {
  159. if (index == DONE) {
  160. index = target.getEndIndex();
  161. } else if (length > 0) {
  162. // Finding the previous match before a following one
  163. index = overlap ? index + length - 1 : index;
  164. }
  165. index += 1;
  166.  
  167. do {
  168. length = 0;
  169. index = handlePrev(index - 1);
  170. } while (index != DONE && !isBreakUnit(index, index+length));
  171.  
  172. if (index == DONE) {
  173. index = BEFORE;
  174. }
  175. return getIndex();
  176. }
  177.  
  178.  
  179.  
  180. /**
  181. * Return the current index in the text being searched.
  182. * If the iteration has gone past the end of the text
  183. * (or past the beginning for a backwards search),
  184. * {@link #DONE} is returned.
  185. */
  186. public int getIndex() {
  187. return index == BEFORE ? DONE : index;
  188. }
  189.  
  190. /**
  191. * Determines whether overlapping matches are returned. If this
  192. * property is <code>true</code>, matches that begin within the
  193. * boundry of the previous match are considered valid and will
  194. * be returned. For example, when searching for "abab" in the
  195. * target text "ababab", both offsets 0 and 2 will be returned
  196. * as valid matches if this property is <code>true</code>.
  197. * <p>
  198. * The default setting of this property is <tt>true</tt>
  199. */
  200. public void setOverlapping(boolean allowOverlap) {
  201. overlap = allowOverlap;
  202. }
  203.  
  204. /**
  205. * Determines whether overlapping matches are returned.
  206. *
  207. * @see #setOverlapping
  208. */
  209. public boolean isOverlapping() {
  210. return overlap;
  211. }
  212.  
  213. /**
  214. * Returns the length of text in the target which matches the search
  215. * pattern. This call returns a valid result only after a successful
  216. * call to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
  217. * Just after construction, or after a searching method returns
  218. * <tt>DONE</tt>, this method will return 0.
  219. *
  220. * @return The length of the match in the target text, or 0 if there
  221. * is no match currently.
  222. */
  223. public int getMatchLength() {
  224. return length;
  225. }
  226.  
  227. /**
  228. * Set the BreakIterator that will be used to restrict the points
  229. * at which matches are detected.
  230. *
  231. * @param iterator A {@link java.text.BreakIterator BreakIterator}
  232. * that will be used to restrict the points
  233. * at which matches are detected. If a match is found, but the match's start
  234. * or end index is not a boundary as determined by
  235. * the <tt>BreakIterator</tt>, the match will be rejected and
  236. * another will be searched for.
  237. *
  238. * If this parameter is <tt>null</tt>, no break
  239. * detection is attempted.
  240. *
  241. * @see #getBreakIterator
  242. */
  243. public void setBreakIterator(BreakIterator iterator) {
  244. breaker = iterator;
  245. if (breaker != null) {
  246. breaker.setText(target);
  247. }
  248. }
  249.  
  250. /**
  251. * Returns the BreakIterator that is used to restrict the points
  252. * at which matches are detected. This will be the same object
  253. * that was passed to the constructor or to <code>setBreakIterator</code>.
  254. * Note that <tt>null</tt> is a legal value; it means that break
  255. * detection should not be attempted.
  256. *
  257. * @see #setBreakIterator
  258. */
  259. public BreakIterator getBreakIterator() {
  260. return breaker;
  261. }
  262.  
  263. /**
  264. * Set the target text which should be searched and resets the
  265. * iterator's position to point before the start of the target text.
  266. * This method is useful if you want to re-use an iterator to
  267. * search for the same pattern within a different body of text.
  268. *
  269. * @see #getTarget
  270. */
  271. public void setTarget(CharacterIterator iterator) {
  272. target = iterator;
  273. if (breaker != null) {
  274. breaker.setText(target);
  275. }
  276. setIndex(BEFORE);
  277. }
  278.  
  279. /**
  280. * Return the target text which is being searched
  281. *
  282. * @see #setTarget
  283. */
  284. public CharacterIterator getTarget() {
  285. return target;
  286. }
  287.  
  288. /**
  289. * Returns the text that was matched by the most recent call to
  290. * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
  291. * If the iterator is not pointing at a valid match (e.g. just after
  292. * construction or after <tt>DONE</tt> has been returned, returns
  293. * an empty string.
  294. */
  295. public String getMatchedText() {
  296. StringBuffer buffer = new StringBuffer();
  297.  
  298. if (length > 0) {
  299. int i = 0;
  300. for (char c = target.setIndex(index); i < length; c = target.next(), i++)
  301. {
  302. buffer.append(c);
  303. }
  304. }
  305. return buffer.toString();
  306. }
  307.  
  308. //-------------------------------------------------------------------
  309. // Protected interface for subclasses
  310. //-------------------------------------------------------------------
  311.  
  312. /**
  313. * Constructor for use by subclasses.
  314. * <p>
  315. * @param target The target text to be searched. This is for internal
  316. * use by this class. Subclasses need to maintain their
  317. * own reference to or iterator over the target text
  318. * for use by their {@link #handleNext handleNext} and
  319. * {@link #handlePrev handlePrev} methods.
  320. *
  321. * @param breaker A {@link BreakIterator} that is used to restrict the points
  322. * at which matches are detected. If <tt>handleNext</tt> or
  323. * <tt>handlePrev</tt> finds a match, but the match's start
  324. * or end index is not a boundary as determined by
  325. * the <tt>BreakIterator</tt>, the match is rejected and
  326. * <tt>handleNext</tt> or <tt>handlePrev</tt> is called again.
  327. * If this parameter is <tt>null</tt>, no break
  328. * detection is attempted.
  329. *
  330. */
  331. protected SearchIterator(CharacterIterator target, BreakIterator breaker)
  332. {
  333. this.target = target;
  334.  
  335. if (breaker != null) {
  336. this.breaker = (BreakIterator)breaker.clone();
  337. this.breaker.setText(target);
  338. }
  339.  
  340. index = target.getBeginIndex();
  341. length = 0;
  342. }
  343.  
  344. /**
  345. * Abstract method which subclasses override to provide the mechanism
  346. * for finding the next match in the target text. This allows different
  347. * subclasses to provide different search algorithms.
  348. * <p>
  349. * If a match is found, the implementation should return the index at
  350. * which the match starts and should call {@link #setMatchLength setMatchLength}
  351. * with the number of characters in the target
  352. * text that make up the match. If no match is found, the method
  353. * should return DONE and should not call <tt>setMatchLength</tt>.
  354. * <p>
  355. * @param startAt The index in the target text at which the search
  356. * should start.
  357. *
  358. * @see #setMatchLength
  359. */
  360. protected abstract int handleNext(int startAt);
  361.  
  362. /**
  363. * Abstract method which subclasses override to provide the mechanism
  364. * for finding the previous match in the target text. This allows different
  365. * subclasses to provide different search algorithms.
  366. * <p>
  367. * If a match is found, the implementation should return the index at
  368. * which the match starts and should call {@link #setMatchLength setMatchLength}
  369. * with the number of characters in the target
  370. * text that make up the match. If no match is found, the method
  371. * should return DONE and should not call <tt>setMatchLength</tt>.
  372. * <p>
  373. * @param startAt The index in the target text at which the search
  374. * should start.
  375. *
  376. * @see #setMatchLength
  377. */
  378. protected abstract int handlePrev(int startAt);
  379.  
  380. /**
  381. * Sets the length of the currently matched string in the target text.
  382. * Subclasses' <code>handleNext</code> and <code>handlePrev</code>
  383. * methods should call this when they find a match in the target text.
  384. */
  385. protected void setMatchLength(int length) {
  386. this.length = length;
  387. }
  388.  
  389. //-------------------------------------------------------------------
  390. // Privates
  391. //
  392.  
  393. /**
  394. * Internal method used by preceding and following. Sets the index
  395. * to point to the given position, and clears any state that's
  396. * affected.
  397. */
  398. private void setIndex(int pos) {
  399. index = pos;
  400. length = 0;
  401. }
  402.  
  403. /**
  404. * Determine whether the target text bounded by <code>start</code> and
  405. * <code>end</code> is one or more whole units of text as determined by
  406. * the current <code>BreakIterator</code>.
  407. */
  408. private boolean isBreakUnit(int start, int end)
  409. {
  410. if (breaker == null) {
  411. return true;
  412. }
  413. boolean startBound = breaker.isBoundary(start);
  414. boolean endBound = (end == target.getEndIndex()) || breaker.isBoundary(end);
  415.  
  416. return startBound && endBound;
  417. }
  418.  
  419. //-------------------------------------------------------------------------
  420. // Private data...
  421. //-------------------------------------------------------------------------
  422. private int index; // Current position in the target text
  423. private int length; // Length of matched text, or 0
  424. private boolean overlap = true; // Return overlapping matches?
  425. private CharacterIterator target; // Target text to be searched
  426. private BreakIterator breaker; // Break iterator to constrain matches
  427. };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement