Guest User

Untitled

a guest
May 27th, 2018
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.69 KB | None | 0 0
  1. package de.fhw.bi.txtsearch;
  2.  
  3. import java.util.Vector;
  4.  
  5. import java.util.Iterator;
  6.  
  7. public class PatternMatcher {
  8.  
  9. public static final int MATCH = 0;
  10.  
  11. public static final int NOMATCH = 1;
  12.  
  13. public static final int UNBEARB = 2;
  14.  
  15. private String text;
  16.  
  17. private String pattern;
  18.  
  19. private Vector listeners;
  20.  
  21. public PatternMatcher() {
  22.  
  23. listeners = new Vector();
  24.  
  25. }
  26.  
  27. public String getText() {
  28.  
  29. return text;
  30.  
  31. }
  32.  
  33. public String getPattern() {
  34.  
  35. return pattern;
  36.  
  37. }
  38.  
  39. public void setText(String text) {
  40.  
  41. this.text = text;
  42.  
  43. }
  44.  
  45. public void setPattern(String pattern) {
  46.  
  47. this.pattern = pattern;
  48.  
  49. }
  50.  
  51. private void setUnbearbeitet(int a, int k, int[] matches) {
  52.  
  53. for (int j = 0; j < k; j++) {
  54.  
  55. matches[a + j] = UNBEARB;
  56.  
  57. }
  58.  
  59. }
  60.  
  61. public void naiveSearch() {
  62.  
  63. // initialize matches arrays:
  64.  
  65. int[] textMatches = new int[text.length()];
  66.  
  67. for (int i = 0; i < textMatches.length; i++) {
  68.  
  69. textMatches[i] = UNBEARB;
  70.  
  71. }
  72.  
  73. int[] patternMatches = new int[pattern.length()];
  74.  
  75. for (int i = 0; i < patternMatches.length; i++) {
  76.  
  77. patternMatches[i] = UNBEARB;
  78.  
  79. }
  80.  
  81. int tIndex = 0, pIndex;
  82.  
  83. int tLength = text.length();
  84.  
  85. int pLength = pattern.length();
  86.  
  87. Vector found = new Vector();
  88.  
  89. while (tIndex <= tLength - pLength) {
  90.  
  91. pIndex = 0;
  92.  
  93. // fireNewPosition();
  94.  
  95. while (pIndex < pLength
  96.  
  97. && pattern.charAt(pIndex) == text.charAt(tIndex + pIndex)) {
  98.  
  99. textMatches[pIndex + tIndex] = MATCH;
  100.  
  101. patternMatches[pIndex] = MATCH;
  102.  
  103. fireNewPosition(textMatches, patternMatches, tIndex);
  104.  
  105. pIndex++;
  106.  
  107. }
  108.  
  109. if (pIndex == pLength) { // full match
  110.  
  111. found.add(new Integer(tIndex));
  112.  
  113. setUnbearbeitet(0, pIndex, patternMatches);
  114.  
  115. // for (int j = 0; j < pIndex; j++) {
  116.  
  117. // patternMatches[j] = UNBEARB;
  118.  
  119. // }
  120.  
  121. } else {
  122.  
  123. textMatches[tIndex + pIndex] = NOMATCH;
  124.  
  125. patternMatches[pIndex] = NOMATCH;
  126.  
  127. fireNewPosition(textMatches, patternMatches, tIndex);
  128.  
  129. // matches must be corrected:
  130.  
  131. setUnbearbeitet(tIndex, pIndex, textMatches);
  132.  
  133. setUnbearbeitet(0, pIndex, patternMatches);
  134.  
  135. /**
  136. * for (int j = 0; j <= pIndex; j++) {
  137. *
  138. * textMatches[tIndex + j] = UNBEARB;
  139. *
  140. * patternMatches[j] = UNBEARB;
  141. *
  142. * }
  143. **/
  144.  
  145. }
  146.  
  147. setUnbearbeitet(0, text.length() - 1, textMatches);
  148.  
  149. for (int i = 0; i < found.size(); i++) {
  150.  
  151. int start = ((Integer) found.get(i)).intValue();
  152.  
  153. for (int j = 0; j < pLength; j++) {
  154.  
  155. textMatches[start + j] = MATCH;
  156.  
  157. }
  158.  
  159. }
  160.  
  161. setUnbearbeitet(0, pattern.length() - 1, patternMatches);
  162.  
  163. // fireNewPosition();
  164.  
  165. tIndex++;
  166.  
  167. }
  168.  
  169. tIndex--;
  170.  
  171. fireNewPosition(textMatches, patternMatches, tIndex);
  172.  
  173. }
  174.  
  175. public void knuthMorrisPratt(String text, String pattern) {
  176.  
  177. int[] textMatches = new int[text.length()];
  178.  
  179. for (int i = 0; i < textMatches.length; i++) {
  180.  
  181. textMatches[i] = UNBEARB;
  182.  
  183. }
  184.  
  185. int[] patternMatches = new int[pattern.length()];
  186.  
  187. for (int i = 0; i < patternMatches.length; i++) {
  188.  
  189. patternMatches[i] = UNBEARB;
  190.  
  191. }
  192.  
  193. // Hilfsarray um den Text farbig anzuzeigen
  194.  
  195. int[] tMatches = new int[text.length()];
  196.  
  197. setUnbearbeitet(0, tMatches.length, tMatches);
  198.  
  199. // Hilfsarray um das Muster farbig anzuzeigen
  200.  
  201. int[] pMatches = new int[pattern.length()];
  202.  
  203. setUnbearbeitet(0, pMatches.length, pMatches);
  204.  
  205. /**
  206. *
  207. * mit fireNewPosition(tMatches, pMatches, i);
  208. *
  209. * wird die Anzeige erneuert, dabei werden die Buchstaben im Text nach
  210. * den
  211. *
  212. * Vorgaben von tMatches angezeigt und die Buchstaben im Muster nach den
  213. * Vorgaben
  214. *
  215. * von pMatches
  216. *
  217. * Das Muster steht im Text an Position i.
  218. */
  219.  
  220. int[] l = calcMargins(pattern);
  221.  
  222. int tIndex = 0, pIndex = 0;
  223.  
  224. int pLen = pattern.length(), tLen = text.length();
  225.  
  226. Vector found = new Vector();
  227.  
  228. // to do: restliche Implementierung
  229.  
  230. // dabei auf die Anzeige achten
  231.  
  232. while (tIndex - pIndex < 0) {
  233.  
  234. while ((pIndex <= pLen - 1) && (text == pattern)) {
  235.  
  236. pIndex++;
  237.  
  238. tIndex++;
  239.  
  240. }
  241.  
  242. if (pIndex == pLen - 1)
  243. textMatches[pIndex + tIndex] = MATCH;
  244.  
  245. patternMatches[pIndex] = MATCH;
  246.  
  247. fireNewPosition(textMatches, patternMatches, tIndex);
  248.  
  249. setUnbearbeitet(tIndex, pIndex, textMatches);
  250.  
  251. setUnbearbeitet(0, pIndex, patternMatches);
  252.  
  253. found.add(tIndex - pIndex, found);
  254.  
  255. if (pIndex == 0)
  256.  
  257. tIndex++;
  258.  
  259. else
  260. pIndex = l[pIndex];
  261.  
  262. ;
  263.  
  264. }
  265.  
  266. }
  267.  
  268. private int[] calcMargins(String p) {
  269.  
  270. // to do: berechne die L\ufffdngen der gr\ufffd\ufffdten R\ufffdnder
  271.  
  272. int[] rand = new int[p.length() + 1];
  273.  
  274. rand[0] = -1;
  275.  
  276. String t = getText();
  277.  
  278. int tIndex = 0, pIndex = -1;
  279.  
  280. while (tIndex < p.length()) {
  281.  
  282. if ((pIndex >= 0) && (p == t))
  283.  
  284. rand[tIndex + 1] = pIndex + 1;
  285.  
  286. else {
  287. while (pIndex >= 0 && p != t)
  288.  
  289. pIndex = rand[pIndex];
  290.  
  291. if (pIndex < 0)
  292.  
  293. rand[tIndex + 1] = 0;
  294.  
  295. else
  296. rand[tIndex + 1] = pIndex + 1;
  297. }
  298.  
  299. tIndex++;
  300.  
  301. pIndex++;
  302.  
  303. }
  304.  
  305. return rand;
  306.  
  307. }
  308.  
  309. public static void main(String[] args) {
  310.  
  311. PatternMatcher pm = new PatternMatcher();
  312.  
  313. int[] a = pm.calcMargins("abxaby");
  314.  
  315. for (int i = 0; i < a.length; i++) {
  316.  
  317. System.out.println(i + " " + a[i] + ";");
  318.  
  319. }
  320.  
  321. }
  322.  
  323. protected void fireNewPosition(int[] textM, int[] patternM, int textPos) {
  324.  
  325. for (Iterator it = listeners.iterator(); it.hasNext();) {
  326.  
  327. ((PatternMatcherListener) it.next()).newPosition(
  328.  
  329. textM,
  330.  
  331. patternM,
  332.  
  333. textPos);
  334.  
  335. }
  336.  
  337. }
  338.  
  339. public void addPatternMatcherListener(PatternMatcherListener l) {
  340.  
  341. listeners.add(l);
  342.  
  343. }
  344.  
  345. private int[] calcOccurrence() {
  346.  
  347. char a;
  348.  
  349. int[] d = new int[256];
  350.  
  351. for (int j = 0; j < 256; j++)
  352.  
  353. d[j] = -1;
  354.  
  355. for (int k = 0; k < pattern.length(); k++) {
  356.  
  357. a = pattern.charAt(k);
  358.  
  359. d[a] = k;
  360.  
  361. }
  362.  
  363. return d;
  364.  
  365. }
  366.  
  367. void goodEndPreprocess1(int[] f, int[] s) {
  368.  
  369. int m = pattern.length(), i = m, j = m + 1;
  370.  
  371. f[i] = j;
  372.  
  373. for (int k = 0; k <= m; k++)
  374.  
  375. s[k] = 0;
  376.  
  377. while (i > 0) {
  378.  
  379. while (j <= m && pattern.charAt(i - 1) != pattern.charAt(j - 1)) {
  380.  
  381. if (s[j] == 0)
  382.  
  383. s[j] = j - 1;
  384.  
  385. j = f[j];
  386.  
  387. }
  388.  
  389. i--;
  390.  
  391. j--;
  392.  
  393. f[i] = j;
  394.  
  395. }
  396.  
  397. }
  398.  
  399. void goodEndPreprocess2(int[] f, int[] s) {
  400.  
  401. int m = pattern.length();
  402.  
  403. int i, j;
  404.  
  405. j = f[0];
  406.  
  407. for (i = 0; i <= m; i++) {
  408.  
  409. if (s[i] == 0)
  410.  
  411. s[i] = j;
  412.  
  413. if (i == j)
  414.  
  415. j = f[j];
  416.  
  417. }
  418.  
  419. }
  420.  
  421. private int[] bmPreprocess(int[] f, int[] s) {
  422.  
  423. int[] d = calcOccurrence();
  424.  
  425. goodEndPreprocess1(f, s);
  426.  
  427. goodEndPreprocess2(f, s);
  428.  
  429. return d;
  430.  
  431. }
  432.  
  433. void boyerMoore() {
  434.  
  435. // zur Darstellung
  436.  
  437. int[] tMatches = new int[text.length()];
  438.  
  439. for (int i = 0; i < tMatches.length; i++) {
  440.  
  441. tMatches[i] = UNBEARB;
  442.  
  443. }
  444.  
  445. int[] pMatches = new int[pattern.length()];
  446.  
  447. for (int i = 0; i < pMatches.length; i++) {
  448.  
  449. pMatches[i] = UNBEARB;
  450.  
  451. }
  452.  
  453. int[] last = null;
  454.  
  455. int[] s = new int[pattern.length() + 1];
  456.  
  457. int[] f = new int[pattern.length() + 1];
  458.  
  459. last = bmPreprocess(f, s);
  460.  
  461. int n = text.length(), m = pattern.length(), i = 0, j;
  462.  
  463. Vector found = new Vector();
  464.  
  465. while (i <= n - m) {
  466.  
  467. j = m - 1;
  468.  
  469. while (j >= 0 && pattern.charAt(j) == text.charAt(i + j)) {
  470.  
  471. tMatches[i + j] = MATCH;
  472.  
  473. pMatches[j] = MATCH;
  474.  
  475. fireNewPosition(tMatches, pMatches, i);
  476.  
  477. j--;
  478.  
  479. }
  480.  
  481. if (j < 0) {
  482.  
  483. found.add(new Integer(i));
  484.  
  485. System.out.println("Position: " + i);
  486.  
  487. for (int k = 0; k < m; k++) {
  488.  
  489. pMatches[k] = UNBEARB;
  490.  
  491. }
  492.  
  493. // report(i);
  494.  
  495. i += s[0];
  496.  
  497. } else {
  498.  
  499. /**
  500. * tMatches[i + j] = NOMATCH;
  501. *
  502. * pMatches[j] = NOMATCH;
  503. *
  504. * fireNewPosition(tMatches, pMatches, i);
  505. **/
  506.  
  507. // matches must be corrected:
  508.  
  509. for (int k = 0; k < m; k++) {
  510.  
  511. tMatches[i + k] = UNBEARB;
  512.  
  513. pMatches[k] = UNBEARB;
  514.  
  515. }
  516.  
  517. i += Math.max(s[j + 1], j - last[text.charAt(i + j)]);
  518.  
  519. }
  520.  
  521. }
  522.  
  523. }
  524.  
  525. int matchLength(String p, int i, int j) {
  526.  
  527. int k = 0, m = p.length();
  528.  
  529. boolean match = true;
  530.  
  531. while (match && i < m && j < m) {
  532.  
  533. if (match = (p.charAt(i) == p.charAt(j))) {
  534.  
  535. k++;
  536. i++;
  537. j++;
  538.  
  539. }
  540.  
  541. }
  542.  
  543. return k;
  544.  
  545. }
  546.  
  547. int[] calcZ(String s) {
  548.  
  549. int m = s.length(), r = 0, l = 0;
  550.  
  551. int[] z = new int[m];
  552.  
  553. z[0] = 0;
  554.  
  555. for (int k = 1; k < m; k++) {
  556.  
  557. if (k > r) {
  558.  
  559. z[k] = matchLength(s, k, 0);
  560.  
  561. if (z[k] > 0) {
  562.  
  563. r = k + z[k] - 1;
  564.  
  565. l = k;
  566.  
  567. }
  568.  
  569. } else {
  570.  
  571. int ks = k - l, b = r - k + 1;
  572.  
  573. if (z[ks] < b)
  574.  
  575. z[k] = z[ks];
  576.  
  577. else {
  578.  
  579. int q = r + 1 + matchLength(s, r + 1, b);
  580.  
  581. z[k] = q - k;
  582.  
  583. r = q - 1;
  584.  
  585. l = k;
  586.  
  587. }
  588.  
  589. }
  590.  
  591. }
  592.  
  593. return z;
  594.  
  595. }
  596.  
  597. void zAlg() {
  598.  
  599. int[] tMatches = new int[text.length()];
  600.  
  601. for (int i = 0; i < tMatches.length; i++) {
  602.  
  603. tMatches[i] = UNBEARB;
  604.  
  605. }
  606.  
  607. int[] pMatches = new int[pattern.length()];
  608.  
  609. for (int i = 0; i < pMatches.length; i++) {
  610.  
  611. pMatches[i] = UNBEARB;
  612.  
  613. }
  614.  
  615. StringBuffer b = new StringBuffer(pattern);
  616.  
  617. b = b.append('$');
  618.  
  619. b = b.append(text);
  620.  
  621. int[] z = calcZ(b.toString());
  622.  
  623. int m = pattern.length();
  624.  
  625. for (int i = 0; i <= text.length(); i++) {
  626.  
  627. if (z[i + m] == m) {
  628.  
  629. for (int j = i; j < i + m; j++) {
  630.  
  631. tMatches[j - 1] = MATCH;
  632.  
  633. pMatches[j - i] = MATCH;
  634.  
  635. fireNewPosition(tMatches, pMatches, i - 1);
  636.  
  637. }
  638.  
  639. }
  640.  
  641. for (int k = 0; k < m; k++)
  642. pMatches[k] = UNBEARB;
  643.  
  644. }
  645.  
  646. }
  647.  
  648. static private int[] initShift(String pattern) {
  649.  
  650. int m = pattern.length();
  651.  
  652. int[] shift = new int[m + 1];
  653.  
  654. int[] suffix = new int[m + 1];
  655.  
  656. int i, j, h = 0;
  657.  
  658. suffix[m - 1] = m;
  659.  
  660. j = m - 1;
  661.  
  662. for (i = m - 2; i >= 0; --i) {
  663.  
  664. if (i > j && suffix[i + m - 1 - h] < i - j) {
  665.  
  666. suffix[i] = suffix[i + m - 1 - h];
  667.  
  668. } else {
  669.  
  670. if (i < j) {
  671.  
  672. j = i;
  673.  
  674. }
  675.  
  676. h = i;
  677.  
  678. while (j >= 0
  679.  
  680. && pattern.charAt(j) == pattern.charAt(j + m - 1 - h)) {
  681.  
  682. --j;
  683.  
  684. }
  685.  
  686. suffix[i] = h - j;
  687.  
  688. }
  689.  
  690. }
  691.  
  692. for (i = 0; i < m; ++i) {
  693.  
  694. shift[i] = m;
  695.  
  696. }
  697.  
  698. j = 0;
  699.  
  700. for (i = m - 1; i >= -1; --i) {
  701.  
  702. if (i == -1 || suffix[i] == i + 1) {
  703.  
  704. while (j < m - 1 - i) {
  705.  
  706. if (shift[j] == m) {
  707.  
  708. shift[j] = m - 1 - i;
  709.  
  710. }
  711.  
  712. ++j;
  713.  
  714. }
  715.  
  716. }
  717.  
  718. }
  719.  
  720. for (i = 0; i < m - 1; ++i) {
  721.  
  722. shift[m - 1 - suffix[i]] = m - i - 1;
  723.  
  724. }
  725.  
  726. for (i = 0; i < pattern.length(); ++i) {
  727.  
  728. System.out.println("shift[" + i + "]=" + shift[i]);
  729.  
  730. }
  731.  
  732. return shift;
  733.  
  734. }
  735.  
  736. static protected int[] initLast(String pattern) {
  737.  
  738. int[] last = new int[256];
  739.  
  740. for (int i = 0; i < 256; ++i) {
  741.  
  742. last[i] = -1;
  743.  
  744. }
  745.  
  746. for (int i = 0; i < pattern.length(); ++i) {
  747.  
  748. last[pattern.charAt(i)] = i;
  749.  
  750. }
  751.  
  752. return last;
  753.  
  754. }
  755.  
  756. public void bmSearch() {
  757.  
  758. final int last[] = initLast(pattern);
  759.  
  760. final int shift[] = initShift(pattern);
  761.  
  762. int[] textMatches = new int[text.length()];
  763.  
  764. for (int i = 0; i < textMatches.length; i++) {
  765.  
  766. textMatches[i] = UNBEARB;
  767.  
  768. }
  769.  
  770. int[] patternMatches = new int[pattern.length()];
  771.  
  772. for (int i = 0; i < patternMatches.length; i++) {
  773.  
  774. patternMatches[i] = UNBEARB;
  775.  
  776. }
  777.  
  778. int tIndex = 0, pIndex;
  779.  
  780. int tLength = text.length();
  781.  
  782. int pLength = pattern.length();
  783.  
  784. Vector found = new Vector();
  785.  
  786. while (tIndex <= tLength - pLength) {
  787.  
  788. pIndex = pLength - 1;
  789.  
  790. // fireNewPosition();
  791.  
  792. while (pIndex >= 0
  793.  
  794. && pattern.charAt(pIndex) == text.charAt(tIndex + pIndex)) {
  795.  
  796. textMatches[pIndex + tIndex] = MATCH;
  797.  
  798. patternMatches[pIndex] = MATCH;
  799.  
  800. fireNewPosition(textMatches, patternMatches, tIndex);
  801.  
  802. pIndex--;
  803.  
  804. }
  805.  
  806. if (pIndex < 0) { // full match
  807.  
  808. found.add(new Integer(tIndex));
  809.  
  810. tIndex += shift[0];
  811.  
  812. } else {
  813.  
  814. textMatches[pIndex + tIndex] = NOMATCH;
  815.  
  816. patternMatches[pIndex] = NOMATCH;
  817.  
  818. fireNewPosition(textMatches, patternMatches, tIndex);
  819.  
  820. for (int j = 0; j <= pIndex; j++) {
  821.  
  822. textMatches[tIndex + j] = UNBEARB;
  823.  
  824. patternMatches[j] = UNBEARB;
  825.  
  826. }
  827.  
  828. tIndex += Math.max(shift[pIndex],
  829. pIndex - last[text.charAt(tIndex + pIndex)]);
  830.  
  831. }
  832.  
  833. for (int i = 0; i < tLength; i++) {
  834.  
  835. textMatches[i] = UNBEARB;
  836.  
  837. }
  838.  
  839. for (int i = 0; i < found.size(); i++) {
  840.  
  841. int start = ((Integer) found.get(i)).intValue();
  842.  
  843. for (int j = 0; j < pLength; j++) {
  844.  
  845. textMatches[start + j] = MATCH;
  846.  
  847. }
  848.  
  849. }
  850.  
  851. for (int i = 0; i < pLength; i++) {
  852.  
  853. patternMatches[i] = UNBEARB;
  854.  
  855. }
  856.  
  857. fireNewPosition(textMatches, patternMatches, tIndex);
  858.  
  859. }
  860.  
  861. }
  862.  
  863. }
Add Comment
Please, Sign In to add comment