Guest User

Untitled

a guest
May 27th, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.27 KB | None | 0 0
  1. package de.fhw.bi.txtsearch;
  2.  
  3. import java.util.Vector;
  4.  
  5. import java.util.Iterator;
  6.  
  7. public class PatternMatcher {
  8.  
  9. public static final int MATCH = 0;
  10.  
  11. public static final int NOMATCH = 1;
  12.  
  13. public static final int UNBEARB = 2;
  14.  
  15. private String text;
  16.  
  17. private String pattern;
  18.  
  19. private Vector listeners;
  20.  
  21. public PatternMatcher() {
  22.  
  23. listeners = new Vector();
  24.  
  25. }
  26.  
  27. public String getText() {
  28.  
  29. return text;
  30.  
  31. }
  32.  
  33. public String getPattern() {
  34.  
  35. return pattern;
  36.  
  37. }
  38.  
  39. public void setText(String text) {
  40.  
  41. this.text = text;
  42.  
  43. }
  44.  
  45. public void setPattern(String pattern) {
  46.  
  47. this.pattern = pattern;
  48.  
  49. }
  50.  
  51. private void setUnbearbeitet(int a, int k, int[] matches) {
  52.  
  53. for (int j = 0; j < k; j++) {
  54.  
  55. matches[a+j] = UNBEARB;
  56.  
  57. }
  58.  
  59. }
  60.  
  61. public void naiveSearch() {
  62.  
  63. // initialize matches arrays:
  64.  
  65. int[] textMatches = new int[text.length()];
  66.  
  67. for (int i = 0; i < textMatches.length; i++) {
  68.  
  69. textMatches[i] = UNBEARB;
  70.  
  71. }
  72.  
  73. int[] patternMatches = new int[pattern.length()];
  74.  
  75. for (int i = 0; i < patternMatches.length; i++) {
  76.  
  77. patternMatches[i] = UNBEARB;
  78.  
  79. }
  80.  
  81. int tIndex = 0, pIndex;
  82.  
  83. int tLength = text.length();
  84.  
  85. int pLength = pattern.length();
  86.  
  87. Vector found = new Vector();
  88.  
  89. while (tIndex <= tLength - pLength) {
  90.  
  91. pIndex = 0;
  92.  
  93. //fireNewPosition();
  94.  
  95. while (pIndex < pLength
  96.  
  97. && pattern.charAt(pIndex) == text.charAt(tIndex + pIndex)) {
  98.  
  99. textMatches[pIndex + tIndex] = MATCH;
  100.  
  101. patternMatches[pIndex] = MATCH;
  102.  
  103. fireNewPosition(textMatches, patternMatches, tIndex);
  104.  
  105. pIndex++;
  106.  
  107. }
  108.  
  109. if (pIndex == pLength) { // full match
  110.  
  111. found.add(new Integer(tIndex));
  112.  
  113. setUnbearbeitet(0,pIndex, patternMatches);
  114.  
  115. //for (int j = 0; j < pIndex; j++) {
  116.  
  117. // patternMatches[j] = UNBEARB;
  118.  
  119. //}
  120.  
  121. } else {
  122.  
  123. textMatches[tIndex + pIndex] = NOMATCH;
  124.  
  125. patternMatches[pIndex] = NOMATCH;
  126.  
  127. fireNewPosition(textMatches, patternMatches, tIndex);
  128.  
  129. // matches must be corrected:
  130.  
  131. setUnbearbeitet(tIndex,pIndex,textMatches);
  132.  
  133. setUnbearbeitet(0,pIndex,patternMatches);
  134.  
  135. /**for (int j = 0; j <= pIndex; j++) {
  136.  
  137. textMatches[tIndex + j] = UNBEARB;
  138.  
  139. patternMatches[j] = UNBEARB;
  140.  
  141. }**/
  142.  
  143. }
  144.  
  145. setUnbearbeitet(0, text.length()-1, textMatches);
  146.  
  147. for (int i = 0; i < found.size(); i++) {
  148.  
  149. int start = ((Integer) found.get(i)).intValue();
  150.  
  151. for (int j = 0; j < pLength; j++) {
  152.  
  153. textMatches[start + j] = MATCH;
  154.  
  155. }
  156.  
  157. }
  158.  
  159. setUnbearbeitet(0,pattern.length()-1,patternMatches);
  160.  
  161. //fireNewPosition();
  162.  
  163. tIndex++;
  164.  
  165. }
  166.  
  167. tIndex--;
  168.  
  169. fireNewPosition(textMatches, patternMatches, tIndex);
  170.  
  171. }
  172.  
  173. public void knuthMorrisPratt(String text, String pattern) {
  174.  
  175. // Hilfsarray um den Text farbig anzuzeigen
  176.  
  177. int[] tMatches = new int[text.length()];
  178.  
  179. setUnbearbeitet(0,tMatches.length,tMatches);
  180.  
  181. // Hilfsarray um das Muster farbig anzuzeigen
  182.  
  183. int[] pMatches = new int[pattern.length()];
  184.  
  185. setUnbearbeitet(0,pMatches.length,pMatches);
  186.  
  187. /**
  188.  
  189. * mit fireNewPosition(tMatches, pMatches, i);
  190.  
  191. * wird die Anzeige erneuert, dabei werden die Buchstaben im Text nach den
  192.  
  193. * Vorgaben von tMatches angezeigt und die Buchstaben im Muster nach den Vorgaben
  194.  
  195. * von pMatches
  196.  
  197. * Das Muster steht im Text an Position i.
  198.  
  199. */
  200.  
  201. int[] l = calcMargins(pattern);
  202.  
  203. int tIndex = 0, pIndex = 0;
  204.  
  205. int pLen = pattern.length(), tLen = text.length();
  206.  
  207. Vector found = new Vector();
  208.  
  209. // to do: restliche Implementierung
  210.  
  211. // dabei auf die Anzeige achten
  212.  
  213. while (tIndex-pIndex < 0){
  214.  
  215. while ((pIndex <= pLen-1) && (text == pattern)){
  216.  
  217. pIndex++;
  218.  
  219. tIndex++;
  220.  
  221. }
  222.  
  223. if (pIndex == pLen-1)
  224.  
  225. found.add(tIndex-pIndex, found);
  226.  
  227. if (pIndex==0)
  228.  
  229. tIndex++;
  230.  
  231. else pIndex = l[pIndex];
  232.  
  233. ;
  234.  
  235. }
  236.  
  237. }
  238.  
  239. private int[] calcMargins(String p) {
  240.  
  241. // to do: berechne die L\ufffdngen der gr\ufffd\ufffdten R\ufffdnder
  242.  
  243. int[] rand = new int[p.length()+1];
  244.  
  245. rand[0]=-1;
  246.  
  247. String t = getText();
  248.  
  249. int tIndex = 0, pIndex = -1;
  250.  
  251. while ( tIndex < p.length()){
  252.  
  253. if ((pIndex >= 0 )&& (p != t))
  254.  
  255. rand[tIndex+1] = pIndex+1;
  256.  
  257. else {while (pIndex >= 0 && p != t)
  258.  
  259. pIndex = rand[pIndex];
  260.  
  261. if (pIndex<0)
  262.  
  263. rand[tIndex+1]=0;
  264.  
  265. else rand[tIndex+1] = pIndex+1;}
  266.  
  267. tIndex++;
  268.  
  269. pIndex++;
  270.  
  271. }
  272.  
  273. return rand;
  274.  
  275. }
  276.  
  277. public static void main(String[] args) {
  278.  
  279. PatternMatcher pm = new PatternMatcher();
  280.  
  281. int[] a = pm.calcMargins("abxaby");
  282.  
  283. for (int i = 0; i < a.length; i++) {
  284.  
  285. System.out.println(i + " " + a[i] + ";");
  286.  
  287. }
  288.  
  289. }
  290.  
  291. protected void fireNewPosition(int[] textM, int[] patternM, int textPos) {
  292.  
  293. for (Iterator it = listeners.iterator(); it.hasNext();) {
  294.  
  295. ((PatternMatcherListener) it.next()).newPosition(
  296.  
  297. textM,
  298.  
  299. patternM,
  300.  
  301. textPos);
  302.  
  303. }
  304.  
  305. }
  306.  
  307. public void addPatternMatcherListener(PatternMatcherListener l) {
  308.  
  309. listeners.add(l);
  310.  
  311. }
  312.  
  313. private int[] calcOccurrence() {
  314.  
  315. char a;
  316.  
  317. int[] d = new int[256];
  318.  
  319. for (int j = 0; j < 256; j++)
  320.  
  321. d[j] = -1;
  322.  
  323. for (int k = 0; k < pattern.length(); k++) {
  324.  
  325. a = pattern.charAt(k);
  326.  
  327. d[a] = k;
  328.  
  329. }
  330.  
  331. return d;
  332.  
  333. }
  334.  
  335. void goodEndPreprocess1(int[] f, int[] s) {
  336.  
  337. int m = pattern.length(), i = m, j = m + 1;
  338.  
  339. f[i] = j;
  340.  
  341. for (int k = 0; k <= m; k++)
  342.  
  343. s[k] = 0;
  344.  
  345. while (i > 0) {
  346.  
  347. while (j <= m && pattern.charAt(i - 1) != pattern.charAt(j - 1)) {
  348.  
  349. if (s[j] == 0)
  350.  
  351. s[j] = j - 1;
  352.  
  353. j = f[j];
  354.  
  355. }
  356.  
  357. i--;
  358.  
  359. j--;
  360.  
  361. f[i] = j;
  362.  
  363. }
  364.  
  365. }
  366.  
  367. void goodEndPreprocess2(int[] f, int[] s) {
  368.  
  369. int m = pattern.length();
  370.  
  371. int i, j;
  372.  
  373. j = f[0];
  374.  
  375. for (i = 0; i <= m; i++) {
  376.  
  377. if (s[i] == 0)
  378.  
  379. s[i] = j;
  380.  
  381. if (i == j)
  382.  
  383. j = f[j];
  384.  
  385. }
  386.  
  387. }
  388.  
  389. private int[] bmPreprocess(int[] f, int[] s) {
  390.  
  391. int[] d = calcOccurrence();
  392.  
  393. goodEndPreprocess1(f, s);
  394.  
  395. goodEndPreprocess2(f, s);
  396.  
  397. return d;
  398.  
  399. }
  400.  
  401. void boyerMoore() {
  402.  
  403. //zur Darstellung
  404.  
  405. int[] tMatches = new int[text.length()];
  406.  
  407. for (int i = 0; i < tMatches.length; i++) {
  408.  
  409. tMatches[i] = UNBEARB;
  410.  
  411. }
  412.  
  413. int[] pMatches = new int[pattern.length()];
  414.  
  415. for (int i = 0; i < pMatches.length; i++) {
  416.  
  417. pMatches[i] = UNBEARB;
  418.  
  419. }
  420.  
  421. int[] last = null;
  422.  
  423. int[] s = new int[pattern.length() + 1];
  424.  
  425. int[] f = new int[pattern.length() + 1];
  426.  
  427. last = bmPreprocess(f, s);
  428.  
  429. int n = text.length(), m = pattern.length(), i = 0, j;
  430.  
  431. Vector found = new Vector();
  432.  
  433. while (i <= n - m) {
  434.  
  435. j = m - 1;
  436.  
  437. while (j >= 0 && pattern.charAt(j) == text.charAt(i + j)) {
  438.  
  439. tMatches[i + j] = MATCH;
  440.  
  441. pMatches[j] = MATCH;
  442.  
  443. fireNewPosition(tMatches, pMatches, i);
  444.  
  445. j--;
  446.  
  447. }
  448.  
  449. if (j < 0) {
  450.  
  451. found.add(new Integer(i));
  452.  
  453. System.out.println("Position: " + i);
  454.  
  455. for (int k = 0; k < m; k++) {
  456.  
  457. pMatches[k] = UNBEARB;
  458.  
  459. }
  460.  
  461. //report(i);
  462.  
  463. i += s[0];
  464.  
  465. } else {
  466.  
  467. /**tMatches[i + j] = NOMATCH;
  468.  
  469. pMatches[j] = NOMATCH;
  470.  
  471. fireNewPosition(tMatches, pMatches, i);**/
  472.  
  473. // matches must be corrected:
  474.  
  475. for (int k = 0; k < m; k++) {
  476.  
  477. tMatches[i + k] = UNBEARB;
  478.  
  479. pMatches[k] = UNBEARB;
  480.  
  481. }
  482.  
  483. i += Math.max(s[j + 1], j - last[text.charAt(i + j)]);
  484.  
  485. }
  486.  
  487. }
  488.  
  489. }
  490.  
  491. int matchLength(String p, int i, int j) {
  492.  
  493. int k = 0, m = p.length();
  494.  
  495. boolean match = true;
  496.  
  497. while (match && i < m && j < m) {
  498.  
  499. if (match = (p.charAt(i) == p.charAt(j))) {
  500.  
  501. k++; i++; j++;
  502.  
  503. }
  504.  
  505. }
  506.  
  507. return k;
  508.  
  509. }
  510.  
  511. int[] calcZ(String s) {
  512.  
  513. int m = s.length(), r = 0, l = 0;
  514.  
  515. int[] z = new int[m];
  516.  
  517. z[0] = 0;
  518.  
  519. for (int k = 1; k < m; k++) {
  520.  
  521. if (k > r) {
  522.  
  523. z[k] = matchLength(s, k, 0);
  524.  
  525. if (z[k] > 0) {
  526.  
  527. r = k + z[k] - 1;
  528.  
  529. l = k;
  530.  
  531. }
  532.  
  533. } else {
  534.  
  535. int ks = k - l, b = r - k + 1;
  536.  
  537. if (z[ks] < b)
  538.  
  539. z[k] = z[ks];
  540.  
  541. else {
  542.  
  543. int q = r + 1 + matchLength(s, r + 1, b);
  544.  
  545. z[k] = q - k;
  546.  
  547. r = q - 1;
  548.  
  549. l = k;
  550.  
  551. }
  552.  
  553. }
  554.  
  555. }
  556.  
  557. return z;
  558.  
  559. }
  560.  
  561. void zAlg() {
  562.  
  563. int[] tMatches = new int[text.length()];
  564.  
  565. for (int i = 0; i < tMatches.length; i++) {
  566.  
  567. tMatches[i] = UNBEARB;
  568.  
  569. }
  570.  
  571. int[] pMatches = new int[pattern.length()];
  572.  
  573. for (int i = 0; i < pMatches.length; i++) {
  574.  
  575. pMatches[i] = UNBEARB;
  576.  
  577. }
  578.  
  579. StringBuffer b = new StringBuffer(pattern);
  580.  
  581. b = b.append('$');
  582.  
  583. b = b.append(text);
  584.  
  585. int[] z = calcZ(b.toString());
  586.  
  587. int m = pattern.length();
  588.  
  589. for (int i = 0; i <= text.length(); i++) {
  590.  
  591. if (z[i + m] == m) {
  592.  
  593. for (int j = i; j < i+m; j++) {
  594.  
  595. tMatches[j-1] = MATCH;
  596.  
  597. pMatches[j-i] = MATCH;
  598.  
  599. fireNewPosition(tMatches, pMatches, i-1);
  600.  
  601. }
  602.  
  603. }
  604.  
  605. for (int k = 0; k < m; k++) pMatches[k] = UNBEARB;
  606.  
  607. }
  608.  
  609. }
  610.  
  611. static private int[] initShift(String pattern) {
  612.  
  613. int m = pattern.length();
  614.  
  615. int[] shift = new int[m + 1];
  616.  
  617. int[] suffix = new int[m + 1];
  618.  
  619. int i, j, h = 0;
  620.  
  621. suffix[m - 1] = m;
  622.  
  623. j = m - 1;
  624.  
  625. for (i = m - 2; i >= 0; --i) {
  626.  
  627. if (i > j && suffix[i + m - 1 - h] < i - j) {
  628.  
  629. suffix[i] = suffix[i + m - 1 - h];
  630.  
  631. } else {
  632.  
  633. if (i < j) {
  634.  
  635. j = i;
  636.  
  637. }
  638.  
  639. h = i;
  640.  
  641. while (j >= 0
  642.  
  643. && pattern.charAt(j) == pattern.charAt(j + m - 1 - h)) {
  644.  
  645. --j;
  646.  
  647. }
  648.  
  649. suffix[i] = h - j;
  650.  
  651. }
  652.  
  653. }
  654.  
  655. for (i = 0; i < m; ++i) {
  656.  
  657. shift[i] = m;
  658.  
  659. }
  660.  
  661. j = 0;
  662.  
  663. for (i = m - 1; i >= -1; --i) {
  664.  
  665. if (i == -1 || suffix[i] == i + 1) {
  666.  
  667. while (j < m - 1 - i) {
  668.  
  669. if (shift[j] == m) {
  670.  
  671. shift[j] = m - 1 - i;
  672.  
  673. }
  674.  
  675. ++j;
  676.  
  677. }
  678.  
  679. }
  680.  
  681. }
  682.  
  683. for (i = 0; i < m - 1; ++i) {
  684.  
  685. shift[m - 1 - suffix[i]] = m - i - 1;
  686.  
  687. }
  688.  
  689. for (i = 0; i < pattern.length(); ++i) {
  690.  
  691. System.out.println("shift[" + i + "]=" + shift[i]);
  692.  
  693. }
  694.  
  695. return shift;
  696.  
  697. }
  698.  
  699. static protected int[] initLast(String pattern) {
  700.  
  701. int[] last = new int[256];
  702.  
  703. for (int i = 0; i < 256; ++i) {
  704.  
  705. last[i] = -1;
  706.  
  707. }
  708.  
  709. for (int i = 0; i < pattern.length(); ++i) {
  710.  
  711. last[pattern.charAt(i)] = i;
  712.  
  713. }
  714.  
  715. return last;
  716.  
  717. }
  718.  
  719. public void bmSearch() {
  720.  
  721. final int last[] = initLast(pattern);
  722.  
  723. final int shift[] = initShift(pattern);
  724.  
  725. int[] textMatches = new int[text.length()];
  726.  
  727. for (int i = 0; i < textMatches.length; i++) {
  728.  
  729. textMatches[i] = UNBEARB;
  730.  
  731. }
  732.  
  733. int[] patternMatches = new int[pattern.length()];
  734.  
  735. for (int i = 0; i < patternMatches.length; i++) {
  736.  
  737. patternMatches[i] = UNBEARB;
  738.  
  739. }
  740.  
  741. int tIndex = 0, pIndex;
  742.  
  743. int tLength = text.length();
  744.  
  745. int pLength = pattern.length();
  746.  
  747. Vector found = new Vector();
  748.  
  749. while (tIndex <= tLength - pLength) {
  750.  
  751. pIndex = pLength - 1;
  752.  
  753. //fireNewPosition();
  754.  
  755. while (pIndex >= 0
  756.  
  757. && pattern.charAt(pIndex) == text.charAt(tIndex + pIndex)) {
  758.  
  759. textMatches[pIndex + tIndex] = MATCH;
  760.  
  761. patternMatches[pIndex] = MATCH;
  762.  
  763. fireNewPosition(textMatches, patternMatches, tIndex);
  764.  
  765. pIndex--;
  766.  
  767. }
  768.  
  769. if (pIndex < 0) { // full match
  770.  
  771. found.add(new Integer(tIndex));
  772.  
  773. tIndex +=shift[0];
  774.  
  775. } else {
  776.  
  777. textMatches[pIndex + tIndex] = NOMATCH;
  778.  
  779. patternMatches[pIndex] = NOMATCH;
  780.  
  781. fireNewPosition(textMatches, patternMatches, tIndex);
  782.  
  783. for (int j = 0; j <= pIndex; j++) {
  784.  
  785. textMatches[tIndex + j] = UNBEARB;
  786.  
  787. patternMatches[j] = UNBEARB;
  788.  
  789. }
  790.  
  791. tIndex += Math.max(shift[pIndex], pIndex - last[text.charAt(tIndex + pIndex)]);
  792.  
  793. }
  794.  
  795. for (int i = 0; i < tLength; i++) {
  796.  
  797. textMatches[i] = UNBEARB;
  798.  
  799. }
  800.  
  801. for (int i = 0; i < found.size(); i++) {
  802.  
  803. int start = ((Integer) found.get(i)).intValue();
  804.  
  805. for (int j = 0; j < pLength; j++) {
  806.  
  807. textMatches[start + j] = MATCH;
  808.  
  809. }
  810.  
  811. }
  812.  
  813. for (int i = 0; i < pLength; i++) {
  814.  
  815. patternMatches[i] = UNBEARB;
  816.  
  817. }
  818.  
  819. fireNewPosition(textMatches, patternMatches, tIndex);
  820.  
  821. }
  822.  
  823. }
  824.  
  825.  
  826.  
  827. }
Add Comment
Please, Sign In to add comment