Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package de.fhw.bi.txtsearch;
- import java.util.Vector;
- import java.util.Iterator;
- public class PatternMatcher {
- public static final int MATCH = 0;
- public static final int NOMATCH = 1;
- public static final int UNBEARB = 2;
- private String text;
- private String pattern;
- private Vector listeners;
- public PatternMatcher() {
- listeners = new Vector();
- }
- public String getText() {
- return text;
- }
- public String getPattern() {
- return pattern;
- }
- public void setText(String text) {
- this.text = text;
- }
- public void setPattern(String pattern) {
- this.pattern = pattern;
- }
- private void setUnbearbeitet(int a, int k, int[] matches) {
- for (int j = 0; j < k; j++) {
- matches[a+j] = UNBEARB;
- }
- }
- public void naiveSearch() {
- // initialize matches arrays:
- int[] textMatches = new int[text.length()];
- for (int i = 0; i < textMatches.length; i++) {
- textMatches[i] = UNBEARB;
- }
- int[] patternMatches = new int[pattern.length()];
- for (int i = 0; i < patternMatches.length; i++) {
- patternMatches[i] = UNBEARB;
- }
- int tIndex = 0, pIndex;
- int tLength = text.length();
- int pLength = pattern.length();
- Vector found = new Vector();
- while (tIndex <= tLength - pLength) {
- pIndex = 0;
- //fireNewPosition();
- while (pIndex < pLength
- && pattern.charAt(pIndex) == text.charAt(tIndex + pIndex)) {
- textMatches[pIndex + tIndex] = MATCH;
- patternMatches[pIndex] = MATCH;
- fireNewPosition(textMatches, patternMatches, tIndex);
- pIndex++;
- }
- if (pIndex == pLength) { // full match
- found.add(new Integer(tIndex));
- setUnbearbeitet(0,pIndex, patternMatches);
- //for (int j = 0; j < pIndex; j++) {
- // patternMatches[j] = UNBEARB;
- //}
- } else {
- textMatches[tIndex + pIndex] = NOMATCH;
- patternMatches[pIndex] = NOMATCH;
- fireNewPosition(textMatches, patternMatches, tIndex);
- // matches must be corrected:
- setUnbearbeitet(tIndex,pIndex,textMatches);
- setUnbearbeitet(0,pIndex,patternMatches);
- /**for (int j = 0; j <= pIndex; j++) {
- textMatches[tIndex + j] = UNBEARB;
- patternMatches[j] = UNBEARB;
- }**/
- }
- setUnbearbeitet(0, text.length()-1, textMatches);
- for (int i = 0; i < found.size(); i++) {
- int start = ((Integer) found.get(i)).intValue();
- for (int j = 0; j < pLength; j++) {
- textMatches[start + j] = MATCH;
- }
- }
- setUnbearbeitet(0,pattern.length()-1,patternMatches);
- //fireNewPosition();
- tIndex++;
- }
- tIndex--;
- fireNewPosition(textMatches, patternMatches, tIndex);
- }
- public void knuthMorrisPratt(String text, String pattern) {
- // Hilfsarray um den Text farbig anzuzeigen
- int[] tMatches = new int[text.length()];
- setUnbearbeitet(0,tMatches.length,tMatches);
- // Hilfsarray um das Muster farbig anzuzeigen
- int[] pMatches = new int[pattern.length()];
- setUnbearbeitet(0,pMatches.length,pMatches);
- /**
- * mit fireNewPosition(tMatches, pMatches, i);
- * wird die Anzeige erneuert, dabei werden die Buchstaben im Text nach den
- * Vorgaben von tMatches angezeigt und die Buchstaben im Muster nach den Vorgaben
- * von pMatches
- * Das Muster steht im Text an Position i.
- */
- int[] l = calcMargins(pattern);
- int tIndex = 0, pIndex = 0;
- int pLen = pattern.length(), tLen = text.length();
- Vector found = new Vector();
- // to do: restliche Implementierung
- // dabei auf die Anzeige achten
- while (tIndex-pIndex < 0){
- while ((pIndex <= pLen-1) && (text == pattern)){
- pIndex++;
- tIndex++;
- }
- if (pIndex == pLen-1)
- found.add(tIndex-pIndex, found);
- if (pIndex==0)
- tIndex++;
- else pIndex = l[pIndex];
- ;
- }
- }
- private int[] calcMargins(String p) {
- // to do: berechne die L\ufffdngen der gr\ufffd\ufffdten R\ufffdnder
- int[] rand = new int[p.length()+1];
- rand[0]=-1;
- String t = getText();
- int tIndex = 0, pIndex = -1;
- while ( tIndex < p.length()){
- if ((pIndex >= 0 )&& (p != t))
- rand[tIndex+1] = pIndex+1;
- else {while (pIndex >= 0 && p != t)
- pIndex = rand[pIndex];
- if (pIndex<0)
- rand[tIndex+1]=0;
- else rand[tIndex+1] = pIndex+1;}
- tIndex++;
- pIndex++;
- }
- return rand;
- }
- public static void main(String[] args) {
- PatternMatcher pm = new PatternMatcher();
- int[] a = pm.calcMargins("abxaby");
- for (int i = 0; i < a.length; i++) {
- System.out.println(i + " " + a[i] + ";");
- }
- }
- protected void fireNewPosition(int[] textM, int[] patternM, int textPos) {
- for (Iterator it = listeners.iterator(); it.hasNext();) {
- ((PatternMatcherListener) it.next()).newPosition(
- textM,
- patternM,
- textPos);
- }
- }
- public void addPatternMatcherListener(PatternMatcherListener l) {
- listeners.add(l);
- }
- private int[] calcOccurrence() {
- char a;
- int[] d = new int[256];
- for (int j = 0; j < 256; j++)
- d[j] = -1;
- for (int k = 0; k < pattern.length(); k++) {
- a = pattern.charAt(k);
- d[a] = k;
- }
- return d;
- }
- void goodEndPreprocess1(int[] f, int[] s) {
- int m = pattern.length(), i = m, j = m + 1;
- f[i] = j;
- for (int k = 0; k <= m; k++)
- s[k] = 0;
- while (i > 0) {
- while (j <= m && pattern.charAt(i - 1) != pattern.charAt(j - 1)) {
- if (s[j] == 0)
- s[j] = j - 1;
- j = f[j];
- }
- i--;
- j--;
- f[i] = j;
- }
- }
- void goodEndPreprocess2(int[] f, int[] s) {
- int m = pattern.length();
- int i, j;
- j = f[0];
- for (i = 0; i <= m; i++) {
- if (s[i] == 0)
- s[i] = j;
- if (i == j)
- j = f[j];
- }
- }
- private int[] bmPreprocess(int[] f, int[] s) {
- int[] d = calcOccurrence();
- goodEndPreprocess1(f, s);
- goodEndPreprocess2(f, s);
- return d;
- }
- void boyerMoore() {
- //zur Darstellung
- int[] tMatches = new int[text.length()];
- for (int i = 0; i < tMatches.length; i++) {
- tMatches[i] = UNBEARB;
- }
- int[] pMatches = new int[pattern.length()];
- for (int i = 0; i < pMatches.length; i++) {
- pMatches[i] = UNBEARB;
- }
- int[] last = null;
- int[] s = new int[pattern.length() + 1];
- int[] f = new int[pattern.length() + 1];
- last = bmPreprocess(f, s);
- int n = text.length(), m = pattern.length(), i = 0, j;
- Vector found = new Vector();
- while (i <= n - m) {
- j = m - 1;
- while (j >= 0 && pattern.charAt(j) == text.charAt(i + j)) {
- tMatches[i + j] = MATCH;
- pMatches[j] = MATCH;
- fireNewPosition(tMatches, pMatches, i);
- j--;
- }
- if (j < 0) {
- found.add(new Integer(i));
- System.out.println("Position: " + i);
- for (int k = 0; k < m; k++) {
- pMatches[k] = UNBEARB;
- }
- //report(i);
- i += s[0];
- } else {
- /**tMatches[i + j] = NOMATCH;
- pMatches[j] = NOMATCH;
- fireNewPosition(tMatches, pMatches, i);**/
- // matches must be corrected:
- for (int k = 0; k < m; k++) {
- tMatches[i + k] = UNBEARB;
- pMatches[k] = UNBEARB;
- }
- i += Math.max(s[j + 1], j - last[text.charAt(i + j)]);
- }
- }
- }
- int matchLength(String p, int i, int j) {
- int k = 0, m = p.length();
- boolean match = true;
- while (match && i < m && j < m) {
- if (match = (p.charAt(i) == p.charAt(j))) {
- k++; i++; j++;
- }
- }
- return k;
- }
- int[] calcZ(String s) {
- int m = s.length(), r = 0, l = 0;
- int[] z = new int[m];
- z[0] = 0;
- for (int k = 1; k < m; k++) {
- if (k > r) {
- z[k] = matchLength(s, k, 0);
- if (z[k] > 0) {
- r = k + z[k] - 1;
- l = k;
- }
- } else {
- int ks = k - l, b = r - k + 1;
- if (z[ks] < b)
- z[k] = z[ks];
- else {
- int q = r + 1 + matchLength(s, r + 1, b);
- z[k] = q - k;
- r = q - 1;
- l = k;
- }
- }
- }
- return z;
- }
- void zAlg() {
- int[] tMatches = new int[text.length()];
- for (int i = 0; i < tMatches.length; i++) {
- tMatches[i] = UNBEARB;
- }
- int[] pMatches = new int[pattern.length()];
- for (int i = 0; i < pMatches.length; i++) {
- pMatches[i] = UNBEARB;
- }
- StringBuffer b = new StringBuffer(pattern);
- b = b.append('$');
- b = b.append(text);
- int[] z = calcZ(b.toString());
- int m = pattern.length();
- for (int i = 0; i <= text.length(); i++) {
- if (z[i + m] == m) {
- for (int j = i; j < i+m; j++) {
- tMatches[j-1] = MATCH;
- pMatches[j-i] = MATCH;
- fireNewPosition(tMatches, pMatches, i-1);
- }
- }
- for (int k = 0; k < m; k++) pMatches[k] = UNBEARB;
- }
- }
- static private int[] initShift(String pattern) {
- int m = pattern.length();
- int[] shift = new int[m + 1];
- int[] suffix = new int[m + 1];
- int i, j, h = 0;
- suffix[m - 1] = m;
- j = m - 1;
- for (i = m - 2; i >= 0; --i) {
- if (i > j && suffix[i + m - 1 - h] < i - j) {
- suffix[i] = suffix[i + m - 1 - h];
- } else {
- if (i < j) {
- j = i;
- }
- h = i;
- while (j >= 0
- && pattern.charAt(j) == pattern.charAt(j + m - 1 - h)) {
- --j;
- }
- suffix[i] = h - j;
- }
- }
- for (i = 0; i < m; ++i) {
- shift[i] = m;
- }
- j = 0;
- for (i = m - 1; i >= -1; --i) {
- if (i == -1 || suffix[i] == i + 1) {
- while (j < m - 1 - i) {
- if (shift[j] == m) {
- shift[j] = m - 1 - i;
- }
- ++j;
- }
- }
- }
- for (i = 0; i < m - 1; ++i) {
- shift[m - 1 - suffix[i]] = m - i - 1;
- }
- for (i = 0; i < pattern.length(); ++i) {
- System.out.println("shift[" + i + "]=" + shift[i]);
- }
- return shift;
- }
- static protected int[] initLast(String pattern) {
- int[] last = new int[256];
- for (int i = 0; i < 256; ++i) {
- last[i] = -1;
- }
- for (int i = 0; i < pattern.length(); ++i) {
- last[pattern.charAt(i)] = i;
- }
- return last;
- }
- public void bmSearch() {
- final int last[] = initLast(pattern);
- final int shift[] = initShift(pattern);
- int[] textMatches = new int[text.length()];
- for (int i = 0; i < textMatches.length; i++) {
- textMatches[i] = UNBEARB;
- }
- int[] patternMatches = new int[pattern.length()];
- for (int i = 0; i < patternMatches.length; i++) {
- patternMatches[i] = UNBEARB;
- }
- int tIndex = 0, pIndex;
- int tLength = text.length();
- int pLength = pattern.length();
- Vector found = new Vector();
- while (tIndex <= tLength - pLength) {
- pIndex = pLength - 1;
- //fireNewPosition();
- while (pIndex >= 0
- && pattern.charAt(pIndex) == text.charAt(tIndex + pIndex)) {
- textMatches[pIndex + tIndex] = MATCH;
- patternMatches[pIndex] = MATCH;
- fireNewPosition(textMatches, patternMatches, tIndex);
- pIndex--;
- }
- if (pIndex < 0) { // full match
- found.add(new Integer(tIndex));
- tIndex +=shift[0];
- } else {
- textMatches[pIndex + tIndex] = NOMATCH;
- patternMatches[pIndex] = NOMATCH;
- fireNewPosition(textMatches, patternMatches, tIndex);
- for (int j = 0; j <= pIndex; j++) {
- textMatches[tIndex + j] = UNBEARB;
- patternMatches[j] = UNBEARB;
- }
- tIndex += Math.max(shift[pIndex], pIndex - last[text.charAt(tIndex + pIndex)]);
- }
- for (int i = 0; i < tLength; i++) {
- textMatches[i] = UNBEARB;
- }
- for (int i = 0; i < found.size(); i++) {
- int start = ((Integer) found.get(i)).intValue();
- for (int j = 0; j < pLength; j++) {
- textMatches[start + j] = MATCH;
- }
- }
- for (int i = 0; i < pLength; i++) {
- patternMatches[i] = UNBEARB;
- }
- fireNewPosition(textMatches, patternMatches, tIndex);
- }
- }
- }
Add Comment
Please, Sign In to add comment