Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <string.h>
- #include <stdio.h>
- int leven_shtein (const char* word_1, const char* word_2)
- {
- int lenght_1 = strlen(word_1);
- int lenght_2 = strlen(word_2);
- int tab[lenght_1 + 1][lenght_2 + 1]; // table length + 1 (because of first 0)
- int i;
- for (i = 0; i <= lenght_1; i++)
- { //filling first column of the table
- tab[i][0] = i;
- }
- for (i = 0; i <= lenght_2; i++)
- { //filling first row of the table
- tab[0][i] = i;
- }
- for (i = 1; i <= lenght_1; i++)
- { //compare single chars and fill empty spaces in the table
- int j;
- char c_1;
- c_1 = word_1[i-1];
- for (j = 1; j <= lenght_2; j++)
- {
- char c_2;
- c_2 = word_2[j-1];
- if ( c_1 == c_2)
- { //if char = char --> fill the current slot
- //with the value from the diagonal
- tab[i][j] = tab[i-1][j-1];
- }
- else
- { //if char != char
- //define the neighbors
- int left;
- int top;
- int diag; //diagonal
- int min;
- left = tab[i-1][j] + 1; // number on the left slot
- top = tab[i][j-1] + 1; // number above slot
- diag = tab[i-1][j-1] + 1; // number from the diagonal
- //finding the smallest
- min = left;
- if (top < min)
- {
- min= top;
- }
- if (diag < min)
- {
- min = diag;
- } // fill the current slot with this value
- tab[i][j] = min;
- }
- }
- }
- return tab[lenght_1][lenght_2]; //return the last element of the table =levelstein distance
- }
- int main ()
- {
- FILE* inp;
- inp = fopen("ocr_output.txt","r");
- char ocr_output[5000][50];
- int i = 0;
- while(1)
- {
- char b = (char)fgetc(inp);
- int j = 0;
- while(b!=' ' && b!=',' && b!='.' && b!='\n' && !feof(inp))
- { //read till /EOF
- ocr_output[i][j++] = b; //store in arr
- b = (char)fgetc(inp);
- }
- ocr_output[i][j]=0; //make last character 0/null
- if(feof(inp))
- { //checking for EOF
- break;
- }
- i++;
- }
- fclose(inp);
- inp = fopen("wrt.dic","r");
- char wrt_output[25000][50];
- i = 0;
- while(1)
- {
- char b = (char)fgetc(inp);
- int j = 0;
- while(b!=' ' && b!=',' && b!='.' && b!='\n' && !feof(inp))
- {
- wrt_output[i][j++] = b;
- b = (char)fgetc(inp);
- }
- wrt_output[i][j]=0;
- if(feof(inp))
- {
- break;
- }
- i++;
- }
- fclose(inp);
- int distance[24000];
- int j;
- //check if the word is in the dictionary
- int k = 7; //number of the word
- for(i=0; i<24000; i++)
- {
- distance[i] = leven_shtein(ocr_output[k], wrt_output[i]);
- if ( distance[i] == 0 )
- {
- printf("'%s' found in the dictionary\n", ocr_output[k]);
- break;
- }
- else if (distance[i] == 1 )
- {
- printf("'%s'Not found in the dictionary\n", ocr_output[k]);
- printf(" List of the similar words: \n");
- for(j=0; j<24000; j++)
- {
- if( leven_shtein(ocr_output[k], wrt_output[j]) == 1 )
- printf("%s\n", wrt_output[j]);
- }
- break;
- }
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement