Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <malloc.h>
- #include <string.h>
- int total=0;
- struct index
- {
- char word[51];
- struct index *ne;
- };
- struct conter
- {
- char word[51];
- int n,pos[2000];
- struct conter *ne;
- };
- unsigned int hash(char *str)
- {
- unsigned int ha=0;
- char *p;
- for(p=str; *p!='\0'; p++)
- ha = 37*ha + *p;
- return ha % 450000;
- }
- struct index *newindex(char* str)
- {
- int i;
- struct index *p=(struct index*)malloc(sizeof(struct index));
- p->ne=NULL;
- for(i=0;str[i]!='\0';i++)
- {
- p->word[i]=str[i];
- }
- p->word[i]='\0';
- return p;
- }
- void append(struct index *head,struct index *newElement)
- {
- struct index *p;
- for(p=head;p->ne!=NULL;p=p->ne);
- p->ne=newElement;
- }
- struct conter *newconter(char *str,int pos)
- {
- struct conter *p=(struct conter *)malloc(sizeof(struct conter));
- p->ne=NULL;
- p->n=1;
- p->pos[0]=pos;
- strcpy(p->word,str);
- return p;
- }
- void count(struct conter *miss[],char *str,int pos)
- {
- unsigned int h=hash(str);
- if(miss[h]==NULL)
- {
- miss[h]=newconter(str,pos);
- total++;
- return;
- }
- struct conter *p;
- for(p=miss[h];p!=NULL;p=p->ne)
- {
- if(!strcmp(str,p->word))
- {
- p->pos[p->n++]=pos;
- return;
- }
- }
- p=newconter(str,pos);
- total++;
- p->ne=miss[h];
- miss[h]=p;
- }
- int emp(const void *a, const void *b)
- {
- struct conter** t1=(struct conter * *)a;
- struct conter** t2=(struct conter * *)b;
- int n=(*t1)->n-(*t2)->n;
- if(n)
- return -n;
- return strcmp((*t1)->word,(*t2)->word);
- }
- void print(struct conter * con, FILE * file)
- {
- int i;
- fprintf(file,"%s %d %d",con->word,con->n,con->pos[0]);
- for (i = 1; i < con->n; i++)
- {
- fprintf(file," %d", con->pos[i]);
- }
- fprintf(file, "\n");
- }
- int main()
- {
- FILE *in,*out;
- in=fopen("dictionary.txt","r");
- char temp[51];
- char t;
- struct index *words[450000];
- struct conter *miss[25000];
- int i,o,pos;
- unsigned int h;
- for(i=0;i<25000;i++)
- {
- words[i]=NULL;
- miss[i]=NULL;
- }
- for(;i<450000;i++)
- {
- words[i]=NULL;
- }
- while(!feof(in))
- {
- i=0;
- do
- {
- t=fgetc(in);
- if(t==EOF)
- break;
- }while(!((t>='A'&&t<='Z')||(t>='a'&&t<='z')));
- do
- {
- if(t>='A'&&t<='Z')
- t+='a'-'A';
- temp[i]=t;
- i++;
- t=fgetc(in);
- }while(((t>='A'&&t<='Z')||(t>='a'&&t<='z'))&&i<=50);
- temp[i]='\0';
- h=hash(temp);
- if(words[h]==NULL)
- words[h]=newindex(temp);
- else
- append(words[h],newindex(temp));
- }
- fclose(in);
- in=fopen("article.txt","r");
- pos=-1;
- while(!feof(in))
- {
- i=0;
- do
- {
- t=fgetc(in);
- pos++;
- if(t==EOF)
- break;
- }while(!((t>='A'&&t<='Z')||(t>='a'&&t<='z')));
- o=pos;
- do
- {
- if(t>='A'&&t<='Z')
- t+='a'-'A';
- temp[i]=t;
- i++;
- t=fgetc(in);
- pos++;
- }while(((t>='A'&&t<='Z')||(t>='a'&&t<='z'))&&i<=30);
- temp[i]='\0';
- h=hash(temp);
- if(words[h]==NULL)
- {
- count(miss,temp,o);
- continue;
- }
- struct index *p;
- for(p=words[h];p!=NULL;p=p->ne)
- {
- if(!strcmp(temp,p->word))
- break;
- }
- if(p==NULL)
- {
- count(miss,temp,o);
- }
- }
- fclose(in);
- out=fopen("misspelling.txt","w");
- i=0;
- o=0;
- struct conter *a;
- struct conter **missa=(struct conter * *)malloc(sizeof(struct conter*)*total);
- for(i=0;i<25000;i++)
- {
- a=miss[i];
- while(a!=NULL)
- {
- missa[o++]=a;
- a=a->ne;
- }
- }
- qsort(missa,total,sizeof(struct conter*),emp);
- for(i=0;i<total;i++)
- {
- print(missa[i],out);
- }
- fclose(out);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement