Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #include <algorithm>
- #include <cmath>
- #include <iostream>
- #include <cassert>
- #include <vector>
- using namespace std;
- FILE *f1;
- int ma=0;
- int num[4][4500000]={0};
- char preposition[20][10]={" of"," to"," in"," for"," with"," on"," at"," by"," from"," up"," about"," than"," after"," before"," down"," between"," under"," since"," without"," near"};
- unsigned int fre[4][4500000][28]={0};
- struct WORD
- {
- char str[4][31][150];
- };
- vector<struct WORD> word;
- unsigned int special(char buffer[],int sign)
- {
- int len = strlen(buffer);
- unsigned int value=0;
- for(int i=0;i<len-1;i++)
- {
- value^=buffer[i];
- value=(value<<(7));
- value%=4499969;
- }
- value%=4499969;
- return value;
- }
- void input(int sign)
- {
- char frequency[100]="\0";
- char buffer[256]="\0";
- int count=0;
- while(fgets(buffer,256,f1)!=NULL)
- {
- int len=strlen(buffer);
- for(int i=len-2;;i--)
- {
- if(isspace(buffer[i]))
- {
- strcpy(frequency,buffer+(i+1));
- buffer[i]='\0';
- unsigned int spe=special(buffer,sign);
- strcpy(word[spe].str[sign][num[sign][spe]],buffer);
- fre[sign][spe][num[sign][spe]]=atoll(frequency);
- num[sign][spe]++;
- //ma= ma<num[sign][spe]? num[sign][spe]:ma;
- break;
- }
- }
- }
- }
- int check_p(char tmp[20])
- {
- for(int i=0;i<20;i++)
- {
- if(strcmp(tmp,preposition[i]+1)==0) return 1;
- }
- return 0;
- }
- char* insertion(char query[],int index,int num)
- {
- char tmp[150]="\0";
- int len= strlen(query);
- int count=0;
- if(index==0)
- {
- strcat(query,preposition[num]);
- return query;
- }
- else
- {
- for(int i=len-1;i>=0;i--)
- {
- if(isspace(query[i]))
- {
- count++;
- }
- if(count==index)
- {
- strcpy(tmp,query+i);
- query[i]='\0';
- strcat(query,preposition[num]);
- strcat(query,tmp);
- return query;
- }
- else if(i==0)
- {
- tmp[0]=' ',tmp[1]='\0';
- strcat(tmp,query);
- query[0]='\0';
- strcat(query,preposition[num]+1);
- strcat(query,tmp);
- return query;
- }
- }
- }
- return query;
- }
- char* deletion(char query[],int index)
- {
- char tmp[150]="\0";
- int len = strlen(query);
- int count=0;
- if(index==0)
- {
- for(int i=len-1;i>=0;i--)
- {
- if(isspace(query[i]))
- {
- query[i]='\0';
- return query;
- }
- }
- }
- else
- {
- for(int i=len-1;i>=0;i--)
- {
- if(isspace(query[i]))
- count++;
- if(count==index)
- {
- for(int j=i-1;j>=0;j--)
- if(isspace(query[j]))
- {
- strcpy(tmp,query+i);
- query[j]='\0';
- strcat(query,tmp);
- return query;
- }
- else if(j==0)
- {
- strcpy(tmp,query+i+1);
- query[j]='\0';
- strcat(query,tmp);
- return query;
- }
- }
- }
- }
- return query;
- }
- char* substitute(char query[],int index,int num)
- {
- deletion(query,index);
- insertion(query,index,num);
- return query;
- }
- int n=0;
- int n_ed1=0;
- int n_ed2=0;
- int p_ed1[4000]={0};
- int p_of_ed2[40000]={0};
- int p_of_ed1[4000]={0};
- struct Answer
- {
- char ans[150];
- unsigned int ans_fre;
- Answer(){
- for (int i = 0; i < 150; i++)
- ans[i] = '\0';
- ans_fre = 0;
- }
- };
- typedef struct Answer ANSWER;
- int check(char ask[],int position,ANSWER answer[])
- {
- if(position-2>3) return 0;
- int spe=special(ask,position-2);
- for(int i=0;i<num[position-2][spe];i++)
- {
- if(strcmp(ask,word[spe].str[position-2][i])==0)
- {
- answer[n].ans[0]='\0';
- strcpy(answer[n].ans,ask);
- answer[n].ans_fre=fre[position-2][spe][i];
- n++;
- return 1;
- }
- }
- return 0;
- }
- int compare(const void *a,const void *b)
- {
- ANSWER *c=(ANSWER *)a;
- ANSWER *d=(ANSWER *)b;
- if(c->ans_fre!=d->ans_fre)
- return c->ans_fre>d->ans_fre ? -1:1;
- return strcmp(c->ans,d->ans);
- }
- int main(void)
- {
- word.reserve(4500000);
- f1=fopen("/tmp2/dsa2016_project/2gm.small.txt","r");
- input(0);
- fclose(f1);
- f1=fopen("/tmp2/dsa2016_project/3gm.small.txt","r");
- input(1);
- fclose(f1);
- f1=fopen("/tmp2/dsa2016_project/4gm.small.txt","r");
- input(2);
- fclose(f1);
- f1=fopen("/tmp2/dsa2016_project/5gm.small.txt","r");
- input(3);
- fclose(f1);
- char query[150]="\0";
- while(fgets(query,150,stdin)!=NULL)
- {
- ANSWER answer[200];
- n=0;
- n_ed2=0;
- n_ed1=0;
- char ed1[4000][120] = {0};
- char ed2[40000][130] = {0};
- int position=0;
- int count=0;
- int pre[10]={0};
- int len=strlen(query);
- query[len-1]='\0';
- char recover[150]={0};
- strcpy(recover,query);
- int space=len-1;
- for(int i=len-2;i>-1;i--)
- {
- if(query[i]==' '||i==0)
- {
- char temp[200]="\0";
- int w=0;
- for(int j=i;j<space;j++)
- {
- if(query[j]==' ') continue;
- temp[w]=query[j];
- w++;
- }
- space=i;
- if(check_p(temp))
- {
- pre[position]=1;
- count++;
- }
- position++;
- }
- }
- if(count==0)
- {
- check(recover,position,answer);
- for(int i=0;i<=position;i++)
- for(int j=0;j<20;j++)
- {
- insertion(query,i,j);
- strcpy(ed1[n_ed1],query);
- p_of_ed1[n_ed1]=position+1;
- n_ed1++;
- strcpy(query,recover);
- }
- for(int k=0;k<n_ed1;k++)
- {
- char recover[200]={0};
- strcpy(recover,ed1[k]);
- for(int i=0;i<p_of_ed1[k];i++)
- for(int j=0;j<20;j++)
- {
- insertion(ed1[k],i,j);
- strcpy(ed2[n_ed2],ed1[k]);
- p_of_ed2[n_ed2]=p_of_ed1[k]+1;
- n_ed2++;
- strcpy(ed1[k],recover);
- }
- }
- }
- else
- {
- int number_of_p=0;
- for(int i=0;i<position;i++)
- {
- if(pre[i])
- {
- int q=i;
- while(1)
- {
- if(pre[q++]) number_of_p++;
- else break;
- }
- for(int m=0;m<number_of_p;m++)
- {
- deletion(query,m+i);
- p_ed1[n_ed1]=i+number_of_p-1;
- strcpy(ed1[n_ed1],query);
- p_of_ed1[n_ed1]=position-1;
- n_ed1++;
- strcpy(query,recover);
- for(int j=0;j<20;j++)
- {
- insertion(query,m+i,j);
- strcpy(ed1[n_ed1],query);
- p_ed1[n_ed1]=i+number_of_p+1;
- p_of_ed1[n_ed1]=position+1;
- n_ed1++;
- strcpy(query,recover);
- insertion(query,m+i+1,j);
- strcpy(ed1[n_ed1],query);
- p_ed1[n_ed1]=i+number_of_p+1;
- p_of_ed1[n_ed1]=position+1;
- n_ed1++;
- strcpy(query,recover);
- substitute(query,m+i,j);
- strcpy(ed1[n_ed1],query);
- p_ed1[n_ed1]=i+number_of_p;
- p_of_ed1[n_ed1]=position;
- n_ed1++;
- strcpy(query,recover);
- }
- }
- break;
- }
- }
- for(int k=0;k<n_ed1;k++)
- {
- int position=0;
- int pre[4000][11]={0};
- int len=strlen(ed1[k]);
- int space=len;
- for(int i=len-1;i>-1;i--)
- {
- if(ed1[k][i]==' '||i==0)
- {
- char temp[150]="\0";
- int w=0;
- for(int j=i;j<space;j++)
- {
- if(ed1[k][j]==' ') continue;
- temp[w]=ed1[k][j];
- w++;
- }
- space=i;
- if(check_p(temp))
- {
- pre[k][position]=1;
- }
- position++;
- }
- }
- for(int i=p_ed1[k];i<p_of_ed1[k];i++)
- {
- char recover[200]={0};
- strcpy(recover,ed1[k]);
- if(pre[k][i]==1)
- {
- int j=i;
- number_of_p=0;
- while(1)
- {
- if(pre[k][j++]) number_of_p++;
- else break;
- }
- for(int m=0;m<number_of_p;m++)
- {
- deletion(ed1[k],m+i);
- strcpy(ed2[n_ed2],ed1[k]);
- p_of_ed2[n_ed2]=position-1;
- n_ed2++;
- strcpy(ed1[k],recover);
- for(int j=0;j<20;j++)
- {
- insertion(ed1[k],m+i,j);
- strcpy(ed2[n_ed2],ed1[k]);
- p_of_ed2[n_ed2]=position+1;
- n_ed2++;
- strcpy(ed1[k],recover);
- insertion(ed1[k],m+i+1,j);
- strcpy(ed2[n_ed2],ed1[k]);
- p_of_ed2[n_ed2]=position+1;
- n_ed2++;
- strcpy(ed1[k],recover);
- substitute(ed1[k],m+i,j);
- strcpy(ed2[n_ed2],ed1[k]);
- p_of_ed2[n_ed2]=position;
- n_ed2++;
- strcpy(ed1[k],recover);
- }
- }
- break;
- }
- }
- }
- }
- for(int i=0;i<n_ed1;i++)
- check(ed1[i],p_of_ed1[i],answer);
- for(int i=0;i<n_ed2;i++)
- check(ed2[i],p_of_ed2[i],answer);
- int out=n;
- qsort(answer,n,sizeof(ANSWER),compare);
- for(int i=0;i<n;i++)
- {
- if(i==0);
- else if(strcmp(answer[i].ans,answer[i-1].ans)==0) out--;
- }
- if(out>10) out=10;
- printf("query: %s\n",query);
- printf("output: %d\n",out);
- int flag=0;
- for(int i=0;i<n;i++)
- {
- if(flag==9) break;
- if(i==0) printf("%s %u\n",answer[i].ans,answer[i].ans_fre);
- else if(strcmp(answer[i].ans,answer[i-1].ans)!=0)
- {
- printf("%s\t%u\n",answer[i].ans,answer[i].ans_fre);
- flag++;
- }
- }
- continue;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement