Advertisement
Guest User

Untitled

a guest
Jun 28th, 2016
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.07 KB | None | 0 0
  1. #include <cstdio>
  2. #include <cstdlib>
  3. #include <cstring>
  4. #include <algorithm>
  5. #include <cmath>
  6. #include <iostream>
  7. #include <cassert>
  8. #include <vector>
  9. using namespace std;
  10. FILE *f1;
  11. int ma=0;
  12. int num[4][4500000]={0};
  13. char preposition[20][10]={" of"," to"," in"," for"," with"," on"," at"," by"," from"," up"," about"," than"," after"," before"," down"," between"," under"," since"," without"," near"};
  14. unsigned int fre[4][4500000][28]={0};
  15. struct WORD
  16. {
  17. char str[4][31][150];
  18. };
  19. vector<struct WORD> word;
  20. unsigned int special(char buffer[],int sign)
  21. {
  22. int len = strlen(buffer);
  23. unsigned int value=0;
  24. for(int i=0;i<len-1;i++)
  25. {
  26. value^=buffer[i];
  27. value=(value<<(7));
  28. value%=4499969;
  29. }
  30. value%=4499969;
  31. return value;
  32. }
  33. void input(int sign)
  34. {
  35. char frequency[100]="\0";
  36. char buffer[256]="\0";
  37. int count=0;
  38. while(fgets(buffer,256,f1)!=NULL)
  39. {
  40. int len=strlen(buffer);
  41. for(int i=len-2;;i--)
  42. {
  43. if(isspace(buffer[i]))
  44. {
  45. strcpy(frequency,buffer+(i+1));
  46. buffer[i]='\0';
  47. unsigned int spe=special(buffer,sign);
  48. strcpy(word[spe].str[sign][num[sign][spe]],buffer);
  49. fre[sign][spe][num[sign][spe]]=atoll(frequency);
  50. num[sign][spe]++;
  51. //ma= ma<num[sign][spe]? num[sign][spe]:ma;
  52. break;
  53. }
  54. }
  55. }
  56. }
  57. int check_p(char tmp[20])
  58. {
  59. for(int i=0;i<20;i++)
  60. {
  61. if(strcmp(tmp,preposition[i]+1)==0) return 1;
  62. }
  63. return 0;
  64. }
  65. char* insertion(char query[],int index,int num)
  66. {
  67. char tmp[150]="\0";
  68. int len= strlen(query);
  69. int count=0;
  70. if(index==0)
  71. {
  72. strcat(query,preposition[num]);
  73. return query;
  74. }
  75. else
  76. {
  77. for(int i=len-1;i>=0;i--)
  78. {
  79. if(isspace(query[i]))
  80. {
  81. count++;
  82. }
  83. if(count==index)
  84. {
  85. strcpy(tmp,query+i);
  86. query[i]='\0';
  87. strcat(query,preposition[num]);
  88. strcat(query,tmp);
  89. return query;
  90. }
  91. else if(i==0)
  92. {
  93. tmp[0]=' ',tmp[1]='\0';
  94. strcat(tmp,query);
  95. query[0]='\0';
  96. strcat(query,preposition[num]+1);
  97. strcat(query,tmp);
  98. return query;
  99. }
  100. }
  101. }
  102. return query;
  103. }
  104. char* deletion(char query[],int index)
  105. {
  106. char tmp[150]="\0";
  107. int len = strlen(query);
  108. int count=0;
  109. if(index==0)
  110. {
  111. for(int i=len-1;i>=0;i--)
  112. {
  113. if(isspace(query[i]))
  114. {
  115. query[i]='\0';
  116. return query;
  117. }
  118. }
  119. }
  120. else
  121. {
  122. for(int i=len-1;i>=0;i--)
  123. {
  124. if(isspace(query[i]))
  125. count++;
  126. if(count==index)
  127. {
  128. for(int j=i-1;j>=0;j--)
  129. if(isspace(query[j]))
  130. {
  131. strcpy(tmp,query+i);
  132. query[j]='\0';
  133. strcat(query,tmp);
  134. return query;
  135. }
  136. else if(j==0)
  137. {
  138. strcpy(tmp,query+i+1);
  139. query[j]='\0';
  140. strcat(query,tmp);
  141. return query;
  142. }
  143. }
  144. }
  145. }
  146. return query;
  147. }
  148. char* substitute(char query[],int index,int num)
  149. {
  150. deletion(query,index);
  151. insertion(query,index,num);
  152. return query;
  153. }
  154. int n=0;
  155. int n_ed1=0;
  156. int n_ed2=0;
  157. int p_ed1[4000]={0};
  158. int p_of_ed2[40000]={0};
  159. int p_of_ed1[4000]={0};
  160. struct Answer
  161. {
  162. char ans[150];
  163. unsigned int ans_fre;
  164. Answer(){
  165. for (int i = 0; i < 150; i++)
  166. ans[i] = '\0';
  167. ans_fre = 0;
  168. }
  169. };
  170. typedef struct Answer ANSWER;
  171. int check(char ask[],int position,ANSWER answer[])
  172. {
  173. if(position-2>3) return 0;
  174. int spe=special(ask,position-2);
  175. for(int i=0;i<num[position-2][spe];i++)
  176. {
  177. if(strcmp(ask,word[spe].str[position-2][i])==0)
  178. {
  179. answer[n].ans[0]='\0';
  180. strcpy(answer[n].ans,ask);
  181. answer[n].ans_fre=fre[position-2][spe][i];
  182. n++;
  183. return 1;
  184. }
  185. }
  186. return 0;
  187. }
  188. int compare(const void *a,const void *b)
  189. {
  190. ANSWER *c=(ANSWER *)a;
  191. ANSWER *d=(ANSWER *)b;
  192. if(c->ans_fre!=d->ans_fre)
  193. return c->ans_fre>d->ans_fre ? -1:1;
  194. return strcmp(c->ans,d->ans);
  195. }
  196. int main(void)
  197. {
  198. word.reserve(4500000);
  199. f1=fopen("/tmp2/dsa2016_project/2gm.small.txt","r");
  200. input(0);
  201. fclose(f1);
  202. f1=fopen("/tmp2/dsa2016_project/3gm.small.txt","r");
  203. input(1);
  204. fclose(f1);
  205. f1=fopen("/tmp2/dsa2016_project/4gm.small.txt","r");
  206. input(2);
  207. fclose(f1);
  208. f1=fopen("/tmp2/dsa2016_project/5gm.small.txt","r");
  209. input(3);
  210. fclose(f1);
  211. char query[150]="\0";
  212. while(fgets(query,150,stdin)!=NULL)
  213. {
  214. ANSWER answer[200];
  215. n=0;
  216. n_ed2=0;
  217. n_ed1=0;
  218. char ed1[4000][120] = {0};
  219. char ed2[40000][130] = {0};
  220. int position=0;
  221. int count=0;
  222. int pre[10]={0};
  223. int len=strlen(query);
  224. query[len-1]='\0';
  225. char recover[150]={0};
  226. strcpy(recover,query);
  227. int space=len-1;
  228. for(int i=len-2;i>-1;i--)
  229. {
  230. if(query[i]==' '||i==0)
  231. {
  232. char temp[200]="\0";
  233. int w=0;
  234. for(int j=i;j<space;j++)
  235. {
  236. if(query[j]==' ') continue;
  237. temp[w]=query[j];
  238. w++;
  239. }
  240. space=i;
  241. if(check_p(temp))
  242. {
  243. pre[position]=1;
  244. count++;
  245. }
  246. position++;
  247. }
  248. }
  249. if(count==0)
  250. {
  251. check(recover,position,answer);
  252. for(int i=0;i<=position;i++)
  253. for(int j=0;j<20;j++)
  254. {
  255. insertion(query,i,j);
  256. strcpy(ed1[n_ed1],query);
  257. p_of_ed1[n_ed1]=position+1;
  258. n_ed1++;
  259. strcpy(query,recover);
  260. }
  261. for(int k=0;k<n_ed1;k++)
  262. {
  263. char recover[200]={0};
  264. strcpy(recover,ed1[k]);
  265. for(int i=0;i<p_of_ed1[k];i++)
  266. for(int j=0;j<20;j++)
  267. {
  268. insertion(ed1[k],i,j);
  269. strcpy(ed2[n_ed2],ed1[k]);
  270. p_of_ed2[n_ed2]=p_of_ed1[k]+1;
  271. n_ed2++;
  272. strcpy(ed1[k],recover);
  273. }
  274. }
  275. }
  276. else
  277. {
  278. int number_of_p=0;
  279. for(int i=0;i<position;i++)
  280. {
  281. if(pre[i])
  282. {
  283. int q=i;
  284. while(1)
  285. {
  286. if(pre[q++]) number_of_p++;
  287. else break;
  288. }
  289. for(int m=0;m<number_of_p;m++)
  290. {
  291. deletion(query,m+i);
  292. p_ed1[n_ed1]=i+number_of_p-1;
  293. strcpy(ed1[n_ed1],query);
  294. p_of_ed1[n_ed1]=position-1;
  295. n_ed1++;
  296. strcpy(query,recover);
  297. for(int j=0;j<20;j++)
  298. {
  299. insertion(query,m+i,j);
  300. strcpy(ed1[n_ed1],query);
  301. p_ed1[n_ed1]=i+number_of_p+1;
  302. p_of_ed1[n_ed1]=position+1;
  303. n_ed1++;
  304. strcpy(query,recover);
  305. insertion(query,m+i+1,j);
  306. strcpy(ed1[n_ed1],query);
  307. p_ed1[n_ed1]=i+number_of_p+1;
  308. p_of_ed1[n_ed1]=position+1;
  309. n_ed1++;
  310. strcpy(query,recover);
  311. substitute(query,m+i,j);
  312. strcpy(ed1[n_ed1],query);
  313. p_ed1[n_ed1]=i+number_of_p;
  314. p_of_ed1[n_ed1]=position;
  315. n_ed1++;
  316. strcpy(query,recover);
  317. }
  318. }
  319. break;
  320. }
  321. }
  322. for(int k=0;k<n_ed1;k++)
  323. {
  324. int position=0;
  325. int pre[4000][11]={0};
  326. int len=strlen(ed1[k]);
  327. int space=len;
  328. for(int i=len-1;i>-1;i--)
  329. {
  330. if(ed1[k][i]==' '||i==0)
  331. {
  332. char temp[150]="\0";
  333. int w=0;
  334. for(int j=i;j<space;j++)
  335. {
  336. if(ed1[k][j]==' ') continue;
  337. temp[w]=ed1[k][j];
  338. w++;
  339. }
  340. space=i;
  341. if(check_p(temp))
  342. {
  343. pre[k][position]=1;
  344. }
  345. position++;
  346. }
  347. }
  348. for(int i=p_ed1[k];i<p_of_ed1[k];i++)
  349. {
  350. char recover[200]={0};
  351. strcpy(recover,ed1[k]);
  352. if(pre[k][i]==1)
  353. {
  354. int j=i;
  355. number_of_p=0;
  356. while(1)
  357. {
  358. if(pre[k][j++]) number_of_p++;
  359. else break;
  360. }
  361. for(int m=0;m<number_of_p;m++)
  362. {
  363. deletion(ed1[k],m+i);
  364. strcpy(ed2[n_ed2],ed1[k]);
  365. p_of_ed2[n_ed2]=position-1;
  366. n_ed2++;
  367. strcpy(ed1[k],recover);
  368. for(int j=0;j<20;j++)
  369. {
  370. insertion(ed1[k],m+i,j);
  371. strcpy(ed2[n_ed2],ed1[k]);
  372. p_of_ed2[n_ed2]=position+1;
  373. n_ed2++;
  374. strcpy(ed1[k],recover);
  375. insertion(ed1[k],m+i+1,j);
  376. strcpy(ed2[n_ed2],ed1[k]);
  377. p_of_ed2[n_ed2]=position+1;
  378. n_ed2++;
  379. strcpy(ed1[k],recover);
  380. substitute(ed1[k],m+i,j);
  381. strcpy(ed2[n_ed2],ed1[k]);
  382. p_of_ed2[n_ed2]=position;
  383. n_ed2++;
  384. strcpy(ed1[k],recover);
  385. }
  386. }
  387. break;
  388. }
  389. }
  390. }
  391. }
  392. for(int i=0;i<n_ed1;i++)
  393. check(ed1[i],p_of_ed1[i],answer);
  394. for(int i=0;i<n_ed2;i++)
  395. check(ed2[i],p_of_ed2[i],answer);
  396. int out=n;
  397. qsort(answer,n,sizeof(ANSWER),compare);
  398. for(int i=0;i<n;i++)
  399. {
  400. if(i==0);
  401. else if(strcmp(answer[i].ans,answer[i-1].ans)==0) out--;
  402. }
  403. if(out>10) out=10;
  404. printf("query: %s\n",query);
  405. printf("output: %d\n",out);
  406. int flag=0;
  407. for(int i=0;i<n;i++)
  408. {
  409. if(flag==9) break;
  410. if(i==0) printf("%s %u\n",answer[i].ans,answer[i].ans_fre);
  411. else if(strcmp(answer[i].ans,answer[i-1].ans)!=0)
  412. {
  413. printf("%s\t%u\n",answer[i].ans,answer[i].ans_fre);
  414. flag++;
  415. }
  416. }
  417. continue;
  418. }
  419. return 0;
  420. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement