Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2017
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.12 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <assert.h>
  4. #include <string.h>
  5. #define MAX_LETTERS 64
  6. #define LINE 1000
  7. #define MAX_OUTPUT 30
  8. //return number of urls
  9. int countUrls() {
  10. FILE *pr;
  11. pr = fopen("pagerankList.txt", "r");
  12. if (pr == NULL) {
  13. fprintf(stderr, "Error: pagerankList.txt not found\n");
  14. return 0;
  15. }
  16. int nUrls = 0;
  17. char ch[LINE];
  18. while(fgets(ch, LINE, pr) != 0) {
  19. nUrls++;
  20. }
  21. fclose(pr);
  22. return nUrls;
  23. }
  24. //finds pages in containing "term" and stores urls into "urlList"
  25. int findPages(char term[MAX_LETTERS], char *urlList[MAX_LETTERS]){
  26. FILE *ii;
  27. char line[LINE];
  28. int found = 0;
  29. int index;
  30. int listIndex = 0;
  31. int iiIndex;
  32. ii = fopen("invertedIndex.txt", "r");
  33. if (ii == NULL) {
  34. fprintf(stderr, "Error: invertedIndex.txt not found\n");
  35. return 0;
  36. }
  37. //reading lines from invertedIndex
  38. while(found == 0 && fgets(line, LINE, ii)) {
  39. char *separate = strtok (line, " \n");
  40. char *words[MAX_LETTERS];
  41. index = 0;
  42. while (separate != NULL){
  43. words[index] = separate;
  44. separate = strtok (NULL, " \n");
  45. index++;
  46. }
  47. if (index > MAX_OUTPUT) {
  48. index = MAX_OUTPUT;
  49. }
  50. if (strcmp(words[0], term) == 0) {
  51. found = 1;
  52. iiIndex = 1;
  53. //copy these "url"s into urlList for this search term
  54. while(iiIndex < index) {
  55. strcpy(urlList[listIndex], words[iiIndex]);
  56. listIndex++;
  57. iiIndex++;
  58. }
  59. }
  60. }
  61. fclose(ii);
  62. if (found == 0) {
  63. fprintf(stderr, "Error: search term not found\n");
  64. return 0;
  65. }
  66. return index - 1;
  67. }
  68. void sortByPR(int nUrls, char *urlsToSort[MAX_LETTERS]) {
  69. FILE *pr;
  70. pr = fopen("pagerankList.txt", "r");
  71. char line[LINE];
  72.  
  73. if (pr == NULL) {
  74. fprintf(stderr, "Error: pagerankList.txt not found\n");
  75. return;
  76. }
  77. //reading each line from pageranklist.txt
  78. int urlListIndex;
  79. int found;
  80. char **match = malloc(nUrls * sizeof(char*));
  81. int i;
  82. for(i=0; i<nUrls; i++ ) {
  83. match[i] = (char*)malloc(MAX_LETTERS+1);
  84. }
  85. int prValIndex = 0;
  86. double prVal[nUrls];
  87. while(fgets(line, LINE, pr)) {
  88. char *separate = strtok (line, " ,");
  89. char *tokened[MAX_LETTERS];
  90. i = 0;
  91. while (i < 3){
  92. tokened[i] = separate;
  93. separate = strtok (NULL, " ,");
  94. i++;
  95. }
  96. //start matching urlList to pagerank order
  97. found = 0;
  98. urlListIndex = 0;
  99. while(urlListIndex < nUrls && found == 0 && urlsToSort[urlListIndex] != NULL) {
  100. if (strcmp(tokened[0], urlsToSort[urlListIndex]) == 0) {
  101. found = 1;
  102. match[prValIndex] = strdup(tokened[0]);
  103. }
  104. urlListIndex++;
  105. }
  106. //end matching
  107. if(found == 1) {
  108. prVal[prValIndex] = atof(tokened[2]);
  109. prValIndex++;
  110. }
  111. }
  112. //standard selection sort to sort PRValue and corresponding url name
  113. double lrgest = prVal[0], tmp;
  114. char *tempURL[MAX_LETTERS];
  115. char *largestURL[MAX_LETTERS];
  116. int index = 0;
  117. int m, l;
  118. for (m = 0; m < nUrls; m++){
  119. lrgest = prVal[m];
  120. index = m;
  121. for (l = m; l < nUrls; l++){
  122. if (prVal[l] > lrgest){
  123. lrgest = prVal[l];
  124. largestURL[0] = match[l];
  125. index = l;
  126. }
  127. }
  128. //perform a swap
  129. tmp = prVal[m];
  130. tempURL[0] = match[m];
  131.  
  132. prVal[m] = lrgest;
  133. match[m] = largestURL[0];
  134.  
  135. prVal[index] = tmp;
  136. match[index] = tempURL[0];
  137. }
  138. //copy into global array
  139. for (m = 0; m < nUrls; m++) {
  140. urlsToSort[m] = match[m];
  141. }
  142. fclose(pr);
  143. }
  144.  
  145. //standard selection sort by decending order of frequency
  146. void sortByFrequency(int total, char *urlsToSort[MAX_LETTERS], int freq[MAX_OUTPUT]) {
  147.  
  148. int lrgest = freq[0], tmp;
  149. char *tempURL[MAX_LETTERS];
  150. char *largestURL[MAX_LETTERS];
  151. int index = 0;
  152. int m, l;
  153. for (m = 0; m < total; m++){
  154. lrgest = freq[m];
  155. index = m;
  156. for (l = m; l < total; l++){
  157. if (freq[l] > lrgest){
  158. lrgest = freq[l];
  159. largestURL[0] = urlsToSort[l];
  160. index = l;
  161. }
  162. }
  163. //perform a swap
  164. tmp = freq[m];
  165. tempURL[0] = urlsToSort[m];
  166.  
  167. freq[m] = lrgest;
  168. urlsToSort[m] = largestURL[0];
  169.  
  170. freq[index] = tmp;
  171. urlsToSort[index] = tempURL[0];
  172. }
  173. }
  174.  
  175. int main(int argc, char *argv[]) {
  176. int i = 0;
  177. int nUrls = countUrls();
  178. char searchTerm[MAX_LETTERS];
  179. char **urlsarray = malloc(nUrls * sizeof(char*));
  180. for(i=0; i<nUrls; i++ ) {
  181. urlsarray[i] = (char*)malloc(MAX_LETTERS+1);
  182. }
  183. int pagesIndex;
  184. if (argc < 2) {
  185. fprintf(stderr, "Error: Enter search terms\n");
  186. return 1;
  187. }
  188.  
  189. if (nUrls == 0) {
  190. return 0;
  191. }
  192. if (argc == 2){
  193. strcpy(searchTerm, argv[1]);
  194. int nTotalUrls = findPages(searchTerm, urlsarray);
  195. pagesIndex = 0;
  196. char **toSort = malloc(nTotalUrls * sizeof(char*));
  197. for(i=0; i<nTotalUrls; i++ ) {
  198. toSort[i] = (char*)malloc(MAX_LETTERS+1);
  199. }
  200. while (pagesIndex < nTotalUrls) {
  201. strcpy(toSort[pagesIndex], urlsarray[pagesIndex]);
  202. pagesIndex++;
  203. }
  204. sortByPR(nTotalUrls, toSort);
  205.  
  206. i = 0;
  207. while (i < nTotalUrls) {
  208. fprintf(stdout, "%s\n", toSort[i]);
  209. i++;
  210. }
  211. return 0;
  212. }
  213. //for more than 1 search term
  214.  
  215. if (argc > 2) {
  216. int arguments = 1;
  217. int found;
  218. int searchIndex;
  219. char **toSortByFreq = malloc(nUrls * sizeof(char*));
  220. for(i=0; i<nUrls; i++ ) {
  221. toSortByFreq[i] = (char*)malloc(MAX_LETTERS+1);
  222. }
  223. int freq[MAX_OUTPUT] = {0};
  224. int globalIndex = 0;
  225. //for every search term
  226. while (arguments < argc) {
  227. strcpy(searchTerm, argv[arguments]);
  228. int nTotalUrls = findPages(searchTerm, urlsarray);
  229. pagesIndex = 0;
  230. char **toSort = malloc(nUrls * sizeof(char*));
  231. for(i=0; i<nUrls; i++ ) {
  232. toSort[i] = (char*)malloc(MAX_LETTERS+1);
  233. }
  234. while (pagesIndex < nTotalUrls) {
  235. strcpy(toSort[pagesIndex], urlsarray[pagesIndex]);
  236. pagesIndex++;
  237. }
  238. sortByPR(nTotalUrls, toSort);
  239. i = 0;
  240. while (i < nTotalUrls) {
  241. //if totalUrlList empty, put first url in
  242. if (strcmp(toSortByFreq[0], "") == 0) {
  243. toSortByFreq[0] = strdup(toSort[i]);
  244. globalIndex = 1;
  245. freq[0] = 1;
  246. }
  247. //else for non empty list
  248. else {
  249. searchIndex = 0;
  250. found = 0;
  251. while(found == 0 && searchIndex < globalIndex) {
  252. if(strcmp(toSort[i], toSortByFreq[searchIndex]) == 0) {
  253. found = 1;
  254. freq[searchIndex]++;
  255. }
  256. searchIndex++;
  257. }
  258. if (found == 0) {
  259. toSortByFreq[searchIndex] = strdup(toSort[i]);
  260. freq[searchIndex] = 1;
  261. globalIndex++;
  262. }
  263. }
  264. i++;
  265. }
  266. arguments++;
  267. }
  268. //sort
  269. sortByFrequency(globalIndex, toSortByFreq, freq);
  270. i = 0;
  271. while (i < globalIndex) {
  272. fprintf(stdout, "%s\n", toSortByFreq[i]);
  273. i++;
  274. }
  275. }
  276. return 0;
  277. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement