Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <assert.h>
- #include <string.h>
- #define MAX_LETTERS 64
- #define LINE 1000
- #define MAX_OUTPUT 30
- //return number of urls
- int countUrls() {
- FILE *pr;
- pr = fopen("pagerankList.txt", "r");
- if (pr == NULL) {
- fprintf(stderr, "Error: pagerankList.txt not found\n");
- return 0;
- }
- int nUrls = 0;
- char ch[LINE];
- while(fgets(ch, LINE, pr) != 0) {
- nUrls++;
- }
- fclose(pr);
- return nUrls;
- }
- //finds pages in containing "term" and stores urls into "urlList"
- int findPages(char term[MAX_LETTERS], char *urlList[MAX_LETTERS]){
- FILE *ii;
- char line[LINE];
- int found = 0;
- int index;
- int listIndex = 0;
- int iiIndex;
- ii = fopen("invertedIndex.txt", "r");
- if (ii == NULL) {
- fprintf(stderr, "Error: invertedIndex.txt not found\n");
- return 0;
- }
- //reading lines from invertedIndex
- while(found == 0 && fgets(line, LINE, ii)) {
- char *separate = strtok (line, " \n");
- char *words[MAX_LETTERS];
- index = 0;
- while (separate != NULL){
- words[index] = separate;
- separate = strtok (NULL, " \n");
- index++;
- }
- if (index > MAX_OUTPUT) {
- index = MAX_OUTPUT;
- }
- if (strcmp(words[0], term) == 0) {
- found = 1;
- iiIndex = 1;
- //copy these "url"s into urlList for this search term
- while(iiIndex < index) {
- strcpy(urlList[listIndex], words[iiIndex]);
- listIndex++;
- iiIndex++;
- }
- }
- }
- fclose(ii);
- if (found == 0) {
- fprintf(stderr, "Error: search term not found\n");
- return 0;
- }
- return index - 1;
- }
- void sortByPR(int nUrls, char *urlsToSort[MAX_LETTERS]) {
- FILE *pr;
- pr = fopen("pagerankList.txt", "r");
- char line[LINE];
- if (pr == NULL) {
- fprintf(stderr, "Error: pagerankList.txt not found\n");
- return;
- }
- //reading each line from pageranklist.txt
- int urlListIndex;
- int found;
- char **match = malloc(nUrls * sizeof(char*));
- int i;
- for(i=0; i<nUrls; i++ ) {
- match[i] = (char*)malloc(MAX_LETTERS+1);
- }
- int prValIndex = 0;
- double prVal[nUrls];
- while(fgets(line, LINE, pr)) {
- char *separate = strtok (line, " ,");
- char *tokened[MAX_LETTERS];
- i = 0;
- while (i < 3){
- tokened[i] = separate;
- separate = strtok (NULL, " ,");
- i++;
- }
- //start matching urlList to pagerank order
- found = 0;
- urlListIndex = 0;
- while(urlListIndex < nUrls && found == 0 && urlsToSort[urlListIndex] != NULL) {
- if (strcmp(tokened[0], urlsToSort[urlListIndex]) == 0) {
- found = 1;
- match[prValIndex] = strdup(tokened[0]);
- }
- urlListIndex++;
- }
- //end matching
- if(found == 1) {
- prVal[prValIndex] = atof(tokened[2]);
- prValIndex++;
- }
- }
- //standard selection sort to sort PRValue and corresponding url name
- double lrgest = prVal[0], tmp;
- char *tempURL[MAX_LETTERS];
- char *largestURL[MAX_LETTERS];
- int index = 0;
- int m, l;
- for (m = 0; m < nUrls; m++){
- lrgest = prVal[m];
- index = m;
- for (l = m; l < nUrls; l++){
- if (prVal[l] > lrgest){
- lrgest = prVal[l];
- largestURL[0] = match[l];
- index = l;
- }
- }
- //perform a swap
- tmp = prVal[m];
- tempURL[0] = match[m];
- prVal[m] = lrgest;
- match[m] = largestURL[0];
- prVal[index] = tmp;
- match[index] = tempURL[0];
- }
- //copy into global array
- for (m = 0; m < nUrls; m++) {
- urlsToSort[m] = match[m];
- }
- fclose(pr);
- }
- //standard selection sort by decending order of frequency
- void sortByFrequency(int total, char *urlsToSort[MAX_LETTERS], int freq[MAX_OUTPUT]) {
- int lrgest = freq[0], tmp;
- char *tempURL[MAX_LETTERS];
- char *largestURL[MAX_LETTERS];
- int index = 0;
- int m, l;
- for (m = 0; m < total; m++){
- lrgest = freq[m];
- index = m;
- for (l = m; l < total; l++){
- if (freq[l] > lrgest){
- lrgest = freq[l];
- largestURL[0] = urlsToSort[l];
- index = l;
- }
- }
- //perform a swap
- tmp = freq[m];
- tempURL[0] = urlsToSort[m];
- freq[m] = lrgest;
- urlsToSort[m] = largestURL[0];
- freq[index] = tmp;
- urlsToSort[index] = tempURL[0];
- }
- }
- int main(int argc, char *argv[]) {
- int i = 0;
- int nUrls = countUrls();
- char searchTerm[MAX_LETTERS];
- char **urlsarray = malloc(nUrls * sizeof(char*));
- for(i=0; i<nUrls; i++ ) {
- urlsarray[i] = (char*)malloc(MAX_LETTERS+1);
- }
- int pagesIndex;
- if (argc < 2) {
- fprintf(stderr, "Error: Enter search terms\n");
- return 1;
- }
- if (nUrls == 0) {
- return 0;
- }
- if (argc == 2){
- strcpy(searchTerm, argv[1]);
- int nTotalUrls = findPages(searchTerm, urlsarray);
- pagesIndex = 0;
- char **toSort = malloc(nTotalUrls * sizeof(char*));
- for(i=0; i<nTotalUrls; i++ ) {
- toSort[i] = (char*)malloc(MAX_LETTERS+1);
- }
- while (pagesIndex < nTotalUrls) {
- strcpy(toSort[pagesIndex], urlsarray[pagesIndex]);
- pagesIndex++;
- }
- sortByPR(nTotalUrls, toSort);
- i = 0;
- while (i < nTotalUrls) {
- fprintf(stdout, "%s\n", toSort[i]);
- i++;
- }
- return 0;
- }
- //for more than 1 search term
- if (argc > 2) {
- int arguments = 1;
- int found;
- int searchIndex;
- char **toSortByFreq = malloc(nUrls * sizeof(char*));
- for(i=0; i<nUrls; i++ ) {
- toSortByFreq[i] = (char*)malloc(MAX_LETTERS+1);
- }
- int freq[MAX_OUTPUT] = {0};
- int globalIndex = 0;
- //for every search term
- while (arguments < argc) {
- strcpy(searchTerm, argv[arguments]);
- int nTotalUrls = findPages(searchTerm, urlsarray);
- pagesIndex = 0;
- char **toSort = malloc(nUrls * sizeof(char*));
- for(i=0; i<nUrls; i++ ) {
- toSort[i] = (char*)malloc(MAX_LETTERS+1);
- }
- while (pagesIndex < nTotalUrls) {
- strcpy(toSort[pagesIndex], urlsarray[pagesIndex]);
- pagesIndex++;
- }
- sortByPR(nTotalUrls, toSort);
- i = 0;
- while (i < nTotalUrls) {
- //if totalUrlList empty, put first url in
- if (strcmp(toSortByFreq[0], "") == 0) {
- toSortByFreq[0] = strdup(toSort[i]);
- globalIndex = 1;
- freq[0] = 1;
- }
- //else for non empty list
- else {
- searchIndex = 0;
- found = 0;
- while(found == 0 && searchIndex < globalIndex) {
- if(strcmp(toSort[i], toSortByFreq[searchIndex]) == 0) {
- found = 1;
- freq[searchIndex]++;
- }
- searchIndex++;
- }
- if (found == 0) {
- toSortByFreq[searchIndex] = strdup(toSort[i]);
- freq[searchIndex] = 1;
- globalIndex++;
- }
- }
- i++;
- }
- arguments++;
- }
- //sort
- sortByFrequency(globalIndex, toSortByFreq, freq);
- i = 0;
- while (i < globalIndex) {
- fprintf(stdout, "%s\n", toSortByFreq[i]);
- i++;
- }
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement