Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- This program takes a weighted list of strings and a partially completed term to search for, sorts them, then shows the top 5 results that can complete the input sorted by weight. The functions labelled "DO NOT MODIFY" were provided as part of skeleton code.
- */
- #include <stdio.h>
- #include <string.h>
- #include <stdlib.h>
- // define a max query length
- #define MAX_QUERY_LENGTH 255
- /**
- DO NOT MODIFY
- Function to load data file consisting
- of words and their frequencies
- @param queries array of strings of max length 255
- @param weights array of unsigned longs
- @param fp pointer to a data file
- */
- void load(char queries[][MAX_QUERY_LENGTH], unsigned long weights[],
- FILE *fp) {
- char query[MAX_QUERY_LENGTH]; // buffer for queries
- unsigned long weight; // used to read in weights
- int i = 0; // index
- // keep scanning until we fail to scan a query
- // weight
- while (fscanf(fp, "%lu", &weight) == 1) {
- // scan the space
- fgetc(fp);
- // scan the query
- fscanf(fp, "%[^\n]\n", query);
- // put in the appropriate query, weight pair
- strcpy(queries[i], query);
- weights[i] = weight;
- i++;
- }
- }
- /**
- DO NOT MODIFY
- Function to write queries and weights into a data file
- @param queries array of strings of max length 255
- @param weights array of unsigned longs
- @param fp pointer to a data file
- @param length number of elements in each array
- */
- void write(char queries[][MAX_QUERY_LENGTH], unsigned long weights[],
- FILE *fp, int length) {
- for (int i = 0; i < length; i++) {
- fprintf(fp, "%s\t%lu\n", queries[i], weights[i]);
- }
- }
- /**
- Function to sort a list of strings and longs alphabetically
- @param list array of strings of max length 255
- @param weights array of unsigned longs
- @param num_queries the number of entries in list
- */
- void bubbleSort (char list[][MAX_QUERY_LENGTH], unsigned long weights[], int num_queries) {
- char current[MAX_QUERY_LENGTH]; // holds the current largest query
- unsigned long current_weight; // holds current largest weight
- int sorted; // set to 0 if anything has to be swapped in an iteration
- int i; // index
- do {
- strcpy(current, list[0]);
- current_weight = weights[0];
- sorted = 1; // assume sorted until found otherwise
- for (i = 1; i < num_queries; i++) { // swap current value forwards
- if (strcmp(current, list[i]) > 0) {
- strcpy(list[i-1], list[i]);
- weights[i-1] = weights[i];
- strcpy(list[i], current);
- weights[i] = current_weight;
- sorted = 0; // if this happens the list wasn't sorted this iteration
- }
- else { // take larger value, don't swap
- strcpy(current, list[i]);
- current_weight = weights[i];
- }
- }
- } while (!sorted);
- }
- /**
- Function to sort a list of strings and longs by value of the long
- @param list array of strings of max length 255
- @param weights array of unsigned longs
- @param num_queries the number of entries in list
- */
- void bubbleSortWeights (char list[][MAX_QUERY_LENGTH], unsigned long weights[], int num_queries) {
- char current[MAX_QUERY_LENGTH]; // holds the current largest query
- unsigned long current_weight; // holds current largest weight
- int sorted; // set to 0 if anything has to be swapped in an iteration
- int i; // index
- do {
- strcpy(current, list[0]);
- current_weight = weights[0];
- sorted = 1; // assume sorted until found otherwise
- for (i = 1; i < num_queries; i++) {
- if (current_weight < weights[i]) { // swap current value forwards
- strcpy(list[i-1], list[i]);
- weights[i-1] = weights[i];
- strcpy(list[i], current);
- weights[i] = current_weight;
- sorted = 0; // if this happens the list wasn't sorted this iteration
- }
- else { // take the larger value, don't swap
- strcpy(current, list[i]);
- current_weight = weights[i];
- }
- }
- } while (!sorted);
- }
- /**
- Function to search an array of strings for a certain starting string
- @param list array of strings to search
- @param num_queries ending point of search
- @param query the string to search for
- @param start the starting point of search
- @return the first index whose string begins with given string
- */
- int binarySearch (char list[][MAX_QUERY_LENGTH], int num_queries, char query[], int start) {
- int search_index = (num_queries - start) / 2 + start;
- int is_longer; // keeps track of whether the query is longer than the list item
- int segment_of = 0; // keeps track of whether the list item is a segment of the user query
- int length;
- if (strlen(query) - 1 <= strlen(list[search_index])) {
- is_longer = 1;
- length = strlen(query) - 1; // for some reason strlen(query) is 1 too big
- }
- else {
- is_longer = 0;
- length = strlen(list[search_index]); // makes sure to not go OoB
- }
- if (strncmp(list[search_index], query, length) == 0){
- if (is_longer == 1) { // make sure the result isn't shorter than the input
- return search_index;
- }
- else {
- segment_of = 1;
- }
- }
- if (num_queries - start <= 1) {
- return -1; // if it's not there return -1
- }
- else if (segment_of == 1 || strncmp(list[search_index], query, length) < 0) {
- return binarySearch(list, num_queries, query, search_index);
- }
- else if (strncmp(list[search_index], query, length) > 0) {
- return binarySearch(list, search_index, query, start);
- }
- return -1; // if it somehow reaches the end of the function return -1
- }
- /**
- Function that searches a sorted array of strings linearly around a given
- starting point for all strings that begin with a certain starting string
- @param list array of strings to search
- @param weights array of associated unsigned longs
- @param results array containing strings that begin with starting string
- @param result_weights array associated unsigned longs for results
- @param start starting point to search from (obtained from binary search)
- @param num_queries number of elements of list
- @param query the string to search for
- @return the number of elements in results
- */
- int giveResults (char list[][MAX_QUERY_LENGTH], unsigned long weights[], char results[][MAX_QUERY_LENGTH], unsigned long result_weights[], int start, int num_queries, char query[MAX_QUERY_LENGTH]) {
- int i = start; // index
- int result_total = 0; // total number of results added
- strcpy(results[result_total], list[i]);
- result_weights[result_total] = weights[i];
- result_total++;
- int stop_looking = 0; // sentinel for following loop
- while (!stop_looking) {
- i--;
- if (i < 0) {
- stop_looking = 1; // don't check out of bounds
- }
- // checks if a binary search of the index gives the index
- else if (binarySearch(list, i, query, i) == i) {
- strcpy(results[result_total], list[i]);
- result_weights[result_total] = weights[i];
- result_total++;
- }
- else {
- stop_looking = 1;
- }
- }
- i = start; // reset i to the start index
- stop_looking = 0;
- while (!stop_looking) {
- i++;
- if (i > num_queries) {
- stop_looking = 1; // again, don't check out of bounds
- }
- else if (binarySearch(list, i, query, i) == i) {
- strcpy(results[result_total], list[i]);
- result_weights[result_total] = weights[i];
- result_total++;
- }
- else {
- stop_looking = 1;
- }
- }
- return result_total; // return number of results
- }
- /**
- Main function
- */
- int main() {
- // YOU MAY ADD CODE INBETWEEN BUT
- // DO NOT MODIFY THE FOLLOWING LINES OF CODE
- // name of the data file
- char *data_file = "data/wiktionary.txt";
- char *sorted_file = "data/sorted_wiktionary.txt";
- // open a file pointer
- FILE *fp = fopen(data_file, "r");
- // gonna make number of results a variable in case I wanna
- // add the ability to input that number later
- int results_to_show = 5;
- // scan number of queries in the vocabulary file
- int num_queries;
- fscanf(fp, "%d", &num_queries);
- // declare the parallel arrays
- char queries[num_queries][MAX_QUERY_LENGTH];
- unsigned long weights[num_queries];
- // read the data into the arrays
- load(queries, weights, fp);
- // here's where I sort the data
- bubbleSort(queries, weights, num_queries);
- // make a string to hold user input
- char user_query[MAX_QUERY_LENGTH];
- // start the main input loop here, read input, end loop if EoF found
- while (fgets(user_query, MAX_QUERY_LENGTH, stdin) != NULL) {
- // we make arrays of strings and unsigned longs for the matches
- char results[num_queries][MAX_QUERY_LENGTH];
- unsigned long result_weights[num_queries];
- // then we do a binary search for the point to start looking for all matches
- int start_index = binarySearch(queries, num_queries, user_query, 0);
- if (start_index != -1) { // skip if nothing's found
- // time for this hellhole of a function
- int result_total = giveResults(queries, weights, results, result_weights, start_index, num_queries, user_query);
- // then sort the list of results by weight
- bubbleSortWeights(results, result_weights, result_total);
- // display the top 5 results, or all if there are less than 5
- if (result_total < results_to_show){
- results_to_show = result_total;
- }
- for (int i = 0; i < results_to_show; i++) {
- printf("%s: %lu\n", results[i], result_weights[i]);
- }
- }
- }
- // always remember to close file pointers!
- fclose(fp);
- // write the sorted arrays to a file
- fp = fopen(sorted_file, "w");
- write(queries, weights, fp, num_queries);
- fclose(fp);
- // END OF DO NOT MODIFY
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement