Advertisement
Guest User

Untitled

a guest
Sep 15th, 2017
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.60 KB | None | 0 0
  1. /**
  2. This program takes a weighted list of strings and a partially completed term to search for, sorts them, then shows the top 5 results that can complete the input sorted by weight. The functions labelled "DO NOT MODIFY" were provided as part of skeleton code.
  3. */
  4.  
  5. #include <stdio.h>
  6. #include <string.h>
  7. #include <stdlib.h>
  8.  
  9. // define a max query length
  10. #define MAX_QUERY_LENGTH 255
  11.  
  12. /**
  13. DO NOT MODIFY
  14. Function to load data file consisting
  15. of words and their frequencies
  16.  
  17. @param queries array of strings of max length 255
  18. @param weights array of unsigned longs
  19. @param fp pointer to a data file
  20. */
  21. void load(char queries[][MAX_QUERY_LENGTH], unsigned long weights[],
  22. FILE *fp) {
  23.  
  24. char query[MAX_QUERY_LENGTH]; // buffer for queries
  25. unsigned long weight; // used to read in weights
  26. int i = 0; // index
  27.  
  28. // keep scanning until we fail to scan a query
  29. // weight
  30. while (fscanf(fp, "%lu", &weight) == 1) {
  31.  
  32. // scan the space
  33. fgetc(fp);
  34. // scan the query
  35. fscanf(fp, "%[^\n]\n", query);
  36.  
  37. // put in the appropriate query, weight pair
  38. strcpy(queries[i], query);
  39. weights[i] = weight;
  40. i++;
  41. }
  42. }
  43.  
  44.  
  45. /**
  46. DO NOT MODIFY
  47. Function to write queries and weights into a data file
  48.  
  49. @param queries array of strings of max length 255
  50. @param weights array of unsigned longs
  51. @param fp pointer to a data file
  52. @param length number of elements in each array
  53. */
  54. void write(char queries[][MAX_QUERY_LENGTH], unsigned long weights[],
  55. FILE *fp, int length) {
  56.  
  57. for (int i = 0; i < length; i++) {
  58. fprintf(fp, "%s\t%lu\n", queries[i], weights[i]);
  59. }
  60. }
  61.  
  62. /**
  63. Function to sort a list of strings and longs alphabetically
  64.  
  65. @param list array of strings of max length 255
  66. @param weights array of unsigned longs
  67. @param num_queries the number of entries in list
  68. */
  69. void bubbleSort (char list[][MAX_QUERY_LENGTH], unsigned long weights[], int num_queries) {
  70. char current[MAX_QUERY_LENGTH]; // holds the current largest query
  71. unsigned long current_weight; // holds current largest weight
  72. int sorted; // set to 0 if anything has to be swapped in an iteration
  73. int i; // index
  74. do {
  75. strcpy(current, list[0]);
  76. current_weight = weights[0];
  77. sorted = 1; // assume sorted until found otherwise
  78. for (i = 1; i < num_queries; i++) { // swap current value forwards
  79. if (strcmp(current, list[i]) > 0) {
  80. strcpy(list[i-1], list[i]);
  81. weights[i-1] = weights[i];
  82. strcpy(list[i], current);
  83. weights[i] = current_weight;
  84. sorted = 0; // if this happens the list wasn't sorted this iteration
  85. }
  86. else { // take larger value, don't swap
  87. strcpy(current, list[i]);
  88. current_weight = weights[i];
  89. }
  90. }
  91. } while (!sorted);
  92. }
  93. /**
  94. Function to sort a list of strings and longs by value of the long
  95.  
  96. @param list array of strings of max length 255
  97. @param weights array of unsigned longs
  98. @param num_queries the number of entries in list
  99. */
  100. void bubbleSortWeights (char list[][MAX_QUERY_LENGTH], unsigned long weights[], int num_queries) {
  101. char current[MAX_QUERY_LENGTH]; // holds the current largest query
  102. unsigned long current_weight; // holds current largest weight
  103. int sorted; // set to 0 if anything has to be swapped in an iteration
  104. int i; // index
  105. do {
  106. strcpy(current, list[0]);
  107. current_weight = weights[0];
  108. sorted = 1; // assume sorted until found otherwise
  109. for (i = 1; i < num_queries; i++) {
  110. if (current_weight < weights[i]) { // swap current value forwards
  111. strcpy(list[i-1], list[i]);
  112. weights[i-1] = weights[i];
  113. strcpy(list[i], current);
  114. weights[i] = current_weight;
  115. sorted = 0; // if this happens the list wasn't sorted this iteration
  116. }
  117. else { // take the larger value, don't swap
  118. strcpy(current, list[i]);
  119. current_weight = weights[i];
  120. }
  121. }
  122. } while (!sorted);
  123. }
  124. /**
  125. Function to search an array of strings for a certain starting string
  126.  
  127. @param list array of strings to search
  128. @param num_queries ending point of search
  129. @param query the string to search for
  130. @param start the starting point of search
  131. @return the first index whose string begins with given string
  132. */
  133. int binarySearch (char list[][MAX_QUERY_LENGTH], int num_queries, char query[], int start) {
  134. int search_index = (num_queries - start) / 2 + start;
  135. int is_longer; // keeps track of whether the query is longer than the list item
  136. int segment_of = 0; // keeps track of whether the list item is a segment of the user query
  137. int length;
  138. if (strlen(query) - 1 <= strlen(list[search_index])) {
  139. is_longer = 1;
  140. length = strlen(query) - 1; // for some reason strlen(query) is 1 too big
  141. }
  142. else {
  143. is_longer = 0;
  144. length = strlen(list[search_index]); // makes sure to not go OoB
  145. }
  146.  
  147. if (strncmp(list[search_index], query, length) == 0){
  148. if (is_longer == 1) { // make sure the result isn't shorter than the input
  149. return search_index;
  150. }
  151. else {
  152. segment_of = 1;
  153. }
  154. }
  155. if (num_queries - start <= 1) {
  156. return -1; // if it's not there return -1
  157. }
  158. else if (segment_of == 1 || strncmp(list[search_index], query, length) < 0) {
  159. return binarySearch(list, num_queries, query, search_index);
  160. }
  161. else if (strncmp(list[search_index], query, length) > 0) {
  162. return binarySearch(list, search_index, query, start);
  163. }
  164. return -1; // if it somehow reaches the end of the function return -1
  165. }
  166. /**
  167. Function that searches a sorted array of strings linearly around a given
  168. starting point for all strings that begin with a certain starting string
  169.  
  170. @param list array of strings to search
  171. @param weights array of associated unsigned longs
  172. @param results array containing strings that begin with starting string
  173. @param result_weights array associated unsigned longs for results
  174. @param start starting point to search from (obtained from binary search)
  175. @param num_queries number of elements of list
  176. @param query the string to search for
  177. @return the number of elements in results
  178. */
  179. int giveResults (char list[][MAX_QUERY_LENGTH], unsigned long weights[], char results[][MAX_QUERY_LENGTH], unsigned long result_weights[], int start, int num_queries, char query[MAX_QUERY_LENGTH]) {
  180. int i = start; // index
  181. int result_total = 0; // total number of results added
  182. strcpy(results[result_total], list[i]);
  183. result_weights[result_total] = weights[i];
  184. result_total++;
  185. int stop_looking = 0; // sentinel for following loop
  186. while (!stop_looking) {
  187. i--;
  188. if (i < 0) {
  189. stop_looking = 1; // don't check out of bounds
  190. }
  191. // checks if a binary search of the index gives the index
  192. else if (binarySearch(list, i, query, i) == i) {
  193. strcpy(results[result_total], list[i]);
  194. result_weights[result_total] = weights[i];
  195. result_total++;
  196. }
  197. else {
  198. stop_looking = 1;
  199. }
  200. }
  201. i = start; // reset i to the start index
  202. stop_looking = 0;
  203. while (!stop_looking) {
  204. i++;
  205. if (i > num_queries) {
  206. stop_looking = 1; // again, don't check out of bounds
  207. }
  208. else if (binarySearch(list, i, query, i) == i) {
  209. strcpy(results[result_total], list[i]);
  210. result_weights[result_total] = weights[i];
  211. result_total++;
  212. }
  213. else {
  214. stop_looking = 1;
  215. }
  216. }
  217. return result_total; // return number of results
  218. }
  219.  
  220.  
  221. /**
  222. Main function
  223. */
  224. int main() {
  225.  
  226. // YOU MAY ADD CODE INBETWEEN BUT
  227. // DO NOT MODIFY THE FOLLOWING LINES OF CODE
  228.  
  229. // name of the data file
  230. char *data_file = "data/wiktionary.txt";
  231. char *sorted_file = "data/sorted_wiktionary.txt";
  232. // open a file pointer
  233. FILE *fp = fopen(data_file, "r");
  234.  
  235. // gonna make number of results a variable in case I wanna
  236. // add the ability to input that number later
  237. int results_to_show = 5;
  238.  
  239. // scan number of queries in the vocabulary file
  240. int num_queries;
  241. fscanf(fp, "%d", &num_queries);
  242.  
  243. // declare the parallel arrays
  244. char queries[num_queries][MAX_QUERY_LENGTH];
  245. unsigned long weights[num_queries];
  246.  
  247. // read the data into the arrays
  248. load(queries, weights, fp);
  249.  
  250. // here's where I sort the data
  251. bubbleSort(queries, weights, num_queries);
  252.  
  253. // make a string to hold user input
  254. char user_query[MAX_QUERY_LENGTH];
  255.  
  256. // start the main input loop here, read input, end loop if EoF found
  257. while (fgets(user_query, MAX_QUERY_LENGTH, stdin) != NULL) {
  258.  
  259. // we make arrays of strings and unsigned longs for the matches
  260. char results[num_queries][MAX_QUERY_LENGTH];
  261. unsigned long result_weights[num_queries];
  262.  
  263. // then we do a binary search for the point to start looking for all matches
  264. int start_index = binarySearch(queries, num_queries, user_query, 0);
  265. if (start_index != -1) { // skip if nothing's found
  266.  
  267. // time for this hellhole of a function
  268. int result_total = giveResults(queries, weights, results, result_weights, start_index, num_queries, user_query);
  269.  
  270. // then sort the list of results by weight
  271. bubbleSortWeights(results, result_weights, result_total);
  272.  
  273. // display the top 5 results, or all if there are less than 5
  274. if (result_total < results_to_show){
  275. results_to_show = result_total;
  276. }
  277. for (int i = 0; i < results_to_show; i++) {
  278. printf("%s: %lu\n", results[i], result_weights[i]);
  279. }
  280. }
  281. }
  282. // always remember to close file pointers!
  283. fclose(fp);
  284.  
  285. // write the sorted arrays to a file
  286. fp = fopen(sorted_file, "w");
  287. write(queries, weights, fp, num_queries);
  288. fclose(fp);
  289. // END OF DO NOT MODIFY
  290.  
  291. return 0;
  292. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement