Guest User

Untitled

a guest
Feb 21st, 2018
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.19 KB | None | 0 0
  1. /*
  2. CITS1210 Project 2
  3. Name: <KESSEY, RHYS> <GOUVIGNON, DANIEL>
  4. Student number: <20925234> <20893173>
  5. Date: <date of submission>
  6. */
  7.  
  8. /* Comment out the next line when you have filled in your details above */
  9. /*
  10. #error Did you fill in your name and student number?
  11. */
  12. #include "index.h"
  13. /****************************************************************/
  14.  
  15. /*
  16. This template file provides function "stubs" for the five functions
  17. that you must complete. While you are free to add your own additional
  18. functions, you must meet the specifications of these five required
  19. functions precisely, including the name and type of each function
  20. specified in the template file. This means that under no circumstances
  21. can you change how one of these function behaves.
  22.  
  23. Note however that you do not need to write all your code in these five
  24. functions, nor just in one source file. Indeed, you will be assessed
  25. on how you structure your program, including the decomposition of
  26. tasks into different functions and the modularisation of your solution
  27. across multiple source files.
  28. */
  29.  
  30. /****************************************************************/
  31.  
  32. /*
  33. * trimLine(char line[]) removes any trailing
  34. * newline('\n') or carriage-return ('\r') characters
  35. */
  36.  
  37. void trimLine(char line[])
  38. {
  39. int index = 0;
  40. while (line[index] != '\0') {
  41. if (line[index] == '\n' || line[index] == '\r') {
  42. line[index] = '\0';
  43. }
  44. index++;
  45. }
  46. }
  47.  
  48. /*
  49. * read_terms_file(char *filename, char *termList[]) takes in a filename
  50. * and an array to populate with the search terms found in the file
  51. * corresponding to the given filename, and fills the array with the
  52. * correct names of the search terms
  53. */
  54. int read_terms_file(char *filename, char *termList[])
  55. {
  56. char line[BUFSIZ];
  57. FILE *fp;
  58. fp = fopen(filename, "r");
  59. //check to see if file opened correctly
  60. if (fp == NULL) {
  61. printf("ERROR: Cannot open file.\n");
  62. exit(EXIT_FAILURE);
  63. }
  64. int count = 0;
  65. //copy the term file, line by line, into the termlist
  66. while (fgets(line, sizeof(line), fp) != NULL) {
  67. trimLine(line);
  68. termList[count] = malloc( sizeof(char) * strlen(line) );
  69. if (termList[count] == NULL) {
  70. printf("ERROR: Cannot allocate memory.\n");
  71. exit(EXIT_FAILURE);
  72. }
  73. termList[count] = strdup(line);
  74. count++;
  75. }
  76. termList[count] = NULL;
  77. //close the file
  78. fclose(fp);
  79. //return number of terms, excluding the first term (Terms: )
  80. return (count - 1);
  81. }
  82.  
  83. /*
  84. * read_sub_directory(char *dirname, char *subdirname, char *textFileArray[]) takes
  85. * a directory, and the name of a subdirectory to investigate
  86. * it continues to fill textfilearray with the extra text files found
  87. * making sure to include the fullpathname
  88. */
  89.  
  90. void read_sub_directory(char *dirname, char *subdirname, char *textFileArray[], char *subDirArray[], int *n, int *sn)
  91. {
  92. DIR *dirp;
  93. struct dirent *dp;
  94. char fullname[MAXPATHLEN];
  95. char fulldirname[MAXPATHLEN];
  96. int count = *n;
  97. int SDcount = *sn;
  98. sprintf(fulldirname, "%s", subdirname);
  99. dirp = opendir(fulldirname);
  100. //check to see if directory opened correctly
  101. if (dirp == NULL) {
  102. printf("Null directory.\n");
  103. exit(EXIT_FAILURE);
  104. }
  105. while ( (dp = readdir(dirp)) != NULL) {
  106. struct stat stat_buffer;
  107. sprintf(fullname, "%s/%s", fulldirname, dp->d_name);
  108. if (stat(fullname, &stat_buffer) != 0) {
  109. printf("ERROR stat did not return 0.\n");
  110. exit(EXIT_FAILURE);
  111. }
  112. //checking for regular files that are not hidden
  113. if (S_ISREG(stat_buffer.st_mode) != 0) {
  114. if (dp->d_name[0] != '.') {
  115. //ensure there is enough space for the new word to slide in
  116. textFileArray[count] = malloc( sizeof(char) * strlen(fullname) );
  117. if (textFileArray[count] == NULL) {
  118. printf("ERROR Allocating memory.\n");
  119. exit(EXIT_FAILURE);
  120. }
  121. textFileArray[count] = strdup(fullname);
  122. count++;
  123. }
  124. }
  125. //checking for other sub directories
  126. //within this subdirectory that are not hidden (as above)
  127. if (S_ISDIR(stat_buffer.st_mode) != 0) {
  128. if (dp->d_name[0] != '.') {
  129. //ensure there is enough space for the new word to slide in
  130. subDirArray[SDcount] = malloc( sizeof(char) * strlen(fullname) );
  131. if (textFileArray[SDcount] == NULL) {
  132. printf("ERROR Allocating memory.\n");
  133. exit(EXIT_FAILURE);
  134. }
  135. //update the subDirArray with this new entry
  136. subDirArray[SDcount] = strdup(fullname);
  137. SDcount++;
  138. }
  139. }
  140. }
  141. *n = count;
  142. *sn = SDcount;
  143. }
  144.  
  145. /*
  146. * read_public_directory(char *dirname, char *tfArray[])
  147. * takes in the name of a directory, and an array to populate
  148. * with the text files in that directory and fills the array with
  149. * the names of the text files
  150. * it also fills an array with all the sub directories located
  151. * using read_sub_directory to find all sub directories inside
  152. *
  153. * by the end of this function, we have an array of all text files located within,
  154. * containing their full names so they can be opened directly with searchTextFile
  155. * searchTextFile is then called, to use each of the names to fill in the correct fields of INDEX
  156. *
  157. */
  158.  
  159. void read_public_directory(char *dirname, char *textFileArray[], char *subDirectoryArray[], char *searchTerms[])
  160. {
  161. DIR *dirp;
  162. struct dirent *dp;
  163. char fullname[MAXPATHLEN];
  164. int count = 0;
  165. int SDcount = 0;
  166. dirp = opendir(dirname);
  167. //check to see if directory opened correctly
  168. if (dirp == NULL) {
  169. printf("Null directory.\n");
  170. exit(EXIT_FAILURE);
  171. }
  172. while ( (dp = readdir(dirp)) != NULL) {
  173. struct stat stat_buffer;
  174. sprintf(fullname, "%s/%s", dirname, dp->d_name);
  175. if (stat(fullname, &stat_buffer) != 0) {
  176. printf("ERROR stat returned 0.\n");
  177. exit(EXIT_FAILURE);
  178. }
  179. //checking for regular files that are not hidden
  180. if (S_ISREG(stat_buffer.st_mode) != 0 ) {
  181. if (dp->d_name[0] != '.') {
  182. textFileArray[count] = malloc( sizeof(char) * strlen(fullname) );
  183. if (textFileArray[count] == NULL) {
  184. printf("ERROR Allocating memory.\n");
  185. exit(EXIT_FAILURE);
  186. }
  187. textFileArray[count] = strdup(fullname);
  188. count++;
  189. }
  190. }
  191. //check for directories that are not hidden
  192. if (S_ISDIR(stat_buffer.st_mode) != 0 ) {
  193. if (dp->d_name[0] != '.') {
  194. subDirectoryArray[SDcount] = malloc( sizeof(char) * strlen(fullname) );
  195. if (subDirectoryArray[SDcount] == NULL) {
  196. printf("ERROR allocating memory to subdirs.\n");
  197. exit(EXIT_FAILURE);
  198. }
  199. subDirectoryArray[SDcount] = strdup(fullname);
  200. SDcount++;
  201. }
  202. }
  203. }
  204. //integer pointers used to update the count and SD count variables
  205. int *n = &count;
  206. int *sn = &SDcount;
  207. int iCount = 0;
  208. //finds all texfiles within each sub directories
  209. //in the certain directory
  210. // if (-r) is found, do this else, do not do this
  211. while (subDirectoryArray[iCount] != NULL) {
  212. read_sub_directory(dirname, subDirectoryArray[iCount], textFileArray,subDirectoryArray, n, sn);
  213. iCount++;
  214. }
  215. textFileArray[*n] = NULL;
  216. closedir(dirp);
  217. }
  218.  
  219. /*
  220. * set_terms(char *termList[], INDEX *index, int numTerms) sets the term field
  221. * of each of the TERMINFO elements to the corresponding termList entry
  222. */
  223.  
  224. void set_terms(char *termList[], INDEX *index)
  225. {
  226. //giving the space for the TERMINFO
  227. index->terms = malloc(index[0].nTerms * sizeof(TERMINFO));
  228. if (index->terms == NULL) {
  229. printf("ERROR allocating TERMINFO memory.\n");
  230. exit(EXIT_FAILURE);
  231. }
  232. for (int i = 0; i < index[0].nTerms; i++) {
  233. //setting the names of each term in the terms field of index
  234. //by copying it over directly from termList
  235. //[i+1] so as not to include the "terms: " entry
  236. index[0].terms[i].term = termList[i+1];
  237. //setting the totalMatches of each term to ZERO
  238. //this is the total number of occurances of each term
  239. index[0].terms[i].totalMatches = 0;
  240. //setting the nFiles of each term to ZERO
  241. //this is the number of files each term was found in
  242. index[0].terms[i].nFiles = 0;
  243. }
  244. }
  245.  
  246. /*
  247. * search_textFile(char *filename, char *searchTerms[], int nTerms, INDEX *thisIndex)
  248. * will look through filename, matching the searchTerms, using nTerms to help within a loop
  249. * and storing the details of these matches in thisIndex
  250. * incriments the total amount of times each term has been found
  251. * incriments the amount of times the term was found in each file
  252. * incriments the amount of files the term has been found in
  253. * incriments the amount of lines the term has been found within
  254. */
  255.  
  256. void search_textFile(char *filename, char *searchTerms[], INDEX *thisIndex, int fileNum)
  257. {
  258. printf("fileNum = %d.\n", fileNum);
  259. //to hold the line that fgets creates
  260. char temp[BUFSIZ];
  261. //for checking if a term has been found in file or not
  262. bool isTermInFile[thisIndex[0].nTerms];
  263. //opening file, check if it opened correctly
  264. FILE *fp = fopen(filename, "r");
  265. if (fp == NULL) {
  266. printf("ERROR: Cannot open file: %s\n", filename);
  267. exit(EXIT_FAILURE);
  268. }
  269. //set all values in isTermInFile to false
  270. for (int clear = 0; clear < thisIndex[0].nTerms; clear++) {
  271. isTermInFile[clear] = false;
  272. }
  273. //looking through the file, checking it against the search terms, and incrementing the correct counter
  274. while (fgets(temp, sizeof(temp), fp) != NULL) {
  275. //a switch for checking if a term is found at least once on a line
  276. bool isTermOnLine[thisIndex[0].nTerms];
  277. //set all values in isTermOnLine to false
  278. for (int clear = 0; clear < thisIndex[0].nTerms; clear++) {
  279. isTermOnLine[clear] = false;
  280. }
  281. int i = 0;
  282. while (i < thisIndex[0].nTerms) {
  283. //Matches the search terms with the words in the file incrimenting the relevant counter in the Index fields
  284. char *p = temp;
  285. while ( (p = strstr(p, searchTerms[i+1])) != NULL) {
  286. int handyFno = thisIndex[0].terms[i].nFiles;
  287. //is this the first occurance of the term in this file?
  288. if (!isTermInFile[i]) {
  289. isTermInFile[i] = true;
  290. thisIndex[0].terms[i].nFiles++;
  291. //allocate memory and clearing the fields for a new FILEMATCH
  292. thisIndex[0].terms[i].files = realloc(thisIndex[0].terms[i].files, (thisIndex[0].terms[i].nFiles)*sizeof(FILEMATCH) );
  293. if (thisIndex[0].terms[i].files == NULL) {
  294. printf("ERROR allocating FILEMATCH memory.\n");
  295. exit(EXIT_FAILURE);
  296. }
  297. thisIndex[0].terms[i].files[handyFno].nLineMatches = 0;
  298. thisIndex[0].terms[i].files[handyFno].totalFileMatches = 0;
  299. }
  300. //is this is the first occurance of the term in this line?
  301. if (!isTermOnLine[i]) {
  302. //if it is, set the switch so it isnt anymore
  303. isTermOnLine[i] = true;
  304. //update the "number of lines this term was found on, in this file" field
  305. thisIndex[0].terms[i].files[handyFno].nLineMatches++;
  306. }
  307. if (isTermInFile[i]) {
  308. //updating the "total number of times this term was found in this file" field
  309. thisIndex[0].terms[i].files[handyFno].totalFileMatches++;
  310. }
  311. //updating the "total number of times this term is found" field
  312. thisIndex[0].terms[i].totalMatches++;
  313. p++;
  314. }
  315. i++;
  316. }
  317. }
  318. fclose(fp);
  319. }
  320. /*
  321. buildIndex(termsfile, paths, nDirs, opts) returns an index containing
  322. information about matchings between the terms in termsfile and the
  323. files and sub-directories found in the nDirs directories specified
  324. by paths, using the options in opts to determine which files are
  325. processed.
  326. */
  327.  
  328. INDEX *buildIndex(char *termsfile, char *dirs[], int n, OPTIONS opts)
  329. {
  330. //creates memory for the index we wish to fill in
  331. INDEX *index = malloc(sizeof(INDEX));
  332. if (index == NULL) {
  333. printf("ERROR allocating INDEX memory.\n");
  334. exit(EXIT_FAILURE);
  335. }
  336. //to hold each of the search terms
  337. char *termList[BUFSIZ];
  338. //to hold all of the text files which we shall search
  339. char *textFileArray[BUFSIZ];
  340. //to hold all of the directories contained within dirs[i] (subdirectories)
  341. char *subDirectoryArray[BUFSIZ];
  342. //fills the nTerms field of index and fills termList with the search terms
  343. //note, that here, termList[0] = "Terms: "...
  344. index[0].nTerms = read_terms_file(termsfile, termList);
  345. //creates memory to hold nTerm lots of TERMINFOs
  346. //initialises all primitive fields to ZERO
  347. //initialises all ->term fields to their corresponding match in the terms file
  348. //note, that here, termList[0] = "Terms: "...
  349. //but index[0].terms[0].term = "word1"...
  350. set_terms(termList, index);
  351. //reads in names of all text files found in directories and sub directories
  352. //textFileArray now holds all text files in a given directory to process
  353. read_public_directory(dirs[0], textFileArray, subDirectoryArray, termList);
  354. //search through each text file, comparing to the searchTerms
  355. //and setting the correct information to the correct fields of INDEX
  356. int fileNum = 0;
  357. while (textFileArray[fileNum] != NULL) {
  358. search_textFile(textFileArray[fileNum], termList, index, fileNum);
  359. fileNum++;
  360. }
  361.  
  362.  
  363. return index;
  364. }
  365.  
  366. /*
  367. sortIndex(index, opts) sorts and returns index, using the options in
  368. opts to determine the ordering.
  369. */
  370. /*
  371. INDEX *sortIndex(INDEX *index, OPTIONS opts)
  372. {
  373. return NULL;
  374. }
  375. */
  376.  
  377. /*
  378. printIndex(index, opts) prints index, using the options in opts to
  379. determine what is printed.
  380. */
  381. /*
  382. void printIndex(INDEX *index, OPTIONS opts)
  383. {
  384. }
  385. */
  386.  
  387. /*
  388. generateTextCloud(index) prints html code for a text-cloud that
  389. corresponds to the frequency information contained in index.
  390. */
  391. /*
  392. void generateTextCloud(INDEX *index)
  393. {
  394. }
  395. */
  396.  
  397. /*
  398. freeIndex(index) frees all the memory associated with index.
  399. */
  400.  
  401. void freeIndex(INDEX *index)
  402. {
  403. /*for (int i = 0; i < index[0].nTerms; i++) {
  404. for (int j = 0; j < index[0].terms[i].nFiles; j++) {
  405. free(index[0].terms[i].files);
  406. }
  407. free(index[0].terms);
  408. }
  409. printf("FREE COMPLETE.\n");*/
  410. }
  411.  
  412.  
  413. /*
  414. * the main function, it will control the flow of the program
  415. */
  416.  
  417. int main(int argc, char *argv[])
  418. {
  419. //check for correct usage
  420. if (argc != 2) {
  421. printf("Usage: <progname> <file.txt>\n");
  422. exit(EXIT_FAILURE);
  423. }
  424. //setting up test variables, for trialing the functions in progress
  425. OPTIONS opt;
  426. char *termsfile = argv[1];
  427. int nDirs = 2;
  428. char *dirs[nDirs - 1];
  429. dirs[0] = "Dir1";
  430. dirs[1] = "Dir2";
  431. //calling the functions
  432. INDEX *index = buildIndex(termsfile, dirs, nDirs, opt);
  433. freeIndex(index);
  434. return 1;
  435. }
Add Comment
Please, Sign In to add comment