Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- CITS1210 Project 2
- Name: <KESSEY, RHYS> <GOUVIGNON, DANIEL>
- Student number: <20925234> <20893173>
- Date: <date of submission>
- */
- /* Comment out the next line when you have filled in your details above */
- /*
- #error Did you fill in your name and student number?
- */
- #include "index.h"
- /****************************************************************/
- /*
- This template file provides function "stubs" for the five functions
- that you must complete. While you are free to add your own additional
- functions, you must meet the specifications of these five required
- functions precisely, including the name and type of each function
- specified in the template file. This means that under no circumstances
- can you change how one of these function behaves.
- Note however that you do not need to write all your code in these five
- functions, nor just in one source file. Indeed, you will be assessed
- on how you structure your program, including the decomposition of
- tasks into different functions and the modularisation of your solution
- across multiple source files.
- */
- /****************************************************************/
- /*
- * trimLine(char line[]) removes any trailing
- * newline('\n') or carriage-return ('\r') characters
- */
- void trimLine(char line[])
- {
- int index = 0;
- while (line[index] != '\0') {
- if (line[index] == '\n' || line[index] == '\r') {
- line[index] = '\0';
- }
- index++;
- }
- }
- /*
- * read_terms_file(char *filename, char *termList[]) takes in a filename
- * and an array to populate with the search terms found in the file
- * corresponding to the given filename, and fills the array with the
- * correct names of the search terms
- */
- int read_terms_file(char *filename, char *termList[])
- {
- char line[BUFSIZ];
- FILE *fp;
- fp = fopen(filename, "r");
- //check to see if file opened correctly
- if (fp == NULL) {
- printf("ERROR: Cannot open file.\n");
- exit(EXIT_FAILURE);
- }
- int count = 0;
- //copy the term file, line by line, into the termlist
- while (fgets(line, sizeof(line), fp) != NULL) {
- trimLine(line);
- termList[count] = malloc( sizeof(char) * strlen(line) );
- if (termList[count] == NULL) {
- printf("ERROR: Cannot allocate memory.\n");
- exit(EXIT_FAILURE);
- }
- termList[count] = strdup(line);
- count++;
- }
- termList[count] = NULL;
- //close the file
- fclose(fp);
- //return number of terms, excluding the first term (Terms: )
- return (count - 1);
- }
- /*
- * read_sub_directory(char *dirname, char *subdirname, char *textFileArray[]) takes
- * a directory, and the name of a subdirectory to investigate
- * it continues to fill textfilearray with the extra text files found
- * making sure to include the fullpathname
- */
- void read_sub_directory(char *dirname, char *subdirname, char *textFileArray[], char *subDirArray[], int *n, int *sn)
- {
- DIR *dirp;
- struct dirent *dp;
- char fullname[MAXPATHLEN];
- char fulldirname[MAXPATHLEN];
- int count = *n;
- int SDcount = *sn;
- sprintf(fulldirname, "%s", subdirname);
- dirp = opendir(fulldirname);
- //check to see if directory opened correctly
- if (dirp == NULL) {
- printf("Null directory.\n");
- exit(EXIT_FAILURE);
- }
- while ( (dp = readdir(dirp)) != NULL) {
- struct stat stat_buffer;
- sprintf(fullname, "%s/%s", fulldirname, dp->d_name);
- if (stat(fullname, &stat_buffer) != 0) {
- printf("ERROR stat did not return 0.\n");
- exit(EXIT_FAILURE);
- }
- //checking for regular files that are not hidden
- if (S_ISREG(stat_buffer.st_mode) != 0) {
- if (dp->d_name[0] != '.') {
- //ensure there is enough space for the new word to slide in
- textFileArray[count] = malloc( sizeof(char) * strlen(fullname) );
- if (textFileArray[count] == NULL) {
- printf("ERROR Allocating memory.\n");
- exit(EXIT_FAILURE);
- }
- textFileArray[count] = strdup(fullname);
- count++;
- }
- }
- //checking for other sub directories
- //within this subdirectory that are not hidden (as above)
- if (S_ISDIR(stat_buffer.st_mode) != 0) {
- if (dp->d_name[0] != '.') {
- //ensure there is enough space for the new word to slide in
- subDirArray[SDcount] = malloc( sizeof(char) * strlen(fullname) );
- if (textFileArray[SDcount] == NULL) {
- printf("ERROR Allocating memory.\n");
- exit(EXIT_FAILURE);
- }
- //update the subDirArray with this new entry
- subDirArray[SDcount] = strdup(fullname);
- SDcount++;
- }
- }
- }
- *n = count;
- *sn = SDcount;
- }
- /*
- * read_public_directory(char *dirname, char *tfArray[])
- * takes in the name of a directory, and an array to populate
- * with the text files in that directory and fills the array with
- * the names of the text files
- * it also fills an array with all the sub directories located
- * using read_sub_directory to find all sub directories inside
- *
- * by the end of this function, we have an array of all text files located within,
- * containing their full names so they can be opened directly with searchTextFile
- * searchTextFile is then called, to use each of the names to fill in the correct fields of INDEX
- *
- */
- void read_public_directory(char *dirname, char *textFileArray[], char *subDirectoryArray[], char *searchTerms[])
- {
- DIR *dirp;
- struct dirent *dp;
- char fullname[MAXPATHLEN];
- int count = 0;
- int SDcount = 0;
- dirp = opendir(dirname);
- //check to see if directory opened correctly
- if (dirp == NULL) {
- printf("Null directory.\n");
- exit(EXIT_FAILURE);
- }
- while ( (dp = readdir(dirp)) != NULL) {
- struct stat stat_buffer;
- sprintf(fullname, "%s/%s", dirname, dp->d_name);
- if (stat(fullname, &stat_buffer) != 0) {
- printf("ERROR stat returned 0.\n");
- exit(EXIT_FAILURE);
- }
- //checking for regular files that are not hidden
- if (S_ISREG(stat_buffer.st_mode) != 0 ) {
- if (dp->d_name[0] != '.') {
- textFileArray[count] = malloc( sizeof(char) * strlen(fullname) );
- if (textFileArray[count] == NULL) {
- printf("ERROR Allocating memory.\n");
- exit(EXIT_FAILURE);
- }
- textFileArray[count] = strdup(fullname);
- count++;
- }
- }
- //check for directories that are not hidden
- if (S_ISDIR(stat_buffer.st_mode) != 0 ) {
- if (dp->d_name[0] != '.') {
- subDirectoryArray[SDcount] = malloc( sizeof(char) * strlen(fullname) );
- if (subDirectoryArray[SDcount] == NULL) {
- printf("ERROR allocating memory to subdirs.\n");
- exit(EXIT_FAILURE);
- }
- subDirectoryArray[SDcount] = strdup(fullname);
- SDcount++;
- }
- }
- }
- //integer pointers used to update the count and SD count variables
- int *n = &count;
- int *sn = &SDcount;
- int iCount = 0;
- //finds all texfiles within each sub directories
- //in the certain directory
- // if (-r) is found, do this else, do not do this
- while (subDirectoryArray[iCount] != NULL) {
- read_sub_directory(dirname, subDirectoryArray[iCount], textFileArray,subDirectoryArray, n, sn);
- iCount++;
- }
- textFileArray[*n] = NULL;
- closedir(dirp);
- }
- /*
- * set_terms(char *termList[], INDEX *index, int numTerms) sets the term field
- * of each of the TERMINFO elements to the corresponding termList entry
- */
- void set_terms(char *termList[], INDEX *index)
- {
- //giving the space for the TERMINFO
- index->terms = malloc(index[0].nTerms * sizeof(TERMINFO));
- if (index->terms == NULL) {
- printf("ERROR allocating TERMINFO memory.\n");
- exit(EXIT_FAILURE);
- }
- for (int i = 0; i < index[0].nTerms; i++) {
- //setting the names of each term in the terms field of index
- //by copying it over directly from termList
- //[i+1] so as not to include the "terms: " entry
- index[0].terms[i].term = termList[i+1];
- //setting the totalMatches of each term to ZERO
- //this is the total number of occurances of each term
- index[0].terms[i].totalMatches = 0;
- //setting the nFiles of each term to ZERO
- //this is the number of files each term was found in
- index[0].terms[i].nFiles = 0;
- }
- }
- /*
- * search_textFile(char *filename, char *searchTerms[], int nTerms, INDEX *thisIndex)
- * will look through filename, matching the searchTerms, using nTerms to help within a loop
- * and storing the details of these matches in thisIndex
- * incriments the total amount of times each term has been found
- * incriments the amount of times the term was found in each file
- * incriments the amount of files the term has been found in
- * incriments the amount of lines the term has been found within
- */
- void search_textFile(char *filename, char *searchTerms[], INDEX *thisIndex, int fileNum)
- {
- printf("fileNum = %d.\n", fileNum);
- //to hold the line that fgets creates
- char temp[BUFSIZ];
- //for checking if a term has been found in file or not
- bool isTermInFile[thisIndex[0].nTerms];
- //opening file, check if it opened correctly
- FILE *fp = fopen(filename, "r");
- if (fp == NULL) {
- printf("ERROR: Cannot open file: %s\n", filename);
- exit(EXIT_FAILURE);
- }
- //set all values in isTermInFile to false
- for (int clear = 0; clear < thisIndex[0].nTerms; clear++) {
- isTermInFile[clear] = false;
- }
- //looking through the file, checking it against the search terms, and incrementing the correct counter
- while (fgets(temp, sizeof(temp), fp) != NULL) {
- //a switch for checking if a term is found at least once on a line
- bool isTermOnLine[thisIndex[0].nTerms];
- //set all values in isTermOnLine to false
- for (int clear = 0; clear < thisIndex[0].nTerms; clear++) {
- isTermOnLine[clear] = false;
- }
- int i = 0;
- while (i < thisIndex[0].nTerms) {
- //Matches the search terms with the words in the file incrimenting the relevant counter in the Index fields
- char *p = temp;
- while ( (p = strstr(p, searchTerms[i+1])) != NULL) {
- int handyFno = thisIndex[0].terms[i].nFiles;
- //is this the first occurance of the term in this file?
- if (!isTermInFile[i]) {
- isTermInFile[i] = true;
- thisIndex[0].terms[i].nFiles++;
- //allocate memory and clearing the fields for a new FILEMATCH
- thisIndex[0].terms[i].files = realloc(thisIndex[0].terms[i].files, (thisIndex[0].terms[i].nFiles)*sizeof(FILEMATCH) );
- if (thisIndex[0].terms[i].files == NULL) {
- printf("ERROR allocating FILEMATCH memory.\n");
- exit(EXIT_FAILURE);
- }
- thisIndex[0].terms[i].files[handyFno].nLineMatches = 0;
- thisIndex[0].terms[i].files[handyFno].totalFileMatches = 0;
- }
- //is this is the first occurance of the term in this line?
- if (!isTermOnLine[i]) {
- //if it is, set the switch so it isnt anymore
- isTermOnLine[i] = true;
- //update the "number of lines this term was found on, in this file" field
- thisIndex[0].terms[i].files[handyFno].nLineMatches++;
- }
- if (isTermInFile[i]) {
- //updating the "total number of times this term was found in this file" field
- thisIndex[0].terms[i].files[handyFno].totalFileMatches++;
- }
- //updating the "total number of times this term is found" field
- thisIndex[0].terms[i].totalMatches++;
- p++;
- }
- i++;
- }
- }
- fclose(fp);
- }
- /*
- buildIndex(termsfile, paths, nDirs, opts) returns an index containing
- information about matchings between the terms in termsfile and the
- files and sub-directories found in the nDirs directories specified
- by paths, using the options in opts to determine which files are
- processed.
- */
- INDEX *buildIndex(char *termsfile, char *dirs[], int n, OPTIONS opts)
- {
- //creates memory for the index we wish to fill in
- INDEX *index = malloc(sizeof(INDEX));
- if (index == NULL) {
- printf("ERROR allocating INDEX memory.\n");
- exit(EXIT_FAILURE);
- }
- //to hold each of the search terms
- char *termList[BUFSIZ];
- //to hold all of the text files which we shall search
- char *textFileArray[BUFSIZ];
- //to hold all of the directories contained within dirs[i] (subdirectories)
- char *subDirectoryArray[BUFSIZ];
- //fills the nTerms field of index and fills termList with the search terms
- //note, that here, termList[0] = "Terms: "...
- index[0].nTerms = read_terms_file(termsfile, termList);
- //creates memory to hold nTerm lots of TERMINFOs
- //initialises all primitive fields to ZERO
- //initialises all ->term fields to their corresponding match in the terms file
- //note, that here, termList[0] = "Terms: "...
- //but index[0].terms[0].term = "word1"...
- set_terms(termList, index);
- //reads in names of all text files found in directories and sub directories
- //textFileArray now holds all text files in a given directory to process
- read_public_directory(dirs[0], textFileArray, subDirectoryArray, termList);
- //search through each text file, comparing to the searchTerms
- //and setting the correct information to the correct fields of INDEX
- int fileNum = 0;
- while (textFileArray[fileNum] != NULL) {
- search_textFile(textFileArray[fileNum], termList, index, fileNum);
- fileNum++;
- }
- return index;
- }
- /*
- sortIndex(index, opts) sorts and returns index, using the options in
- opts to determine the ordering.
- */
- /*
- INDEX *sortIndex(INDEX *index, OPTIONS opts)
- {
- return NULL;
- }
- */
- /*
- printIndex(index, opts) prints index, using the options in opts to
- determine what is printed.
- */
- /*
- void printIndex(INDEX *index, OPTIONS opts)
- {
- }
- */
- /*
- generateTextCloud(index) prints html code for a text-cloud that
- corresponds to the frequency information contained in index.
- */
- /*
- void generateTextCloud(INDEX *index)
- {
- }
- */
- /*
- freeIndex(index) frees all the memory associated with index.
- */
- void freeIndex(INDEX *index)
- {
- /*for (int i = 0; i < index[0].nTerms; i++) {
- for (int j = 0; j < index[0].terms[i].nFiles; j++) {
- free(index[0].terms[i].files);
- }
- free(index[0].terms);
- }
- printf("FREE COMPLETE.\n");*/
- }
- /*
- * the main function, it will control the flow of the program
- */
- int main(int argc, char *argv[])
- {
- //check for correct usage
- if (argc != 2) {
- printf("Usage: <progname> <file.txt>\n");
- exit(EXIT_FAILURE);
- }
- //setting up test variables, for trialing the functions in progress
- OPTIONS opt;
- char *termsfile = argv[1];
- int nDirs = 2;
- char *dirs[nDirs - 1];
- dirs[0] = "Dir1";
- dirs[1] = "Dir2";
- //calling the functions
- INDEX *index = buildIndex(termsfile, dirs, nDirs, opt);
- freeIndex(index);
- return 1;
- }
Add Comment
Please, Sign In to add comment