Advertisement
Guest User

Untitled

a guest
Apr 26th, 2017
178
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.37 KB | None | 0 0
  1. static void LoopAndInsert(HashTable tab, char *content) { char *curptr = content, *wordstart = content; // STEP 7. // This is the interesting part of Part A! // // "content" contains a C string with the full contents // of the file. You need to implement a loop that steps through the // file content a character at a time, testing to see whether a // character is an alphabetic character or not. If a character is // alphabetic, it's part of a word. If a character is not // alphabetic, it's part of the boundary between words. // You can use the string.h "isalpha()" macro to test whether // a character is alphabetic or not. ("man isalpha"). // // So, for example, here's a string with the words within // it underlined with "=", and boundary characters underlined // with "+": // // The Fox Can't CATCH the Chicken. // ===++===++===+=+++=====+===++=======+ // // As you loop through, anytime you detect the start of a // word, you should use the "wordstart" pointer to remember // where the word started. You should also use the "tolower" // macro to convert alphabetic characters to lowercase. // (e.g., *curptr = tolower(*curptr); ). Finally, as // a hint, you can overwrite boundary characters with '\0' (null // terminators) in place in the buffer to create valid C // strings out of each parsed word. // // Each time you find a word that you want to record in // the hashtable, call the AddToHashTable() helper // function with appropriate arguments, e.g., // // AddToHashTable(tab, wordstart, pos); // DocPositionOffset_t pos = 0; while (*curptr != 0) { if (isalpha(*curptr)) { wordstart = curptr; DocPositionOffset_t wpos = pos; do { *curptr = tolower(*curptr); pos++; curptr++; } while (isalpha(*curptr)); *curptr = '\0'; AddToHashTable(tab, wordstart, wpos); } curptr++; pos++; } } static void AddToHashTable(HashTable tab, char *word, DocPositionOffset_t pos) { HTKey_t hashKey; int retval; HTKeyValue kv; // Hash the string. hashKey = FNVHash64((unsigned char *) word, strlen(word)); // Have we already encountered this word within this file? // If so, it's already in the hashtable. retval = LookupHashTable(tab, hashKey, &kv); if (retval == 1) { // Yes; we just need to add a position in using AppendLinkedList(). Note // how we're casting the DocPositionOffset_t position variable to an LLPayload_t to store // it in the linked list payload without needing to malloc space for it. // Ugly, but it works! WordPositions *wp = (WordPositions *) kv.value; retval = AppendLinkedList(wp->positions, (LLPayload_t) ((intptr_t) pos)); Verify333(retval != 0); } else { // STEP 8. // No; this is the first time we've seen this word. Allocate and prepare // a new WordPositions structure, and append the new position to its list // using a similar ugly hack as right above. WordPositions *wp = malloc(sizeof(WordPositions)); char *newstr = malloc(strlen(word) + 1); strncpy(newstr, word, strlen(word) + 1); wp->positions = AllocateLinkedList(); wp->word = newstr; retval = AppendLinkedList(wp->positions, (LLPayload_t) ((intptr_t) pos)); Verify333(retval != 0); kv.key = hashKey; kv.value = wp; retval = InsertHashTable(tab, kv, NULL); Verify333(retval == 1); }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement