Advertisement
Guest User

real nigga hours

a guest
May 17th, 2018
206
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 17.81 KB | None | 0 0
  1. /*
  2.  * Copyright ©2018 Justin Hsia.  All rights reserved.  Permission is
  3.  * hereby granted to students registered for University of Washington
  4.  * CSE 333 for use solely during Spring Quarter 2018 for purposes of
  5.  * the course.  No other use, copying, distribution, or modification
  6.  * is permitted without prior written consent. Copyrights for
  7.  * third-party components of this work must be honored.  Instructors
  8.  * interested in reusing these course materials should contact the
  9.  * author.
  10.  */
  11.  
  12. #include <cstdio>    // for (FILE *).
  13. #include <cstring>   // for strlen(), etc.
  14.  
  15. #include "./filelayout.h"
  16. #include "./fileindexutil.h"  // for many useful routines!
  17. #include "./fileindexwriter.h"
  18.  
  19. // We need to peek inside the implementation of a HashTable so
  20. // that we can iterate through its buckets and their chain elements.
  21. extern "C" {
  22.   #include "libhw1/CSE333.h"
  23.   #include "libhw1/HashTable_priv.h"
  24. }
  25.  
  26. namespace hw3 {
  27.  
  28. // Helper function to write the docid->filename mapping from the
  29. // DocTable "dt" into file "f", starting at byte offset "offset".
  30. // Returns the size of the written DocTable or 0 on error.
  31. static HWSize_t WriteDocTable(FILE *f, DocTable dt, IndexFileOffset_t offset);
  32.  
  33. // Helper function to write the MemIndex "mi" into file "f", starting
  34. // at byte offset "offset."  Returns the size of the written MemIndex
  35. // or 0 on error.
  36. static HWSize_t WriteMemIndex(FILE *f, MemIndex mi, IndexFileOffset_t offset);
  37.  
  38. // Helper function to write the index file's header into file "f".
  39. // Returns the number of header bytes written on success, 0 on
  40. // failure.  Will atomically write the MAGIC_NUMBER as the very last
  41. // thing; as a result, if we crash part way through writing an index
  42. // file, it won't contain a valid MAGIC_NUMBER and the rest of HW3
  43. // will know to report an error.
  44. static HWSize_t WriteHeader(FILE *f,
  45.                             HWSize_t doct_size,
  46.                             HWSize_t memidx_size);
  47.  
  48. // A write_element_fn is used by WriteHashTable() to write a
  49. // HashTable's HTKeyValue element into the index file at offset
  50. // "offset".
  51. //
  52. // Returns the number of bytes written or 0 on error.
  53. typedef HWSize_t (*write_element_fn)(FILE *f,
  54.                                      IndexFileOffset_t offset,
  55.                                      HTKeyValue *kv);
  56.  
  57. // Helper function to write a HashTable into file "f", starting at
  58. // offset "offset".  The helper function "write_element_fn" is invoked
  59. // to writes each HTKeyValue element within the HashTable into the
  60. // file.
  61. //
  62. // Returns the total amount of data written, or 0 on failure.
  63. static HWSize_t WriteHashTable(FILE *f,
  64.                                HashTable t,
  65.                                IndexFileOffset_t offset,
  66.                                write_element_fn fn);
  67.  
  68. // Helper function used by WriteHashTable() to write out a bucket
  69. // record (i.e., a "bucket_rec" within the hw3 diagrams).  Returns the
  70. // amount of data written, or 0 on failure.
  71. //
  72. // "f" is the file to write into, "li" is the bucket chain linked
  73. // list, "br_offset" is the offset of the 'bucket_rec' field to write
  74. // into, and "b_offset" is the value of 'bucket offset' field to write
  75. // within the bucket_rec field.
  76. static HWSize_t WriteBucketRecord(FILE *f,
  77.                                   LinkedList li,
  78.                                   IndexFileOffset_t br_offset,
  79.                                   IndexFileOffset_t b_offset);
  80.  
  81. // Helper function used by WriteHashTable() to write out a bucket.
  82. // Returns the amount of data written, or 0 on failure.
  83. //
  84. // "f" is the file to write into, "li" is the bucket chain linked list
  85. // to write within the bucket, 'offset' is the offset of the bucket,
  86. // and 'write_element_fn' is a helper function used to write the
  87. // HTKeyValue into the element itself.
  88. static HWSize_t WriteBucket(FILE *f,
  89.                             LinkedList li,
  90.                             IndexFileOffset_t offset,
  91.                             write_element_fn fn);
  92.  
  93. HWSize_t WriteIndex(MemIndex mi, DocTable dt, const char *filename) {
  94.   HWSize_t filesize = 0, dtres, mires, headersize;
  95.   FILE *f;
  96.  
  97.   // Do some sanity checking on the arguments we were given.
  98.   Verify333(mi != nullptr);
  99.   Verify333(dt != nullptr);
  100.   Verify333(filename != nullptr);
  101.  
  102.   // fopen() the file for writing; use mode "wb+" to indicate binary,
  103.   // write mode, and to create/truncate the file.
  104.   f = fopen(filename, "wb+");
  105.   if (f == nullptr)
  106.     return 0;
  107.  
  108.   // write the document table using WriteDocTable().
  109.   dtres = WriteDocTable(f, dt, sizeof(IndexFileHeader));
  110.   if (dtres == 0) {
  111.     fclose(f);
  112.     unlink(filename);
  113.     return 0;
  114.   }
  115.   filesize += dtres;
  116.  
  117.   // write the memindex using WriteMemIndex().
  118.   // MISSING:
  119.   mires = WriteMemIndex(f, mi, sizeof(IndexFileHeader) + dtres);    // not sure if 3rd arg is correct
  120.   if (mires == 0) {
  121.     fclose(f);
  122.     unlink(filename);
  123.     return 0;
  124.   }
  125.   filesize += mires;
  126.  
  127.   // write the header using WriteHeader().
  128.   // MISSING:
  129.   headersize = WriteHeader(f, dtres, mires);
  130.   filesize += headersize;
  131.  
  132.   // Clean up and return the total amount written.
  133.   fclose(f);
  134.   return filesize;
  135. }
  136.  
  137. // This write_element_fn is used to write a docid->docname mapping
  138. // element, i.e., an element of the "doctable" table.
  139. static HWSize_t WriteDocidDocnameFn(FILE *f,
  140.                                     IndexFileOffset_t offset,
  141.                                     HTKeyValue *kv) {
  142.   size_t res;
  143.   uint16_t slen_ho;
  144.  
  145.   // determine the filename length
  146.   // MISSING (change this assignment to the correct thing):
  147.   slen_ho = strlen((char*) kv->value);
  148.  
  149.   // fwrite() the docid from "kv".  Remember to convert to
  150.   // disk format before writing.
  151.   doctable_element_header header = {kv->key, slen_ho};
  152.   header.toDiskFormat();
  153.  
  154.   // fseek() to the provided offset and then write the header.
  155.   if (fseek(f, offset, SEEK_SET) != 0)
  156.     return 0;
  157.   res = fwrite(&header, sizeof(header), 1, f);
  158.   if (res != 1)
  159.     return 0;
  160.  
  161.   // fwrite() the filename.  We don't write the null-terminator from
  162.   // the string, just the characters.
  163.   // MISSING:
  164.   res = fwrite(kv->value, slen_ho, 1, f);
  165.   if (res != 1) {
  166.     return 0;
  167.   }
  168.  
  169.   // calculate and return the total amount written.
  170.   // MISSING (change this return to the correct thing):
  171.   return sizeof(header) + slen_ho;
  172. }
  173.  
  174. static HWSize_t WriteDocTable(FILE *f, DocTable dt, IndexFileOffset_t offset) {
  175.   // Use WriteHashTable() to write the docid->filename hash table.
  176.   // You'll need to use DTGetDocidTable() to get the docID hash table
  177.   // from dt, and you'll need to pass in WriteDocidDocnameFn as the
  178.   // final parameter of WriteHashTable().
  179.   return WriteHashTable(f,
  180.                         DTGetDocidTable(dt),
  181.                         offset,
  182.                         &WriteDocidDocnameFn);
  183. }
  184.  
  185. // This write_element_fn is used to write a DocID + position list
  186. // element (i.e., an element of a nested docID table) into the file at
  187. // offset 'offset'.
  188. static HWSize_t WriteDocPositionListFn(FILE *f,
  189.                                        IndexFileOffset_t offset,
  190.                                        HTKeyValue *kv) {
  191.   size_t res;
  192.  
  193.   // Extract the docID from the HTKeyValue.
  194.   DocID_t docID_ho = (DocID_t)kv->key;
  195.  
  196.   // Extract the positions LinkedList from the HTKeyValue and
  197.   // determine its size.
  198.   LinkedList positions = (LinkedList)kv->value;
  199.   HWSize_t num_pos_ho = NumElementsInLinkedList(positions);
  200.  
  201.   // Write the header, in disk format.
  202.   // You'll need to fseek() to the right location in the file.
  203.   // MISSING:
  204.   docid_element_header header = {docID_ho, num_pos_ho};
  205.   header.toDiskFormat();
  206.   res = fseek(f, offset, SEEK_SET);
  207.   if (res != 0)
  208.     return 0;
  209.   res = fwrite(&header, sizeof(header), 1, f);
  210.   if (res != 1)
  211.     return 0;
  212.  
  213.   // Loop through the positions list, writing each position out.
  214.   HWSize_t i;
  215.   docid_element_position position;
  216.   LLPayload_t pos;
  217.   LLIter it = LLMakeIterator(positions, 0);
  218.   Verify333(it != nullptr);
  219.   for (i = 0; i < num_pos_ho; i++) {
  220.     // Get the next position from the list.
  221.     // MISSING:
  222.     LLIteratorGetPayload(it, (LLPayload_t*) &pos);
  223.    
  224.     // Truncate to 32 bits, then convert it to network order and write it out.
  225.     // MISSING:
  226.  
  227.     //position = {(uint32_t) *((uint64_t*) pos)};
  228.     //DocPositionOffset_t* p = (DocPositionOffset_t*) pos;
  229.     //uint32_t  q =  reinterpret_cast<uintptr_t>(pos);
  230.     //DocPositionOffset_t off = *p;
  231.     //uint32_t p = *pos;
  232.     position.toDiskFormat();
  233.     res = fwrite(&position, sizeof(docid_element_position), 1, f);
  234.     if (res != 1) {
  235.       LLIteratorFree(it);
  236.       return 0;
  237.     }
  238.  
  239.     // Iterate to the next position.
  240.     LLIteratorNext(it);
  241.   }
  242.   LLIteratorFree(it);
  243.  
  244.   // Calculate and return the total amount of data written.
  245.   // MISSING (fix this return value):
  246.   return num_pos_ho * sizeof(position) + sizeof(header);
  247. }
  248.  
  249. // This write_element_fn is used to write a WordDocSet
  250. // element into the file at position 'offset'.
  251. static HWSize_t WriteWordDocSetFn(FILE *f,
  252.                                   IndexFileOffset_t offset,
  253.                                   HTKeyValue *kv) {
  254.   size_t res;
  255.  
  256.   // Extract the WordDocSet from the HTKeyValue.
  257.   WordDocSet *wds = static_cast<WordDocSet *>(kv->value);
  258.   Verify333(wds != nullptr);
  259.  
  260.   // Prepare the wordlen field.
  261.   uint16_t wordlen_ho;
  262.   // MISSING:
  263.   wordlen_ho = strlen(wds->word);
  264.  
  265.   // Write the nested DocID->positions hashtable (i.e., the "docID
  266.   // table" element in the diagrams).  Use WriteHashTable() to do it,
  267.   // passing it the wds->docIDs table and using the
  268.   // WriteDocPositionListFn helper function as the final parameter.
  269.   HWSize_t htlen_ho = WriteHashTable(f,
  270.                                      wds->docIDs,
  271.                                      offset + sizeof(worddocset_header) + wordlen_ho,
  272.                                      &WriteDocPositionListFn);
  273.  
  274.   // Write the header, in network order, in the right
  275.   // place in the file.
  276.   worddocset_header header = {wordlen_ho, htlen_ho};
  277.   // MISSING:
  278.   header.toDiskFormat();
  279.   res = fseek(f, offset, SEEK_SET);
  280.   if (res != 0) {
  281.     return 0;
  282.   }
  283.   res = fwrite(&header, sizeof(header), 1, f);
  284.   if (res != 1) {
  285.     return 0;
  286.   }
  287.  
  288.   // Write the word itself, excluding the nullptr terminator,
  289.   // in the right place in the file.
  290.   // MISSING:
  291.   res = fwrite(wds->word, wordlen_ho, 1, f);
  292.   if (res != 1) {
  293.     return 0;
  294.   }
  295.  
  296.   // Calculate and return the total amount of data written.
  297.   // MISSING (fix this return value):
  298.   return sizeof(header) + wordlen_ho + htlen_ho;
  299. }
  300.  
  301. static HWSize_t WriteMemIndex(FILE *f, MemIndex mi, IndexFileOffset_t offset) {
  302.   // Use WriteHashTable() to write the MemIndex into the file.  You'll
  303.   // need to pass in the WriteWordDocSetFn helper function as the
  304.   // final argument.
  305.   return WriteHashTable(f,
  306.                         mi,
  307.                         offset,
  308.                         &WriteWordDocSetFn);
  309. }
  310.  
  311. static HWSize_t WriteHeader(FILE *f,
  312.                             HWSize_t doct_size,
  313.                             HWSize_t memidx_size) {
  314.   // We need to calculate the checksum over the doctable and index
  315.   // table.  (Note that the checksum does not include the index file
  316.   // header, just these two tables.)  Use fseek() to seek to the right
  317.   // location, and use a CRC32 object from fileindexutil.h to do the
  318.   // CRC checksum calculation, feeding it characters that you read
  319.   // from the index file using fread().
  320.  
  321.   IndexFileHeader header = {MAGIC_NUMBER, 0, doct_size, memidx_size};
  322.  
  323.   HWSize_t cslen = doct_size + memidx_size;
  324.   CRC32 crcobj;
  325.  
  326.   // MISSING:
  327.  
  328.   // skip the first 16 bytes where the header is stored
  329.   if (fseek(f, 16, SEEK_SET) != 0) {
  330.     return 0; //  fseek failure, return early
  331.   }
  332.  
  333.   HWSize_t bytes_read = 0;
  334.   // use new here?
  335.   char* buf = new char[cslen + 1];
  336.   buf[cslen] = '\0';
  337.   int res;
  338.   int i = 0;
  339.  
  340.   // NEED TO DO ERROR CHECKING HERE
  341.   res = fread(buf, 1, cslen, f);
  342.  
  343.   //crcobj.Initialize();
  344.   for(HWSize_t i = 0; i < cslen; i++) {
  345.     crcobj.FoldByteIntoCRC(buf[i]);
  346.   }
  347.  
  348.   uint32_t checksum = crcobj.GetFinalCRC();
  349.   //delete crcobj;
  350.   header.checksum = checksum;
  351.   delete[] buf;
  352.  
  353.  
  354.   // Write the header fields.  Be sure to convert the fields to
  355.   // network order before writing them!
  356.   header.toDiskFormat();
  357.  
  358.   if (fseek(f, 0, SEEK_SET) != 0)
  359.     return 0;
  360.   if (fwrite(&header, sizeof(header), 1, f) != 1)
  361.     return 0;
  362.  
  363.   // Use fsync to flush the header field to disk.
  364.   Verify333(fsync(fileno(f)) == 0);
  365.  
  366.   // We're done!  Return the number of header bytes written.
  367.   return sizeof(IndexFileHeader);
  368. }
  369.  
  370. static HWSize_t WriteBucketRecord(FILE *f,
  371.                                   LinkedList li,
  372.                                   IndexFileOffset_t br_offset,
  373.                                   IndexFileOffset_t b_offset) {
  374.   HWSize_t res;
  375.  
  376.   // Use NumElementsInLinkedList() to figure out how many chained
  377.   // elements are in this bucket.  Put bucket_rec in network
  378.   // byte order.
  379.  
  380.   // MISSING:
  381.   bucket_rec br;
  382.   br.chain_len = NumElementsInLinkedList(li);
  383.   br.bucket_position = b_offset;
  384.   br.toDiskFormat();
  385.  
  386.   fprintf(stderr, "bucket rec offset: %d\n", b_offset);
  387.  
  388.   // fseek() to the "bucket_rec" record for this bucket.
  389.   res = fseek(f, br_offset, SEEK_SET);
  390.   if (res != 0)
  391.     return 0;
  392.  
  393.   // Write the bucket_rec.
  394.   // MISSING:
  395.   res = fwrite(&br, sizeof(bucket_rec), 1, f);
  396.   if (res != 1)
  397.     return 0;
  398.  
  399.   // Calculate and return how many bytes we wrote.
  400.   return sizeof(bucket_rec);
  401. }
  402.  
  403. static HWSize_t WriteBucket(FILE *f,
  404.                             LinkedList li,
  405.                             IndexFileOffset_t offset,
  406.                             write_element_fn fn) {
  407.   // Use NumElementsInLinkedList() to calculate how many elements are
  408.   // in this bucket chain.
  409.   HWSize_t chainlen_ho = NumElementsInLinkedList(li);
  410.  
  411.   // "bucketlen" is our running calculation of how many bytes have
  412.   // been written out for this bucket.
  413.   HWSize_t bucketlen = sizeof(element_position_rec) * chainlen_ho;
  414.  
  415.   // Figure out the position of the next "element" in the bucket.
  416.   HWSize_t nextelpos = offset + bucketlen;
  417.  
  418.   // Loop through the chain, writing each associated "element position"
  419.   // header field for the bucket and then writing out the "element"
  420.   // itself.  Be sure to write things in network order.  Use the
  421.   // "fn" parameter to invoke the helper function that knows how to
  422.   // write out the payload of each chain element into the "element"
  423.   // fields of the bucket.
  424.  
  425.   element_position_rec element_record;
  426.   if (chainlen_ho > 0) {
  427.     LLIter it = LLMakeIterator(li, 0);
  428.     Verify333(it != nullptr);
  429.     HWSize_t j;
  430.     for (j = 0; j < chainlen_ho; j++) {
  431.       HWSize_t ellen, res;
  432.       HTKeyValue *kv;
  433.  
  434.       // MISSING:
  435.       LLIteratorGetPayload(it, (LLPayload_t*)&kv);
  436.       ellen = fn(f, nextelpos, kv);
  437.       if (ellen == 0) {
  438.         return 0;
  439.       }
  440.       element_record = {nextelpos};
  441.       element_record.toDiskFormat();
  442.       res = fseek(f, offset + j * sizeof(element_record),SEEK_SET);
  443.       if (res != 0) {
  444.         LLIteratorFree(it);
  445.         return 0;
  446.       }
  447.      /*
  448.       res = fwrite(&element_record, sizeof(element_record), 1, f);
  449.       if (res != 1)
  450.         LLIteratorFree(it);
  451.         return 0;
  452. */
  453.      
  454.  
  455.       // Advance to the next element in the chain, tallying up our
  456.       // lengths.
  457.       bucketlen += ellen;
  458.       nextelpos += ellen;
  459.       LLIteratorNext(it);
  460.     }
  461.     LLIteratorFree(it);
  462.   }
  463.  
  464.   // Return the total amount of data written.
  465.   return bucketlen;
  466. }
  467.  
  468. // This is the main workhorse of the file.  It iterates through the
  469. // buckets in the HashTable "t", writing the hash table out into
  470. // the index file.
  471. static HWSize_t WriteHashTable(FILE *f,
  472.                                HashTable t,
  473.                                IndexFileOffset_t offset,
  474.                                write_element_fn fn) {
  475.   HashTableRecord *ht = static_cast<HashTableRecord *>(t);
  476.   IndexFileOffset_t next_bucket_rec_offset = offset + sizeof(BucketListHeader);
  477.   IndexFileOffset_t next_bucket_offset;
  478.   HWSize_t i, res;
  479.  
  480.   // fwrite() out the "num_buckets" (number of buckets) field, in
  481.   // network order.
  482.   BucketListHeader header = {ht->num_buckets};
  483.   header.toDiskFormat();
  484.   res = fseek(f, offset, SEEK_SET);
  485.   if (res != 0)
  486.     return 0;
  487.   res = fwrite(&header.num_buckets, sizeof(BucketListHeader), 1, f);
  488.   if (res != 1)
  489.     return 0;
  490.  
  491.   // Figure out the offset of the first "bucket" field.  Each
  492.   // bucket_rec is sizeof(bucket_rec) bytes, and there
  493.   // are ht->num_buckets of them.
  494.   next_bucket_offset = next_bucket_rec_offset +
  495.                          (ht->num_buckets) * sizeof(bucket_rec);
  496.  
  497.   // Loop through table's buckets, writing them out to the appropriate
  498.   // "bucket_rec" and "bucket" fields within the file index.  Writing
  499.   // a bucket means writing the bucket_rec, then writing the bucket
  500.   // itself.  Use WriteBucketRecord() to write a bucket_rec, and
  501.   // WriteBucket() to write a bucket.
  502.   //
  503.   // Be sure to handle the corner case where the bucket's chain is
  504.   // empty.  For that case, you still have to write a "bucket_rec"
  505.   // record for the bucket, but you won't write a "bucket".
  506.   bucket_rec currentRecord;
  507.   HWSize_t bucketsize;
  508.   HWSize_t bucketrecsize;
  509.   for (i = 0; i < ht->num_buckets; i++) {
  510.     // MISSING:
  511.     // check if these return 0
  512.     bucketrecsize = WriteBucketRecord(f, ht->buckets[i], next_bucket_rec_offset, next_bucket_offset);
  513.     next_bucket_rec_offset += bucketrecsize;
  514.     if (NumElementsInLinkedList(ht->buckets[i]) > 0) {
  515.       bucketsize = WriteBucket(f, ht->buckets[i], next_bucket_offset, fn);
  516.       next_bucket_offset += bucketsize;
  517.     }
  518.   }
  519.  
  520.   // Calculate and return the total number of bytes written.
  521.   return (next_bucket_offset - offset);
  522. }
  523.  
  524. }  // namespace hw3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement