daily pastebin goal
22%
SHARE
TWEET

real nigga hours

a guest May 17th, 2018 150 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /*
  2.  * Copyright ©2018 Justin Hsia.  All rights reserved.  Permission is
  3.  * hereby granted to students registered for University of Washington
  4.  * CSE 333 for use solely during Spring Quarter 2018 for purposes of
  5.  * the course.  No other use, copying, distribution, or modification
  6.  * is permitted without prior written consent. Copyrights for
  7.  * third-party components of this work must be honored.  Instructors
  8.  * interested in reusing these course materials should contact the
  9.  * author.
  10.  */
  11.  
  12. #include <cstdio>    // for (FILE *).
  13. #include <cstring>   // for strlen(), etc.
  14.  
  15. #include "./filelayout.h"
  16. #include "./fileindexutil.h"  // for many useful routines!
  17. #include "./fileindexwriter.h"
  18.  
  19. // We need to peek inside the implementation of a HashTable so
  20. // that we can iterate through its buckets and their chain elements.
  21. extern "C" {
  22.   #include "libhw1/CSE333.h"
  23.   #include "libhw1/HashTable_priv.h"
  24. }
  25.  
  26. namespace hw3 {
  27.  
  28. // Helper function to write the docid->filename mapping from the
  29. // DocTable "dt" into file "f", starting at byte offset "offset".
  30. // Returns the size of the written DocTable or 0 on error.
  31. static HWSize_t WriteDocTable(FILE *f, DocTable dt, IndexFileOffset_t offset);
  32.  
  33. // Helper function to write the MemIndex "mi" into file "f", starting
  34. // at byte offset "offset."  Returns the size of the written MemIndex
  35. // or 0 on error.
  36. static HWSize_t WriteMemIndex(FILE *f, MemIndex mi, IndexFileOffset_t offset);
  37.  
  38. // Helper function to write the index file's header into file "f".
  39. // Returns the number of header bytes written on success, 0 on
  40. // failure.  Will atomically write the MAGIC_NUMBER as the very last
  41. // thing; as a result, if we crash part way through writing an index
  42. // file, it won't contain a valid MAGIC_NUMBER and the rest of HW3
  43. // will know to report an error.
  44. static HWSize_t WriteHeader(FILE *f,
  45.                             HWSize_t doct_size,
  46.                             HWSize_t memidx_size);
  47.  
  48. // A write_element_fn is used by WriteHashTable() to write a
  49. // HashTable's HTKeyValue element into the index file at offset
  50. // "offset".
  51. //
  52. // Returns the number of bytes written or 0 on error.
  53. typedef HWSize_t (*write_element_fn)(FILE *f,
  54.                                      IndexFileOffset_t offset,
  55.                                      HTKeyValue *kv);
  56.  
  57. // Helper function to write a HashTable into file "f", starting at
  58. // offset "offset".  The helper function "write_element_fn" is invoked
  59. // to writes each HTKeyValue element within the HashTable into the
  60. // file.
  61. //
  62. // Returns the total amount of data written, or 0 on failure.
  63. static HWSize_t WriteHashTable(FILE *f,
  64.                                HashTable t,
  65.                                IndexFileOffset_t offset,
  66.                                write_element_fn fn);
  67.  
  68. // Helper function used by WriteHashTable() to write out a bucket
  69. // record (i.e., a "bucket_rec" within the hw3 diagrams).  Returns the
  70. // amount of data written, or 0 on failure.
  71. //
  72. // "f" is the file to write into, "li" is the bucket chain linked
  73. // list, "br_offset" is the offset of the 'bucket_rec' field to write
  74. // into, and "b_offset" is the value of 'bucket offset' field to write
  75. // within the bucket_rec field.
  76. static HWSize_t WriteBucketRecord(FILE *f,
  77.                                   LinkedList li,
  78.                                   IndexFileOffset_t br_offset,
  79.                                   IndexFileOffset_t b_offset);
  80.  
  81. // Helper function used by WriteHashTable() to write out a bucket.
  82. // Returns the amount of data written, or 0 on failure.
  83. //
  84. // "f" is the file to write into, "li" is the bucket chain linked list
  85. // to write within the bucket, 'offset' is the offset of the bucket,
  86. // and 'write_element_fn' is a helper function used to write the
  87. // HTKeyValue into the element itself.
  88. static HWSize_t WriteBucket(FILE *f,
  89.                             LinkedList li,
  90.                             IndexFileOffset_t offset,
  91.                             write_element_fn fn);
  92.  
  93. HWSize_t WriteIndex(MemIndex mi, DocTable dt, const char *filename) {
  94.   HWSize_t filesize = 0, dtres, mires, headersize;
  95.   FILE *f;
  96.  
  97.   // Do some sanity checking on the arguments we were given.
  98.   Verify333(mi != nullptr);
  99.   Verify333(dt != nullptr);
  100.   Verify333(filename != nullptr);
  101.  
  102.   // fopen() the file for writing; use mode "wb+" to indicate binary,
  103.   // write mode, and to create/truncate the file.
  104.   f = fopen(filename, "wb+");
  105.   if (f == nullptr)
  106.     return 0;
  107.  
  108.   // write the document table using WriteDocTable().
  109.   dtres = WriteDocTable(f, dt, sizeof(IndexFileHeader));
  110.   if (dtres == 0) {
  111.     fclose(f);
  112.     unlink(filename);
  113.     return 0;
  114.   }
  115.   filesize += dtres;
  116.  
  117.   // write the memindex using WriteMemIndex().
  118.   // MISSING:
  119.   mires = WriteMemIndex(f, mi, sizeof(IndexFileHeader) + dtres);    // not sure if 3rd arg is correct
  120.   if (mires == 0) {
  121.     fclose(f);
  122.     unlink(filename);
  123.     return 0;
  124.   }
  125.   filesize += mires;
  126.  
  127.   // write the header using WriteHeader().
  128.   // MISSING:
  129.   headersize = WriteHeader(f, dtres, mires);
  130.   filesize += headersize;
  131.  
  132.   // Clean up and return the total amount written.
  133.   fclose(f);
  134.   return filesize;
  135. }
  136.  
  137. // This write_element_fn is used to write a docid->docname mapping
  138. // element, i.e., an element of the "doctable" table.
  139. static HWSize_t WriteDocidDocnameFn(FILE *f,
  140.                                     IndexFileOffset_t offset,
  141.                                     HTKeyValue *kv) {
  142.   size_t res;
  143.   uint16_t slen_ho;
  144.  
  145.   // determine the filename length
  146.   // MISSING (change this assignment to the correct thing):
  147.   slen_ho = strlen((char*) kv->value);
  148.  
  149.   // fwrite() the docid from "kv".  Remember to convert to
  150.   // disk format before writing.
  151.   doctable_element_header header = {kv->key, slen_ho};
  152.   header.toDiskFormat();
  153.  
  154.   // fseek() to the provided offset and then write the header.
  155.   if (fseek(f, offset, SEEK_SET) != 0)
  156.     return 0;
  157.   res = fwrite(&header, sizeof(header), 1, f);
  158.   if (res != 1)
  159.     return 0;
  160.  
  161.   // fwrite() the filename.  We don't write the null-terminator from
  162.   // the string, just the characters.
  163.   // MISSING:
  164.   res = fwrite(kv->value, slen_ho, 1, f);
  165.   if (res != 1) {
  166.     return 0;
  167.   }
  168.  
  169.   // calculate and return the total amount written.
  170.   // MISSING (change this return to the correct thing):
  171.   return sizeof(header) + slen_ho;
  172. }
  173.  
  174. static HWSize_t WriteDocTable(FILE *f, DocTable dt, IndexFileOffset_t offset) {
  175.   // Use WriteHashTable() to write the docid->filename hash table.
  176.   // You'll need to use DTGetDocidTable() to get the docID hash table
  177.   // from dt, and you'll need to pass in WriteDocidDocnameFn as the
  178.   // final parameter of WriteHashTable().
  179.   return WriteHashTable(f,
  180.                         DTGetDocidTable(dt),
  181.                         offset,
  182.                         &WriteDocidDocnameFn);
  183. }
  184.  
  185. // This write_element_fn is used to write a DocID + position list
  186. // element (i.e., an element of a nested docID table) into the file at
  187. // offset 'offset'.
  188. static HWSize_t WriteDocPositionListFn(FILE *f,
  189.                                        IndexFileOffset_t offset,
  190.                                        HTKeyValue *kv) {
  191.   size_t res;
  192.  
  193.   // Extract the docID from the HTKeyValue.
  194.   DocID_t docID_ho = (DocID_t)kv->key;
  195.  
  196.   // Extract the positions LinkedList from the HTKeyValue and
  197.   // determine its size.
  198.   LinkedList positions = (LinkedList)kv->value;
  199.   HWSize_t num_pos_ho = NumElementsInLinkedList(positions);
  200.  
  201.   // Write the header, in disk format.
  202.   // You'll need to fseek() to the right location in the file.
  203.   // MISSING:
  204.   docid_element_header header = {docID_ho, num_pos_ho};
  205.   header.toDiskFormat();
  206.   res = fseek(f, offset, SEEK_SET);
  207.   if (res != 0)
  208.     return 0;
  209.   res = fwrite(&header, sizeof(header), 1, f);
  210.   if (res != 1)
  211.     return 0;
  212.  
  213.   // Loop through the positions list, writing each position out.
  214.   HWSize_t i;
  215.   docid_element_position position;
  216.   LLPayload_t pos;
  217.   LLIter it = LLMakeIterator(positions, 0);
  218.   Verify333(it != nullptr);
  219.   for (i = 0; i < num_pos_ho; i++) {
  220.     // Get the next position from the list.
  221.     // MISSING:
  222.     LLIteratorGetPayload(it, (LLPayload_t*) &pos);
  223.    
  224.     // Truncate to 32 bits, then convert it to network order and write it out.
  225.     // MISSING:
  226.  
  227.     //position = {(uint32_t) *((uint64_t*) pos)};
  228.     //DocPositionOffset_t* p = (DocPositionOffset_t*) pos;
  229.     //uint32_t  q =  reinterpret_cast<uintptr_t>(pos);
  230.     //DocPositionOffset_t off = *p;
  231.     //uint32_t p = *pos;
  232.     position.toDiskFormat();
  233.     res = fwrite(&position, sizeof(docid_element_position), 1, f);
  234.     if (res != 1) {
  235.       LLIteratorFree(it);
  236.       return 0;
  237.     }
  238.  
  239.     // Iterate to the next position.
  240.     LLIteratorNext(it);
  241.   }
  242.   LLIteratorFree(it);
  243.  
  244.   // Calculate and return the total amount of data written.
  245.   // MISSING (fix this return value):
  246.   return num_pos_ho * sizeof(position) + sizeof(header);
  247. }
  248.  
  249. // This write_element_fn is used to write a WordDocSet
  250. // element into the file at position 'offset'.
  251. static HWSize_t WriteWordDocSetFn(FILE *f,
  252.                                   IndexFileOffset_t offset,
  253.                                   HTKeyValue *kv) {
  254.   size_t res;
  255.  
  256.   // Extract the WordDocSet from the HTKeyValue.
  257.   WordDocSet *wds = static_cast<WordDocSet *>(kv->value);
  258.   Verify333(wds != nullptr);
  259.  
  260.   // Prepare the wordlen field.
  261.   uint16_t wordlen_ho;
  262.   // MISSING:
  263.   wordlen_ho = strlen(wds->word);
  264.  
  265.   // Write the nested DocID->positions hashtable (i.e., the "docID
  266.   // table" element in the diagrams).  Use WriteHashTable() to do it,
  267.   // passing it the wds->docIDs table and using the
  268.   // WriteDocPositionListFn helper function as the final parameter.
  269.   HWSize_t htlen_ho = WriteHashTable(f,
  270.                                      wds->docIDs,
  271.                                      offset + sizeof(worddocset_header) + wordlen_ho,
  272.                                      &WriteDocPositionListFn);
  273.  
  274.   // Write the header, in network order, in the right
  275.   // place in the file.
  276.   worddocset_header header = {wordlen_ho, htlen_ho};
  277.   // MISSING:
  278.   header.toDiskFormat();
  279.   res = fseek(f, offset, SEEK_SET);
  280.   if (res != 0) {
  281.     return 0;
  282.   }
  283.   res = fwrite(&header, sizeof(header), 1, f);
  284.   if (res != 1) {
  285.     return 0;
  286.   }
  287.  
  288.   // Write the word itself, excluding the nullptr terminator,
  289.   // in the right place in the file.
  290.   // MISSING:
  291.   res = fwrite(wds->word, wordlen_ho, 1, f);
  292.   if (res != 1) {
  293.     return 0;
  294.   }
  295.  
  296.   // Calculate and return the total amount of data written.
  297.   // MISSING (fix this return value):
  298.   return sizeof(header) + wordlen_ho + htlen_ho;
  299. }
  300.  
  301. static HWSize_t WriteMemIndex(FILE *f, MemIndex mi, IndexFileOffset_t offset) {
  302.   // Use WriteHashTable() to write the MemIndex into the file.  You'll
  303.   // need to pass in the WriteWordDocSetFn helper function as the
  304.   // final argument.
  305.   return WriteHashTable(f,
  306.                         mi,
  307.                         offset,
  308.                         &WriteWordDocSetFn);
  309. }
  310.  
  311. static HWSize_t WriteHeader(FILE *f,
  312.                             HWSize_t doct_size,
  313.                             HWSize_t memidx_size) {
  314.   // We need to calculate the checksum over the doctable and index
  315.   // table.  (Note that the checksum does not include the index file
  316.   // header, just these two tables.)  Use fseek() to seek to the right
  317.   // location, and use a CRC32 object from fileindexutil.h to do the
  318.   // CRC checksum calculation, feeding it characters that you read
  319.   // from the index file using fread().
  320.  
  321.   IndexFileHeader header = {MAGIC_NUMBER, 0, doct_size, memidx_size};
  322.  
  323.   HWSize_t cslen = doct_size + memidx_size;
  324.   CRC32 crcobj;
  325.  
  326.   // MISSING:
  327.  
  328.   // skip the first 16 bytes where the header is stored
  329.   if (fseek(f, 16, SEEK_SET) != 0) {
  330.     return 0; //  fseek failure, return early
  331.   }
  332.  
  333.   HWSize_t bytes_read = 0;
  334.   // use new here?
  335.   char* buf = new char[cslen + 1];
  336.   buf[cslen] = '\0';
  337.   int res;
  338.   int i = 0;
  339.  
  340.   // NEED TO DO ERROR CHECKING HERE
  341.   res = fread(buf, 1, cslen, f);
  342.  
  343.   //crcobj.Initialize();
  344.   for(HWSize_t i = 0; i < cslen; i++) {
  345.     crcobj.FoldByteIntoCRC(buf[i]);
  346.   }
  347.  
  348.   uint32_t checksum = crcobj.GetFinalCRC();
  349.   //delete crcobj;
  350.   header.checksum = checksum;
  351.   delete[] buf;
  352.  
  353.  
  354.   // Write the header fields.  Be sure to convert the fields to
  355.   // network order before writing them!
  356.   header.toDiskFormat();
  357.  
  358.   if (fseek(f, 0, SEEK_SET) != 0)
  359.     return 0;
  360.   if (fwrite(&header, sizeof(header), 1, f) != 1)
  361.     return 0;
  362.  
  363.   // Use fsync to flush the header field to disk.
  364.   Verify333(fsync(fileno(f)) == 0);
  365.  
  366.   // We're done!  Return the number of header bytes written.
  367.   return sizeof(IndexFileHeader);
  368. }
  369.  
  370. static HWSize_t WriteBucketRecord(FILE *f,
  371.                                   LinkedList li,
  372.                                   IndexFileOffset_t br_offset,
  373.                                   IndexFileOffset_t b_offset) {
  374.   HWSize_t res;
  375.  
  376.   // Use NumElementsInLinkedList() to figure out how many chained
  377.   // elements are in this bucket.  Put bucket_rec in network
  378.   // byte order.
  379.  
  380.   // MISSING:
  381.   bucket_rec br;
  382.   br.chain_len = NumElementsInLinkedList(li);
  383.   br.bucket_position = b_offset;
  384.   br.toDiskFormat();
  385.  
  386.   fprintf(stderr, "bucket rec offset: %d\n", b_offset);
  387.  
  388.   // fseek() to the "bucket_rec" record for this bucket.
  389.   res = fseek(f, br_offset, SEEK_SET);
  390.   if (res != 0)
  391.     return 0;
  392.  
  393.   // Write the bucket_rec.
  394.   // MISSING:
  395.   res = fwrite(&br, sizeof(bucket_rec), 1, f);
  396.   if (res != 1)
  397.     return 0;
  398.  
  399.   // Calculate and return how many bytes we wrote.
  400.   return sizeof(bucket_rec);
  401. }
  402.  
  403. static HWSize_t WriteBucket(FILE *f,
  404.                             LinkedList li,
  405.                             IndexFileOffset_t offset,
  406.                             write_element_fn fn) {
  407.   // Use NumElementsInLinkedList() to calculate how many elements are
  408.   // in this bucket chain.
  409.   HWSize_t chainlen_ho = NumElementsInLinkedList(li);
  410.  
  411.   // "bucketlen" is our running calculation of how many bytes have
  412.   // been written out for this bucket.
  413.   HWSize_t bucketlen = sizeof(element_position_rec) * chainlen_ho;
  414.  
  415.   // Figure out the position of the next "element" in the bucket.
  416.   HWSize_t nextelpos = offset + bucketlen;
  417.  
  418.   // Loop through the chain, writing each associated "element position"
  419.   // header field for the bucket and then writing out the "element"
  420.   // itself.  Be sure to write things in network order.  Use the
  421.   // "fn" parameter to invoke the helper function that knows how to
  422.   // write out the payload of each chain element into the "element"
  423.   // fields of the bucket.
  424.  
  425.   element_position_rec element_record;
  426.   if (chainlen_ho > 0) {
  427.     LLIter it = LLMakeIterator(li, 0);
  428.     Verify333(it != nullptr);
  429.     HWSize_t j;
  430.     for (j = 0; j < chainlen_ho; j++) {
  431.       HWSize_t ellen, res;
  432.       HTKeyValue *kv;
  433.  
  434.       // MISSING:
  435.       LLIteratorGetPayload(it, (LLPayload_t*)&kv);
  436.       ellen = fn(f, nextelpos, kv);
  437.       if (ellen == 0) {
  438.         return 0;
  439.       }
  440.       element_record = {nextelpos};
  441.       element_record.toDiskFormat();
  442.       res = fseek(f, offset + j * sizeof(element_record),SEEK_SET);
  443.       if (res != 0) {
  444.         LLIteratorFree(it);
  445.         return 0;
  446.       }
  447.      /*
  448.       res = fwrite(&element_record, sizeof(element_record), 1, f);
  449.       if (res != 1)
  450.         LLIteratorFree(it);
  451.         return 0;
  452. */
  453.      
  454.  
  455.       // Advance to the next element in the chain, tallying up our
  456.       // lengths.
  457.       bucketlen += ellen;
  458.       nextelpos += ellen;
  459.       LLIteratorNext(it);
  460.     }
  461.     LLIteratorFree(it);
  462.   }
  463.  
  464.   // Return the total amount of data written.
  465.   return bucketlen;
  466. }
  467.  
  468. // This is the main workhorse of the file.  It iterates through the
  469. // buckets in the HashTable "t", writing the hash table out into
  470. // the index file.
  471. static HWSize_t WriteHashTable(FILE *f,
  472.                                HashTable t,
  473.                                IndexFileOffset_t offset,
  474.                                write_element_fn fn) {
  475.   HashTableRecord *ht = static_cast<HashTableRecord *>(t);
  476.   IndexFileOffset_t next_bucket_rec_offset = offset + sizeof(BucketListHeader);
  477.   IndexFileOffset_t next_bucket_offset;
  478.   HWSize_t i, res;
  479.  
  480.   // fwrite() out the "num_buckets" (number of buckets) field, in
  481.   // network order.
  482.   BucketListHeader header = {ht->num_buckets};
  483.   header.toDiskFormat();
  484.   res = fseek(f, offset, SEEK_SET);
  485.   if (res != 0)
  486.     return 0;
  487.   res = fwrite(&header.num_buckets, sizeof(BucketListHeader), 1, f);
  488.   if (res != 1)
  489.     return 0;
  490.  
  491.   // Figure out the offset of the first "bucket" field.  Each
  492.   // bucket_rec is sizeof(bucket_rec) bytes, and there
  493.   // are ht->num_buckets of them.
  494.   next_bucket_offset = next_bucket_rec_offset +
  495.                          (ht->num_buckets) * sizeof(bucket_rec);
  496.  
  497.   // Loop through table's buckets, writing them out to the appropriate
  498.   // "bucket_rec" and "bucket" fields within the file index.  Writing
  499.   // a bucket means writing the bucket_rec, then writing the bucket
  500.   // itself.  Use WriteBucketRecord() to write a bucket_rec, and
  501.   // WriteBucket() to write a bucket.
  502.   //
  503.   // Be sure to handle the corner case where the bucket's chain is
  504.   // empty.  For that case, you still have to write a "bucket_rec"
  505.   // record for the bucket, but you won't write a "bucket".
  506.   bucket_rec currentRecord;
  507.   HWSize_t bucketsize;
  508.   HWSize_t bucketrecsize;
  509.   for (i = 0; i < ht->num_buckets; i++) {
  510.     // MISSING:
  511.     // check if these return 0
  512.     bucketrecsize = WriteBucketRecord(f, ht->buckets[i], next_bucket_rec_offset, next_bucket_offset);
  513.     next_bucket_rec_offset += bucketrecsize;
  514.     if (NumElementsInLinkedList(ht->buckets[i]) > 0) {
  515.       bucketsize = WriteBucket(f, ht->buckets[i], next_bucket_offset, fn);
  516.       next_bucket_offset += bucketsize;
  517.     }
  518.   }
  519.  
  520.   // Calculate and return the total number of bytes written.
  521.   return (next_bucket_offset - offset);
  522. }
  523.  
  524. }  // namespace hw3
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top