Advertisement
homer512

wstring to string benchmark

Aug 29th, 2015
146
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 9.62 KB | None | 0 0
  1. /*
  2.  * Copyright 2015 Florian Philipp
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  * http://www.apache.org/licenses/LICENSE-2.0
  9.  
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */
  16.  
  17. #include <wchar.h>
  18. /* using wcsrtombs, wcrtomb, wcstrlen */
  19. #include <stdlib.h>
  20. /* using malloc, realloc, free */
  21. #include <string.h>
  22. /* using memset, memcpy */
  23. #include <stdio.h>
  24.  /* using printf, perror */
  25. #include <locale.h>
  26. /* using setlocale */
  27. #include <errno.h>
  28. /* using errno */
  29. #include <time.h>
  30. /* using clock_gettime, difftime */
  31.  
  32.  
  33. /**
  34.  * Utility to preserve errno when calling free
  35.  */
  36. static void _free_keep_errno(void* buf)
  37. {
  38.   int tmp_errno;
  39.   tmp_errno = errno;
  40.   free(buf);
  41.   errno = tmp_errno;
  42. }
  43.  
  44. /**
  45.  * Converts a wide character string to a multi-byte character string
  46.  *
  47.  * Operates by measuring the size and allocating the resulting string once
  48.  *
  49.  * \param ws a null-terminated wide character string
  50.  * \return the corresponding multi-byte character string or NULL on error
  51.  */
  52. static char* s_wstring_to_string_meas(const wchar_t* ws)
  53. {
  54.   const wchar_t* count_ws;
  55.   mbstate_t mbstate;
  56.   size_t len;
  57.   char* buf;
  58.   count_ws = ws;
  59.   memset(&mbstate, 0, sizeof(mbstate));
  60.   if((len = wcsrtombs(NULL, &count_ws, 0, &mbstate)) == (size_t) -1)
  61.     goto err_rtrn;
  62.   len += 1;
  63.   if(! (buf = malloc(len)))
  64.     goto err_rtrn;
  65.   if(wcsrtombs(buf, &ws, len, &mbstate) == (size_t) -1)
  66.     goto err_free;
  67.   return buf;
  68.  err_free:
  69.   _free_keep_errno(buf);
  70.  err_rtrn:
  71.   return NULL;
  72. }
  73.  
  74. /**
  75.  * Converts a wide character string to a multi-byte character string
  76.  *
  77.  * Operates by allocating enough memory if each wchar_t translates into
  78.  * MB_CUR_MAX bytes, then shrinking the string in the end
  79.  *
  80.  * \param ws a null-terminated wide character string
  81.  * \return the corresponding multi-byte character string or NULL on error
  82.  */
  83. static char* s_wstring_to_string_max(const wchar_t* ws)
  84. {
  85.   size_t len;
  86.   char* buf, *shrunk;
  87.   mbstate_t mbstate;
  88.   /* make a worst case estimate. Note that MB_CUR_MAX is a runtime variable */
  89.   len = MB_CUR_MAX * wcslen(ws) + 1;
  90.   if(! (buf = malloc(len)))
  91.     goto err_rtrn;
  92.   memset(&mbstate, 0, sizeof(mbstate));
  93.   if((len = wcsrtombs(buf, &ws, len, &mbstate)) == (size_t) -1)
  94.     goto err_free;
  95.   if(! (shrunk = realloc(buf, len + 1)))
  96.     goto err_free;
  97.   return shrunk;
  98.  err_free:
  99.   _free_keep_errno(buf);
  100.  err_rtrn:
  101.   return NULL;
  102. }
  103.  
  104. /**
  105.  * Converts a wide character string to a multi-byte character string
  106.  *
  107.  * Helper function for different high-level implementations.
  108.  * Uses an initial guess for the size.
  109.  * If the guessed size is insufficient, larger memory is allocated
  110.  * exponentially.
  111.  *
  112.  * \param ws a null-terminated wide character string
  113.  * \param buflen initial size estimate
  114.  * \return the corresponding multi-byte character string or NULL on error
  115.  */
  116. static char* _s_wstring_to_string_realloc(const wchar_t* ws, size_t buflen)
  117. {
  118.   char* buffer;
  119.   char* resized;
  120.   size_t bufpos ;
  121.   mbstate_t mbstate;
  122.   memset(&mbstate, 0, sizeof(mbstate));
  123.   /* to avoid parsing the string twice, we allocate memory speculatively
  124.    * with exponential growth. Then we shrink it at the end
  125.    */
  126.   for(buffer = NULL, bufpos = 0; ws; buflen *= 2) {
  127.     size_t converted;
  128.     if(! (resized = realloc(buffer, buflen)))
  129.       goto err;
  130.     buffer = resized;
  131.     if((converted = wcsrtombs(buffer + bufpos, &ws, buflen - bufpos, &mbstate))
  132.        == (size_t) -1)
  133.       goto err;
  134.     bufpos += converted;
  135.   }
  136.   /* shrink buffer to actually required size */
  137.   if(! (resized = realloc(buffer, bufpos + 1)))
  138.     goto err;
  139.   return resized;
  140.  err:
  141.   _free_keep_errno(buffer); /* buffer may be NULL */
  142.   return NULL;
  143. }
  144.  
  145.  
  146. /**
  147.  * Converts a wide character string to a multi-byte character string
  148.  *
  149.  * Operates by allocating the one byte per wide character and then extending it
  150.  * exponentially
  151.  *
  152.  * \param ws a null-terminated wide character string
  153.  * \return the corresponding multi-byte character string or NULL on error
  154.  */
  155. static char* s_wstring_to_string_min(const wchar_t* ws)
  156. {
  157.   return _s_wstring_to_string_realloc(ws, wcslen(ws) + 1);
  158. }
  159.  
  160. /**
  161.  * Converts a wide character string to a multi-byte character string
  162.  *
  163.  * Operates by making an uneducated guess on the memory size and then extending
  164.  * it exponentially.
  165.  * Currently, the initial guess is 64, i.e. one cache line
  166.  *
  167.  * \param ws a null-terminated wide character string
  168.  * \return the corresponding multi-byte character string or NULL on error
  169.  */
  170. static char* s_wstring_to_string_guess(const wchar_t* ws)
  171. {
  172.   return _s_wstring_to_string_realloc(ws, 64);
  173. }
  174.  
  175. /**
  176.  * Converts a wide character string to a multi-byte character string
  177.  *
  178.  * Operates by allocating exponentially larger strings. Then concatenates them
  179.  * in the end
  180.  *
  181.  * \param ws a null-terminated wide character string
  182.  * \return the corresponding multi-byte character string or NULL on error
  183.  */
  184. static char* s_wstring_to_string_rope(const wchar_t* ws)
  185. {
  186.   struct _String
  187.   {
  188.     size_t len;
  189.     char* str;
  190.   };
  191.   /* Array of exponentially growing strings */
  192.   struct _String* rope;
  193.   size_t ropelen = 0, ropecap = 64 / sizeof(struct _String);
  194.   /* allocation size. Initial size is a cache line */
  195.   size_t lastreserved = 64;
  196.   size_t i;
  197.   size_t outlen = 1;
  198.   char* outbuf;
  199.   char* outpos;
  200.   int errno_cpy;
  201.   mbstate_t mbstate;
  202.   memset(&mbstate, 0, sizeof(mbstate));
  203.   if(! (rope = malloc(ropecap * sizeof(struct _String))))
  204.     goto err_rtrn;
  205.   /* Convert the string into increasingly large substrings */
  206.   for(i = 0; ws; ++i, lastreserved *= 2) {
  207.     if(i == ropecap) {
  208.       struct _String* newrope;
  209.       ropecap *= 2;
  210.       if(! (newrope = realloc(rope, ropecap * sizeof(struct _String))))
  211.     goto err_free;
  212.       rope = newrope;
  213.     }
  214.     if(! (rope[i].str = malloc(lastreserved)))
  215.       goto err_free;
  216.     ropelen = i + 1;
  217.     if((rope[i].len = wcsrtombs(rope[i].str, &ws, lastreserved, &mbstate))
  218.        == (size_t) -1)
  219.       goto err_free;
  220.     outlen += rope[i].len;
  221.   };
  222.   /* allocate the final string. reallocing the first strings saves us the
  223.    * hassle of copying its content and freeing it.
  224.    * It also deals with the common case that the first string was big enough
  225.    * and just needs to be shrunk
  226.    */
  227.   if(! (outbuf = realloc(rope[0].str, outlen)))
  228.     goto err_free;
  229.   /* append the remaining strings */
  230.   outpos = outbuf + rope[0].len;
  231.   for(i = 1; i < ropelen; ++i) {
  232.     outpos = memcpy(outpos, rope[i].str, rope[i].len) + rope[i].len;
  233.     free(rope[i].str);
  234.   }
  235.   *outpos = '\0';
  236.   free(rope);
  237.   return outbuf;
  238.  err_free:
  239.   /* free all entries. The last entry may be NULL if allocation failed.
  240.    * free may call munmap so we preserve errno before calling it
  241.    */
  242.   errno_cpy = errno;
  243.   for(i = 0; i < ropelen; ++i)
  244.     free(rope[i].str);
  245.   free(rope);
  246.   errno = errno_cpy;
  247.  err_rtrn:
  248.   return NULL;
  249. }
  250.  
  251. /**
  252.  * Creates n random valid wchar_ts
  253.  */
  254. static void make_wchars(wchar_t* ws, size_t n)
  255. {
  256.   mbstate_t lastvalid, cur;
  257.   size_t i;
  258.   memset(&lastvalid, 0, sizeof(lastvalid));
  259.   for(i = 0; i < n; ++i) {
  260.     char buf[MB_CUR_MAX];
  261.     do {
  262.       memcpy(&cur, &lastvalid, sizeof(cur));
  263.       ws[i] = (wchar_t) (rand() + 1);
  264.     }
  265.     while(wcrtomb(buf, ws[i], &cur) == (size_t) -1);
  266.     memcpy(&lastvalid, &cur, sizeof(lastvalid));
  267.   }
  268. }
  269.  
  270. int main(void)
  271. {
  272.   typedef char* (*algorithm_t)(const wchar_t*);
  273.   static const algorithm_t algorithms[] = {
  274.     s_wstring_to_string_meas,
  275.     s_wstring_to_string_max,
  276.     s_wstring_to_string_min,
  277.     s_wstring_to_string_guess,
  278.     s_wstring_to_string_rope
  279.   };
  280.   static const char* const names[] = {
  281.     "measure", "max", "min", "guess", "rope"
  282.   };
  283.   const size_t len = 4096, algo_n = sizeof(names) / sizeof(names[0]);
  284.   wchar_t* wstr;
  285.   size_t curlen;
  286.   const char* operation;
  287.   setlocale(LC_ALL, "");
  288.   if(! (wstr = malloc(len * sizeof(wchar_t)))) {
  289.     operation = "Allocating wide characters";
  290.     goto err_rtrn;
  291.   }
  292.   make_wchars(wstr, len - 1);
  293.   wstr[len - 1] = L'\0';
  294.   for(curlen = 1; curlen <= len; curlen *= 2) {
  295.     const wchar_t* cur_wstr;
  296.     size_t repetitions;
  297.     size_t algo_i;
  298.     cur_wstr = wstr + len - curlen;
  299.     repetitions = 4096 * 1024 * 8 / curlen;
  300.     for(algo_i = 0; algo_i < algo_n; ++algo_i) {
  301.       struct timespec starttime, endtime;
  302.       size_t repetition_i;
  303.       double spenttime, time_per_wchar;
  304.       if(clock_gettime(CLOCK_MONOTONIC, &starttime)) {
  305.     operation = "Getting time";
  306.     goto err_free;
  307.       }
  308.       for(repetition_i = 0; repetition_i < repetitions; ++repetition_i) {
  309.     char* mbstr;
  310.     if(! (mbstr = algorithms[algo_i](cur_wstr))) {
  311.       operation = names[algo_i];
  312.       goto err_free;
  313.     }
  314.     free(mbstr);
  315.       }
  316.       if(clock_gettime(CLOCK_MONOTONIC, &endtime)) {
  317.     operation = "Getting time";
  318.     goto err_free;
  319.       }
  320.       spenttime = difftime(endtime.tv_sec, starttime.tv_sec)
  321.     + (endtime.tv_nsec - starttime.tv_nsec) * 1e-9;
  322.       time_per_wchar = spenttime / (repetitions * curlen);
  323.       if(printf("%zu\t%s\t%g\n", curlen, names[algo_i], time_per_wchar) < 0) {
  324.     operation = "Printing results";
  325.     goto err_free;
  326.       }
  327.     }
  328.   }
  329.   free(wstr);
  330.   return 0;
  331.  err_free:
  332.   _free_keep_errno(wstr);
  333.  err_rtrn:
  334.   perror(operation);
  335.   return 1;
  336. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement