Advertisement
donuthole

Untitled

May 31st, 2025
151
0
351 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 5.21 KB | Source Code | 0 0
  1. #include <stdio.h>      /* for fopen(), fgetc() etc. */
  2. #include <stdlib.h>     /* for EXIT_SUCCESS etc. */
  3. #include <string.h>     /* for memset() */
  4. #include <limits.h>     /* for UCHAR_MAX */
  5. #include <stdint.h>     /* for SIZE_MAX */
  6. #include <ctype.h>      /* for islower() */
  7.  
  8. static const char *input_file_name = "5_com_five.txt";
  9. static const char *output_file_name = "5_test.txt";
  10. static const char *alphabet = "abcdefghijklmnopqrstuvwxyz";
  11.  
  12.  
  13. static void require_uchar_max_plus_1_does_not_overflow()
  14. {
  15.   /* Prevent the program compiling if UCHAR_MAX+1 is not representable
  16.      as size_t. */
  17.   char x[UCHAR_MAX >= SIZE_MAX ? -1 : 1];
  18.   (void)x;
  19. }
  20.  
  21. static int self_test()
  22. {
  23.   int ch = 0;
  24.   for(;;)
  25.     {
  26.       if (ch > UCHAR_MAX)
  27.     {
  28.       break;
  29.     }
  30.       int expected = islower(ch) ? 1 : 0;
  31.       int got = (strchr(alphabet, (char)ch) != NULL && ch != 0) ? 1 : 0;
  32.       if (expected != got)
  33.     {
  34.       fprintf(stderr, "expected=%d, got=%d\n", expected, got);
  35.       if (isgraph(ch))
  36.         {
  37.           fprintf(stderr, "self-test failed for character %c (%d)\n", ch, ch);
  38.         }
  39.       else
  40.         {
  41.           fprintf(stderr, "self-test failed for non-printing character %d\n", ch);
  42.         }
  43.       fprintf(stderr, "If you didn't expect a self-test failure, perhaps your locale settings are unexpected; try running with LC_ALL=C\n");
  44.       return -1;
  45.     }
  46.       ++ch;
  47.     }
  48.   return 0;
  49. }
  50.  
  51.  
  52.  
  53. /* The `want` and `freq` arrays indicate respectively whether this is
  54.  * as letter we care about and how many of them we have seen in this
  55.  * word.
  56.  *
  57.  * Since main() is not re-entrant, it will not matter from a
  58.  * correctness point of view whether we allocate these on the stack or
  59.  * not.  But since we may be executing the code on a platform with
  60.  * limited stack, we avoid the stack.  This is not likely to be a
  61.  * problem in practice however, because platforms where is likely to
  62.  * be a problem (e.g. systems with CHAR_BIT>8 and a small stack) are
  63.  * also unlikely to have a workign fopen() function.
  64.  */
  65. static int want[UCHAR_MAX + 1];
  66. /* Assume unibyte input, i.e. that all letters are represented
  67.    by a single char in the input. */
  68. static size_t freq[UCHAR_MAX + 1];
  69.  
  70. static int print_word(FILE *fin, const fpos_t* pos, FILE *fout)
  71. {
  72.   if (0 != fsetpos(fin, pos))
  73.     {
  74.       perror("fsetpos");
  75.       exit(EXIT_FAILURE);
  76.     }
  77.   int ch;
  78.   while ((ch=fgetc(fin)) != EOF)
  79.     {
  80.       if (fputc(ch, fout) == EOF)
  81.     {
  82.       perror(output_file_name);
  83.       return -1;
  84.     }
  85.       if (ch == '\n')
  86.     {
  87.       break;
  88.     }
  89.     }
  90.   if (ferror(fin))
  91.     {
  92.       perror(input_file_name);
  93.       return -1;
  94.     }
  95.   return 0;
  96. }
  97.  
  98.  
  99. int main (int argc, char *argv[]) {
  100.   (void) argc;
  101.   (void) argv;
  102.   (void) require_uchar_max_plus_1_does_not_overflow; /* avoid unused-function warning */
  103.  
  104.   if (self_test() < 0)
  105.     {
  106.       return EXIT_FAILURE;
  107.     }
  108.  
  109.   int finished = 0;
  110.  
  111.   FILE * input = fopen(input_file_name, "r");
  112.   FILE * output = fopen(output_file_name, "w");
  113.  
  114.   if (input == NULL)
  115.     {
  116.       perror(input_file_name);
  117.       return EXIT_FAILURE;
  118.     }
  119.   if (output == NULL)
  120.     {
  121.       perror(output_file_name);
  122.       return EXIT_FAILURE;
  123.     }
  124.  
  125.   memset(want, 0, sizeof(want));
  126.   for (const char* p = alphabet; *p; ++p)
  127.     {
  128.       want[(unsigned char)*p] = 1;
  129.     }
  130.  
  131.   while (!finished)
  132.     {
  133.       /* Zero out the frequency histogram.  Reduycing the number of
  134.        * times we execute this loop is the primary motivation for the
  135.        * existence of `alphabet` (and `self_test()`): without it we
  136.        * could simply use islower() from the standard library.  This
  137.        * is almost certainly a premature optimization, and may not
  138.        * necessarily even be faster, depending on the behaviour of CPU
  139.        * cache.  Certainly I wouldn't choose to (initially at least)
  140.        * implement things this way in a professional context.  The
  141.        * only way to tell for sure is to benchmark it, but for the
  142.        * current implementation, I/O will likely dominate anyway.
  143.        */
  144.       for (const char*p = alphabet; *p; ++p)
  145.     {
  146.       freq[(unsigned char)*p] = 0;
  147.     }
  148.       /* Remember where this word started. */
  149.       fpos_t word_start;
  150.       if (0 != fgetpos(input, &word_start))
  151.     {
  152.       perror("fgetpos");
  153.       return EXIT_FAILURE;
  154.     }
  155.  
  156.       /* Read this word/line, character by character. */
  157.       for (;;)
  158.     {
  159.       int ch = fgetc(input);
  160.       if (ch == EOF)
  161.         {
  162.           finished = 1;
  163.           break;
  164.         }
  165.  
  166.       if (ch == '\n')
  167.         {
  168.           /* If our word contained a repeated character we already
  169.          printed it, so there is nothing to do here. */
  170.           break;
  171.         }
  172.  
  173.       if (want[(unsigned char)ch])
  174.         {
  175.           /* We don't need to worry about overflow in freq[] as the
  176.          value never gets higher than 2. */
  177.           if (++freq[(unsigned char)ch] > 1)
  178.         {
  179.           if (print_word(input, &word_start, output) < 0)
  180.             {
  181.               /* We already printed the error message */
  182.               return EXIT_FAILURE;
  183.             }
  184.           /* We have printed the word, and this leaves us at
  185.              the end of the line.  So, break out of the inner
  186.              loop in order to process the next word. */
  187.           break;
  188.         }
  189.         }
  190.     }
  191.    }
  192.    if (ferror(input))
  193.      {
  194.        perror(input_file_name);
  195.        return EXIT_FAILURE;
  196.      }
  197.    return EXIT_SUCCESS;
  198. }
  199.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement