Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h> /* for fopen(), fgetc() etc. */
- #include <stdlib.h> /* for EXIT_SUCCESS etc. */
- #include <string.h> /* for memset() */
- #include <limits.h> /* for UCHAR_MAX */
- #include <stdint.h> /* for SIZE_MAX */
- #include <ctype.h> /* for islower() */
- static const char *input_file_name = "5_com_five.txt";
- static const char *output_file_name = "5_test.txt";
- static const char *alphabet = "abcdefghijklmnopqrstuvwxyz";
- static void require_uchar_max_plus_1_does_not_overflow()
- {
- /* Prevent the program compiling if UCHAR_MAX+1 is not representable
- as size_t. */
- char x[UCHAR_MAX >= SIZE_MAX ? -1 : 1];
- (void)x;
- }
- static int self_test()
- {
- int ch = 0;
- for(;;)
- {
- if (ch > UCHAR_MAX)
- {
- break;
- }
- int expected = islower(ch) ? 1 : 0;
- int got = (strchr(alphabet, (char)ch) != NULL && ch != 0) ? 1 : 0;
- if (expected != got)
- {
- fprintf(stderr, "expected=%d, got=%d\n", expected, got);
- if (isgraph(ch))
- {
- fprintf(stderr, "self-test failed for character %c (%d)\n", ch, ch);
- }
- else
- {
- fprintf(stderr, "self-test failed for non-printing character %d\n", ch);
- }
- fprintf(stderr, "If you didn't expect a self-test failure, perhaps your locale settings are unexpected; try running with LC_ALL=C\n");
- return -1;
- }
- ++ch;
- }
- return 0;
- }
- /* The `want` and `freq` arrays indicate respectively whether this is
- * as letter we care about and how many of them we have seen in this
- * word.
- *
- * Since main() is not re-entrant, it will not matter from a
- * correctness point of view whether we allocate these on the stack or
- * not. But since we may be executing the code on a platform with
- * limited stack, we avoid the stack. This is not likely to be a
- * problem in practice however, because platforms where is likely to
- * be a problem (e.g. systems with CHAR_BIT>8 and a small stack) are
- * also unlikely to have a workign fopen() function.
- */
- static int want[UCHAR_MAX + 1];
- /* Assume unibyte input, i.e. that all letters are represented
- by a single char in the input. */
- static size_t freq[UCHAR_MAX + 1];
- static int print_word(FILE *fin, const fpos_t* pos, FILE *fout)
- {
- if (0 != fsetpos(fin, pos))
- {
- perror("fsetpos");
- exit(EXIT_FAILURE);
- }
- int ch;
- while ((ch=fgetc(fin)) != EOF)
- {
- if (fputc(ch, fout) == EOF)
- {
- perror(output_file_name);
- return -1;
- }
- if (ch == '\n')
- {
- break;
- }
- }
- if (ferror(fin))
- {
- perror(input_file_name);
- return -1;
- }
- return 0;
- }
- int main (int argc, char *argv[]) {
- (void) argc;
- (void) argv;
- (void) require_uchar_max_plus_1_does_not_overflow; /* avoid unused-function warning */
- if (self_test() < 0)
- {
- return EXIT_FAILURE;
- }
- int finished = 0;
- FILE * input = fopen(input_file_name, "r");
- FILE * output = fopen(output_file_name, "w");
- if (input == NULL)
- {
- perror(input_file_name);
- return EXIT_FAILURE;
- }
- if (output == NULL)
- {
- perror(output_file_name);
- return EXIT_FAILURE;
- }
- memset(want, 0, sizeof(want));
- for (const char* p = alphabet; *p; ++p)
- {
- want[(unsigned char)*p] = 1;
- }
- while (!finished)
- {
- /* Zero out the frequency histogram. Reduycing the number of
- * times we execute this loop is the primary motivation for the
- * existence of `alphabet` (and `self_test()`): without it we
- * could simply use islower() from the standard library. This
- * is almost certainly a premature optimization, and may not
- * necessarily even be faster, depending on the behaviour of CPU
- * cache. Certainly I wouldn't choose to (initially at least)
- * implement things this way in a professional context. The
- * only way to tell for sure is to benchmark it, but for the
- * current implementation, I/O will likely dominate anyway.
- */
- for (const char*p = alphabet; *p; ++p)
- {
- freq[(unsigned char)*p] = 0;
- }
- /* Remember where this word started. */
- fpos_t word_start;
- if (0 != fgetpos(input, &word_start))
- {
- perror("fgetpos");
- return EXIT_FAILURE;
- }
- /* Read this word/line, character by character. */
- for (;;)
- {
- int ch = fgetc(input);
- if (ch == EOF)
- {
- finished = 1;
- break;
- }
- if (ch == '\n')
- {
- /* If our word contained a repeated character we already
- printed it, so there is nothing to do here. */
- break;
- }
- if (want[(unsigned char)ch])
- {
- /* We don't need to worry about overflow in freq[] as the
- value never gets higher than 2. */
- if (++freq[(unsigned char)ch] > 1)
- {
- if (print_word(input, &word_start, output) < 0)
- {
- /* We already printed the error message */
- return EXIT_FAILURE;
- }
- /* We have printed the word, and this leaves us at
- the end of the line. So, break out of the inner
- loop in order to process the next word. */
- break;
- }
- }
- }
- }
- if (ferror(input))
- {
- perror(input_file_name);
- return EXIT_FAILURE;
- }
- return EXIT_SUCCESS;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement