Advertisement
drankinatty

C - Read Text File with Lines of Any Length into Memory

Apr 1st, 2017
246
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 5.38 KB | None | 0 0
  1. /*
  2.  * The program below will read any text file passed as the first argument
  3.  * (or it will read from stdin if no argument is given). It will dynamically
  4.  * allocate pointers as needed in MAXL blocks (rather than one-at-a-time
  5.  * which is highly inefficient). It will allocate (and reallocate) lines
  6.  * as required to accommodate lines of any length. It uses a fixed buffer
  7.  * of MAXC chars to read continually until a complete line is read saving
  8.  * the offset to the position in the current line to append subsequent
  9.  * reads as required until a line is fully read. It keeps track of the
  10.  * ending '\n' for each line and will accomodate files without a POSIX
  11.  * '\n' at the end of file.
  12.  */
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <string.h>
  16.  
  17. enum { MAXL = 32, MAXC = 1024 };
  18.  
  19. char **read_file_into_buf (char ***buf, FILE *fp, int *nlines);
  20.  
  21. int main (int argc, char **argv) {
  22.  
  23.     char **buf = NULL;  /* buffer to hold lines of file */
  24.     int n;              /* number of lines read */
  25.     FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
  26.  
  27.     if (!fp) {  /* validate file open for reading */
  28.         fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
  29.         return 1;
  30.     }
  31.    
  32.     if (read_file_into_buf (&buf, fp, &n)) {        /* read file into buf  */
  33.         for (int i = 0; i < n; i++) {               /* loop over all lines */
  34. #ifdef WLNUMBERS
  35.             printf ("line[%3d]: %s\n", i, buf[i]);  /* output w/ln numbers */
  36. #else
  37.             printf ("%s\n", buf[i]);                /* output line */
  38. #endif
  39.             free (buf[i]);      /* free line */
  40.         }
  41.         free (buf);             /* free buffer */
  42.     }
  43.  
  44.     if (fp != stdin) fclose (fp);     /* close file if not stdin */
  45.  
  46.     return 0;
  47. }
  48.  
  49. /** read text file from 'fp' into 'buf' update 'nlines'.
  50.  *  Being a 3-STAR Programmer, is NOT a compliment. However,
  51.  *  to pass a pointer to pointer to char as a parameter and
  52.  *  allocate within the function, it is required. You must
  53.  *  pass the address of buf from main otherwise the function
  54.  *  recieves a copy whose allocation is lost on return. A better
  55.  *  approach is to simply assign the return in 'main' and not
  56.  *  pass buf at all.
  57.  */
  58. char **read_file_into_buf (char ***buf, FILE *fp, int *nlines)
  59. {
  60.     /* current allocation, current index, and offset if less
  61.      * than a whole line is read into line on fgets call, len
  62.      * holds the line len, and eol indicates '\n' present.
  63.      */
  64.     size_t n = MAXL, idx = 0, offset = 0, len = 0, eol = 0;
  65.     char line[MAXC] = "";   /* temp buffer for MAXC chars */
  66.     void *tmp = NULL;       /* pointer for realloc */
  67.    
  68.     /* validate address, file ptr & nlines address */
  69.     if (!buf || !fp || !nlines) {
  70.         fprintf (stderr, "error; invalid parameter.\n");
  71.         return NULL;
  72.     }
  73.    
  74.     /* allocate initial MAXL pointers, calloc used to avoid valgrind
  75.      * warning about basing a conditional jump on uninitialized value.
  76.      * calloc allocates and initializes.
  77.      */
  78.     if (!(*buf = calloc (sizeof *buf, MAXL))) {
  79.         fprintf (stderr, "error: virtual memory exhausted.\n");
  80.         return NULL;
  81.     }
  82.    
  83.     while (fgets (line, MAXC, fp))  /* read every line in file */
  84.     {
  85.         /* save offset from prior read (if any), get len */
  86.         size_t end = offset;
  87.         len = strlen (line);
  88.    
  89.         if (len && line[len - 1] == '\n') {    /* test for new line */
  90.             line[--len] = 0;                   /* overwrite with nul */
  91.             offset = 0;                        /* zero offset, all read */
  92.             eol = 1;                           /* POSIX eol present */
  93.         }
  94.         else {
  95.             line[len] = 0;  /* nul-terminate */
  96.             offset += len;  /* short read, save offset to last char */
  97.             eol = 0;        /* no POSIX eol */
  98.         }
  99.        
  100.         /* allocate/reallocate for current line + nul-byte */
  101.         tmp = realloc ((*buf)[idx], sizeof ***buf * (end + len + 1));
  102.         if (!tmp) {
  103.             fprintf (stderr, "error: realloc, memory exhausted.\n");
  104.             return *buf;  /* return current buf */
  105.         }
  106.         (*buf)[idx] = tmp;  /* assign block to current index */
  107.         strcpy ((*buf)[idx] + end, line);  /* copy line to block */
  108.        
  109.         if (!eol) continue;   /* chars remain in line, go read them */
  110.        
  111.         if (++idx == n) {   /* check pointer allocation, realloc as needed */
  112.             tmp = realloc (*buf, sizeof **buf * (n + MAXL));
  113.             if (!tmp) {
  114.                 fprintf (stderr, "error: realloc buf, memory exhausted.\n");
  115.                 return *buf;
  116.             }
  117.             *buf = tmp; /* assign new block to *buf */
  118.             memset (*buf + n, 0, sizeof **buf * MAXL);  /* zero new memory */
  119.             n += MAXL;  /* update the current number of ptrs allocated */
  120.         }
  121.         *nlines = idx;  /* update the number of lines read */
  122.     }
  123.     if (!eol) {         /* protect against file with no POSIX ending '\n' */
  124.         idx++;          /* account for final line */
  125.         *nlines = idx;  /* update nlines */
  126.     }
  127.    
  128.     /* final realloc to size buf to exactly fit number of lines */
  129.     tmp = realloc (*buf, sizeof **buf * (idx));
  130.     if (!tmp)   /* if it fails, return current buf */
  131.         return *buf;
  132.    
  133.     *buf = tmp; /* assign reallocated block to buf */
  134.    
  135.     return *buf;
  136. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement