changeset 13931:2a046f95cb50

read-file: reorganize to avoid various issues * lib/read-file.c (fread_file): Read 1 more byte than is currently in a regular file, to immediately detect EOF, and thus avoid any realloc()s. As well as being slower, these may fail, thus artificially limiting the supported size. Allocate up to SIZE_MAX for streams, rather than limiting to about SIZE_MAX - SIZE_MAX/5. Don't use the 'size + BUFSIZ + 1' expression, which could overflow and cause invalid operation. As a style decision, explicitly check for overflow rather than using a temporary roll over variable (new_alloc).
author Pádraig Brady <P@draigBrady.com>
date Mon, 13 Dec 2010 08:08:23 +0000
parents dbcecbd09d6d
children 435d0aba5282 ef8b363d4b54
files ChangeLog lib/read-file.c
diffstat 2 files changed, 41 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2010-12-13  Pádraig Brady <P@draigBrady.com>
+
+	read-file: Improve handling of large files
+	* lib/read-file.c (fread_file): Minimize realloc()s
+	for regular files, and better manage sizes around SIZE_MAX.
+
 2010-12-13  Eric Blake  <eblake@redhat.com>
 
 	cloexec, fcntl: relax license
--- a/lib/read-file.c
+++ b/lib/read-file.c
@@ -39,12 +39,12 @@
    and set *LENGTH to the length of the string.  The string is
    zero-terminated, but the terminating zero byte is not counted in
    *LENGTH.  On errors, *LENGTH is undefined, errno preserves the
-   values set by system functions (if any), and NULL is returned. */
+   values set by system functions (if any), and NULL is returned.  */
 char *
 fread_file (FILE * stream, size_t * length)
 {
   char *buf = NULL;
-  size_t alloc = 0;
+  size_t alloc = BUFSIZ;
 
   /* For a regular file, allocate a buffer that has exactly the right
      size.  This avoids the need to do dynamic reallocations later.  */
@@ -59,59 +59,31 @@
           {
             off_t alloc_off = st.st_size - pos;
 
-            if (SIZE_MAX <= alloc_off)
+            /* '1' below, accounts for the trailing NUL.  */
+            if (SIZE_MAX - 1 < alloc_off)
               {
                 errno = ENOMEM;
                 return NULL;
               }
 
             alloc = alloc_off + 1;
-
-            buf = malloc (alloc);
-            if (!buf)
-              /* errno is ENOMEM.  */
-              return NULL;
           }
       }
   }
 
+  if (!(buf = malloc (alloc)))
+    return NULL; /* errno is ENOMEM.  */
+
   {
     size_t size = 0; /* number of bytes read so far */
     int save_errno;
 
     for (;;)
       {
-        size_t count;
-        size_t requested;
-
-        if (size + BUFSIZ + 1 > alloc)
-          {
-            char *new_buf;
-            size_t new_alloc = alloc + alloc / 2;
-
-            /* Check against overflow.  */
-            if (new_alloc < alloc)
-              {
-                save_errno = ENOMEM;
-                break;
-              }
-
-            alloc = new_alloc;
-            if (alloc < size + BUFSIZ + 1)
-              alloc = size + BUFSIZ + 1;
-
-            new_buf = realloc (buf, alloc);
-            if (!new_buf)
-              {
-                save_errno = errno;
-                break;
-              }
-
-            buf = new_buf;
-          }
-
-        requested = alloc - size - 1;
-        count = fread (buf + size, 1, requested, stream);
+        /* This reads 1 more than the size of a regular file
+           so that we get eof immediately.  */
+        size_t requested = alloc - size;
+        size_t count = fread (buf + size, 1, requested, stream);
         size += count;
 
         if (count != requested)
@@ -121,7 +93,7 @@
               break;
 
             /* Shrink the allocated memory if possible.  */
-            if (size + 1 < alloc)
+            if (size < alloc - 1)
               {
                 char *smaller_buf = realloc (buf, size + 1);
                 if (smaller_buf != NULL)
@@ -132,6 +104,29 @@
             *length = size;
             return buf;
           }
+
+        {
+          char *new_buf;
+
+          if (alloc == SIZE_MAX)
+            {
+              save_errno = ENOMEM;
+              break;
+            }
+
+          if (alloc < SIZE_MAX - alloc / 2)
+            alloc = alloc + alloc / 2;
+          else
+            alloc = SIZE_MAX;
+
+          if (!(new_buf = realloc (buf, alloc)))
+            {
+              save_errno = errno;
+              break;
+            }
+
+          buf = new_buf;
+        }
       }
 
     free (buf);