# HG changeset patch # User Pádraig Brady # Date 1292227703 0 # Node ID 2a046f95cb50fd2b8a5024fa5ae60f0dcf386e53 # Parent dbcecbd09d6d520721b41a9aae3eb78d2dc39984 read-file: reorganize to avoid various issues * lib/read-file.c (fread_file): Read 1 more byte than is currently in a regular file, to immediately detect EOF, and thus avoid any realloc()s. As well as being slower, these may fail, thus artificially limiting the supported size. Allocate up to SIZE_MAX for streams, rather than limiting to about SIZE_MAX - SIZE_MAX/5. Don't use the 'size + BUFSIZ + 1' expression, which could overflow and cause invalid operation. As a style decision, explicitly check for overflow rather than using a temporary roll over variable (new_alloc). diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2010-12-13 Pádraig Brady + + read-file: Improve handling of large files + * lib/read-file.c (fread_file): Minimize realloc()s + for regular files, and better manage sizes around SIZE_MAX. + 2010-12-13 Eric Blake cloexec, fcntl: relax license diff --git a/lib/read-file.c b/lib/read-file.c --- a/lib/read-file.c +++ b/lib/read-file.c @@ -39,12 +39,12 @@ and set *LENGTH to the length of the string. The string is zero-terminated, but the terminating zero byte is not counted in *LENGTH. On errors, *LENGTH is undefined, errno preserves the - values set by system functions (if any), and NULL is returned. */ + values set by system functions (if any), and NULL is returned. */ char * fread_file (FILE * stream, size_t * length) { char *buf = NULL; - size_t alloc = 0; + size_t alloc = BUFSIZ; /* For a regular file, allocate a buffer that has exactly the right size. This avoids the need to do dynamic reallocations later. */ @@ -59,59 +59,31 @@ { off_t alloc_off = st.st_size - pos; - if (SIZE_MAX <= alloc_off) + /* '1' below, accounts for the trailing NUL. */ + if (SIZE_MAX - 1 < alloc_off) { errno = ENOMEM; return NULL; } alloc = alloc_off + 1; - - buf = malloc (alloc); - if (!buf) - /* errno is ENOMEM. */ - return NULL; } } } + if (!(buf = malloc (alloc))) + return NULL; /* errno is ENOMEM. */ + { size_t size = 0; /* number of bytes read so far */ int save_errno; for (;;) { - size_t count; - size_t requested; - - if (size + BUFSIZ + 1 > alloc) - { - char *new_buf; - size_t new_alloc = alloc + alloc / 2; - - /* Check against overflow. */ - if (new_alloc < alloc) - { - save_errno = ENOMEM; - break; - } - - alloc = new_alloc; - if (alloc < size + BUFSIZ + 1) - alloc = size + BUFSIZ + 1; - - new_buf = realloc (buf, alloc); - if (!new_buf) - { - save_errno = errno; - break; - } - - buf = new_buf; - } - - requested = alloc - size - 1; - count = fread (buf + size, 1, requested, stream); + /* This reads 1 more than the size of a regular file + so that we get eof immediately. */ + size_t requested = alloc - size; + size_t count = fread (buf + size, 1, requested, stream); size += count; if (count != requested) @@ -121,7 +93,7 @@ break; /* Shrink the allocated memory if possible. */ - if (size + 1 < alloc) + if (size < alloc - 1) { char *smaller_buf = realloc (buf, size + 1); if (smaller_buf != NULL) @@ -132,6 +104,29 @@ *length = size; return buf; } + + { + char *new_buf; + + if (alloc == SIZE_MAX) + { + save_errno = ENOMEM; + break; + } + + if (alloc < SIZE_MAX - alloc / 2) + alloc = alloc + alloc / 2; + else + alloc = SIZE_MAX; + + if (!(new_buf = realloc (buf, alloc))) + { + save_errno = errno; + break; + } + + buf = new_buf; + } } free (buf);