# HG changeset patch # User Bruno Haible # Date 1318623856 -7200 # Node ID 0319356f65c3112be3162154e2fa837cb74000a9 # Parent 0098fa6557113728c2189a3ef3c0f41960e96352 ffsl: Optimize on 64-bit platforms. * lib/ffsl.h (FUNC): Omit a test from the last loop round. Do loop unrolling. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2011-10-14 Bruno Haible + + ffsl: Optimize on 64-bit platforms. + * lib/ffsl.h (FUNC): Omit a test from the last loop round. Do loop + unrolling. + 2011-10-13 Bruno Haible ffsl: Optimize on 32-bit platforms. diff --git a/lib/ffsl.h b/lib/ffsl.h --- a/lib/ffsl.h +++ b/lib/ffsl.h @@ -34,34 +34,31 @@ #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) && defined GCC_BUILTIN return GCC_BUILTIN (i); #else - if (sizeof (TYPE) == sizeof (int)) - return ffs (i); - else + unsigned TYPE j = i; + /* Split j into chunks, and look at one chunk after the other. */ + enum { chunk_bits = CHAR_BIT * sizeof (unsigned int) }; + /* The number of chunks is ceil (sizeof (TYPE) / sizeof (unsigned int)) + = (sizeof (TYPE) - 1) / sizeof (unsigned int) + 1. */ + enum { chunk_count = (sizeof (TYPE) - 1) / sizeof (unsigned int) + 1 }; + + if (chunk_count > 1) { - unsigned TYPE j = i; - /* Split j into chunks, and look at one chunk after the other. */ - /* Define chunk_bits so as to avoid a GCC warning - "right shift count >= width of type" - if sizeof (TYPE) == sizeof (int). */ - enum - { - chunk_bits = (sizeof (TYPE) != sizeof (int) - ? CHAR_BIT * sizeof (unsigned int) - : 0) - }; - int result = 0; + size_t k; /* It is tempting to write if (!j) here, but if we do this, Solaris 10/x86 "cc -O" miscompiles the code. */ if (!i) return 0; - while (1) - { - if ((unsigned int) j) - return result + ffs ((unsigned int) j); - j >>= chunk_bits; - result += chunk_bits; - } + /* Unroll the first loop round. k = 0. */ + if ((unsigned int) j) + return ffs ((unsigned int) j); + /* Generic loop. */ + for (k = 1; k < chunk_count - 1; k++) + if ((unsigned int) (j >> (k * chunk_bits)) != 0) + return k * chunk_bits + ffs ((unsigned int) (j >> (k * chunk_bits))); } + /* Last loop round. k = chunk_count - 1. */ + return (chunk_count - 1) * chunk_bits + + ffs ((unsigned int) (j >> ((chunk_count - 1) * chunk_bits))); #endif }