# HG changeset patch # User Paul Eggert # Date 1125517093 0 # Node ID 5862ee08bfc102df9cd56edd3031d3492e3826c5 # Parent a10e4460ad4c972bfbe6d2c9a751a9b30e605ca7 * lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char): (optimize_subexps, lower_subexp): Don't assume 1<<31 has defined behavior on hosts with 32-bit int, since the signed shift might overflow. Use 1u<<31 instead. * lib/regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise. * lib/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * lib/regcomp.c (optimize_subexps, lower_subexp): Use CHAR_BIT rather than 8, for clarity. * lib/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * lib/regcomp.c (init_dfa): Make table_size unsigned, so that we don't have to worry about portability issues when shifting it left. Remove no-longer-needed test for table_size > 0. * lib/regcomp.c (parse_sub_exp): Do not shift more bits than there are in a word, as the resulting behavior is undefined. * lib/regexec.c (check_dst_limits_calc_pos_1): Likewise; in one case, a <= should have been an <, and in another case the whole test was missing. * lib/regex_internal.h (BYTE_BITS): Remove. All uses changed to the standard name CHAR_BIT. * lib/regexec.c (match_ctx_add_entry): Don't assume that ~0 == -1; this is not true on one's complement and signed-magnitude hosts. diff --git a/lib/ChangeLog b/lib/ChangeLog --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,5 +1,29 @@ 2005-08-31 Paul Eggert + * regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char): + (optimize_subexps, lower_subexp): + Don't assume 1<<31 has defined behavior on hosts with 32-bit int, + since the signed shift might overflow. Use 1u<<31 instead. + * regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise. + * regexec.c (check_dst_limits_calc_pos_1, check_subexp_matching_top): + Likewise. + * regcomp.c (optimize_subexps, lower_subexp): + Use CHAR_BIT rather than 8, for clarity. + * regexec.c (check_dst_limits_calc_pos_1): + (check_subexp_matching_top): Likewise. + * regcomp.c (init_dfa): Make table_size unsigned, so that we don't + have to worry about portability issues when shifting it left. + Remove no-longer-needed test for table_size > 0. + * regcomp.c (parse_sub_exp): Do not shift more bits than there are + in a word, as the resulting behavior is undefined. + * regexec.c (check_dst_limits_calc_pos_1): Likewise; + in one case, a <= should have been an <, and in another case the + whole test was missing. + * regex_internal.h (BYTE_BITS): Remove. All uses changed to + the standard name CHAR_BIT. + * regexec.c (match_ctx_add_entry): Don't assume that ~0 == -1; + this is not true on one's complement and signed-magnitude hosts. + * regex_internal.h (re_sub_match_top_t): Remove unused member next_last_offset. (struct re_dfa_t): Remove unused member states_alloc. diff --git a/lib/regcomp.c b/lib/regcomp.c --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -336,7 +336,7 @@ int i, j, ch; for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) - if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) + if (dfa->nodes[node].opr.sbcset[i] & (1u << j)) re_set_fastmap (fastmap, icase, ch); } #ifdef RE_ENABLE_I18N @@ -798,7 +798,7 @@ static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len) { - int table_size; + unsigned int table_size; #ifndef _LIBC char *codeset_name; #endif @@ -812,7 +812,7 @@ dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); /* table_size = 2 ^ ceil(log pat_len) */ - for (table_size = 1; table_size > 0; table_size <<= 1) + for (table_size = 1; ; table_size <<= 1) if (table_size > pat_len) break; @@ -872,7 +872,7 @@ { wint_t wch = __btowc (ch); if (wch != WEOF) - dfa->sb_char[i] |= 1 << j; + dfa->sb_char[i] |= 1u << j; # ifndef _LIBC if (isascii (ch) && wch != ch) dfa->map_notascii = 1; @@ -899,7 +899,7 @@ for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= 1 << j; + dfa->word_char[i] |= 1u << j; } /* Free the work area which are only used while compiling. */ @@ -1222,8 +1222,8 @@ node->left->parent = node; dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; - if (other_idx < 8 * sizeof (dfa->used_bkref_map)) - dfa->used_bkref_map &= ~(1 << other_idx); + if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map) + dfa->used_bkref_map &= ~(1u << other_idx); } return REG_NOERROR; @@ -1266,8 +1266,8 @@ very common, so we do not lose much. An example that triggers this case is the sed "script" /\(\)/x. */ && node->left != NULL - && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map) - || !(dfa->used_bkref_map & (1 << node->token.opr.idx)))) + && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map + || !(dfa->used_bkref_map & (1u << node->token.opr.idx)))) return node->left; /* Convert the SUBEXP node to the concatenation of an @@ -2380,7 +2380,9 @@ if (BE (*err != REG_NOERROR, 0)) return NULL; } - dfa->completed_bkref_map |= 1 << cur_nsub; + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; tree = create_tree (dfa, tree, NULL, SUBEXP); if (BE (tree == NULL, 0)) diff --git a/lib/regex_internal.h b/lib/regex_internal.h --- a/lib/regex_internal.h +++ b/lib/regex_internal.h @@ -90,8 +90,6 @@ # define inline #endif -/* Number of bits in a byte. */ -#define BYTE_BITS 8 /* Number of single byte character. */ #define SBC_MAX 256 @@ -124,16 +122,16 @@ extern const size_t __re_error_msgid_idx[] attribute_hidden; /* Number of bits in an unsinged int. */ -#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS) +#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT) /* Number of unsigned int in an bit_set. */ #define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS) typedef unsigned int bitset[BITSET_UINTS]; typedef unsigned int *re_bitset_ptr_t; typedef const unsigned int *re_const_bitset_ptr_t; -#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS) -#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS)) -#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS)) +#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS) +#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS)) +#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS)) #define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS) #define bitset_set_all(set) \ memset (set, 255, sizeof (unsigned int) * BITSET_UINTS) diff --git a/lib/regexec.c b/lib/regexec.c --- a/lib/regexec.c +++ b/lib/regexec.c @@ -1895,8 +1895,9 @@ if (ent->node != node) continue; - if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map) - && !(ent->eps_reachable_subexps_map & (1 << subexp_idx))) + if (subexp_idx + < CHAR_BIT * sizeof ent->eps_reachable_subexps_map + && !(ent->eps_reachable_subexps_map & (1u << subexp_idx))) continue; /* Recurse trying to reach the OP_OPEN_SUBEXP and @@ -1922,7 +1923,9 @@ if (cpos == 0 && (boundaries & 2)) return 0; - ent->eps_reachable_subexps_map &= ~(1 << subexp_idx); + if (subexp_idx + < CHAR_BIT * sizeof ent->eps_reachable_subexps_map) + ent->eps_reachable_subexps_map &= ~(1u << subexp_idx); } while (ent++->more); } @@ -2376,8 +2379,8 @@ { int node = cur_nodes->elems[node_idx]; if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map)) - && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx)) + && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map + && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx)) { err = match_ctx_add_subtop (mctx, node, str_idx); if (BE (err != REG_NOERROR, 0)) @@ -4166,7 +4169,7 @@ A backreference does not epsilon-transition unless it is empty, so set to all zeros if FROM != TO. */ mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map - = (from == to ? ~0 : 0); + = (from == to ? -1 : 0); mctx->bkref_ents[mctx->nbkref_ents++].more = 0; if (mctx->max_mb_elem_len < to - from)