# HG changeset patch # User Jim Meyering # Date 982394071 0 # Node ID 990d29e7d4eff0186f0e36e8cd0879ca8181bb41 # Parent 1d5f75101938ef4fb9332b012b60f82fc67e8287 Update from libc. diff --git a/lib/regex.c b/lib/regex.c --- a/lib/regex.c +++ b/lib/regex.c @@ -62,7 +62,7 @@ # define US_CHAR_TYPE wchar_t/* unsigned character type */ # define COMPILED_BUFFER_VAR wc_buffer # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ -# define CHAR_CLASS_SIZE (sizeof(wctype_t)/sizeof(CHAR_TYPE)+1) +# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1) # define PUT_CHAR(c) \ do { \ if (MC_CUR_MAX == 1) \ @@ -289,6 +289,8 @@ static char re_syntax_table[CHAR_SET_SIZE]; +static void init_syntax_once PARAMS ((void)); + static void init_syntax_once () { @@ -1173,7 +1175,7 @@ static size_t convert_mbs_to_wcs (CHAR_TYPE *dest, const unsigned char* src, size_t len, int *offset_buffer, - int *is_binary); + char *is_binary); static size_t convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary) CHAR_TYPE *dest; @@ -1190,7 +1192,7 @@ = {0, 3, 4, 6} */ int *offset_buffer; - int *is_binary; + char *is_binary; { wchar_t *pdest = dest; const unsigned char *psrc = src; @@ -1905,7 +1907,8 @@ #ifndef TRANSLATE # ifdef MBS_SUPPORT # define TRANSLATE(d) \ - (translate && (sizeof(d) <= 1)? (char) translate[(unsigned char) (d)] : (d)) + ((translate && ((US_CHAR_TYPE) (d)) <= 0xff) \ + ? (char) translate[(unsigned char) (d)] : (d)) #else # define TRANSLATE(d) \ (translate ? (char) translate[(unsigned char) (d)] : (d)) @@ -2282,9 +2285,9 @@ /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ int *mbs_offset = NULL; /* It hold whether each wchar_t is binary data or not. */ - int *is_binary = NULL; + char *is_binary = NULL; /* A flag whether exactn is handling binary data or not. */ - int is_exactn_bin = FALSE; + char is_exactn_bin = FALSE; #endif /* MBS_SUPPORT */ /* A random temporary spot in PATTERN. */ @@ -2344,7 +2347,7 @@ /* Initialize the wchar_t PATTERN and offset_buffer. */ p = pend = pattern = TALLOC(csize, CHAR_TYPE); mbs_offset = TALLOC(csize + 1, int); - is_binary = TALLOC(csize + 1, int); + is_binary = TALLOC(csize + 1, char); if (pattern == NULL || mbs_offset == NULL || is_binary == NULL) { if (pattern) free(pattern); @@ -2806,6 +2809,8 @@ if (c == ':' && *p == ']') { wctype_t wt; + uintptr_t alignedp; + /* Query the character class as wctype_t. */ wt = IS_CHAR_CLASS (str); if (wt == 0) @@ -2823,9 +2828,14 @@ b += CHAR_CLASS_SIZE; /* Move data which follow character classes not to violate the data. */ - insert_space(CHAR_CLASS_SIZE, laststart + 6, b - 1); + insert_space(CHAR_CLASS_SIZE, + laststart + 6 + laststart[1], + b - 1); + alignedp = ((uintptr_t)(laststart + 6 + laststart[1]) + + __alignof__(wctype_t) - 1) + & ~(uintptr_t)(__alignof__(wctype_t) - 1); /* Store the character class. */ - *((wctype_t*)(laststart + 6)) = wt; + *((wctype_t*)alignedp) = wt; /* Update length of char_classes */ laststart[1] += CHAR_CLASS_SIZE; @@ -5050,9 +5060,9 @@ } else /* Searching backwards. */ { - register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] - : string1[startpos]); + register CHAR_TYPE c = (size1 == 0 || startpos >= size1 + ? string2[startpos - size1] + : string1[startpos]); if (!fastmap[(unsigned char) TRANSLATE (c)]) goto advance; @@ -5186,8 +5196,6 @@ FREE_VAR (string2); \ FREE_VAR (mbs_offset1); \ FREE_VAR (mbs_offset2); \ - FREE_VAR (is_binary1); \ - FREE_VAR (is_binary2); \ } while (0) # else /* not MBS_SUPPORT */ # define FREE_VARIABLES() \ @@ -5205,17 +5213,16 @@ } while (0) # endif /* MBS_SUPPORT */ #else +# define FREE_VAR(var) if (var) free (var); var = NULL # ifdef MBS_SUPPORT # define FREE_VARIABLES() \ do { \ - if (string1) free (string1); \ - if (string2) free (string2); \ - if (mbs_offset1) free (mbs_offset1); \ - if (mbs_offset2) free (mbs_offset2); \ - if (is_binary1) free (is_binary1); \ - if (is_binary2) free (is_binary2); \ + FREE_VAR (string1); \ + FREE_VAR (string2); \ + FREE_VAR (mbs_offset1); \ + FREE_VAR (mbs_offset2); \ } while (0) -# eles +# else # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ # endif /* MBS_SUPPORT */ #endif /* not MATCH_MAY_ALLOCATE */ @@ -5304,10 +5311,14 @@ #endif #ifdef MBS_SUPPORT + +static int count_mbs_length PARAMS ((int *, int)); + /* This check the substring (from 0, to length) of the multibyte string, to which offset_buffer correspond. And count how many wchar_t_characters the substring occupy. We use offset_buffer to optimization. See convert_mbs_to_wcs. */ + static int count_mbs_length(offset_buffer, length) int *offset_buffer; @@ -5365,7 +5376,7 @@ /* offset buffer for optimizatoin. See convert_mbs_to_wc. */ int *mbs_offset1 = NULL, *mbs_offset2 = NULL; /* They hold whether each wchar_t is binary data or not. */ - int *is_binary1 = NULL, *is_binary2 = NULL; + char *is_binary = NULL; #endif /* MBS_SUPPORT */ /* Just past the end of the corresponding string. */ @@ -5544,38 +5555,39 @@ fill them with converted string. */ if (csize1 != 0) { - string1 = TALLOC (csize1 + 1, CHAR_TYPE); - mbs_offset1 = TALLOC (csize1 + 1, int); - is_binary1 = TALLOC (csize1 + 1, int); - if (!string1 || !mbs_offset1 || !is_binary1) + string1 = REGEX_TALLOC (csize1 + 1, CHAR_TYPE); + mbs_offset1 = REGEX_TALLOC (csize1 + 1, int); + is_binary = REGEX_TALLOC (csize1 + 1, char); + if (!string1 || !mbs_offset1 || !is_binary) { - if (string1) free(string1); - if (mbs_offset1) free(mbs_offset1); - if (is_binary1) free(is_binary1); + FREE_VAR (string1); + FREE_VAR (mbs_offset1); + FREE_VAR (is_binary); return -2; } size1 = convert_mbs_to_wcs(string1, cstring1, csize1, - mbs_offset1, is_binary1); + mbs_offset1, is_binary); string1[size1] = L'\0'; /* for a sentinel */ + FREE_VAR (is_binary); } if (csize2 != 0) { string2 = REGEX_TALLOC (csize2 + 1, CHAR_TYPE); mbs_offset2 = REGEX_TALLOC (csize2 + 1, int); - is_binary2 = TALLOC (csize2 + 1, int); - if (!string2 || !mbs_offset2 || !is_binary2) + is_binary = REGEX_TALLOC (csize2 + 1, char); + if (!string2 || !mbs_offset2 || !is_binary) { - if (string1) free(string1); - if (mbs_offset1) free(mbs_offset1); - if (is_binary1) free(is_binary1); - if (string2) free(string2); - if (mbs_offset2) free(mbs_offset2); - if (is_binary2) free(is_binary2); + FREE_VAR (string1); + FREE_VAR (mbs_offset1); + FREE_VAR (string2); + FREE_VAR (mbs_offset2); + FREE_VAR (is_binary); return -2; } size2 = convert_mbs_to_wcs(string2, cstring2, csize2, - mbs_offset2, is_binary2); + mbs_offset2, is_binary); string2[size2] = L'\0'; /* for a sentinel */ + FREE_VAR (is_binary); } /* We need to cast pattern to (wchar_t*), because we casted this compiled @@ -6000,7 +6012,11 @@ /* match with char_class? */ for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE) { - wctype_t wctype = *((wctype_t*)workp); + wctype_t wctype; + uintptr_t alignedp = ((uintptr_t)workp + + __alignof__(wctype_t) - 1) + & ~(uintptr_t)(__alignof__(wctype_t) - 1); + wctype = *((wctype_t*)alignedp); workp += CHAR_CLASS_SIZE; if (iswctype((wint_t)c, wctype)) goto char_set_matched;