diff options
author | Ulrich Drepper <drepper@redhat.com> | 2002-05-24 08:49:00 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2002-05-24 08:49:00 +0000 |
commit | c0a0f9a32c8baa6ab93d00eb42d92c02e9e146d7 (patch) | |
tree | 8cf43cdab37f80704e698ccc65b752b93320fa11 /posix | |
parent | 12b7f470d21eeb897e25b9ef6dae4bfd787168f6 (diff) | |
download | glibc-c0a0f9a32c8baa6ab93d00eb42d92c02e9e146d7.tar.gz glibc-c0a0f9a32c8baa6ab93d00eb42d92c02e9e146d7.tar.xz glibc-c0a0f9a32c8baa6ab93d00eb42d92c02e9e146d7.zip |
Update.
2002-05-21 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regex.c: Define `inline' as a macro into nothing for the compilers which lack the keyword. * posix/regex.h: (RE_SYNTAX_GNU_AWK): Remove RE_CONTEXT_INVALID_OPS for the compatibility of gawk. * posix/regcomp.c: Add fake implementation of isblank() for the environments which lack the function. Don't use free_charset() in case of non-i18n envs. (build_range_exp): Don't use i18n related code in case of non-i18n envs. (build_collating_symbol): Likewise. (build_equiv_class): Likewise. (build_charclass): Likewise. (re_compile_fastmap_iter): Likewise. (parse_bracket_exp): Likewise. (build_word_op): Likewise. (regfree): Don't use free_charset() in case of non-i18n envs. * posix/regex_internal.h: Remove COMPLEX_BRACKET from re_token_type_t in case of non-i18n envs. Don't define re_charset_t in case of non-i18n envs. Change the type of wcs of re_string_t from wchar_t to wint_t, since we store also WEOF. * posix/regex_internal.c: (re_string_realloc_buffers): Change the type of wcs of re_string_t from wchar_t to wint_t. (re_string_reconstruct): Likewise. (create_ci_newstate): Don't use i18n related code in case of non-i18n envs. (create_cd_newstate): Likewise. 2002-05-24 Ulrich Drepper <drepper@redhat.com> * iconv/loop.c: Fix typo. 2002-05-23 Jakub Jelinek <jakub@redhat.com> * inet/ether_line.c (ether_line): Fix a typo causing only lower 4 bits of each ethernet address byte being assigned. Don't modify what line points to. * inet/tst-ether_aton.c (main): Add ether_line tests. 2002-05-23 Marcus Brinkmann <marcus@gnu.org> * manual/filesys.texi: Don't make readlink example leak memory when readlink fails.
Diffstat (limited to 'posix')
-rw-r--r-- | posix/regcomp.c | 239 | ||||
-rw-r--r-- | posix/regex.c | 1 | ||||
-rw-r--r-- | posix/regex.h | 3 | ||||
-rw-r--r-- | posix/regex_internal.c | 10 | ||||
-rw-r--r-- | posix/regex_internal.h | 22 |
5 files changed, 213 insertions, 62 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c index 59836b15e0..b9b056065f 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -28,6 +28,11 @@ #include <wchar.h> #include <wctype.h> +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif + #ifdef _LIBC # ifndef _RE_DEFINE_LOCALE_FUNCTIONS # define _RE_DEFINE_LOCALE_FUNCTIONS 1 @@ -65,7 +70,9 @@ static void re_compile_fastmap_iter (regex_t *bufp, char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); static reg_errcode_t init_word_char (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ static void free_workarea_compile (regex_t *preg); static reg_errcode_t create_initial_state (re_dfa_t *dfa); static reg_errcode_t analyze (re_dfa_t *dfa); @@ -115,24 +122,40 @@ static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token); #ifndef _LIBC -static reg_errcode_t build_range_exp (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, int *range_alloc, +# ifdef RE_ENABLE_I18N +static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *range_alloc, bracket_elem_t *start_elem, bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, +static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *coll_sym_alloc, unsigned char *name); +# else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, + bracket_elem_t *start_elem, + bracket_elem_t *end_elem); +static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, + unsigned char *name); +# endif /* not RE_ENABLE_I18N */ #endif /* not _LIBC */ -static reg_errcode_t build_equiv_class (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *equiv_class_alloc, const unsigned char *name); -static reg_errcode_t build_charclass (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, +static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, const unsigned char *class_name, reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, + const unsigned char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err); static void free_bin_tree (bin_tree_t *tree); static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right, @@ -340,6 +363,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) fastmap[ch] = 1; } +#ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) { int i; @@ -347,7 +371,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes || cset->nranges || cset->nchar_classes) { -#ifdef _LIBC +# ifdef _LIBC if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) { /* In this case we want to catch the bytes which are @@ -364,14 +388,12 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (table[ch] < 0) fastmap[ch] = 1; } -#else -# ifdef RE_ENABLE_I18N +# else if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) fastmap[i] = 1; -# endif /* RE_ENABLE_I18N */ -#endif /* not _LIBC */ +# endif /* not _LIBC */ } for (i = 0; i < cset->nmbchars; ++i) { @@ -380,8 +402,12 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) fastmap[buf[0]] = 1; } } - else if (type == END_OF_RE || type == COMPLEX_BRACKET - || type == OP_PERIOD) +#endif /* RE_ENABLE_I18N */ + else if (type == END_OF_RE || type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == COMPLEX_BRACKET +#endif /* RE_ENABLE_I18N */ + ) { memset (fastmap, '\1', sizeof (char) * SBC_MAX); if (type == END_OF_RE) @@ -550,9 +576,12 @@ regfree (preg) for (i = 0; i < dfa->nodes_len; ++i) { re_token_t *node = dfa->nodes + i; +#ifdef RE_ENABLE_I18N if (node->type == COMPLEX_BRACKET && node->duplicated == 0) free_charset (node->opr.mbcset); - else if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) re_free (node->opr.sbcset); else if (node->type == OP_CONTEXT_NODE) { @@ -993,7 +1022,9 @@ calc_first (dfa, node) case OP_PERIOD: case OP_DUP_ASTERISK: case OP_DUP_QUESTION: +#ifdef RE_ENABLE_I18N case COMPLEX_BRACKET: +#endif /* RE_ENABLE_I18N */ case SIMPLE_BRACKET: case OP_BACK_REF: case ANCHOR: @@ -2233,10 +2264,14 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) update it. */ static reg_errcode_t -build_range_exp (mbcset, sbcset, range_alloc, start_elem, end_elem) +# ifdef RE_ENABLE_I18N +build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *range_alloc; +# else /* not RE_ENABLE_I18N */ +build_range_exp (sbcset, start_elem, end_elem) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; bracket_elem_t *start_elem, *end_elem; { unsigned int start_ch, end_ch; @@ -2338,10 +2373,14 @@ build_range_exp (mbcset, sbcset, range_alloc, start_elem, end_elem) pointer argument since we may update it. */ static reg_errcode_t -build_collating_symbol (mbcset, sbcset, coll_sym_alloc, name) +# ifdef RE_ENABLE_I18N +build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *coll_sym_alloc; +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (sbcset, name) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; unsigned char *name; { if (BE (strlen (name) != 1, 0)) @@ -2476,16 +2515,21 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) update it. */ static inline reg_errcode_t - build_range_exp (mbcset, sbcset, range_alloc, start_elem, end_elem) +# ifdef RE_ENABLE_I18N + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *range_alloc; +# else /* not RE_ENABLE_I18N */ + build_range_exp (sbcset, start_elem, end_elem) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; bracket_elem_t *start_elem, *end_elem; { unsigned int ch; uint32_t start_collseq; uint32_t end_collseq; +# ifdef RE_ENABLE_I18N /* Check the space of the arrays. */ if (*range_alloc == mbcset->nranges) { @@ -2510,6 +2554,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) mbcset->range_ends = new_array_end; *range_alloc = new_nranges; } +# endif /* RE_ENABLE_I18N */ /* Equivalence Classes and Character Classes can't be a range start/end. */ @@ -2526,9 +2571,11 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) return REG_ERANGE; +# ifdef RE_ENABLE_I18N /* Got valid collation sequence values, add them as a new entry. */ mbcset->range_starts[mbcset->nranges] = start_collseq; mbcset->range_ends[mbcset->nranges++] = end_collseq; +# endif /* RE_ENABLE_I18N */ /* Build the table for single byte characters. */ for (ch = 0; ch <= SBC_MAX; ch++) @@ -2554,10 +2601,14 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) pointer argument sinse we may update it. */ static inline reg_errcode_t - build_collating_symbol (mbcset, sbcset, coll_sym_alloc, name) +# ifdef RE_ENABLE_I18N + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *coll_sym_alloc; +# else /* not RE_ENABLE_I18N */ + build_collating_symbol (sbcset, name) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; unsigned char *name; { int32_t elem, idx; @@ -2581,6 +2632,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) else return REG_ECOLLATE; +# ifdef RE_ENABLE_I18N /* Got valid collation sequence, add it as a new entry. */ /* Check the space of the arrays. */ if (*coll_sym_alloc == mbcset->ncoll_syms) @@ -2596,6 +2648,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) return REG_ESPACE; } mbcset->coll_syms[mbcset->ncoll_syms++] = idx; +# endif /* RE_ENABLE_I18N */ return REG_NOERROR; } else @@ -2613,11 +2666,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) re_token_t br_token; re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N re_charset_t *mbcset; - bin_tree_t *work_tree; - int token_len, new_idx; int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; int equiv_class_alloc = 0, char_class_alloc = 0; +#else /* not RE_ENABLE_I18N */ + int non_match = 0; +#endif /* not RE_ENABLE_I18N */ + bin_tree_t *work_tree; + int token_len, new_idx; #ifdef _LIBC collseqmb = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); @@ -2635,8 +2692,14 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } #endif sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); +#ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ { *err = REG_ESPACE; return NULL; @@ -2650,8 +2713,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } if (token->type == OP_NON_MATCH_LIST) { +#ifdef RE_ENABLE_I18N int i; mbcset->non_match = 1; +#else /* not RE_ENABLE_I18N */ + non_match = 1; +#endif /* not RE_ENABLE_I18N */ if (syntax & RE_HAT_LISTS_NOT_NEWLINE) bitset_set (sbcset, '\0'); re_string_skip_bytes (regexp, token_len); /* Skip a token. */ @@ -2661,10 +2728,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) *err = REG_BADPAT; goto parse_bracket_exp_free_return; } +#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) bitset_set (sbcset, i); +#endif /* RE_ENABLE_I18N */ } /* We treat the first ']' as a normal character. */ @@ -2731,8 +2800,11 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) *err = REG_BADPAT; goto parse_bracket_exp_free_return; } - *err = build_range_exp (mbcset, sbcset, &range_alloc, &start_elem, - &end_elem); + *err = build_range_exp (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &range_alloc, +#endif /* RE_ENABLE_I18N */ + &start_elem, &end_elem); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; } @@ -2743,6 +2815,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) case SB_CHAR: bitset_set (sbcset, start_elem.opr.ch); break; +#ifdef RE_ENABLE_I18N case MB_CHAR: /* Check whether the array has enough space. */ if (mbchar_alloc == mbcset->nmbchars) @@ -2758,20 +2831,30 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; break; +#endif /* RE_ENABLE_I18N */ case EQUIV_CLASS: - *err = build_equiv_class (mbcset, sbcset, &equiv_class_alloc, + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ start_elem.opr.name); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; break; case COLL_SYM: - *err = build_collating_symbol (mbcset, sbcset, &coll_sym_alloc, + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ start_elem.opr.name); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; break; case CHAR_CLASS: - ret = build_charclass (mbcset, sbcset, &char_class_alloc, + ret = build_charclass (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ start_elem.opr.name, syntax); if (BE (ret != REG_NOERROR, 0)) goto parse_bracket_exp_espace; @@ -2788,7 +2871,11 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) re_string_skip_bytes (regexp, token_len); /* Skip a token. */ /* If it is non-matching list. */ +#ifdef RE_ENABLE_I18N if (mbcset->non_match) +#else /* not RE_ENABLE_I18N */ + if (non_match) +#endif /* not RE_ENABLE_I18N */ bitset_not (sbcset); /* Build a tree for simple bracket. */ @@ -2799,6 +2886,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) if (BE (new_idx == -1 || work_tree == NULL, 0)) goto parse_bracket_exp_espace; +#ifdef RE_ENABLE_I18N if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes || mbcset->non_match))) @@ -2825,12 +2913,17 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) free_charset (mbcset); return work_tree; } +#else /* not RE_ENABLE_I18N */ + return work_tree; +#endif /* not RE_ENABLE_I18N */ parse_bracket_exp_espace: *err = REG_ESPACE; parse_bracket_exp_free_return: re_free (sbcset); +#ifdef RE_ENABLE_I18N free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ return NULL; } @@ -2915,13 +3008,17 @@ parse_bracket_symbol (elem, regexp, token) is a pointer argument sinse we may update it. */ static reg_errcode_t -build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) +#ifdef RE_ENABLE_I18N +build_equiv_class (sbcset, mbcset, equiv_class_alloc, name) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *equiv_class_alloc; +#else /* not RE_ENABLE_I18N */ +build_equiv_class (sbcset, name) +#endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; const unsigned char *name; { -#ifdef _LIBC +#if defined _LIBC && defined RE_ENABLE_I18N uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) { @@ -2987,7 +3084,7 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else -#endif +#endif /* _LIBC && RE_ENABLE_I18N */ { if (BE (strlen (name) != 1, 0)) return REG_ECOLLATE; @@ -3003,15 +3100,27 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) is a pointer argument sinse we may update it. */ static reg_errcode_t -build_charclass (mbcset, sbcset, char_class_alloc, class_name, syntax) +#ifdef RE_ENABLE_I18N +build_charclass (sbcset, mbcset, char_class_alloc, class_name, syntax) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *char_class_alloc; +#else /* not RE_ENABLE_I18N */ +build_charclass (sbcset, class_name, syntax) +#endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; const unsigned char *class_name; reg_syntax_t syntax; { int i; const unsigned char *name = class_name; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) + name = "alpha"; + +#ifdef RE_ENABLE_I18N /* Check the space of the arrays. */ if (*char_class_alloc == mbcset->nchar_classes) { @@ -3024,14 +3133,8 @@ build_charclass (mbcset, sbcset, char_class_alloc, class_name, syntax) if (BE (mbcset->char_classes == NULL, 0)) return REG_ESPACE; } - - /* In case of REG_ICASE "upper" and "lower" match the both of - upper and lower cases. */ - if ((syntax & RE_ICASE) - && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) - name = "alpha"; - mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); +#endif /* RE_ENABLE_I18N */ #define BUILD_CHARCLASS_LOOP(ctype_func)\ for (i = 0; i < SBC_MAX; ++i) \ @@ -3077,15 +3180,27 @@ build_word_op (dfa, not, err) reg_errcode_t *err; { re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N re_charset_t *mbcset; + int alloc = 0; +#else /* not RE_ENABLE_I18N */ + int non_match = 0; +#endif /* not RE_ENABLE_I18N */ reg_errcode_t ret; re_token_t br_token; bin_tree_t *tree; - int new_idx, alloc = 0; + int new_idx; sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); +#ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ { *err = REG_ESPACE; return NULL; @@ -3093,26 +3208,35 @@ build_word_op (dfa, not, err) if (not) { +#ifdef RE_ENABLE_I18N int i; - mbcset->non_match = 1; /* if (syntax & RE_HAT_LISTS_NOT_NEWLINE) bitset_set(cset->sbcset, '\0'); */ -#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) bitset_set (sbcset, i); -#endif /* RE_ENABLE_I18N */ +#else /* not RE_ENABLE_I18N */ + non_match = 1; +#endif /* not RE_ENABLE_I18N */ } /* We don't care the syntax in this case. */ - ret = build_charclass (mbcset, sbcset, &alloc, "alpha", 0); + ret = build_charclass (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + "alpha", 0); + if (BE (ret != REG_NOERROR, 0)) { re_free (sbcset); +#ifdef RE_ENABLE_I18N free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ *err = REG_ESPACE; return NULL; } @@ -3120,7 +3244,11 @@ build_word_op (dfa, not, err) bitset_set (sbcset, '_'); /* If it is non-matching list. */ +#ifdef RE_ENABLE_I18N if (mbcset->non_match) +#else /* not RE_ENABLE_I18N */ + if (non_match) +#endif /* not RE_ENABLE_I18N */ bitset_not (sbcset); /* Build a tree for simple bracket. */ @@ -3131,6 +3259,7 @@ build_word_op (dfa, not, err) if (BE (new_idx == -1 || tree == NULL, 0)) goto build_word_op_espace; +#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { re_token_t alt_token; @@ -3155,9 +3284,15 @@ build_word_op (dfa, not, err) free_charset (mbcset); return tree; } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + build_word_op_espace: re_free (sbcset); +#ifdef RE_ENABLE_I18N free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ *err = REG_ESPACE; return NULL; } @@ -3190,19 +3325,21 @@ fetch_number (input, token, syntax) return num; } +#ifdef RE_ENABLE_I18N static void free_charset (re_charset_t *cset) { re_free (cset->mbchars); -#ifdef _LIBC +# ifdef _LIBC re_free (cset->coll_syms); re_free (cset->equiv_classes); re_free (cset->range_starts); re_free (cset->range_ends); -#endif +# endif re_free (cset->char_classes); re_free (cset); } +#endif /* RE_ENABLE_I18N */ /* Functions for binary tree operation. */ diff --git a/posix/regex.c b/posix/regex.c index 0fe38d3c63..1d5dd0f534 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -45,6 +45,7 @@ # define BE(expr, val) __builtin_expect (expr, val) #else # define BE(expr, val) (expr) +# define inline #endif #include "regcomp.c" diff --git a/posix/regex.h b/posix/regex.h index d1e4b6841a..fac441dc6d 100644 --- a/posix/regex.h +++ b/posix/regex.h @@ -189,7 +189,8 @@ extern reg_syntax_t re_syntax_options; #define RE_SYNTAX_GNU_AWK \ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) #define RE_SYNTAX_POSIX_AWK \ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 5327c265c2..2809c45132 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -169,7 +169,7 @@ re_string_realloc_buffers (pstr, new_buf_len) #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { - pstr->wcs = re_realloc (pstr->wcs, wchar_t, new_buf_len); + pstr->wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); if (BE (pstr->wcs == NULL, 0)) return REG_ESPACE; } @@ -436,7 +436,7 @@ re_string_reconstruct (pstr, idx, eflags, newline) #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) memmove (pstr->wcs, pstr->wcs + offset, - (pstr->valid_len - offset) * sizeof (wchar_t)); + (pstr->valid_len - offset) * sizeof (wint_t)); #endif /* RE_ENABLE_I18N */ if (MBS_ALLOCATED (pstr)) memmove (pstr->mbs, pstr->mbs + offset, @@ -1166,9 +1166,11 @@ create_ci_newstate (dfa, nodes, hash) /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) newstate->halt = 1; +#ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET || (type == OP_PERIOD && MB_CUR_MAX > 1)) newstate->accept_mb = 1; +#endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR || OP_CONTEXT_NODE) @@ -1213,9 +1215,11 @@ create_cd_newstate (dfa, nodes, context, hash) /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) newstate->halt = 1; +#ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET || (type == OP_PERIOD && MB_CUR_MAX > 1)) newstate->accept_mb = 1; +#endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR) @@ -1228,9 +1232,11 @@ create_cd_newstate (dfa, nodes, context, hash) newstate->halt = 1; else if (ctype == OP_BACK_REF) newstate->has_backref = 1; +#ifdef RE_ENABLE_I18N else if (ctype == COMPLEX_BRACKET || (type == OP_PERIOD && MB_CUR_MAX > 1)) newstate->accept_mb = 1; +#endif /* RE_ENABLE_I18N */ } if (constraint) diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 75cc81517b..95ae46e297 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -119,7 +119,9 @@ typedef enum ALT, SUBEXP, SIMPLE_BRACKET, +#ifdef RE_ENABLE_I18N COMPLEX_BRACKET, +#endif /* RE_ENABLE_I18N */ /* Node type, These are used by token, node, tree. */ OP_PERIOD, @@ -137,6 +139,7 @@ typedef enum END_OF_RE_TOKEN_T } re_token_type_t; +#ifdef RE_ENABLE_I18N typedef struct { /* If this character set is the non-matching list. */ @@ -147,31 +150,32 @@ typedef struct int nmbchars; /* Collating symbols. */ -#ifdef _LIBC +# ifdef _LIBC int32_t *coll_syms; -#endif +# endif int ncoll_syms; /* Equivalence classes. */ -#ifdef _LIBC +# ifdef _LIBC int32_t *equiv_classes; -#endif +# endif int nequiv_classes; /* Range expressions. */ -#ifdef _LIBC +# ifdef _LIBC uint32_t *range_starts; uint32_t *range_ends; -#else /* not _LIBC */ +# else /* not _LIBC */ wchar_t *range_starts; wchar_t *range_ends; -#endif /* not _LIBC */ +# endif /* not _LIBC */ int nranges; /* Character classes. */ wctype_t *char_classes; int nchar_classes; } re_charset_t; +#endif /* RE_ENABLE_I18N */ typedef struct { @@ -180,7 +184,9 @@ typedef struct { unsigned char c; /* for CHARACTER */ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ int idx; /* for BACK_REF */ re_context_type ctx_type; /* for ANCHOR */ struct @@ -221,7 +227,7 @@ struct re_string_t unsigned char *mbs_case; #ifdef RE_ENABLE_I18N /* Store the wide character string which is corresponding to MBS. */ - wchar_t *wcs; + wint_t *wcs; mbstate_t cur_state; #endif /* The length of the valid characters in the buffers. */ |