From 04ea3b0fbb9ca56a04b437c57c2878842d331c77 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 29 Aug 2000 01:20:23 +0000 Subject: Update. 2000-08-27 Bruno Haible * string/strxfrm.c (strxfrm, wcsxfrm): Include . If nrules == 0 and srclen < n, copy only srclen + 1 characters. * sysdeps/generic/getdomain.c (getdomainname): Include . If the result is fits in the buffer, copy only as many bytes as needed. * sysdeps/generic/_strerror.c (__strerror_r): Don't zero-fill the buffer after copying numbuf into it. * sysdeps/mach/_strerror.c (__strerror_r): Likewise. 2000-08-27 Bruno Haible * posix/confstr.c (confstr): When string_len > len, NUL-terminate the result. When string_len < len, don't clear the rest of the buffer. 2000-08-27 Bruno Haible Support for new LC_COLLATE format. * locale/coll-lookup.h: New file. * locale/weightwc.h (findidx): When size == 0, call collidx_table_lookup. * wcsmbs/wcscoll.c: Include coll-lookup.h. * wcsmbs/wcsxfrm.c: Likewise. * posix/fnmatch.c: Likewise. * posix/fnmatch_loop.c (internal_fnwmatch): When size == 0, call collseq_table_lookup. * locale/programs/3level.h: New file. * locale/programs/ld-ctype.c: (wcwidth_table, wctrans_table): Define by including "3level.h". * locale/programs/ld-collate.c (wchead_table, collidx_table, collseq_table): New types, defined by including "3level.h". (locale_collate_t): New wcheads_3level, wcseqorder_3level fields. (encoding_mask, encoding_byte): Remove. (utf8_encode): Use simple shifts instead. (collate_finish): When !oldstyle_tables, set plane_size and plane_cnt to 0, and initialize and fill wcheads_3level and wcseqorder_3level. (collate_output): New local variable tablewc_3level. When !oldstyle_tables, set table_size to 0 and names to NULL and fill tablewc_3level instead of tablewc. Change format of TABLEWC and COLLSEQWC entries written to the file. * locale/C-collate.c (collseqwc): Change format. (_nl_C_LC_COLLATE): Set HASH_SIZE and HASH_LAYERS to 0, change format of COLLSEQWC. * locale/Makefile (distribute): Add coll-lookup.h, programs/3level.h. 2000-08-27 Bruno Haible * locale/programs/ld-ctype.c (MAX_CHARNAMES_IDX): New macro. (locale_ctype_t): New charnames_idx field. (ctype_startup): Initialize charnames_idx field. (find_idx): Speed up dramatically by using charnames_idx inverse table. 2000-08-27 Bruno Haible * locale/C-ctype.c: Switch to new locale format. (_nl_C_LC_CTYPE_names): Remove array. (STRUCT_CTYPE_CLASS): New macro. (_nl_C_LC_CTYPE_class_{upper,lower,alpha,digit,xdigit,space,print, graph,blank,cntrl,punct,alnum}, _nl_C_LC_CTYPE_map_{toupper,tolower}): New three-level tables. (_nl_C_LC_CTYPE_width): Change from array to three-level table. (_nl_C_LC_CTYPE): Fix nstrings value. Set HASH_SIZE and HASH_LAYERS to 0. Change WIDTH format. Set CLASS_OFFSET and MAP_OFFSET. Add 12 class tables and 2 map tables at the end. * ctype/ctype-info.c (_nl_C_LC_CTYPE_names): Remove declaration. (_nl_C_LC_CTYPE_class_{upper,lower,alpha,digit,xdigit,space,print, graph,blank,cntrl,punct,alnum}, _nl_C_LC_CTYPE_map_{toupper,tolower}): New declarations. (b): Remove trailing semicolon. (__ctype_names, __ctype_width): Don't initialize. (__ctype32_wctype, __ctype32_wctrans, __ctype32_width): Initialize. 2000-08-27 Bruno Haible * elf/dl-load.c (open_path): Add a argument telling whether *dirsp is guaranteed to be allocated with the same malloc() and may be passed to free(). (_dl_map_object): Update open_path calls. If rtld_search_dirs has been set to empty by an earlier open_path call, don't pass it again. --- ChangeLog | 82 ++++++ ctype/ctype-info.c | 45 +++- elf/dl-load.c | 32 ++- locale/C-collate.c | 26 +- locale/C-ctype.c | 357 ++++++++++++++++++++----- locale/Makefile | 5 +- locale/coll-lookup.h | 101 ++++++++ locale/programs/3level.h | 321 +++++++++++++++++++++++ locale/programs/ld-collate.c | 602 +++++++++++++++++++++++++------------------ locale/programs/ld-ctype.c | 507 +++--------------------------------- locale/weightwc.h | 26 +- posix/confstr.c | 20 +- posix/fnmatch.c | 1 + posix/fnmatch_loop.c | 109 +++++--- string/strxfrm.c | 3 +- sysdeps/generic/_strerror.c | 4 +- sysdeps/generic/getdomain.c | 7 +- sysdeps/mach/_strerror.c | 8 +- wcsmbs/wcscoll.c | 1 + wcsmbs/wcsxfrm.c | 1 + 20 files changed, 1406 insertions(+), 852 deletions(-) create mode 100644 locale/coll-lookup.h create mode 100644 locale/programs/3level.h diff --git a/ChangeLog b/ChangeLog index 5289a5447e..f467908703 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,85 @@ +2000-08-27 Bruno Haible + + * string/strxfrm.c (strxfrm, wcsxfrm): Include . + If nrules == 0 and srclen < n, copy only srclen + 1 characters. + + * sysdeps/generic/getdomain.c (getdomainname): Include . + If the result is fits in the buffer, copy only as many bytes as needed. + + * sysdeps/generic/_strerror.c (__strerror_r): Don't zero-fill the + buffer after copying numbuf into it. + * sysdeps/mach/_strerror.c (__strerror_r): Likewise. + +2000-08-27 Bruno Haible + + * posix/confstr.c (confstr): When string_len > len, NUL-terminate + the result. When string_len < len, don't clear the rest of the buffer. + +2000-08-27 Bruno Haible + + Support for new LC_COLLATE format. + * locale/coll-lookup.h: New file. + * locale/weightwc.h (findidx): When size == 0, call + collidx_table_lookup. + * wcsmbs/wcscoll.c: Include coll-lookup.h. + * wcsmbs/wcsxfrm.c: Likewise. + * posix/fnmatch.c: Likewise. + * posix/fnmatch_loop.c (internal_fnwmatch): When size == 0, call + collseq_table_lookup. + * locale/programs/3level.h: New file. + * locale/programs/ld-ctype.c: (wcwidth_table, wctrans_table): Define + by including "3level.h". + * locale/programs/ld-collate.c (wchead_table, collidx_table, + collseq_table): New types, defined by including "3level.h". + (locale_collate_t): New wcheads_3level, wcseqorder_3level fields. + (encoding_mask, encoding_byte): Remove. + (utf8_encode): Use simple shifts instead. + (collate_finish): When !oldstyle_tables, set plane_size and plane_cnt + to 0, and initialize and fill wcheads_3level and wcseqorder_3level. + (collate_output): New local variable tablewc_3level. When + !oldstyle_tables, set table_size to 0 and names to NULL and fill + tablewc_3level instead of tablewc. Change format of TABLEWC and + COLLSEQWC entries written to the file. + * locale/C-collate.c (collseqwc): Change format. + (_nl_C_LC_COLLATE): Set HASH_SIZE and HASH_LAYERS to 0, change format + of COLLSEQWC. + * locale/Makefile (distribute): Add coll-lookup.h, programs/3level.h. + +2000-08-27 Bruno Haible + + * locale/programs/ld-ctype.c (MAX_CHARNAMES_IDX): New macro. + (locale_ctype_t): New charnames_idx field. + (ctype_startup): Initialize charnames_idx field. + (find_idx): Speed up dramatically by using charnames_idx inverse table. + +2000-08-27 Bruno Haible + + * locale/C-ctype.c: Switch to new locale format. + (_nl_C_LC_CTYPE_names): Remove array. + (STRUCT_CTYPE_CLASS): New macro. + (_nl_C_LC_CTYPE_class_{upper,lower,alpha,digit,xdigit,space,print, + graph,blank,cntrl,punct,alnum}, _nl_C_LC_CTYPE_map_{toupper,tolower}): + New three-level tables. + (_nl_C_LC_CTYPE_width): Change from array to three-level table. + (_nl_C_LC_CTYPE): Fix nstrings value. Set HASH_SIZE and HASH_LAYERS + to 0. Change WIDTH format. Set CLASS_OFFSET and MAP_OFFSET. Add + 12 class tables and 2 map tables at the end. + * ctype/ctype-info.c (_nl_C_LC_CTYPE_names): Remove declaration. + (_nl_C_LC_CTYPE_class_{upper,lower,alpha,digit,xdigit,space,print, + graph,blank,cntrl,punct,alnum}, _nl_C_LC_CTYPE_map_{toupper,tolower}): + New declarations. + (b): Remove trailing semicolon. + (__ctype_names, __ctype_width): Don't initialize. + (__ctype32_wctype, __ctype32_wctrans, __ctype32_width): Initialize. + +2000-08-27 Bruno Haible + + * elf/dl-load.c (open_path): Add a argument telling whether *dirsp + is guaranteed to be allocated with the same malloc() and may be + passed to free(). + (_dl_map_object): Update open_path calls. If rtld_search_dirs has + been set to empty by an earlier open_path call, don't pass it again. + 2000-08-28 Ulrich Drepper * include/libintl.h: Include since libintl.h doesn't if diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c index 18d88d9534..57f1256572 100644 --- a/ctype/ctype-info.c +++ b/ctype/ctype-info.c @@ -24,10 +24,23 @@ extern const char _nl_C_LC_CTYPE_class[]; extern const char _nl_C_LC_CTYPE_class32[]; extern const char _nl_C_LC_CTYPE_toupper[]; extern const char _nl_C_LC_CTYPE_tolower[]; -extern const char _nl_C_LC_CTYPE_names[]; +extern const char _nl_C_LC_CTYPE_class_upper[]; +extern const char _nl_C_LC_CTYPE_class_lower[]; +extern const char _nl_C_LC_CTYPE_class_alpha[]; +extern const char _nl_C_LC_CTYPE_class_digit[]; +extern const char _nl_C_LC_CTYPE_class_xdigit[]; +extern const char _nl_C_LC_CTYPE_class_space[]; +extern const char _nl_C_LC_CTYPE_class_print[]; +extern const char _nl_C_LC_CTYPE_class_graph[]; +extern const char _nl_C_LC_CTYPE_class_blank[]; +extern const char _nl_C_LC_CTYPE_class_cntrl[]; +extern const char _nl_C_LC_CTYPE_class_punct[]; +extern const char _nl_C_LC_CTYPE_class_alnum[]; +extern const char _nl_C_LC_CTYPE_map_toupper[]; +extern const char _nl_C_LC_CTYPE_map_tolower[]; extern const char _nl_C_LC_CTYPE_width[]; -#define b(t,x,o) (((const t *) _nl_C_LC_CTYPE_##x) + o); +#define b(t,x,o) (((const t *) _nl_C_LC_CTYPE_##x) + o) const unsigned short int *__ctype_b = b (unsigned short int, class, 128); const __uint32_t *__ctype32_b = b (__uint32_t, class32, 0); @@ -35,8 +48,26 @@ const __int32_t *__ctype_tolower = b (__int32_t, tolower, 128); const __int32_t *__ctype_toupper = b (__int32_t, toupper, 128); const __uint32_t *__ctype32_tolower = b (__uint32_t, tolower, 128); const __uint32_t *__ctype32_toupper = b (__uint32_t, toupper, 128); -const __uint32_t *__ctype_names = b (__uint32_t, names, 0); -const unsigned char *__ctype_width = b (unsigned char, width, 0); -const char *__ctype32_wctype[12]; -const char *__ctype32_wctrans[2]; -const char *__ctype32_width; +const __uint32_t *__ctype_names; +const unsigned char *__ctype_width; +const char *__ctype32_wctype[12] = +{ + b(char, class_upper, 32), + b(char, class_lower, 32), + b(char, class_alpha, 32), + b(char, class_digit, 32), + b(char, class_xdigit, 32), + b(char, class_space, 32), + b(char, class_print, 32), + b(char, class_graph, 32), + b(char, class_blank, 32), + b(char, class_cntrl, 32), + b(char, class_punct, 32), + b(char, class_alnum, 32) +}; +const char *__ctype32_wctrans[2] = +{ + b(char, map_toupper, 0), + b(char, map_tolower, 0) +}; +const char *__ctype32_width = b (char, width, 0); diff --git a/elf/dl-load.c b/elf/dl-load.c index ea3c928169..2c1f2a5a6a 100644 --- a/elf/dl-load.c +++ b/elf/dl-load.c @@ -1204,13 +1204,16 @@ print_search_path (struct r_search_path_elem **list, _dl_debug_message (0, "\t\t(", what, ")\n", NULL); } -/* Try to open NAME in one of the directories in DIRS. +/* Try to open NAME in one of the directories in *DIRSP. Return the fd, or -1. If successful, fill in *REALNAME - with the malloc'd full directory name. */ + with the malloc'd full directory name. If it turns out + that none of the directories in *DIRSP exists, *DIRSP is + replaced with (void *) -1, and the old value is free()d + if MAY_FREE_DIRS is true. */ static int open_path (const char *name, size_t namelen, int preloaded, - struct r_search_path_elem ***dirsp, + struct r_search_path_elem ***dirsp, int may_free_dirs, char **realname) { struct r_search_path_elem **dirs = *dirsp; @@ -1325,7 +1328,10 @@ open_path (const char *name, size_t namelen, int preloaded, /* Remove the whole path if none of the directories exists. */ if (! any) { - free (*dirsp); + /* Paths which were allocated using the minimal malloc() in ld.so + must not be freed using the general free() in libc. */ + if (may_free_dirs) + free (*dirsp); *dirsp = (void *) -1; } @@ -1414,12 +1420,12 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded, if (l->l_rpath_dirs != (void *) -1) fd = open_path (name, namelen, preloaded, - &l->l_rpath_dirs, &realname); + &l->l_rpath_dirs, 1, &realname); } } else if (l->l_rpath_dirs != (void *) -1) fd = open_path (name, namelen, preloaded, &l->l_rpath_dirs, - &realname); + 0, &realname); } /* If dynamically linked, try the DT_RPATH of the executable @@ -1427,13 +1433,14 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded, l = _dl_loaded; if (fd == -1 && l && l->l_type != lt_loaded && l != loader && l->l_rpath_dirs != (void *) -1) - fd = open_path (name, namelen, preloaded, &l->l_rpath_dirs, + fd = open_path (name, namelen, preloaded, &l->l_rpath_dirs, 0, &realname); } /* Try the LD_LIBRARY_PATH environment variable. */ if (fd == -1 && env_path_list != (void *) -1) - fd = open_path (name, namelen, preloaded, &env_path_list, &realname); + fd = open_path (name, namelen, preloaded, &env_path_list, 0, + &realname); /* Look at the RUNPATH informaiton for this binary. */ if (loader != NULL && loader->l_runpath_dirs != (void *) -1) @@ -1453,12 +1460,12 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded, if (loader->l_runpath_dirs != (void *) -1) fd = open_path (name, namelen, preloaded, - &loader->l_runpath_dirs, &realname); + &loader->l_runpath_dirs, 1, &realname); } } else if (loader->l_runpath_dirs != (void *) -1) fd = open_path (name, namelen, preloaded, - &loader->l_runpath_dirs, &realname); + &loader->l_runpath_dirs, 0, &realname); } if (fd == -1) @@ -1518,8 +1525,9 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded, /* Finally, try the default path. */ if (fd == -1 && (l == NULL || - __builtin_expect (!(l->l_flags_1 & DF_1_NODEFLIB), 1))) - fd = open_path (name, namelen, preloaded, &rtld_search_dirs, + __builtin_expect (!(l->l_flags_1 & DF_1_NODEFLIB), 1)) + && rtld_search_dirs != (void *) -1) + fd = open_path (name, namelen, preloaded, &rtld_search_dirs, 0, &realname); /* Add another newline when we a tracing the library loading. */ diff --git a/locale/C-collate.c b/locale/C-collate.c index f8d1430755..a0ba0ff691 100644 --- a/locale/C-collate.c +++ b/locale/C-collate.c @@ -58,6 +58,12 @@ static const char collseqmb[] = static const uint32_t collseqwc[] = { + 8, 1, 8, 0x0, 0xff, + /* 1st-level table */ + 6 * sizeof (uint32_t), + /* 2nd-level table */ + 7 * sizeof (uint32_t), + /* 3rd-level table */ L'\x00', L'\x01', L'\x02', L'\x03', L'\x04', L'\x05', L'\x06', L'\x07', L'\x08', L'\x09', L'\x0a', L'\x0b', L'\x0c', L'\x0d', L'\x0e', L'\x0f', L'\x10', L'\x11', L'\x12', L'\x13', L'\x14', L'\x15', L'\x16', L'\x17', @@ -101,23 +107,41 @@ const struct locale_data _nl_C_LC_COLLATE = NULL, 18, { + /* _NL_COLLATE_NRULES */ { word: 0 }, + /* _NL_COLLATE_RULESETS */ { string: NULL }, + /* _NL_COLLATE_TABLEMB */ { string: NULL }, + /* _NL_COLLATE_WEIGHTMB */ { string: NULL }, + /* _NL_COLLATE_EXTRAMB */ { string: NULL }, + /* _NL_COLLATE_INDIRECTMB */ { string: NULL }, + /* _NL_COLLATE_HASH_SIZE */ { word: 0 }, + /* _NL_COLLATE_HASH_LAYERS */ { word: 0 }, + /* _NL_COLLATE_NAMES */ { string: NULL }, + /* _NL_COLLATE_TABLEWC */ { string: NULL }, + /* _NL_COLLATE_WEIGHTWC */ { string: NULL }, + /* _NL_COLLATE_EXTRAWC */ { string: NULL }, + /* _NL_COLLATE_INDIRECTWC */ { string: NULL }, + /* _NL_COLLATE_SYMB_HASH_SIZEMB */ { string: NULL }, + /* _NL_COLLATE_SYMB_TABLEMB */ { string: NULL }, + /* _NL_COLLATE_SYMB_EXTRAMB */ { string: NULL }, + /* _NL_COLLATE_COLLSEQMB */ { string: collseqmb }, - { wstr: collseqwc } + /* _NL_COLLATE_COLLSEQWC */ + { string: (const char *) collseqwc } } }; diff --git a/locale/C-ctype.c b/locale/C-ctype.c index 23420c1bc5..4f8e204a5d 100644 --- a/locale/C-ctype.c +++ b/locale/C-ctype.c @@ -286,59 +286,248 @@ const uint32_t _nl_C_LC_CTYPE_tolower[384] = /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; -const uint32_t _nl_C_LC_CTYPE_names[256] = + +#define STRUCT_CTYPE_CLASS(p, q) \ + struct \ + { \ + uint32_t isctype_data[8]; \ + uint32_t header[5]; \ + uint32_t level1[1]; \ + uint32_t level2[1 << q]; \ + uint32_t level3[1 << p]; \ + } + +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_upper = { - /* 0x00 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - /* 0x08 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - /* 0x10 */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - /* 0x18 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - /* 0x20 */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, - /* 0x28 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - /* 0x30 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - /* 0x38 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - /* 0x40 */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - /* 0x48 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - /* 0x50 */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - /* 0x58 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - /* 0x60 */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - /* 0x68 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - /* 0x70 */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - /* 0x78 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - /* 0x80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - /* 0x88 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - /* 0x90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, - /* 0x98 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - /* 0xa0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - /* 0xa8 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - /* 0xb0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - /* 0xb8 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - /* 0xc0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - /* 0xc8 */ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - /* 0xd0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, - /* 0xd8 */ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - /* 0xe0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, - /* 0xe8 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - /* 0xf0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - /* 0xf8 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff + { 0x00000000, 0x00000000, 0x07fffffe, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x07fffffe, 0x00000000 } +}; +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_lower = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x07fffffe } +}; +const STRUCT_CTYPE_CLASS(1, 1) _nl_C_LC_CTYPE_class_alpha = +{ + { 0x00000000, 0x00000000, 0x07fffffe, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 6, 1, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 8 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x07fffffe, 0x07fffffe } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_digit = +{ + { 0x00000000, 0x03ff0000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_xdigit = +{ + { 0x00000000, 0x03ff0000, 0x0000007e, 0x0000007e, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000, 0x0000007e, 0x0000007e } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_space = +{ + { 0x00003e00, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00003e00, 0x00000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_print = +{ + { 0x00000000, 0xffffffff, 0xffffffff, 0x7fffffff, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xffffffff, 0xffffffff, 0x7fffffff } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_graph = +{ + { 0x00000000, 0xfffffffe, 0xffffffff, 0x7fffffff, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xfffffffe, 0xffffffff, 0x7fffffff } +}; +const STRUCT_CTYPE_CLASS(1, 0) _nl_C_LC_CTYPE_class_blank = +{ + { 0x00000200, 0x00000001, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 6, 1, 6, 0, 1 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000200, 0x00000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_cntrl = +{ + { 0xffffffff, 0x00000000, 0x00000000, 0x80000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0xffffffff, 0x00000000, 0x00000000, 0x80000000 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_punct = +{ + { 0x00000000, 0xfc00fffe, 0xf8000001, 0x78000001, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0xfc00fffe, 0xf8000001, 0x78000001 } +}; +const STRUCT_CTYPE_CLASS(2, 0) _nl_C_LC_CTYPE_class_alnum = +{ + { 0x00000000, 0x03ff0000, 0x07fffffe, 0x07fffffe, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + }, + { 7, 1, 7, 0, 3 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 0x00000000, 0x03ff0000, 0x07fffffe, 0x07fffffe } }; -const char _nl_C_LC_CTYPE_width[256] = - /* 0x00 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x10 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x20 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x30 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x40 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x50 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x60 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x70 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x80 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0x90 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xa0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xb0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xc0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xd0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xe0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" - /* 0xf0 */ "\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001" -; + +const struct +{ + uint32_t header[5]; + uint32_t level1[1]; + uint32_t level2[4]; + int32_t level3[32]; +} +_nl_C_LC_CTYPE_map_toupper = +{ + { 7, 1, 5, 3, 31 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 0, 0, 10 * sizeof (uint32_t) }, + /* 3rd-level table */ + { + 0x00000000, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0xffffffe0, + 0xffffffe0, 0xffffffe0, 0xffffffe0, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + } +}, +_nl_C_LC_CTYPE_map_tolower = +{ + { 7, 1, 5, 3, 31 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 0, 0, 10 * sizeof (uint32_t), 0 }, + /* 3rd-level table */ + { + 0x00000000, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000020, + 0x00000020, 0x00000020, 0x00000020, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + } +}; + +const struct +{ + uint32_t header[5]; + uint32_t level1[1]; + uint32_t level2[1]; + uint8_t level3[1]; +} +_nl_C_LC_CTYPE_width = +{ + { 7, 1, 0, 0, 0 }, + /* 1st-level table */ + { 6 * sizeof (uint32_t) }, + /* 2nd-level table */ + { 7 * sizeof (uint32_t) }, + /* 3rd-level table */ + { 1 } +}; + +/* Number of fields with fixed meanings, starting at 0. */ +#define NR_FIXED 70 +/* Number of class fields, starting at CLASS_OFFSET. */ +#define NR_CLASSES 12 +/* Number of map fields, starting at MAP_OFFSET. */ +#define NR_MAPS 2 + +/* Compile time verification of + NR_FIXED == _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1). */ +typedef int assertion1[1 - 2 * (NR_FIXED != _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))]; const struct locale_data _nl_C_LC_CTYPE = { @@ -347,18 +536,29 @@ const struct locale_data _nl_C_LC_CTYPE = UNDELETABLE, 1, /* Enable transliteration by default. */ NULL, - 66, + NR_FIXED + NR_CLASSES + NR_MAPS, { + /* _NL_CTYPE_CLASS */ { string: _nl_C_LC_CTYPE_class }, + /* _NL_CTYPE_TOUPPER */ { string: (const char *) _nl_C_LC_CTYPE_toupper }, + /* _NL_CTYPE_GAP1 */ { string: NULL }, + /* _NL_CTYPE_TOLOWER */ { string: (const char *) _nl_C_LC_CTYPE_tolower }, + /* _NL_CTYPE_GAP2 */ { string: NULL }, + /* _NL_CTYPE_CLASS32 */ { string: _nl_C_LC_CTYPE_class32 }, - { string: (const char *) _nl_C_LC_CTYPE_names }, + /* _NL_CTYPE_NAMES */ { string: NULL }, - { word: 256 }, - { word: 1 }, + /* _NL_CTYPE_GAP3 */ + { string: NULL }, + /* _NL_CTYPE_HASH_SIZE */ + { word: 0 }, + /* _NL_CTYPE_HASH_LAYERS */ + { word: 0 }, + /* _NL_CTYPE_CLASS_NAMES */ { string: "upper\0" "lower\0" "alpha\0" "digit\0" "xdigit\0" "space\0" "print\0" "graph\0" "blank\0" "cntrl\0" "punct\0" "alnum\0" #ifdef PREDEFINED_CLASSES @@ -370,19 +570,29 @@ const struct locale_data _nl_C_LC_CTYPE = "vowel_connect\0" #endif }, + /* _NL_CTYPE_MAP_NAMES */ { string: "toupper\0" "tolower\0" #ifdef PREDEFINED_CLASSES "tosymmetric\0" #endif }, - { string: _nl_C_LC_CTYPE_width }, + /* _NL_CTYPE_WIDTH */ + { string: (const char *) _nl_C_LC_CTYPE_width.header }, + /* _NL_CTYPE_MB_CUR_MAX */ { word: 1 }, + /* _NL_CTYPE_CODESET_NAME */ { string: "ANSI_X3.4-1968" }, + /* _NL_CTYPE_TOUPPER32 */ { string: (const char *) &_nl_C_LC_CTYPE_toupper[128] }, + /* _NL_CTYPE_TOLOWER32 */ { string: (const char *) &_nl_C_LC_CTYPE_tolower[128] }, - { word: 0 }, - { word: 0 }, + /* _NL_CTYPE_CLASS_OFFSET */ + { word: NR_FIXED }, + /* _NL_CTYPE_MAP_OFFSET */ + { word: NR_FIXED + NR_CLASSES }, + /* _NL_CTYPE_INDIGITS_MB_LEN */ { word: 1 }, + /* _NL_CTYPE_INDIGITS0_MB .. _NL_CTYPE_INDIGITS9_MB */ { string: "0" }, { string: "1" }, { string: "2" }, @@ -393,7 +603,9 @@ const struct locale_data _nl_C_LC_CTYPE = { string: "7" }, { string: "8" }, { string: "9" }, + /* _NL_CTYPE_INDIGITS_WC_LEN */ { word: 1 }, + /* _NL_CTYPE_INDIGITS0_WC .. _NL_CTYPE_INDIGITS9_WC */ { wstr: (uint32_t *) L"0" }, { wstr: (uint32_t *) L"1" }, { wstr: (uint32_t *) L"2" }, @@ -404,6 +616,7 @@ const struct locale_data _nl_C_LC_CTYPE = { wstr: (uint32_t *) L"7" }, { wstr: (uint32_t *) L"8" }, { wstr: (uint32_t *) L"9" }, + /* _NL_CTYPE_OUTDIGIT0_MB .. _NL_CTYPE_OUTDIGIT9_MB */ { string: "0" }, { string: "1" }, { string: "2" }, @@ -414,6 +627,7 @@ const struct locale_data _nl_C_LC_CTYPE = { string: "7" }, { string: "8" }, { string: "9" }, + /* _NL_CTYPE_OUTDIGIT0_WC .. _NL_CTYPE_OUTDIGIT9_WC */ { word: L'0' }, { word: L'1' }, { word: L'2' }, @@ -424,14 +638,39 @@ const struct locale_data _nl_C_LC_CTYPE = { word: L'7' }, { word: L'8' }, { word: L'9' }, + /* _NL_CTYPE_TRANSLIT_TAB_SIZE */ { word: NTRANSLIT }, + /* _NL_CTYPE_TRANSLIT_FROM_IDX */ { wstr: translit_from_idx }, + /* _NL_CTYPE_TRANSLIT_FROM_TBL */ { wstr: (uint32_t *) translit_from_tbl }, + /* _NL_CTYPE_TRANSLIT_TO_IDX */ { wstr: translit_to_idx }, + /* _NL_CTYPE_TRANSLIT_TO_TBL */ { wstr: (uint32_t *) translit_to_tbl }, + /* _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN */ { word: 1 }, + /* _NL_CTYPE_TRANSLIT_DEFAULT_MISSING */ { wstr: (uint32_t *) L"?" }, + /* _NL_CTYPE_TRANSLIT_IGNORE_LEN */ { word: 0 }, - { wstr: NULL } + /* _NL_CTYPE_TRANSLIT_IGNORE */ + { wstr: NULL }, + /* NR_CLASSES wctype_tables */ + { string: (const char *) _nl_C_LC_CTYPE_class_upper.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_lower.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_alpha.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_digit.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_xdigit.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_space.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_print.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_graph.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_blank.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_cntrl.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_punct.header }, + { string: (const char *) _nl_C_LC_CTYPE_class_alnum.header }, + /* NR_MAPS wctrans_tables */ + { string: (const char *) _nl_C_LC_CTYPE_map_toupper.header }, + { string: (const char *) _nl_C_LC_CTYPE_map_tolower.header } } }; diff --git a/locale/Makefile b/locale/Makefile index 4bcdecbe74..ce9747bd00 100644 --- a/locale/Makefile +++ b/locale/Makefile @@ -25,14 +25,15 @@ headers = locale.h langinfo.h xlocale.h distribute = localeinfo.h categories.def iso-639.def iso-3166.def \ iso-4217.def weight.h weightwc.h strlen-hash.h elem-hash.h \ indigits.h indigitswc.h outdigits.h outdigitswc.h \ - C-translit.h.in C-translit.h gen-translit.pl \ + coll-lookup.h C-translit.h.in C-translit.h gen-translit.pl \ $(addprefix programs/, \ locale.c localedef.c \ $(localedef-modules:=.c) $(locale-modules:=.c) \ $(lib-modules:=.c) config.h simple-hash.h \ charmap-kw.gperf charmap-kw.h locfile-token.h \ locfile-kw.gperf locfile-kw.h linereader.h \ - locfile.h charmap.h repertoire.h localedef.h) + locfile.h charmap.h repertoire.h localedef.h \ + 3level.h) routines = setlocale findlocale loadlocale localeconv nl_langinfo \ mb_cur_max codeset_name \ newlocale duplocale freelocale diff --git a/locale/coll-lookup.h b/locale/coll-lookup.h new file mode 100644 index 0000000000..ad0ff66c8e --- /dev/null +++ b/locale/coll-lookup.h @@ -0,0 +1,101 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible , 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Word tables are accessed by cutting wc in three blocks of bits: + - the high 32-q-p bits, + - the next q bits, + - the next p bits. + + +------------------+-----+-----+ + wc = + 32-q-p | q | p | + +------------------+-----+-----+ + + p and q are variable. For 16-bit Unicode it is sufficient to + choose p and q such that q+p <= 16. + + The table contains the following uint32_t words: + - q+p, + - s = upper exclusive bound for wc >> (q+p), + - p, + - 2^q-1, + - 2^p-1, + - 1st-level table: s offsets, pointing into the 2nd-level table, + - 2nd-level table: k*2^q offsets, pointing into the 3rd-level table, + - 3rd-level table: j*2^p words, each containing 32 bits of data. +*/ + +#include + +/* Lookup in a table of int32_t, with default value 0. */ +static inline int32_t +collidx_table_lookup (const char *table, uint32_t wc) +{ + uint32_t shift1 = ((const uint32_t *) table)[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = ((const uint32_t *) table)[1]; + if (index1 < bound) + { + uint32_t lookup1 = ((const uint32_t *) table)[5 + index1]; + if (lookup1 != 0) + { + uint32_t shift2 = ((const uint32_t *) table)[2]; + uint32_t mask2 = ((const uint32_t *) table)[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2]; + if (lookup2 != 0) + { + uint32_t mask3 = ((const uint32_t *) table)[4]; + uint32_t index3 = wc & mask3; + int32_t lookup3 = ((const int32_t *)(table + lookup2))[index3]; + + return lookup3; + } + } + } + return 0; +} + +/* Lookup in a table of uint32_t, with default value 0xffffffff. */ +static inline uint32_t +collseq_table_lookup (const char *table, uint32_t wc) +{ + uint32_t shift1 = ((const uint32_t *) table)[0]; + uint32_t index1 = wc >> shift1; + uint32_t bound = ((const uint32_t *) table)[1]; + if (index1 < bound) + { + uint32_t lookup1 = ((const uint32_t *) table)[5 + index1]; + if (lookup1 != 0) + { + uint32_t shift2 = ((const uint32_t *) table)[2]; + uint32_t mask2 = ((const uint32_t *) table)[3]; + uint32_t index2 = (wc >> shift2) & mask2; + uint32_t lookup2 = ((const uint32_t *)(table + lookup1))[index2]; + if (lookup2 != 0) + { + uint32_t mask3 = ((const uint32_t *) table)[4]; + uint32_t index3 = wc & mask3; + uint32_t lookup3 = ((const uint32_t *)(table + lookup2))[index3]; + + return lookup3; + } + } + } + return ~((uint32_t) 0); +} diff --git a/locale/programs/3level.h b/locale/programs/3level.h new file mode 100644 index 0000000000..d8293322b1 --- /dev/null +++ b/locale/programs/3level.h @@ -0,0 +1,321 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible , 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* Construction of sparse 3-level tables. + See wchar-lookup.h or coll-lookup.h for their structure and the + meaning of p and q. + + Before including this file, set + TABLE to the name of the structure to be defined + ELEMENT to the type of every entry + DEFAULT to the default value for empty entries + ITERATE if you want the TABLE_iterate function to be defined + NO_FINALIZE if you don't want the TABLE_finalize function to be defined + + This will define + + struct TABLE; + void TABLE_init (struct TABLE *t); + ELEMENT TABLE_get (struct TABLE *t, uint32_t wc); + void TABLE_add (struct TABLE *t, uint32_t wc, ELEMENT value); + void TABLE_iterate (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)); + void TABLE_finalize (struct TABLE *t); +*/ + +#define CONCAT(a,b) CONCAT1(a,b) +#define CONCAT1(a,b) a##b + +struct TABLE +{ + /* Parameters. */ + unsigned int p; + unsigned int q; + /* Working representation. */ + size_t level1_alloc; + size_t level1_size; + uint32_t *level1; + size_t level2_alloc; + size_t level2_size; + uint32_t *level2; + size_t level3_alloc; + size_t level3_size; + ELEMENT *level3; + /* Compressed representation. */ + size_t result_size; + char *result; +}; + +/* Initialize. Assumes t->p and t->q have already been set. */ +static inline void +CONCAT(TABLE,_init) (struct TABLE *t) +{ + t->level1_alloc = t->level1_size = 0; + t->level2_alloc = t->level2_size = 0; + t->level3_alloc = t->level3_size = 0; +} + +/* Retrieve an entry. */ +static inline ELEMENT +CONCAT(TABLE,_get) (struct TABLE *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p); + if (index1 < t->level1_size) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != ~((uint32_t) 0)) + { + uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) + + (lookup1 << t->q); + uint32_t lookup2 = t->level2[index2]; + if (lookup2 != ~((uint32_t) 0)) + { + uint32_t index3 = (wc & ((1 << t->p) - 1)) + + (lookup2 << t->p); + ELEMENT lookup3 = t->level3[index3]; + + return lookup3; + } + } + } + return DEFAULT; +} + +/* Add one entry. */ +static void +CONCAT(TABLE,_add) (struct TABLE *t, uint32_t wc, ELEMENT value) +{ + uint32_t index1 = wc >> (t->q + t->p); + uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); + uint32_t index3 = wc & ((1 << t->p) - 1); + size_t i, i1, i2; + + if (value == CONCAT(TABLE,_get) (t, wc)) + return; + + if (index1 >= t->level1_size) + { + if (index1 >= t->level1_alloc) + { + size_t alloc = 2 * t->level1_alloc; + if (alloc <= index1) + alloc = index1 + 1; + t->level1 = (t->level1_alloc > 0 + ? (uint32_t *) xrealloc ((char *) t->level1, + alloc * sizeof (uint32_t)) + : (uint32_t *) xmalloc (alloc * sizeof (uint32_t))); + t->level1_alloc = alloc; + } + while (index1 >= t->level1_size) + t->level1[t->level1_size++] = ~((uint32_t) 0); + } + + if (t->level1[index1] == ~((uint32_t) 0)) + { + if (t->level2_size == t->level2_alloc) + { + size_t alloc = 2 * t->level2_alloc + 1; + t->level2 = (t->level2_alloc > 0 + ? (uint32_t *) xrealloc ((char *) t->level2, + (alloc << t->q) * sizeof (uint32_t)) + : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t))); + t->level2_alloc = alloc; + } + i1 = t->level2_size << t->q; + i2 = (t->level2_size + 1) << t->q; + for (i = i1; i < i2; i++) + t->level2[i] = ~((uint32_t) 0); + t->level1[index1] = t->level2_size++; + } + + index2 += t->level1[index1] << t->q; + + if (t->level2[index2] == ~((uint32_t) 0)) + { + if (t->level3_size == t->level3_alloc) + { + size_t alloc = 2 * t->level3_alloc + 1; + t->level3 = (t->level3_alloc > 0 + ? (ELEMENT *) xrealloc ((char *) t->level3, + (alloc << t->p) * sizeof (ELEMENT)) + : (ELEMENT *) xmalloc ((alloc << t->p) * sizeof (ELEMENT))); + t->level3_alloc = alloc; + } + i1 = t->level3_size << t->p; + i2 = (t->level3_size + 1) << t->p; + for (i = i1; i < i2; i++) + t->level3[i] = DEFAULT; + t->level2[index2] = t->level3_size++; + } + + index3 += t->level2[index2] << t->p; + + t->level3[index3] = value; +} + +#ifdef ITERATE +/* Apply a function to all entries in the table. */ +static void +CONCAT(TABLE,_iterate) (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)) +{ + uint32_t index1; + for (index1 = 0; index1 < t->level1_size; index1++) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != ~((uint32_t) 0)) + { + uint32_t lookup1_shifted = lookup1 << t->q; + uint32_t index2; + for (index2 = 0; index2 < (1 << t->q); index2++) + { + uint32_t lookup2 = t->level2[index2 + lookup1_shifted]; + if (lookup2 != ~((uint32_t) 0)) + { + uint32_t lookup2_shifted = lookup2 << t->p; + uint32_t index3; + for (index3 = 0; index3 < (1 << t->p); index3++) + { + ELEMENT lookup3 = t->level3[index3 + lookup2_shifted]; + if (lookup3 != DEFAULT) + fn ((((index1 << t->q) + index2) << t->p) + index3, + lookup3); + } + } + } + } + } +} +#endif + +#ifndef NO_FINALIZE +/* Finalize and shrink. */ +static void +CONCAT(TABLE,_finalize) (struct TABLE *t) +{ + size_t i, j, k; + uint32_t reorder3[t->level3_size]; + uint32_t reorder2[t->level2_size]; + uint32_t level1_offset, level2_offset, level3_offset, last_offset; + + /* Uniquify level3 blocks. */ + k = 0; + for (j = 0; j < t->level3_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)) == 0) + break; + /* Relocate block j to block i. */ + reorder3[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)); + k++; + } + } + t->level3_size = k; + + for (i = 0; i < (t->level2_size << t->q); i++) + if (t->level2[i] != ~((uint32_t) 0)) + t->level2[i] = reorder3[t->level2[i]]; + + /* Uniquify level2 blocks. */ + k = 0; + for (j = 0; j < t->level2_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder2[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)); + k++; + } + } + t->level2_size = k; + + for (i = 0; i < t->level1_size; i++) + if (t->level1[i] != ~((uint32_t) 0)) + t->level1[i] = reorder2[t->level1[i]]; + + /* Create and fill the resulting compressed representation. */ + last_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t) + + (t->level3_size << t->p) * sizeof (ELEMENT); + t->result_size = (last_offset + 3) & ~3ul; + t->result = (char *) xmalloc (t->result_size); + + level1_offset = + 5 * sizeof (uint32_t); + level2_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t); + + ((uint32_t *) t->result)[0] = t->q + t->p; + ((uint32_t *) t->result)[1] = t->level1_size; + ((uint32_t *) t->result)[2] = t->p; + ((uint32_t *) t->result)[3] = (1 << t->q) - 1; + ((uint32_t *) t->result)[4] = (1 << t->p) - 1; + + for (i = 0; i < t->level1_size; i++) + ((uint32_t *) (t->result + level1_offset))[i] = + (t->level1[i] == ~((uint32_t) 0) + ? 0 + : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); + + for (i = 0; i < (t->level2_size << t->q); i++) + ((uint32_t *) (t->result + level2_offset))[i] = + (t->level2[i] == ~((uint32_t) 0) + ? 0 + : (t->level2[i] << t->p) * sizeof (ELEMENT) + level3_offset); + + for (i = 0; i < (t->level3_size << t->p); i++) + ((ELEMENT *) (t->result + level3_offset))[i] = t->level3[i]; + + if (last_offset < t->result_size) + memset (t->result + last_offset, 0, t->result_size - last_offset); + + if (t->level1_alloc > 0) + free (t->level1); + if (t->level2_alloc > 0) + free (t->level2); + if (t->level3_alloc > 0) + free (t->level3); +} +#endif + +#undef TABLE +#undef ELEMENT +#undef DEFAULT +#undef ITERATE +#undef NO_FINALIZE diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 96ae542256..6513d89adf 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -139,6 +139,26 @@ struct symbol_t size_t line; }; +/* Sparse table of struct element_t *. */ +#define TABLE wchead_table +#define ELEMENT struct element_t * +#define DEFAULT NULL +#define ITERATE +#define NO_FINALIZE +#include "3level.h" + +/* Sparse table of int32_t. */ +#define TABLE collidx_table +#define ELEMENT int32_t +#define DEFAULT 0 +#include "3level.h" + +/* Sparse table of uint32_t. */ +#define TABLE collseq_table +#define ELEMENT uint32_t +#define DEFAULT ~((uint32_t) 0) +#include "3level.h" + /* The real definition of the struct for the LC_COLLATE locale. */ struct locale_collate_t @@ -199,10 +219,12 @@ struct locale_collate_t /* Arrays with heads of the list for each of the leading bytes in the multibyte sequences. */ struct element_t **wcheads; + struct wchead_table wcheads_3level; /* The arrays with the collation sequence order. */ unsigned char mbseqorder[256]; uint32_t *wcseqorder; + struct collseq_table wcseqorder_3level; }; @@ -211,19 +233,6 @@ struct locale_collate_t static uint32_t nrules; -/* These are definitions used by some of the functions for handling - UTF-8 encoding below. */ -static const uint32_t encoding_mask[] = -{ - ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - - /* We need UTF-8 encoding of numbers. */ static inline int utf8_encode (char *buf, int val) @@ -240,11 +249,11 @@ utf8_encode (char *buf, int val) int step; for (step = 2; step < 6; ++step) - if ((val & encoding_mask[step - 2]) == 0) + if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0) break; retval = step; - *buf = encoding_byte[step - 2]; + *buf = (unsigned char) (~0xff >> step); --step; do { @@ -1635,109 +1644,126 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap) collate->mbheads[i] = &collate->undefined; } - /* Now to the wide character case. Here we have to find first a good - mapping function to get the wide range of wide character values - (0x00000000 to 0x7fffffff) to a managable table. This might take - some time so we issue a warning. - - We use a very trivial hashing function to store the sparse - table. CH % TABSIZE is used as an index. To solve multiple hits - we have N planes. This guarantees a fixed search time for a - character [N / 2]. In the following code we determine the minimum - value for TABSIZE * N, where TABSIZE >= 256. - - Some people complained that this algorithm takes too long. Well, - go on, improve it. But changing the step size is *not* an - option. Some people changed this to use only sizes of prime - numbers. Think again, do some math. We are looking for the - optimal solution, not something which works in general. Unless - somebody can provide a dynamic programming solution I think this - implementation is as good as it can get. */ - if (nr_wide_elems > 512 && !be_quiet) - fputs (_("\ + /* Now to the wide character case. */ + if (oldstyle_tables) + { + /* Here we have to find first a good mapping function to get the + wide range of wide character values (0x00000000 to 0x7fffffff) + to a managable table. This might take some time so we issue + a warning. + + We use a very trivial hashing function to store the sparse + table. CH % TABSIZE is used as an index. To solve multiple hits + we have N planes. This guarantees a fixed search time for a + character [N / 2]. In the following code we determine the minimum + value for TABSIZE * N, where TABSIZE >= 256. + + Some people complained that this algorithm takes too long. Well, + go on, improve it. But changing the step size is *not* an + option. Some people changed this to use only sizes of prime + numbers. Think again, do some math. We are looking for the + optimal solution, not something which works in general. Unless + somebody can provide a dynamic programming solution I think this + implementation is as good as it can get. */ + if (nr_wide_elems > 512 && !be_quiet) + fputs (_("\ Computing table size for collation table might take a while..."), - stderr); + stderr); - min_total = UINT_MAX; - act_size = 256; + min_total = UINT_MAX; + act_size = 256; - /* While we want to have a small total size we are willing to use a - little bit larger table if this reduces the number of layers. - Therefore we add a little penalty to the number of planes. - Maybe this constant has to be adjusted a bit. */ + /* While we want to have a small total size we are willing to use a + little bit larger table if this reduces the number of layers. + Therefore we add a little penalty to the number of planes. + Maybe this constant has to be adjusted a bit. */ #define PENALTY 128 - do - { - size_t cnt[act_size]; - struct element_t *elem[act_size]; - size_t act_planes = 1; + do + { + size_t cnt[act_size]; + struct element_t *elem[act_size]; + size_t act_planes = 1; - memset (cnt, '\0', sizeof cnt); - memset (elem, '\0', sizeof elem); + memset (cnt, '\0', sizeof cnt); + memset (elem, '\0', sizeof elem); - runp = collate->start; - while (runp != NULL) - { - if (runp->wcs != NULL) + runp = collate->start; + while (runp != NULL) { - size_t nr = runp->wcs[0] % act_size; - struct element_t *elemp = elem[nr]; - - while (elemp != NULL) + if (runp->wcs != NULL) { - if (elemp->wcs[0] == runp->wcs[0]) - break; - elemp = elemp->wcnext; - } + size_t nr = runp->wcs[0] % act_size; + struct element_t *elemp = elem[nr]; - if (elemp == NULL && ++cnt[nr] > act_planes) - { - act_planes = cnt[nr]; + while (elemp != NULL) + { + if (elemp->wcs[0] == runp->wcs[0]) + break; + elemp = elemp->wcnext; + } + + if (elemp == NULL && ++cnt[nr] > act_planes) + { + act_planes = cnt[nr]; - runp->wcnext = elem[nr]; - elem[nr] = runp; + runp->wcnext = elem[nr]; + elem[nr] = runp; - if ((act_size + PENALTY) * act_planes >= min_total) - break; + if ((act_size + PENALTY) * act_planes >= min_total) + break; + } } + + /* Up to the next entry. */ + runp = runp->next; } - /* Up to the next entry. */ - runp = runp->next; - } + if ((act_size + PENALTY) * act_planes < min_total) + { + min_total = (act_size + PENALTY) * act_planes; + collate->plane_size = act_size; + collate->plane_cnt = act_planes; + } - if ((act_size + PENALTY) * act_planes < min_total) - { - min_total = (act_size + PENALTY) * act_planes; - collate->plane_size = act_size; - collate->plane_cnt = act_planes; + ++act_size; } + while (act_size < min_total); + + if (nr_wide_elems > 512 && !be_quiet) + fputs (_(" done\n"), stderr); + + /* Now that we know how large the table has to be we are able to + allocate the array and start adding the characters to the lists + in the same way we did it for the multibyte characters. */ + collate->wcheads = (struct element_t **) + obstack_alloc (&collate->mempool, (collate->plane_size + * collate->plane_cnt + * sizeof (struct element_t *))); + memset (collate->wcheads, '\0', (collate->plane_size + * collate->plane_cnt + * sizeof (struct element_t *))); - ++act_size; + collate->wcseqorder = (uint32_t *) + obstack_alloc (&collate->mempool, (collate->plane_size + * collate->plane_cnt + * sizeof (uint32_t))); + memset (collate->wcseqorder, '\0', (collate->plane_size + * collate->plane_cnt + * sizeof (uint32_t))); } - while (act_size < min_total); - - if (nr_wide_elems > 512 && !be_quiet) - fputs (_(" done\n"), stderr); + else + { + collate->plane_size = 0; + collate->plane_cnt = 0; - /* Now that we know how large the table has to be we are able to - allocate the array and start adding the characters to the lists - in the same way we did it for the multibyte characters. */ - collate->wcheads = (struct element_t **) - obstack_alloc (&collate->mempool, (collate->plane_size - * collate->plane_cnt - * sizeof (struct element_t *))); - memset (collate->wcheads, '\0', (collate->plane_size - * collate->plane_cnt - * sizeof (struct element_t *))); + collate->wcheads_3level.p = 6; + collate->wcheads_3level.q = 10; + wchead_table_init (&collate->wcheads_3level); - collate->wcseqorder = (uint32_t *) - obstack_alloc (&collate->mempool, (collate->plane_size - * collate->plane_cnt - * sizeof (uint32_t))); - memset (collate->wcseqorder, '\0', (collate->plane_size - * collate->plane_cnt - * sizeof (uint32_t))); + collate->wcseqorder_3level.p = 6; + collate->wcseqorder_3level.q = 10; + collseq_table_init (&collate->wcseqorder_3level); + } /* Start adding. */ runp = collate->start; @@ -1745,26 +1771,42 @@ Computing table size for collation table might take a while..."), { if (runp->wcs != NULL) { + struct element_t *e; struct element_t **eptr; - struct element_t *lastp = NULL; + struct element_t *lastp; size_t idx; - /* Find a free index. */ - idx = runp->wcs[0] % collate->plane_size; - while (collate->wcheads[idx] != NULL) + if (oldstyle_tables) { - /* Stop if this is an entry with the same starting character. */ - if (collate->wcheads[idx]->wcs[0] == runp->wcs[0]) - break; + /* Find a free index. */ + idx = runp->wcs[0] % collate->plane_size; + while (collate->wcheads[idx] != NULL) + { + /* Stop if this is an entry with the same starting character. */ + if (collate->wcheads[idx]->wcs[0] == runp->wcs[0]) + break; - idx += collate->plane_size; + idx += collate->plane_size; + } + + /* Insert the collation sequence value. */ + collate->wcseqorder[idx] = runp->wcseqorder; + + /* Find the point where to insert in the list. */ + eptr = &collate->wcheads[idx]; } + else + { + /* Insert the collation sequence value. */ + collseq_table_add (&collate->wcseqorder_3level, runp->wcs[0], + runp->wcseqorder); - /* Insert the collation sequence value. */ - collate->wcseqorder[idx] = runp->wcseqorder; + /* Find the point where to insert in the list. */ + e = wchead_table_get (&collate->wcheads_3level, runp->wcs[0]); + eptr = &e; + } - /* Find the point where to insert in the list. */ - eptr = &collate->wcheads[idx]; + lastp = NULL; while (*eptr != NULL) { if ((*eptr)->nwcs < runp->nwcs) @@ -1778,7 +1820,7 @@ Computing table size for collation table might take a while..."), if (c == 0) { /* This should not happen. It means that we have - to symbols with the same byte sequence. It is + two symbols with the same byte sequence. It is of course an error. */ error_at_line (0, 0, (*eptr)->file, (*eptr)->line, _("symbol `%s' has the same encoding as"), @@ -1803,6 +1845,8 @@ Computing table size for collation table might take a while..."), if (*eptr != NULL) (*eptr)->wclast = runp; *eptr = runp; + if (!oldstyle_tables && eptr == &e) + wchead_table_add (&collate->wcheads_3level, runp->wcs[0], e); dont_insertwc: } @@ -1810,6 +1854,9 @@ Computing table size for collation table might take a while..."), runp = runp->next; } + if (!oldstyle_tables) + collseq_table_finalize (&collate->wcseqorder_3level); + /* Now determine whether the UNDEFINED entry is needed and if yes, whether it was defined. */ collate->undefined.used_in_level = need_undefined ? ~0ul : 0; @@ -1968,9 +2015,10 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, struct obstack extrapool; struct obstack indirectpool; struct section_list *sect; + size_t table_size; uint32_t *names; uint32_t *tablewc; - size_t table_size; + struct collidx_table tablewc_3level; uint32_t elem_size; uint32_t *elem_table; int i; @@ -2321,15 +2369,23 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, assert (idx[cnt] % 4 == 0); ++cnt; - /* Construct a table with the names. The size of the table is the same - as the table with the pointers. */ - table_size = collate->plane_size * collate->plane_cnt; - names = (uint32_t *) alloca (table_size * sizeof (uint32_t)); - for (ch = 0; ch < table_size; ++ch) - if (collate->wcheads[ch] == NULL) - names[ch] = 0; - else - names[ch] = collate->wcheads[ch]->wcs[0]; + if (oldstyle_tables) + { + /* Construct a table with the names. The size of the table is the same + as the table with the pointers. */ + table_size = collate->plane_size * collate->plane_cnt; + names = (uint32_t *) alloca (table_size * sizeof (uint32_t)); + for (ch = 0; ch < table_size; ++ch) + if (collate->wcheads[ch] == NULL) + names[ch] = 0; + else + names[ch] = collate->wcheads[ch]->wcs[0]; + } + else + { + table_size = 0; + names = NULL; + } assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NAMES)); iov[2 + cnt].iov_base = names; @@ -2363,95 +2419,111 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, with the same wide character and add them one after the other to the table. In case we have more than one sequence starting with the same byte we have to use extra indirection. */ - tablewc = (uint32_t *) alloca (table_size * sizeof (uint32_t)); - for (ch = 0; ch < table_size; ++ch) - if (collate->wcheads[ch] == NULL) - { - /* Set the entry to zero. */ - tablewc[ch] = 0; - } - else if (collate->wcheads[ch]->wcnext == NULL - && collate->wcheads[ch]->nwcs == 1) - { - tablewc[ch] = output_weightwc (&weightpool, collate, - collate->wcheads[ch]); - } - else + { + void add_to_tablewc (uint32_t ch, struct element_t *runp) { - /* As for the singlebyte table, we recognize sequences and - compress them. */ - struct element_t *runp = collate->wcheads[ch]; - struct element_t *lastp; - - tablewc[ch] = -(obstack_object_size (&extrapool) / sizeof (uint32_t)); - - do + if (runp->wcnext == NULL && runp->nwcs == 1) { - /* Store the current index in the weight table. We know that - the current position in the `extrapool' is aligned on a - 32-bit address. */ - int32_t weightidx; - int added; - - /* Find out wether this is a single entry or we have more than - one consecutive entry. */ - if (runp->wcnext != NULL - && runp->nwcs == runp->wcnext->nwcs - && wmemcmp ((wchar_t *) runp->wcs, - (wchar_t *)runp->wcnext->wcs, runp->nwcs - 1) == 0 - && (runp->wcs[runp->nwcs - 1] - == runp->wcnext->wcs[runp->nwcs - 1] + 1)) - { - int i; - struct element_t *series_startp = runp; - struct element_t *curp; + int32_t weigthidx = output_weightwc (&weightpool, collate, runp); + if (oldstyle_tables) + tablewc[ch] = weigthidx; + else + collidx_table_add (&tablewc_3level, ch, weigthidx); + } + else + { + /* As for the singlebyte table, we recognize sequences and + compress them. */ + struct element_t *lastp; - /* Now add first the initial byte sequence. */ - added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); - if (sizeof (int32_t) == sizeof (int)) - obstack_make_room (&extrapool, added); + if (oldstyle_tables) + tablewc[ch] = -(obstack_object_size (&extrapool) / sizeof (uint32_t)); + else + collidx_table_add (&tablewc_3level, ch, + -(obstack_object_size (&extrapool) / sizeof (uint32_t))); - /* More than one consecutive entry. We mark this by having - a negative index into the indirect table. */ - if (sizeof (int32_t) == sizeof (int)) - { - obstack_int_grow_fast (&extrapool, - -(obstack_object_size (&indirectpool) - / sizeof (int32_t))); - obstack_int_grow_fast (&extrapool, runp->nwcs - 1); - } - else + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)) { - int32_t i = -(obstack_object_size (&indirectpool) - / sizeof (int32_t)); - obstack_grow (&extrapool, &i, sizeof (int32_t)); - i = runp->nwcs - 1; - obstack_grow (&extrapool, &i, sizeof (int32_t)); - } + int i; + struct element_t *series_startp = runp; + struct element_t *curp; - do - runp = runp->wcnext; - while (runp->wcnext != NULL - && runp->nwcs == runp->wcnext->nwcs - && wmemcmp ((wchar_t *) runp->wcs, - (wchar_t *)runp->wcnext->wcs, - runp->nwcs - 1) == 0 - && (runp->wcs[runp->nwcs - 1] - == runp->wcnext->wcs[runp->nwcs - 1] + 1)); + /* Now add first the initial byte sequence. */ + added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); + if (sizeof (int32_t) == sizeof (int)) + obstack_make_room (&extrapool, added); - /* Now walk backward from here to the beginning. */ - curp = runp; + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + if (sizeof (int32_t) == sizeof (int)) + { + obstack_int_grow_fast (&extrapool, + -(obstack_object_size (&indirectpool) + / sizeof (int32_t))); + obstack_int_grow_fast (&extrapool, runp->nwcs - 1); + } + else + { + int32_t i = -(obstack_object_size (&indirectpool) + / sizeof (int32_t)); + obstack_grow (&extrapool, &i, sizeof (int32_t)); + i = runp->nwcs - 1; + obstack_grow (&extrapool, &i, sizeof (int32_t)); + } - for (i = 1; i < runp->nwcs; ++i) - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow_fast (&extrapool, curp->wcs[i]); - else - obstack_grow (&extrapool, &curp->wcs[i], sizeof (int32_t)); + do + runp = runp->wcnext; + while (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + for (i = 1; i < runp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, curp->wcs[i]); + else + obstack_grow (&extrapool, &curp->wcs[i], + sizeof (int32_t)); - /* Now find the end of the consecutive sequence and - add all the indeces in the indirect pool. */ - do - { + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weightwc (&weightpool, collate, + curp); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&indirectpool, weightidx); + else + obstack_grow (&indirectpool, &weightidx, + sizeof (int32_t)); + + curp = curp->wclast; + } + while (curp != series_startp); + + /* Add the final weight. */ weightidx = output_weightwc (&weightpool, collate, curp); if (sizeof (int32_t) == sizeof (int)) obstack_int_grow (&indirectpool, weightidx); @@ -2459,68 +2531,88 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, obstack_grow (&indirectpool, &weightidx, sizeof (int32_t)); - curp = curp->wclast; + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (&extrapool, curp->wcs[i]); + else + obstack_grow (&extrapool, &curp->wcs[i], + sizeof (int32_t)); } - while (curp != series_startp); - - /* Add the final weight. */ - weightidx = output_weightwc (&weightpool, collate, curp); - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow (&indirectpool, weightidx); else - obstack_grow (&indirectpool, &weightidx, sizeof (int32_t)); - - /* And add the end byte sequence. Without length this - time. */ - for (i = 1; i < curp->nwcs; ++i) - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow (&extrapool, curp->wcs[i]); - else - obstack_grow (&extrapool, &curp->wcs[i], sizeof (int32_t)); - } - else - { - /* A single entry. Simply add the index and the length and - string (except for the first character which is already - tested for). */ - int i; + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; - /* Output the weight info. */ - weightidx = output_weightwc (&weightpool, collate, runp); + /* Output the weight info. */ + weightidx = output_weightwc (&weightpool, collate, runp); - added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); - if (sizeof (int) == sizeof (int32_t)) - obstack_make_room (&extrapool, added); + added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); + if (sizeof (int) == sizeof (int32_t)) + obstack_make_room (&extrapool, added); - if (sizeof (int32_t) == sizeof (int)) - { - obstack_int_grow_fast (&extrapool, weightidx); - obstack_int_grow_fast (&extrapool, runp->nwcs - 1); - } - else - { - int32_t l = runp->nwcs - 1; - obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); - obstack_grow (&extrapool, &l, sizeof (int32_t)); + if (sizeof (int32_t) == sizeof (int)) + { + obstack_int_grow_fast (&extrapool, weightidx); + obstack_int_grow_fast (&extrapool, runp->nwcs - 1); + } + else + { + int32_t l = runp->nwcs - 1; + obstack_grow (&extrapool, &weightidx, + sizeof (int32_t)); + obstack_grow (&extrapool, &l, sizeof (int32_t)); + } + for (i = 1; i < runp->nwcs; ++i) + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, runp->wcs[i]); + else + obstack_grow (&extrapool, &runp->wcs[i], + sizeof (int32_t)); } - for (i = 1; i < runp->nwcs; ++i) - if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow_fast (&extrapool, runp->wcs[i]); - else - obstack_grow (&extrapool, &runp->wcs[i], sizeof (int32_t)); - } - /* Next entry. */ - lastp = runp; - runp = runp->wcnext; + /* Next entry. */ + lastp = runp; + runp = runp->wcnext; + } + while (runp != NULL); } - while (runp != NULL); } + if (oldstyle_tables) + { + tablewc = (uint32_t *) alloca (table_size * sizeof (uint32_t)); + + for (ch = 0; ch < table_size; ++ch) + if (collate->wcheads[ch] == NULL) + /* Set the entry to zero. */ + tablewc[ch] = 0; + else + add_to_tablewc (ch, collate->wcheads[ch]); + } + else + { + tablewc_3level.p = 6; + tablewc_3level.q = 10; + collidx_table_init (&tablewc_3level); + + wchead_table_iterate (&collate->wcheads_3level, add_to_tablewc); + + collidx_table_finalize (&tablewc_3level); + } + } + /* Now add the four tables. */ assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC)); - iov[2 + cnt].iov_base = tablewc; - iov[2 + cnt].iov_len = table_size * sizeof (uint32_t); + iov[2 + cnt].iov_base = (oldstyle_tables + ? (void *) tablewc + : (void *) tablewc_3level.result); + iov[2 + cnt].iov_len = (oldstyle_tables + ? table_size * sizeof (uint32_t) + : tablewc_3level.result_size); idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; assert (iov[2 + cnt].iov_len % sizeof (int32_t) == 0); assert (idx[cnt] % 4 == 0); @@ -2672,8 +2764,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, ++cnt; assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)); - iov[2 + cnt].iov_base = collate->wcseqorder; - iov[2 + cnt].iov_len = table_size * sizeof (uint32_t); + iov[2 + cnt].iov_base = (oldstyle_tables + ? (void *) collate->wcseqorder + : (void *) collate->wcseqorder_3level.result); + iov[2 + cnt].iov_len = (oldstyle_tables + ? table_size * sizeof (uint32_t) + : collate->wcseqorder_3level.result_size); assert (idx[cnt] % 4 == 0); ++cnt; diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index e2d76b0002..1f40fe84ba 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -112,6 +112,9 @@ struct locale_ctype_t uint32_t *charnames; size_t charnames_max; size_t charnames_act; + /* An index lookup table, to speedup find_idx. */ +#define MAX_CHARNAMES_IDX 0x10000 + uint32_t *charnames_idx; struct repertoire_t *repertoire; @@ -253,6 +256,10 @@ ctype_startup (struct linereader *lr, struct localedef_t *locale, for (cnt = 0; cnt < 256; ++cnt) ctype->charnames[cnt] = cnt; ctype->charnames_act = 256; + ctype->charnames_idx = + (uint32_t *) xmalloc (MAX_CHARNAMES_IDX * sizeof (uint32_t)); + for (cnt = 0; cnt < MAX_CHARNAMES_IDX; ++cnt) + ctype->charnames_idx[cnt] = ~((uint32_t) 0); /* Fill character class information. */ ctype->last_class_char = ILLEGAL_CHAR_VALUE; @@ -1299,9 +1306,23 @@ find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, if (idx < 256) return table == NULL ? NULL : &(*table)[idx]; - for (cnt = 256; cnt < ctype->charnames_act; ++cnt) - if (ctype->charnames[cnt] == idx) - break; + /* If idx is in the usual range, use the charnames_idx lookup table + instead of the slow search loop. */ + if (idx < MAX_CHARNAMES_IDX) + { + if (ctype->charnames_idx[idx] != ~((uint32_t) 0)) + /* Found. */ + cnt = ctype->charnames_idx[idx]; + else + /* Not found. */ + cnt = ctype->charnames_act; + } + else + { + for (cnt = 256; cnt < ctype->charnames_act; ++cnt) + if (ctype->charnames[cnt] == idx) + break; + } /* We have to distinguish two cases: the name is found or not. */ if (cnt == ctype->charnames_act) @@ -1315,6 +1336,8 @@ find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, sizeof (uint32_t) * ctype->charnames_max); } ctype->charnames[ctype->charnames_act++] = idx; + if (idx < MAX_CHARNAMES_IDX) + ctype->charnames_idx[idx] = cnt; } if (table == NULL) @@ -3582,473 +3605,23 @@ wctype_table_finalize (struct wctype_table *t) free (t->level3); } -struct wcwidth_table -{ - /* Parameters. */ - unsigned int p; - unsigned int q; - /* Working representation. */ - size_t level1_alloc; - size_t level1_size; - uint32_t *level1; - size_t level2_alloc; - size_t level2_size; - uint32_t *level2; - size_t level3_alloc; - size_t level3_size; - uint8_t *level3; - /* Compressed representation. */ - size_t result_size; - char *result; -}; - -/* Initialize. Assumes t->p and t->q have already been set. */ +#define TABLE wcwidth_table +#define ELEMENT uint8_t +#define DEFAULT 0xff +#include "3level.h" + +#define TABLE wctrans_table +#define ELEMENT int32_t +#define DEFAULT 0 +#define wctrans_table_add wctrans_table_add_internal +#include "3level.h" +#undef wctrans_table_add +/* The wctrans_table must actually store the difference between the + desired result and the argument. */ static inline void -wcwidth_table_init (struct wcwidth_table *t) -{ - t->level1_alloc = t->level1_size = 0; - t->level2_alloc = t->level2_size = 0; - t->level3_alloc = t->level3_size = 0; -} - -/* Retrieve an entry. */ -static inline uint8_t -wcwidth_table_get (struct wcwidth_table *t, uint32_t wc) -{ - uint32_t index1 = wc >> (t->q + t->p); - if (index1 < t->level1_size) - { - uint32_t lookup1 = t->level1[index1]; - if (lookup1 != ~((uint32_t) 0)) - { - uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) - + (lookup1 << t->q); - uint32_t lookup2 = t->level2[index2]; - if (lookup2 != ~((uint32_t) 0)) - { - uint32_t index3 = (wc & ((1 << t->p) - 1)) - + (lookup2 << t->p); - uint8_t lookup3 = t->level3[index3]; - - return lookup3; - } - } - } - return 0xff; -} - -/* Add one entry. */ -static void -wcwidth_table_add (struct wcwidth_table *t, uint32_t wc, uint8_t width) -{ - uint32_t index1 = wc >> (t->q + t->p); - uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); - uint32_t index3 = wc & ((1 << t->p) - 1); - size_t i, i1, i2; - - if (width == wcwidth_table_get (t, wc)) - return; - - if (index1 >= t->level1_size) - { - if (index1 >= t->level1_alloc) - { - size_t alloc = 2 * t->level1_alloc; - if (alloc <= index1) - alloc = index1 + 1; - t->level1 = (t->level1_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level1, - alloc * sizeof (uint32_t)) - : (uint32_t *) xmalloc (alloc * sizeof (uint32_t))); - t->level1_alloc = alloc; - } - while (index1 >= t->level1_size) - t->level1[t->level1_size++] = ~((uint32_t) 0); - } - - if (t->level1[index1] == ~((uint32_t) 0)) - { - if (t->level2_size == t->level2_alloc) - { - size_t alloc = 2 * t->level2_alloc + 1; - t->level2 = (t->level2_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level2, - (alloc << t->q) * sizeof (uint32_t)) - : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t))); - t->level2_alloc = alloc; - } - i1 = t->level2_size << t->q; - i2 = (t->level2_size + 1) << t->q; - for (i = i1; i < i2; i++) - t->level2[i] = ~((uint32_t) 0); - t->level1[index1] = t->level2_size++; - } - - index2 += t->level1[index1] << t->q; - - if (t->level2[index2] == ~((uint32_t) 0)) - { - if (t->level3_size == t->level3_alloc) - { - size_t alloc = 2 * t->level3_alloc + 1; - t->level3 = (t->level3_alloc > 0 - ? (uint8_t *) xrealloc ((char *) t->level3, - (alloc << t->p) * sizeof (uint8_t)) - : (uint8_t *) xmalloc ((alloc << t->p) * sizeof (uint8_t))); - t->level3_alloc = alloc; - } - i1 = t->level3_size << t->p; - i2 = (t->level3_size + 1) << t->p; - for (i = i1; i < i2; i++) - t->level3[i] = 0xff; - t->level2[index2] = t->level3_size++; - } - - index3 += t->level2[index2] << t->p; - - t->level3[index3] = width; -} - -/* Finalize and shrink. */ -static void -wcwidth_table_finalize (struct wcwidth_table *t) -{ - size_t i, j, k; - uint32_t reorder3[t->level3_size]; - uint32_t reorder2[t->level2_size]; - uint32_t level1_offset, level2_offset, level3_offset, last_offset; - - /* Uniquify level3 blocks. */ - k = 0; - for (j = 0; j < t->level3_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (uint8_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder3[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (uint8_t)); - k++; - } - } - t->level3_size = k; - - for (i = 0; i < (t->level2_size << t->q); i++) - if (t->level2[i] != ~((uint32_t) 0)) - t->level2[i] = reorder3[t->level2[i]]; - - /* Uniquify level2 blocks. */ - k = 0; - for (j = 0; j < t->level2_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder2[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)); - k++; - } - } - t->level2_size = k; - - for (i = 0; i < t->level1_size; i++) - if (t->level1[i] != ~((uint32_t) 0)) - t->level1[i] = reorder2[t->level1[i]]; - - /* Create and fill the resulting compressed representation. */ - last_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t) - + (t->level3_size << t->p) * sizeof (uint8_t); - t->result_size = (last_offset + 3) & ~3ul; - t->result = (char *) xmalloc (t->result_size); - - level1_offset = - 5 * sizeof (uint32_t); - level2_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t); - level3_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t); - - ((uint32_t *) t->result)[0] = t->q + t->p; - ((uint32_t *) t->result)[1] = t->level1_size; - ((uint32_t *) t->result)[2] = t->p; - ((uint32_t *) t->result)[3] = (1 << t->q) - 1; - ((uint32_t *) t->result)[4] = (1 << t->p) - 1; - - for (i = 0; i < t->level1_size; i++) - ((uint32_t *) (t->result + level1_offset))[i] = - (t->level1[i] == ~((uint32_t) 0) - ? 0 - : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); - - for (i = 0; i < (t->level2_size << t->q); i++) - ((uint32_t *) (t->result + level2_offset))[i] = - (t->level2[i] == ~((uint32_t) 0) - ? 0 - : (t->level2[i] << t->p) * sizeof (uint8_t) + level3_offset); - - for (i = 0; i < (t->level3_size << t->p); i++) - ((uint8_t *) (t->result + level3_offset))[i] = t->level3[i]; - - if (last_offset < t->result_size) - memset (t->result + last_offset, 0, t->result_size - last_offset); - - if (t->level1_alloc > 0) - free (t->level1); - if (t->level2_alloc > 0) - free (t->level2); - if (t->level3_alloc > 0) - free (t->level3); -} - -struct wctrans_table -{ - /* Parameters. */ - unsigned int p; - unsigned int q; - /* Working representation. */ - size_t level1_alloc; - size_t level1_size; - uint32_t *level1; - size_t level2_alloc; - size_t level2_size; - uint32_t *level2; - size_t level3_alloc; - size_t level3_size; - int32_t *level3; - /* Compressed representation. */ - size_t result_size; - char *result; -}; - -/* Initialize. Assumes t->p and t->q have already been set. */ -static inline void -wctrans_table_init (struct wctrans_table *t) -{ - t->level1_alloc = t->level1_size = 0; - t->level2_alloc = t->level2_size = 0; - t->level3_alloc = t->level3_size = 0; -} - -/* Retrieve an entry. */ -static inline uint32_t -wctrans_table_get (struct wctrans_table *t, uint32_t wc) -{ - uint32_t index1 = wc >> (t->q + t->p); - if (index1 < t->level1_size) - { - uint32_t lookup1 = t->level1[index1]; - if (lookup1 != ~((uint32_t) 0)) - { - uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) - + (lookup1 << t->q); - uint32_t lookup2 = t->level2[index2]; - if (lookup2 != ~((uint32_t) 0)) - { - uint32_t index3 = (wc & ((1 << t->p) - 1)) - + (lookup2 << t->p); - int32_t lookup3 = t->level3[index3]; - - return wc + lookup3; - } - } - } - return wc; -} - -/* Add one entry. */ -static void wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc) { - uint32_t index1 = wc >> (t->q + t->p); - uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); - uint32_t index3 = wc & ((1 << t->p) - 1); - int32_t value; - size_t i, i1, i2; - - if (mapped_wc == wctrans_table_get (t, wc)) - return; - - value = (int32_t) mapped_wc - (int32_t) wc; - - if (index1 >= t->level1_size) - { - if (index1 >= t->level1_alloc) - { - size_t alloc = 2 * t->level1_alloc; - if (alloc <= index1) - alloc = index1 + 1; - t->level1 = (t->level1_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level1, - alloc * sizeof (uint32_t)) - : (uint32_t *) xmalloc (alloc * sizeof (uint32_t))); - t->level1_alloc = alloc; - } - while (index1 >= t->level1_size) - t->level1[t->level1_size++] = ~((uint32_t) 0); - } - - if (t->level1[index1] == ~((uint32_t) 0)) - { - if (t->level2_size == t->level2_alloc) - { - size_t alloc = 2 * t->level2_alloc + 1; - t->level2 = (t->level2_alloc > 0 - ? (uint32_t *) xrealloc ((char *) t->level2, - (alloc << t->q) * sizeof (uint32_t)) - : (uint32_t *) xmalloc ((alloc << t->q) * sizeof (uint32_t))); - t->level2_alloc = alloc; - } - i1 = t->level2_size << t->q; - i2 = (t->level2_size + 1) << t->q; - for (i = i1; i < i2; i++) - t->level2[i] = ~((uint32_t) 0); - t->level1[index1] = t->level2_size++; - } - - index2 += t->level1[index1] << t->q; - - if (t->level2[index2] == ~((uint32_t) 0)) - { - if (t->level3_size == t->level3_alloc) - { - size_t alloc = 2 * t->level3_alloc + 1; - t->level3 = (t->level3_alloc > 0 - ? (int32_t *) xrealloc ((char *) t->level3, - (alloc << t->p) * sizeof (int32_t)) - : (int32_t *) xmalloc ((alloc << t->p) * sizeof (int32_t))); - t->level3_alloc = alloc; - } - i1 = t->level3_size << t->p; - i2 = (t->level3_size + 1) << t->p; - for (i = i1; i < i2; i++) - t->level3[i] = 0; - t->level2[index2] = t->level3_size++; - } - - index3 += t->level2[index2] << t->p; - - t->level3[index3] = value; -} - -/* Finalize and shrink. */ -static void -wctrans_table_finalize (struct wctrans_table *t) -{ - size_t i, j, k; - uint32_t reorder3[t->level3_size]; - uint32_t reorder2[t->level2_size]; - uint32_t level1_offset, level2_offset, level3_offset; - - /* Uniquify level3 blocks. */ - k = 0; - for (j = 0; j < t->level3_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (int32_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder3[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level3[i << t->p], &t->level3[j << t->p], - (1 << t->p) * sizeof (int32_t)); - k++; - } - } - t->level3_size = k; - - for (i = 0; i < (t->level2_size << t->q); i++) - if (t->level2[i] != ~((uint32_t) 0)) - t->level2[i] = reorder3[t->level2[i]]; - - /* Uniquify level2 blocks. */ - k = 0; - for (j = 0; j < t->level2_size; j++) - { - for (i = 0; i < k; i++) - if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)) == 0) - break; - /* Relocate block j to block i. */ - reorder2[j] = i; - if (i == k) - { - if (i != j) - memcpy (&t->level2[i << t->q], &t->level2[j << t->q], - (1 << t->q) * sizeof (uint32_t)); - k++; - } - } - t->level2_size = k; - - for (i = 0; i < t->level1_size; i++) - if (t->level1[i] != ~((uint32_t) 0)) - t->level1[i] = reorder2[t->level1[i]]; - - /* Create and fill the resulting compressed representation. */ - t->result_size = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t) - + (t->level3_size << t->p) * sizeof (int32_t); - t->result = (char *) xmalloc (t->result_size); - - level1_offset = - 5 * sizeof (uint32_t); - level2_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t); - level3_offset = - 5 * sizeof (uint32_t) - + t->level1_size * sizeof (uint32_t) - + (t->level2_size << t->q) * sizeof (uint32_t); - - ((uint32_t *) t->result)[0] = t->q + t->p; - ((uint32_t *) t->result)[1] = t->level1_size; - ((uint32_t *) t->result)[2] = t->p; - ((uint32_t *) t->result)[3] = (1 << t->q) - 1; - ((uint32_t *) t->result)[4] = (1 << t->p) - 1; - - for (i = 0; i < t->level1_size; i++) - ((uint32_t *) (t->result + level1_offset))[i] = - (t->level1[i] == ~((uint32_t) 0) - ? 0 - : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); - - for (i = 0; i < (t->level2_size << t->q); i++) - ((uint32_t *) (t->result + level2_offset))[i] = - (t->level2[i] == ~((uint32_t) 0) - ? 0 - : (t->level2[i] << t->p) * sizeof (int32_t) + level3_offset); - - for (i = 0; i < (t->level3_size << t->p); i++) - ((int32_t *) (t->result + level3_offset))[i] = t->level3[i]; - - if (t->level1_alloc > 0) - free (t->level1); - if (t->level2_alloc > 0) - free (t->level2); - if (t->level3_alloc > 0) - free (t->level3); + wctrans_table_add_internal (t, wc, mapped_wc - wc); } diff --git a/locale/weightwc.h b/locale/weightwc.h index d0ca018e70..92bf47ab5f 100644 --- a/locale/weightwc.h +++ b/locale/weightwc.h @@ -24,19 +24,29 @@ findidx (const wint_t **cpp) int_fast32_t i; const wint_t *cp; wint_t ch; - size_t idx; size_t cnt = 0; ch = *(*cpp)++; - idx = ch % size; - while (names[idx] != ch) + if (size != 0) { - if (++cnt == layers) - /* We didn't find the name. It is case for UNDEFINED. */ - return 0; - idx += size; + /* Old locale format. */ + size_t idx; + + idx = ch % size; + while (names[idx] != ch) + { + if (++cnt == layers) + /* We didn't find the name. It is case for UNDEFINED. */ + return 0; + idx += size; + } + i = table[idx]; + } + else + { + /* New locale format. */ + i = collidx_table_lookup ((const char *) table, ch); } - i = table[idx]; if (i >= 0) /* This is an index into the weight table. Cool. */ diff --git a/posix/confstr.c b/posix/confstr.c index c2b12dfd33..6ab76e8bed 100644 --- a/posix/confstr.c +++ b/posix/confstr.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1991, 1996, 1997, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,9 +22,9 @@ #include #include -/* If BUF is not NULL, fill in at most LEN characters of BUF - with the value corresponding to NAME. Return the number - of characters required to hold NAME's entire value. */ +/* If BUF is not NULL and LEN > 0, fill in at most LEN - 1 bytes + of BUF with the value corresponding to NAME and zero-terminate BUF. + Return the number of bytes required to hold NAME's entire value. */ size_t confstr (name, buf, len) int name; @@ -89,7 +89,15 @@ confstr (name, buf, len) return 0; } - if (buf != NULL) - (void) strncpy (buf, string, len); + if (len > 0 && buf != NULL) + { + if (string_len <= len) + memcpy (buf, string, string_len); + else + { + memcpy (buf, string, len - 1); + buf[len - 1] = '\0'; + } + } return string_len; } diff --git a/posix/fnmatch.c b/posix/fnmatch.c index 1db39d9e0d..d1777d179e 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -54,6 +54,7 @@ #ifdef _LIBC # include "../locale/localeinfo.h" # include "../locale/elem-hash.h" +# include "../locale/coll-lookup.h" # define CONCAT(a,b) __CONCAT(a,b) # define mbsinit __mbsinit diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index 86526a15ff..c933346877 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -618,44 +618,70 @@ FCT (pattern, string, no_leading_period, flags) uint32_t fcollseq; uint32_t lcollseq; UCHAR cend = *p++; -# ifdef WIDE_CHAR_VERSION - int idx; - size_t cnt; -# endif # ifdef WIDE_CHAR_VERSION /* Search in the `names' array for the characters. */ - idx = fn % size; - cnt = 0; - while (names[idx] != fn) + if (size != 0) { - if (++cnt == layers) + /* Old locale format. */ + int idx; + size_t cnt; + + idx = fn % size; + cnt = 0; + while (names[idx] != fn) + { + if (++cnt == layers) + /* XXX We don't know anything about + the character we are supposed to + match. This means we are failing. */ + goto range_not_matched; + + idx += size; + } + fcollseq = collseq[idx]; + } + else + { + /* New locale format. */ + fcollseq = + collseq_table_lookup ((const char *) collseq, fn); + if (fcollseq == ~((uint32_t) 0)) /* XXX We don't know anything about the character we are supposed to match. This means we are failing. */ goto range_not_matched; - - idx += size; } - fcollseq = collseq[idx]; if (is_seqval) lcollseq = cold; else { - idx = cold % size; - cnt = 0; - while (names[idx] != cold) + if (size != 0) { - if (++cnt == layers) + /* Old locale format. */ + int idx; + size_t cnt; + + idx = cold % size; + cnt = 0; + while (names[idx] != cold) { - idx = -1; - break; + if (++cnt == layers) + { + idx = -1; + break; + } + idx += size; } - idx += size; - } - lcollseq = idx == -1 ? 0xffffffff : collseq[idx]; + lcollseq = + idx == -1 ? 0xffffffff : collseq[idx]; + } + else + /* New locale format. */ + lcollseq = + collseq_table_lookup ((const char *) collseq, cold); } # else fcollseq = collseq[fn]; @@ -817,22 +843,47 @@ FCT (pattern, string, no_leading_period, flags) else { # ifdef WIDE_CHAR_VERSION - idx = cend % size; - cnt = 0; - while (names[idx] != cend) + if (size != 0) { - if (++cnt == layers) + /* Old locale format. */ + int idx; + size_t cnt; + + idx = cend % size; + cnt = 0; + while (names[idx] != cend) + { + if (++cnt == layers) + { + /* Hum, no information about the + upper bound. The matching + succeeds if the lower bound is + matched exactly. */ + if (lcollseq != fcollseq) + goto range_not_matched; + + goto matched; + } + } + hcollseq = collseq[idx]; + } + else + { + /* New locale format. */ + hcollseq = + collseq_table_lookup ((const char *) collseq, cend); + if (hcollseq == ~((uint32_t) 0)) { - /* Hum, no information about the upper - bound. The matching succeeds if the - lower bound is matched exactly. */ - if (idx == -1 && lcollseq != fcollseq) + /* Hum, no information about the + upper bound. The matching succeeds + if the lower bound is matched + exactly. */ + if (lcollseq != fcollseq) goto range_not_matched; goto matched; } } - hcollseq = collseq[idx]; # else hcollseq = collseq[cend]; # endif diff --git a/string/strxfrm.c b/string/strxfrm.c index dd0ad67835..b655daf9e4 100644 --- a/string/strxfrm.c +++ b/string/strxfrm.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifndef STRING_TYPE # define STRING_TYPE char @@ -124,7 +125,7 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) if (nrules == 0) { if (n != 0) - STPNCPY (dest, src, n); + STPNCPY (dest, src, MIN (srclen + 1, n)); return srclen; } diff --git a/sysdeps/generic/_strerror.c b/sysdeps/generic/_strerror.c index 4a9b032c86..ae520f32fc 100644 --- a/sysdeps/generic/_strerror.c +++ b/sysdeps/generic/_strerror.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 93, 95, 96, 97, 98 Free Software Foundation, Inc. +/* Copyright (C) 1991, 93, 95, 96, 97, 98, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -56,7 +56,7 @@ __strerror_r (int errnum, char *buf, size_t buflen) buffer size. */ q = __mempcpy (buf, unk, MIN (unklen, buflen)); if (unklen < buflen) - __stpncpy (q, p, buflen - unklen); + memcpy (q, p, MIN (&numbuf[21] - p, buflen - unklen)); /* Terminate the string in any case. */ if (buflen > 0) diff --git a/sysdeps/generic/getdomain.c b/sysdeps/generic/getdomain.c index 250aec68e2..40a59514f0 100644 --- a/sysdeps/generic/getdomain.c +++ b/sysdeps/generic/getdomain.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1994, 1995, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1994, 1995, 1997, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -34,11 +35,13 @@ getdomainname (name, len) size_t len; { struct utsname u; + size_t u_len; if (uname (&u) < 0) return -1; - strncpy (name, u.domainname, len); + u_len = strlen (u.domainname); + memcpy (name, u.domainname, MIN (u_len + 1, len)); return 0; } diff --git a/sysdeps/mach/_strerror.c b/sysdeps/mach/_strerror.c index 7b1599492a..0dcf264075 100644 --- a/sysdeps/mach/_strerror.c +++ b/sysdeps/mach/_strerror.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1993, 1995, 1996, 1997, 1998 Free Software Foundation, Inc. +/* Copyright (C) 1993, 1995, 1996, 1997, 1998, 2000 + Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -65,7 +66,7 @@ __strerror_r (int errnum, char *buf, size_t buflen) buffer size. */ q = __mempcpy (buf, unk, MIN (unklen, buflen)); if (unklen < buflen) - __stpncpy (q, p, buflen - unklen); + memcpy (q, p, MIN (&numbuf[21] - p, buflen - unklen)); /* Terminate the string in any case. */ if (buflen > 0) @@ -103,7 +104,8 @@ __strerror_r (int errnum, char *buf, size_t buflen) { *q++ = ' '; if (unklen + len + 1 < buflen) - __stpncpy (q, p, buflen - unklen - len - 1); + memcpy (q, p, + MIN (&numbuf[21] - p, buflen - unklen - len - 1)); } } diff --git a/wcsmbs/wcscoll.c b/wcsmbs/wcscoll.c index 6fbd13df72..fddca8e2e3 100644 --- a/wcsmbs/wcscoll.c +++ b/wcsmbs/wcscoll.c @@ -18,6 +18,7 @@ Boston, MA 02111-1307, USA. */ #include +#include "../locale/coll-lookup.h" #define STRING_TYPE wchar_t #define USTRING_TYPE wint_t diff --git a/wcsmbs/wcsxfrm.c b/wcsmbs/wcsxfrm.c index d4ac1338d9..dc0c096664 100644 --- a/wcsmbs/wcsxfrm.c +++ b/wcsmbs/wcsxfrm.c @@ -18,6 +18,7 @@ Boston, MA 02111-1307, USA. */ #include +#include "../locale/coll-lookup.h" #define STRING_TYPE wchar_t #define USTRING_TYPE wint_t -- cgit 1.4.1