diff options
Diffstat (limited to 'wcsmbs')
-rw-r--r-- | wcsmbs/Makefile | 9 | ||||
-rw-r--r-- | wcsmbs/btowc.c | 46 | ||||
-rw-r--r-- | wcsmbs/mbrtowc.c | 144 | ||||
-rw-r--r-- | wcsmbs/mbsnrtowcs.c | 193 | ||||
-rw-r--r-- | wcsmbs/mbsrtowcs.c | 172 | ||||
-rw-r--r-- | wcsmbs/wchar.h | 29 | ||||
-rw-r--r-- | wcsmbs/wcrtomb.c | 109 | ||||
-rw-r--r-- | wcsmbs/wcslen.c | 5 | ||||
-rw-r--r-- | wcsmbs/wcsmbsload.c | 132 | ||||
-rw-r--r-- | wcsmbs/wcsmbsload.h | 52 | ||||
-rw-r--r-- | wcsmbs/wcsnlen.c | 44 | ||||
-rw-r--r-- | wcsmbs/wcsnrtombs.c | 155 | ||||
-rw-r--r-- | wcsmbs/wcsrtombs.c | 157 | ||||
-rw-r--r-- | wcsmbs/wctob.c | 41 |
14 files changed, 742 insertions, 546 deletions
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile index 5dd46da551..a601489652 100644 --- a/wcsmbs/Makefile +++ b/wcsmbs/Makefile @@ -1,4 +1,4 @@ -# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +# Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -22,21 +22,22 @@ subdir := wcsmbs headers := wchar.h -distribute := wcwidth.h +distribute := wcwidth.h wcsmbsload.h routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \ wcsncmp wcsncpy wcspbrk wcsrchr wcsspn wcstok wcsstr wmemchr \ wmemcmp wmemcpy wmemmove wmemset wcpcpy wcpncpy \ btowc wctob mbsinit \ mbrlen mbrtowc wcrtomb mbsrtowcs wcsrtombs \ - mbsnrtowcs wcsnrtombs wmemrtowcs wmemrtombs \ + mbsnrtowcs wcsnrtombs wcsnlen \ wcstol wcstoul wcstoll wcstoull wcstod wcstold wcstof \ wcstol_l wcstoul_l wcstoll_l wcstoull_l \ wcstod_l wcstold_l wcstof_l \ wcscoll wcsxfrm \ wcwidth wcswidth \ wcscoll_l wcsxfrm_l \ - wcscasecmp wcsncase wcscasecmp_l wcsncase_l + wcscasecmp wcsncase wcscasecmp_l wcsncase_l \ + wcsmbsload tests := tst-wcstof diff --git a/wcsmbs/btowc.c b/wcsmbs/btowc.c index 4c2f9df215..268b1f2378 100644 --- a/wcsmbs/btowc.c +++ b/wcsmbs/btowc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu> @@ -17,18 +17,52 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <gconv.h> #include <stdio.h> +#include <string.h> #include <wchar.h> +#include <wcsmbsload.h> -/* We use UTF8 encoding for multibyte strings and therefore a valid - one byte multibyte string only can have a value from 0 to 0x7f. */ wint_t btowc (c) int c; { - if (WEOF != (wint_t) EOF || c < 0 || c > 0x7f) + char buf[sizeof (wchar_t)]; + struct gconv_step_data data; + char inbuf[1]; + size_t inbytes; + size_t converted; + int status; + + /* If the parameter does not fit into one byte or it is the EOF value + we can give the answer now. */ + if (c < -128 || c > 127 || c == EOF) + return WEOF; + + /* Tell where we want the result. */ + data.outbuf = (char *) buf; + data.outbufavail = 0; + data.outbufsize = sizeof (wchar_t); + data.is_last = 1; + data.statep = &data.__state; + + /* Make sure we start in the initial state. */ + memset (&data.__state, '\0', sizeof (mbstate_t)); + + /* Make sure we use the correct function. */ + update_conversion_ptrs (); + + /* Create the input string. */ + inbuf[0] = c; + inbytes = 1; + + status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc, + &data, inbuf, &inbytes, + &converted, 0); + /* The conversion failed. */ + if (status != GCONV_OK && status != GCONV_FULL_OUTPUT) return WEOF; - else - return (wint_t) c; + + return *(wchar_t *)buf; } diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c index cf0bbd6ce2..99bc5a4797 100644 --- a/wcsmbs/mbrtowc.c +++ b/wcsmbs/mbrtowc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -18,113 +18,77 @@ Boston, MA 02111-1307, USA. */ #include <errno.h> +#include <gconv.h> #include <wchar.h> +#include <wcsmbsload.h> + +#include <assert.h> #ifndef EILSEQ -#define EILSEQ EINVAL +# define EILSEQ EINVAL #endif -static mbstate_t internal; +/* This is the private state used if PS is NULL. */ +static mbstate_t state; size_t __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) { - size_t used = 0; - - if (ps == NULL) - ps = &internal; - + wchar_t buf[1]; + struct gconv_step_data data; + size_t inbytes; + int status; + size_t result; + + /* Tell where we want the result. */ + data.outbuf = (char *) (pwc ?: buf); + data.outbufavail = 0; + data.outbufsize = sizeof (wchar_t); + data.is_last = 1; + data.statep = ps ?: &state; + + /* A first special case is if S is NULL. This means put PS in the + initial state. */ if (s == NULL) { - /* See first paragraph of description in 7.16.6.3.2. */ - ps->count = 0; - return 0; + data.outbuf = (char *) buf; + s = ""; + n = 1; } - if (n > 0) - { - if (ps->count == 0) - { - unsigned char byte = (unsigned char) *s++; - ++used; - - /* We must look for a possible first byte of a UTF8 sequence. */ - if (byte < 0x80) - { - /* One byte sequence. */ - if (pwc != NULL) - *pwc = (wchar_t) byte; - return byte ? used : 0; - } - - if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe) - { - /* Oh, oh. An encoding error. */ - __set_errno (EILSEQ); - return (size_t) -1; - } - - if ((byte & 0xe0) == 0xc0) - { - /* We expect two bytes. */ - ps->count = 1; - ps->value = byte & 0x1f; - } - else if ((byte & 0xf0) == 0xe0) - { - /* We expect three bytes. */ - ps->count = 2; - ps->value = byte & 0x0f; - } - else if ((byte & 0xf8) == 0xf0) - { - /* We expect four bytes. */ - ps->count = 3; - ps->value = byte & 0x07; - } - else if ((byte & 0xfc) == 0xf8) - { - /* We expect five bytes. */ - ps->count = 4; - ps->value = byte & 0x03; - } - else - { - /* We expect six bytes. */ - ps->count = 5; - ps->value = byte & 0x01; - } - } + /* Make sure we use the correct function. */ + update_conversion_ptrs (); - /* We know we have to handle a multibyte character and there are - some more bytes to read. */ - while (used < n) + /* Do a normal conversion. */ + inbytes = n; + status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc, + &data, s, &inbytes, NULL, 0); + + /* There must not be any problems with the conversion but illegal input + characters. The output buffer must be large enough, otherwise the + definition of MB_CUR_MAX is not correct. All the other possible + errors also must not happen. */ + assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT + || status == GCONV_INCOMPLETE_INPUT); + + if (status == GCONV_OK) + { + if (*(wchar_t *)data.outbuf == L'\0') { - /* The second to sixths byte must be of the form 10xxxxxx. */ - unsigned char byte = (unsigned char) *s++; - ++used; - - if ((byte & 0xc0) != 0x80) - { - /* Oh, oh. An encoding error. */ - __set_errno (EILSEQ); - return (size_t) -1; - } - - ps->value <<= 6; - ps->value |= byte & 0x3f; - - if (--ps->count == 0) - { - /* The character is finished. */ - if (pwc != NULL) - *pwc = (wchar_t) ps->value; - return ps->value ? used : 0; - } + /* The converted character is the NUL character. */ + assert (mbsinit (data.statep)); + result = 0; } + else + result = n - inbytes; + } + else + { + result = status == GCONV_INCOMPLETE_INPUT ? (size_t) -2 : (size_t) -1; + __set_errno (EILSEQ); } - return (size_t) -2; + return result; } weak_alias (__mbrtowc, mbrtowc) diff --git a/wcsmbs/mbsnrtowcs.c b/wcsmbs/mbsnrtowcs.c index db67d5c1bb..ded15e4299 100644 --- a/wcsmbs/mbsnrtowcs.c +++ b/wcsmbs/mbsnrtowcs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -18,16 +18,20 @@ Boston, MA 02111-1307, USA. */ #include <errno.h> +#include <gconv.h> +#include <string.h> #include <wchar.h> +#include <wcsmbsload.h> + +#include <assert.h> #ifndef EILSEQ -#define EILSEQ EINVAL +# define EILSEQ EINVAL #endif -/* We don't need the state really because we don't have shift states - to maintain between calls to this function. */ -static mbstate_t internal; +/* This is the private state used if PS is NULL. */ +static mbstate_t state; /* This is a non-standard function but it is very useful in the implementation of stdio because we have to deal with unterminated @@ -40,128 +44,89 @@ __mbsnrtowcs (dst, src, nmc, len, ps) size_t len; mbstate_t *ps; { - size_t written = 0; - const char *run = *src; - const char *last = run + nmc; - wchar_t value; - size_t count; + size_t inbytes_in; + struct gconv_step_data data; + size_t result = 0; + int status; - if (ps == NULL) - ps = &internal; + /* Tell where we want the result. */ + data.is_last = 1; + data.statep = ps ?: &state; - /* Get information from last use of this state. */ - count = ps->count; - value = ps->value; + if (nmc == 0) + return 0; + inbytes_in = __strnlen (*src, nmc - 1) + 1; - if (dst == NULL) - /* The LEN parameter has to be ignored if we don't actually write - anything. */ - len = ~0; + /* Make sure we use the correct function. */ + update_conversion_ptrs (); - /* Copy all words. */ - while (written < len && run < last) + /* We have to handle DST == NULL special. */ + if (dst == NULL) { - unsigned char byte; - - /* Store address of next byte to process. */ - *src = run; - - /* Start reading a new character only if we are in the initial - state. */ - if (count == 0) - { - byte = *run++; - - /* We expect a start of a new multibyte character. */ - if (byte < 0x80) - { - /* One byte sequence. */ - count = 0; - value = byte; - } - else if ((byte & 0xe0) == 0xc0) - { - count = 1; - value = byte & 0x1f; - } - else if ((byte & 0xf0) == 0xe0) - { - /* We expect three bytes. */ - count = 2; - value = byte & 0x0f; - } - else if ((byte & 0xf8) == 0xf0) - { - /* We expect four bytes. */ - count = 3; - value = byte & 0x07; - } - else if ((byte & 0xfc) == 0xf8) - { - /* We expect five bytes. */ - count = 4; - value = byte & 0x03; - } - else if ((byte & 0xfe) == 0xfc) - { - /* We expect six bytes. */ - count = 5; - value = byte & 0x01; - } - else - { - /* This is an illegal encoding. */ - __set_errno (EILSEQ); - return (size_t) -1; - } - } - - /* Read the possible remaining bytes. */ - while (run < last && count > 0) - { - byte = *run++; - --count; - - if ((byte & 0xc0) != 0x80) - { - /* This is an illegal encoding. */ - __set_errno (EILSEQ); - return (size_t) -1; - } - - value <<= 6; - value |= byte & 0x3f; - } - - /* If this character is only partially available remember this. */ - if (run == last && count != 0) + wchar_t buf[64]; /* Just an arbitrary size. */ + size_t inbytes = inbytes_in; + const char *inbuf = *src; + size_t written; + + data.outbuf = (char *) buf; + data.outbufsize = sizeof (buf); + do { - ps->count = count; - ps->value = value; - break; + inbuf += inbytes_in - inbytes; + inbytes_in = inbytes; + data.outbufavail = 0; + written = 0; + + status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc, + &data, inbuf, &inbytes, + &written, 0); + result += written; } + while (status == GCONV_FULL_OUTPUT); - /* Store value is required. */ - if (dst != NULL) - *dst++ = value; - - /* The whole sequence is read. Check whether end of string is - reached. */ - if (value == L'\0') + if (status == GCONV_OK && ((wchar_t *) dst)[written - 1] == L'\0') + /* Don't count the NUL character in. */ + --result; + } + else + { + /* This code is based on the safe assumption that all internal + multi-byte encodings use the NUL byte only to mark the end + of the string. */ + size_t inbytes = inbytes_in; + + data.outbuf = (char *) dst; + data.outbufsize = len * sizeof (wchar_t); + data.outbufavail = 0; + + status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc, + &data, *src, &inbytes, + &result, 0); + + /* We have to determine whether the last character converted + is the NUL character. */ + if (status == GCONV_OK && ((wchar_t *) dst)[result - 1] == L'\0') { - /* Found the end of the string. */ + assert (result > 0); + assert (mbsinit (data.statep)); *src = NULL; - ps->count = 0; - return written; + --result; } - - /* Increment counter of produced words. */ - ++written; + else + *src += inbytes_in - inbytes; } - /* Store address of next byte to process. */ - *src = run; + /* There must not be any problems with the conversion but illegal input + characters. */ + assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT + || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT); + + if (status != GCONV_OK && status != GCONV_FULL_OUTPUT) + { + result = (size_t) -1; + __set_errno (EILSEQ); + } - return written; + return result; } weak_alias (__mbsnrtowcs, mbsnrtowcs) diff --git a/wcsmbs/mbsrtowcs.c b/wcsmbs/mbsrtowcs.c index 84d4cbf66d..d0af40fb54 100644 --- a/wcsmbs/mbsrtowcs.c +++ b/wcsmbs/mbsrtowcs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -18,16 +18,20 @@ Boston, MA 02111-1307, USA. */ #include <errno.h> +#include <gconv.h> +#include <string.h> #include <wchar.h> +#include <wcsmbsload.h> + +#include <assert.h> #ifndef EILSEQ -#define EILSEQ EINVAL +# define EILSEQ EINVAL #endif -/* We don't need the state really because we don't have shift states - to maintain between calls to this function. */ -static mbstate_t internal; +/* This is the private state used if PS is NULL. */ +static mbstate_t state; size_t __mbsrtowcs (dst, src, len, ps) @@ -36,108 +40,86 @@ __mbsrtowcs (dst, src, len, ps) size_t len; mbstate_t *ps; { - size_t written = 0; - const char *run = *src; + struct gconv_step_data data; + size_t result = 0; + int status; - if (ps == NULL) - ps = &internal; + /* Tell where we want the result. */ + data.is_last = 1; + data.statep = ps ?: &state; - if (dst == NULL) - /* The LEN parameter has to be ignored if we don't actually write - anything. */ - len = ~0; + /* Make sure we use the correct function. */ + update_conversion_ptrs (); - /* Copy all words. */ - while (written < len) + /* We have to handle DST == NULL special. */ + if (dst == NULL) { - wchar_t value; - size_t count; - unsigned char byte; - - /* Store address of next byte to process. */ - *src = run; - - byte = *run++; - - /* We expect a start of a new multibyte character. */ - if (byte < 0x80) - { - /* One byte sequence. */ - count = 0; - value = byte; - } - else if ((byte & 0xe0) == 0xc0) - { - count = 1; - value = byte & 0x1f; - } - else if ((byte & 0xf0) == 0xe0) - { - /* We expect three bytes. */ - count = 2; - value = byte & 0x0f; - } - else if ((byte & 0xf8) == 0xf0) - { - /* We expect four bytes. */ - count = 3; - value = byte & 0x07; - } - else if ((byte & 0xfc) == 0xf8) - { - /* We expect five bytes. */ - count = 4; - value = byte & 0x03; - } - else if ((byte & 0xfe) == 0xfc) + wchar_t buf[64]; /* Just an arbitrary size. */ + size_t inbytes_in = strlen (*src) + 1; + size_t inbytes = inbytes_in; + const char *inbuf = *src; + size_t written; + + data.outbuf = (char *) buf; + data.outbufsize = sizeof (buf); + do { - /* We expect six bytes. */ - count = 5; - value = byte & 0x01; - } - else - { - /* This is an illegal encoding. */ - __set_errno (EILSEQ); - return (size_t) -1; - } - - /* Read the possible remaining bytes. */ - while (count-- > 0) - { - byte = *run++; - - if ((byte & 0xc0) != 0x80) - { - /* This is an illegal encoding. */ - __set_errno (EILSEQ); - return (size_t) -1; - } - - value <<= 6; - value |= byte & 0x3f; + inbuf += inbytes_in - inbytes; + inbytes_in = inbytes; + data.outbufavail = 0; + written = 0; + + status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc, + &data, inbuf, &inbytes, + &written, 0); + result += written; } + while (status == GCONV_FULL_OUTPUT); - /* Store value is required. */ - if (dst != NULL) - *dst++ = value; - - /* The whole sequence is read. Check whether end of string is - reached. */ - if (value == L'\0') + if (status == GCONV_OK && ((wchar_t *) dst)[written - 1] == L'\0') + /* Don't count the NUL character in. */ + --result; + } + else + { + /* This code is based on the safe assumption that all internal + multi-byte encodings use the NUL byte only to mark the end + of the string. */ + size_t inbytes_in = __strnlen (*src, len * MB_CUR_MAX) + 1; + size_t inbytes = inbytes_in; + + data.outbuf = (char *) dst; + data.outbufsize = len * sizeof (wchar_t); + data.outbufavail = 0; + + status = (*__wcsmbs_gconv_fcts.towc->fct) (__wcsmbs_gconv_fcts.towc, + &data, *src, &inbytes, + &result, 0); + + /* We have to determine whether the last character converted + is the NUL character. */ + if (status == GCONV_OK && ((wchar_t *) dst)[result - 1] == L'\0') { - /* Found the end of the string. */ + assert (result > 0); + assert (mbsinit (data.statep)); *src = NULL; - return written; + --result; } - - /* Increment counter of produced words. */ - ++written; + else + *src += inbytes_in - inbytes; } - /* Store address of next byte to process. */ - *src = run; + /* There must not be any problems with the conversion but illegal input + characters. */ + assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT + || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT); + + if (status != GCONV_OK && status != GCONV_FULL_OUTPUT) + { + result = (size_t) -1; + __set_errno (EILSEQ); + } - return written; + return result; } weak_alias (__mbsrtowcs, mbsrtowcs) diff --git a/wcsmbs/wchar.h b/wcsmbs/wchar.h index 4bba959387..719de26e37 100644 --- a/wcsmbs/wchar.h +++ b/wcsmbs/wchar.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -166,8 +166,15 @@ extern wchar_t *wcstok __P ((wchar_t *__restrict __s, wchar_t **__restrict __ptr)); /* Return the number of wide characters in S. */ +extern size_t __wcslen __P ((__const wchar_t *__s)); extern size_t wcslen __P ((__const wchar_t *__s)); +#ifdef __USE_GNU +/* Return the number of wide characters in S, but at most MAXLEN. */ +extern size_t __wcsnlen __P ((__const wchar_t *__s, size_t __maxlen)); +extern size_t wcsnlen __P ((__const wchar_t *__s, size_t __maxlen)); +#endif + /* Search N wide characters of S for C. */ extern wchar_t *wmemchr __P ((__const wchar_t *__s, wchar_t __c, size_t __n)); @@ -260,16 +267,6 @@ extern size_t mbsnrtowcs __P ((wchar_t *__restrict __dst, __const char **__restrict __src, size_t __nmc, size_t __len, mbstate_t *__restrict __ps)); -/* Similar function to the above but this does not stop at NUL bytes. */ -extern size_t __wmemrtowcs __P ((wchar_t *__restrict __dst, - __const char **__restrict __src, - size_t __nmc, size_t __len, - mbstate_t *__restrict __ps)); -extern size_t wmemrtowcs __P ((wchar_t *__restrict __dst, - __const char **__restrict __src, - size_t __nmc, size_t __len, - mbstate_t *__restrict __ps)); - /* Write multibyte character representation of at most NWC characters from the wide character string SRC to DST. */ extern size_t __wcsnrtombs __P ((char *__restrict __dst, @@ -280,16 +277,6 @@ extern size_t wcsnrtombs __P ((char *__restrict __dst, __const wchar_t **__restrict __src, size_t __nwc, size_t __len, mbstate_t *__restrict __ps)); - -/* Similar function to the above but this does not stop at NUL bytes. */ -extern size_t __wmemrtombs __P ((char *__restrict __dst, - __const wchar_t **__restrict __src, - size_t __nwc, size_t len, - mbstate_t *__restrict __ps)); -extern size_t wmemrtombs __P ((char *__restrict __dst, - __const wchar_t **__restrict __src, - size_t __nwc, size_t len, - mbstate_t *__restrict __ps)); #endif /* use GNU */ diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c index 63859e158a..d45ae444bb 100644 --- a/wcsmbs/wcrtomb.c +++ b/wcsmbs/wcrtomb.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -18,77 +18,86 @@ Boston, MA 02111-1307, USA. */ #include <errno.h> +#include <gconv.h> +#include <stdlib.h> #include <wchar.h> +#include <wcsmbsload.h> + +#include <assert.h> #ifndef EILSEQ -#define EILSEQ EINVAL +# define EILSEQ EINVAL #endif -static const wchar_t encoding_mask[] = -{ - ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; -/* The state is for this UTF8 encoding not used. */ -static mbstate_t internal; +/* This is the private state used if PS is NULL. */ +static mbstate_t state; size_t __wcrtomb (char *s, wchar_t wc, mbstate_t *ps) { - size_t written = 0; - - if (ps == NULL) - ps = &internal; - + char buf[MB_CUR_MAX]; + struct gconv_step_data data; + int status; + size_t result; + + /* Tell where we want the result. */ + data.outbuf = s; + data.outbufavail = 0; + data.outbufsize = MB_CUR_MAX; + data.is_last = 1; + data.statep = ps ?: &state; + + /* A first special case is if S is NULL. This means put PS in the + initial state. */ if (s == NULL) { - /* This is equivalent to wcrtomb (<<internal>, L'\0', ps). We - only have to reset the state. */ - ps->count = 0; - return 1; + data.outbuf = buf; + wc = L'\0'; } - /* Store the UTF8 representation of WC. */ - if (wc < 0 || wc > 0x7fffffff) + /* Make sure we use the correct function. */ + update_conversion_ptrs (); + + /* If WC is the NUL character we write into the output buffer the byte + sequence necessary for PS to get into the initial state, followed + by a NUL byte. */ + if (wc == L'\0') { - /* This is no correct ISO 10646 character. */ - __set_errno (EILSEQ); - return (size_t) -1; - } + size_t inbytes = 0; + + status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, + &data, NULL, &inbytes, + NULL, 1); - if (wc < 0x80) + if (status == GCONV_OK) + data.outbuf[data.outbufavail++] = '\0'; + } + else { - /* It's a one byte sequence. */ - if (s != NULL) - *s = (char) wc; - ps->count = 0; - return 1; + /* Do a normal conversion. */ + size_t inbytes = sizeof (wchar_t); + + status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, + &data, (char *) &wc, &inbytes, + NULL, 0); } - for (written = 2; written < 6; ++written) - if ((wc & encoding_mask[written - 2]) == 0) - break; + /* There must not be any problems with the conversion but illegal input + characters. The output buffer must be large enough, otherwise the + definition of MB_CUR_MAX is not correct. All the other possible + errors also must not happen. */ + assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT + || status == GCONV_INCOMPLETE_INPUT); - if (s != NULL) + if (status == GCONV_OK) + result = data.outbufavail; + else { - size_t cnt = written; - s[0] = encoding_byte[cnt - 2]; - - --cnt; - do - { - s[cnt] = 0x80 | (wc & 0x3f); - wc >>= 6; - } - while (--cnt > 0); - s[0] |= wc; + result = (size_t) -1; + __set_errno (EILSEQ); } - return written; + return result; } weak_alias (__wcrtomb, wcrtomb) diff --git a/wcsmbs/wcslen.c b/wcsmbs/wcslen.c index 113bc2a113..b0f1d29a27 100644 --- a/wcsmbs/wcslen.c +++ b/wcsmbs/wcslen.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. @@ -22,7 +22,7 @@ /* Copy SRC to DEST. */ size_t -wcslen (s) +__wcslen (s) const wchar_t *s; { size_t len = 0; @@ -40,3 +40,4 @@ wcslen (s) return len; } +weak_alias (__wcslen, wcslen) diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c new file mode 100644 index 0000000000..cf854d9125 --- /dev/null +++ b/wcsmbs/wcsmbsload.c @@ -0,0 +1,132 @@ +/* Copyright (C) 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <langinfo.h> +#include <limits.h> + +#include <wcsmbsload.h> +#include <bits/libc-lock.h> +#include <iconv/gconv_int.h> + + +/* Last loaded locale for LC_CTYPE. We initialize for the C locale + which is enabled at startup. */ +extern const struct locale_data _nl_C_LC_CTYPE; +const struct locale_data *__wcsmbs_last_locale = &_nl_C_LC_CTYPE; + + +/* These are the descriptions for the default conversion functions. */ +static struct gconv_step to_wc = +{ + shlib_handle: NULL, + modname: NULL, + counter: INT_MAX, + from_name: "ANSI_X3.4-1968", + to_name: "ISO-10646/UCS4/", + fct: __gconv_transform_ascii_ucs4, + init_fct: NULL, + end_fct: NULL, + data: NULL +}; + +static struct gconv_step to_mb = +{ + shlib_handle: NULL, + modname: NULL, + counter: INT_MAX, + from_name: "ISO-10646/UCS4/", + to_name: "ANSI_X3.4-1968", + fct: __gconv_transform_ucs4_ascii, + init_fct: NULL, + end_fct: NULL, + data: NULL +}; + + +/* For the default locale we only have to handle ANSI_X3.4-1968. */ +struct gconv_fcts __wcsmbs_gconv_fcts = +{ + towc: &to_wc, + tomb: &to_mb +}; + + +static inline struct gconv_step * +getfct (const char *to, const char *from) +{ + size_t nsteps; + struct gconv_step *result; + + if (__gconv_find_transform (to, from, &result, &nsteps) != GCONV_OK) + /* Loading the conversion step is not possible. */ + return NULL; + + /* We must only have one step in this conversion. */ + if (nsteps != 1) + return NULL; + + return result; +} + + +/* Load conversion functions for the currently selected locale. */ +void +__wcsmbs_load_conv (const struct locale_data *new_category) +{ + /* We must modify global data. */ + __libc_lock_define_initialized (static, lock) + + /* Acquire the lock. */ + __libc_lock_lock (lock); + + /* We should repest the test since while we waited some other thread + might have run this function. */ + if (__wcsmbs_last_locale != new_category) + { + if (new_category->name == _nl_C_name) /* Yes, pointer comparison. */ + { + failed: + __wcsmbs_gconv_fcts.towc = &to_wc; + __wcsmbs_gconv_fcts.tomb = &to_mb; + } + else + { + /* We must find the real functions. */ + const char *charset_name; + + /* Get name of charset of the locale. */ + charset_name = new_category->values[_NL_ITEM_INDEX(CODESET)].string; + + __wcsmbs_gconv_fcts.tomb = getfct (charset_name, "ISO-10646/UCS4/"); + __wcsmbs_gconv_fcts.towc = getfct ("ISO-10646/UCS4/", charset_name); + + /* If any of the conversion functions is not available we don't + use any since this would mean we cannot convert back and + forth.*/ + if (__wcsmbs_gconv_fcts.towc == NULL + || __wcsmbs_gconv_fcts.tomb == NULL) + goto failed; + } + + /* Set last-used variable for current locale. */ + __wcsmbs_last_locale = new_category; + } + + __libc_lock_unlock (lock); +} diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h new file mode 100644 index 0000000000..df0ba7b796 --- /dev/null +++ b/wcsmbs/wcsmbsload.h @@ -0,0 +1,52 @@ +/* Copyright (C) 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <locale.h> +#include <wchar.h> +#include <locale/localeinfo.h> + + +/* Contains pointers to the used functions in the `gconv' modules. */ +struct gconv_fcts + { + struct gconv_step *towc; + struct gconv_step *tomb; + }; + +/* Set of currently active conversion functions. */ +extern struct gconv_fcts __wcsmbs_gconv_fcts; + + +/* Last loaded locale for LC_CTYPE. */ +extern const struct locale_data *__wcsmbs_last_locale; + + +/* Load conversion functions for the currently selected locale. */ +extern void __wcsmbs_load_conv (const struct locale_data *new_category) + internal_function; + + +/* Check whether the LC_CTYPE locale changed since the last call. + Update the pointers appropriately. */ +static inline void +update_conversion_ptrs (void) +{ + if (__wcsmbs_last_locale != _nl_current_LC_CTYPE) + __wcsmbs_load_conv (_nl_current_LC_CTYPE); +} diff --git a/wcsmbs/wcsnlen.c b/wcsmbs/wcsnlen.c new file mode 100644 index 0000000000..5264a66d05 --- /dev/null +++ b/wcsmbs/wcsnlen.c @@ -0,0 +1,44 @@ +/* Copyright (C) 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include <wchar.h> + + +/* Copy SRC to DEST. */ +size_t +__wcsnlen (s, maxlen) + const wchar_t *s; + size_t maxlen; +{ + size_t len = 0; + + while (s[len] != L'\0' && maxlen > 0) + { + if (s[++len] == L'\0' || --maxlen == 0) + return len; + if (s[++len] == L'\0' || --maxlen == 0) + return len; + if (s[++len] == L'\0' || --maxlen == 0) + return len; + ++len; + } + + return len; +} +weak_alias (__wcsnlen, wcsnlen) diff --git a/wcsmbs/wcsnrtombs.c b/wcsmbs/wcsnrtombs.c index f6c8048295..2dd9f7c17e 100644 --- a/wcsmbs/wcsnrtombs.c +++ b/wcsmbs/wcsnrtombs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -18,26 +18,19 @@ Boston, MA 02111-1307, USA. */ #include <errno.h> +#include <gconv.h> #include <wchar.h> +#include <wcsmbsload.h> + +#include <assert.h> #ifndef EILSEQ -#define EILSEQ EINVAL +# define EILSEQ EINVAL #endif -static const wchar_t encoding_mask[] = -{ - ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - -/* We don't need the state really because we don't have shift states - to maintain between calls to this function. */ -static mbstate_t internal; +/* This is the private state used if PS is NULL. */ +static mbstate_t state; /* This is a non-standard function but it is very useful in the implementation of stdio because we have to deal with unterminated @@ -50,85 +43,91 @@ __wcsnrtombs (dst, src, nwc, len, ps) size_t len; mbstate_t *ps; { - size_t written = 0; - const wchar_t *run = *src; + struct gconv_step_data data; + size_t inbytes_in; + int status; + size_t result; - if (ps == NULL) - ps = &internal; + /* Tell where we want the result. */ + data.is_last = 1; + data.statep = ps ?: &state; - if (dst == NULL) - /* The LEN parameter has to be ignored if we don't actually write - anything. */ - len = ~0; + if (nwc == 0) + return 0; + inbytes_in = __wcsnlen (*src, nwc - 1) + 1; - while (written < len && nwc-- > 0) - { - wchar_t wc; + /* Make sure we use the correct function. */ + update_conversion_ptrs (); - /* Store position of first unprocessed word. */ - *src = run; + /* We have to handle DST == NULL special. */ + if (dst == NULL) + { + char buf[256]; /* Just an arbitrary value. */ + size_t inbytes = inbytes_in; + const wchar_t *inbuf = *src; + size_t written; - wc = *run++; + data.outbuf = buf; + data.outbufsize = sizeof (buf); - if (wc < 0 || wc > 0x7fffffff) + do { - /* This is no correct ISO 10646 character. */ - __set_errno (EILSEQ); - return (size_t) -1; + inbuf += (inbytes_in - inbytes) / sizeof (wchar_t); + inbytes_in = inbytes; + data.outbufavail = 0; + written = 0; + + status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, + &data, + (const char *) inbuf, + &inbytes, &written, 0); + result += written; } + while (status == GCONV_FULL_OUTPUT); - if (wc == L'\0') + if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0') + /* Don't count the NUL character in. */ + --result; + } + else + { + /* This code is based on the safe assumption that all internal + multi-byte encodings use the NUL byte only to mark the end + of the string. */ + size_t inbytes = inbytes_in; + + data.outbuf = dst; + data.outbufavail = 0; + data.outbufsize = len; + + status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, + &data, (const char *) *src, + &inbytes, &result, 0); + + /* We have to determine whether the last character converted + is the NUL character. */ + if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0') { - /* Found the end. */ - if (dst != NULL) - *dst = '\0'; + assert (data.outbufavail > 0); + assert (mbsinit (data.statep)); *src = NULL; - return written; - } - else if (wc < 0x80) - { - /* It's an one byte sequence. */ - if (dst != NULL) - *dst++ = (char) wc; - ++written; + --result; } else - { - size_t step; - - for (step = 2; step < 6; ++step) - if ((wc & encoding_mask[step - 2]) == 0) - break; - - if (written + step >= len) - /* Too long. */ - break; - - if (dst != NULL) - { - size_t cnt = step; - - dst[0] = encoding_byte[cnt - 2]; - - --cnt; - do - { - dst[cnt] = 0x80 | (wc & 0x3f); - wc >>= 6; - } - while (--cnt > 0); - dst[0] |= wc; + *src += result; + } - dst += step; - } + /* There must not be any problems with the conversion but illegal input + characters. */ + assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT + || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT); - written += step; - } + if (status != GCONV_OK && status != GCONV_FULL_OUTPUT) + { + result = (size_t) -1; + __set_errno (EILSEQ); } - /* Store position of first unprocessed word. */ - *src = run; - - return written; + return result; } weak_alias (__wcsnrtombs, wcsnrtombs) diff --git a/wcsmbs/wcsrtombs.c b/wcsmbs/wcsrtombs.c index cc21a51eb3..35bb58e977 100644 --- a/wcsmbs/wcsrtombs.c +++ b/wcsmbs/wcsrtombs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -18,26 +18,20 @@ Boston, MA 02111-1307, USA. */ #include <errno.h> +#include <stdlib.h> +#include <gconv.h> #include <wchar.h> +#include <wcsmbsload.h> + +#include <assert.h> #ifndef EILSEQ -#define EILSEQ EINVAL +# define EILSEQ EINVAL #endif -static const wchar_t encoding_mask[] = -{ - ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - -/* We don't need the state really because we don't have shift states - to maintain between calls to this function. */ -static mbstate_t internal; +/* This is the private state used if PS is NULL. */ +static mbstate_t state; size_t __wcsrtombs (dst, src, len, ps) @@ -46,89 +40,88 @@ __wcsrtombs (dst, src, len, ps) size_t len; mbstate_t *ps; { - size_t written = 0; - const wchar_t *run = *src; + struct gconv_step_data data; + int status; + size_t result; - if (ps == NULL) - ps = &internal; + /* Tell where we want the result. */ + data.is_last = 1; + data.statep = ps ?: &state; - if (dst == NULL) - /* The LEN parameter has to be ignored if we don't actually write - anything. */ - len = ~0; + /* Make sure we use the correct function. */ + update_conversion_ptrs (); - while (written < len) + /* We have to handle DST == NULL special. */ + if (dst == NULL) { - wchar_t wc; - - /* Store position of first unprocessed word. */ - *src = run; + char buf[256]; /* Just an arbitrary value. */ + size_t inbytes_in = __wcslen (*src) + 1; + size_t inbytes = inbytes_in; + const wchar_t *inbuf = *src; + size_t written; - wc = *run++; + data.outbuf = buf; + data.outbufsize = sizeof (buf); - if (wc < 0 || wc > 0x7fffffff) + do { - /* This is no correct ISO 10646 character. */ - __set_errno (EILSEQ); - return (size_t) -1; + inbuf += (inbytes_in - inbytes) / sizeof (wchar_t); + inbytes_in = inbytes; + data.outbufavail = 0; + written = 0; + + status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, + &data, + (const char *) inbuf, + &inbytes, &written, 0); + result += written; } + while (status == GCONV_FULL_OUTPUT); - if (wc == L'\0') + if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0') + /* Don't count the NUL character in. */ + --result; + } + else + { + /* This code is based on the safe assumption that all internal + multi-byte encodings use the NUL byte only to mark the end + of the string. */ + size_t inbytes_in = __wcsnlen (*src, len * MB_CUR_MAX) + 1; + size_t inbytes = inbytes_in; + + data.outbuf = dst; + data.outbufavail = 0; + data.outbufsize = len; + + status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, + &data, (const char *) *src, + &inbytes, &result, 0); + + /* We have to determine whether the last character converted + is the NUL character. */ + if (status == GCONV_OK && dst[data.outbufavail - 1] == '\0') { - /* Found the end. */ - if (dst != NULL) - *dst = '\0'; - ps->count = 0; + assert (data.outbufavail > 0); + assert (mbsinit (data.statep)); *src = NULL; - return written; - } - else if (wc < 0x80) - { - /* It's an one byte sequence. */ - if (dst != NULL) - *dst++ = (char) wc; - ++written; + --result; } else - { - size_t step; - - for (step = 2; step < 6; ++step) - if ((wc & encoding_mask[step - 2]) == 0) - break; - - if (written + step >= len) - /* Too long. */ - break; - - if (dst != NULL) - { - size_t cnt = step; - - dst[0] = encoding_byte[cnt - 2]; - - --cnt; - do - { - dst[cnt] = 0x80 | (wc & 0x3f); - wc >>= 6; - } - while (--cnt > 0); - dst[0] |= wc; - - dst += step; - } - - written += step; - } + *src += result; } - /* Store position of first unprocessed word. */ - *src = run; + /* There must not be any problems with the conversion but illegal input + characters. */ + assert (status == GCONV_OK || status == GCONV_ILLEGAL_INPUT + || status == GCONV_INCOMPLETE_INPUT || status == GCONV_FULL_OUTPUT); - /* Signal that we finished correctly. */ - ps->count = 0; + if (status != GCONV_OK && status != GCONV_FULL_OUTPUT) + { + result = (size_t) -1; + __set_errno (EILSEQ); + } - return written; + return result; } weak_alias (__wcsrtombs, wcsrtombs) diff --git a/wcsmbs/wctob.c b/wcsmbs/wctob.c index ce1063a1ec..0461877d32 100644 --- a/wcsmbs/wctob.c +++ b/wcsmbs/wctob.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. @@ -17,15 +17,48 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <gconv.h> #include <stdio.h> +#include <string.h> #include <wchar.h> +#include <wcsmbsload.h> -/* We use UTF8 encoding for multibyte strings and therefore a valid - one byte multibyte string only can have a value from 0 to 0x7f. */ int wctob (c) wint_t c; { - return (c >= 0 && c <= 0x7f) ? c : EOF; + char buf[MB_LEN_MAX]; + struct gconv_step_data data; + wchar_t inbuf[1]; + size_t inbytes; + size_t converted; + int status; + + /* Tell where we want the result. */ + data.outbuf = (char *) buf; + data.outbufavail = 0; + data.outbufsize = MB_LEN_MAX; + data.is_last = 1; + data.statep = &data.__state; + + /* Make sure we start in the initial state. */ + memset (&data.__state, '\0', sizeof (mbstate_t)); + + /* Make sure we use the correct function. */ + update_conversion_ptrs (); + + /* Create the input string. */ + inbuf[0] = c; + inbytes = sizeof (wchar_t); + + status = (*__wcsmbs_gconv_fcts.tomb->fct) (__wcsmbs_gconv_fcts.tomb, &data, + (const char *) inbuf, &inbytes, + &converted, 0); + /* The conversion failed or the output is too long. */ + if (status != GCONV_OK && status != GCONV_FULL_OUTPUT + || data.outbufavail != 1) + return WEOF; + + return buf[0]; } |