diff options
author | Ulrich Drepper <drepper@gmail.com> | 2012-01-08 07:19:21 -0500 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2012-01-08 07:19:21 -0500 |
commit | d3ed722566f42d3f614b1221a8e4f19092976531 (patch) | |
tree | 4a63e059ef599167cf407311188551fe72221d8d /wcsmbs/mbrtoc16.c | |
parent | a0da5fe1e49b819b4d90b77915e21cddd397d064 (diff) | |
download | glibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.gz glibc-d3ed722566f42d3f614b1221a8e4f19092976531.tar.xz glibc-d3ed722566f42d3f614b1221a8e4f19092976531.zip |
Simplify char16_t implementation
Diffstat (limited to 'wcsmbs/mbrtoc16.c')
-rw-r--r-- | wcsmbs/mbrtoc16.c | 75 |
1 files changed, 34 insertions, 41 deletions
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c index df970fba4f..f5ed2b4ac9 100644 --- a/wcsmbs/mbrtoc16.c +++ b/wcsmbs/mbrtoc16.c @@ -30,12 +30,6 @@ # define EILSEQ EINVAL #endif -#if __STDC__ >= 201000L -# define U(c) U##c -#else -# define U(c) L##c -#endif - /* This is the private state used if PS is NULL. */ static mbstate_t state; @@ -46,6 +40,11 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (ps == NULL) ps = &state; + /* The standard text does not say that S being NULL means the state + is reset even if the second half of a surrogate still have to be + returned. In fact, the error code description indicates + otherwise. Therefore always first try to return a second + half. */ if (ps->__count & 0x80000000) { /* We have to return the second word for a surrogate. */ @@ -55,13 +54,13 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) return (size_t) -3; } - char16_t buf[2]; + wchar_t wc; struct __gconv_step_data data; int status; size_t result; size_t dummy; const unsigned char *inbuf, *endbuf; - unsigned char *outbuf = (unsigned char *) buf; + unsigned char *outbuf = (unsigned char *) &wc; const struct gconv_fcts *fcts; /* Set information for this step. */ @@ -75,14 +74,14 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) initial state. */ if (s == NULL) { - outbuf = (unsigned char *) buf; + pc16 = NULL; s = ""; n = 1; } /* Tell where we want the result. */ data.__outbuf = outbuf; - data.__outbufend = outbuf + sizeof (char16_t); + data.__outbufend = outbuf + sizeof (wchar_t); /* Get the conversion functions. */ fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE)); @@ -91,28 +90,20 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) inbuf = (const unsigned char *) s; endbuf = inbuf + n; if (__builtin_expect (endbuf < inbuf, 0)) - endbuf = (const unsigned char *) ~(uintptr_t) 0; - __gconv_fct fct = fcts->toc16->__fct; + { + endbuf = (const unsigned char *) ~(uintptr_t) 0; + if (endbuf == inbuf) + goto ilseq; + } + __gconv_fct fct = fcts->towc->__fct; #ifdef PTR_DEMANGLE - if (fcts->toc16->__shlib_handle != NULL) + if (fcts->towc->__shlib_handle != NULL) PTR_DEMANGLE (fct); #endif - /* We first have to check whether the character can be represented - without a surrogate. If we immediately pass in a buffer large - enough to hold two char16_t values and the first character does - not require a surrogate the routine will try to convert more - input if N is larger then needed for the first character. */ - status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, + status = DL_CALL_FCT (fct, (fcts->towc, &data, &inbuf, endbuf, NULL, &dummy, 0, 1)); - if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf) - { - data.__outbufend = outbuf + 2 * sizeof (char16_t); - status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf, - NULL, &dummy, 0, 1)); - } - /* There must not be any problems with the conversion but illegal input characters. The output buffer must be large enough, otherwise the definition of MB_CUR_MAX is not correct. All the other possible @@ -125,33 +116,35 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps) if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT || status == __GCONV_FULL_OUTPUT) { - if (pc16 != NULL) - *pc16 = buf[0]; + result = inbuf - (const unsigned char *) s; - if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0')) + if (wc < 0x10000) { - /* The converted character is the NUL character. */ - assert (__mbsinit (data.__statep)); - result = 0; + if (pc16 != NULL) + *pc16 = wc; + + if (data.__outbuf != outbuf && wc == L'\0') + { + /* The converted character is the NUL character. */ + assert (__mbsinit (data.__statep)); + result = 0; + } } else { - result = inbuf - (const unsigned char *) s; + /* This is a surrogate. */ + if (pc16 != NULL) + *pc16 = 0xd7c0 + (wc >> 10); - if (data.__outbuf != outbuf + 2) - { - /* This is a surrogate. */ - assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff); - assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff); - ps->__count |= 0x80000000; - ps->__value.__wch = buf[1]; - } + ps->__count |= 0x80000000; + ps->__value.__wch = 0xdc00 + (wc & 0x3ff); } } else if (status == __GCONV_INCOMPLETE_INPUT) result = (size_t) -2; else { + ilseq: result = (size_t) -1; __set_errno (EILSEQ); } |