From a904b5d93a2b54c611beb9c50ed274c5d77ec7b4 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 5 Apr 1998 11:23:12 +0000 Subject: Update. 1998-04-05 Ulrich Drepper * iconv/gconv_simple.c: Rewrite to not make use of the mb*towc* and wc*tomb* functions. 1998-04-04 15:16 Philip Blundell * sysdeps/unix/start.c: Fix typo. 1998-04-04 Ulrich Drepper * iconv/gconv_db.c (__gconv_find_transform): Fix typo. --- ChangeLog | 13 ++++ iconv/gconv_db.c | 6 +- iconv/gconv_simple.c | 183 ++++++++++++++++++++++++++++++++++++++++++++------- sysdeps/unix/start.c | 4 +- 4 files changed, 176 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3119f1efb8..7bca7bb9fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +1998-04-05 Ulrich Drepper + + * iconv/gconv_simple.c: Rewrite to not make use of the mb*towc* + and wc*tomb* functions. + +1998-04-04 15:16 Philip Blundell + + * sysdeps/unix/start.c: Fix typo. + +1998-04-04 Ulrich Drepper + + * iconv/gconv_db.c (__gconv_find_transform): Fix typo. + 1998-04-03 23:38 Ulrich Drepper * sysdeps/libm-ieee754/e_acos.c: Optimize by splitting large diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c index e0a94e41c3..b3bdd7abc0 100644 --- a/iconv/gconv_db.c +++ b/iconv/gconv_db.c @@ -534,9 +534,9 @@ __gconv_find_transform (const char *toset, const char *fromset, do if (steps[--cnt].counter++ == 0) { - steps[--cnt].shlib_handle = - __gconv_find_shlib (steps[--cnt].modname); - if (steps[--cnt].shlib_handle == NULL) + steps[cnt].shlib_handle = + __gconv_find_shlib (steps[cnt].modname); + if (steps[cnt].shlib_handle == NULL) { /* Oops, this is the second time we use this module (after unloading) and this time loading failed!? */ diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 7fbdfbacf3..197a3d1e9c 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -26,6 +26,24 @@ #include #include +#ifndef EILSEQ +# define EILSEQ EINVAL +#endif + + +/* These are definitions used by some of the functions for handling + UTF-8 encoding below. */ +static const wchar_t encoding_mask[] = +{ + ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff +}; + +static const unsigned char encoding_byte[] = +{ + 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; + + int __gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data, @@ -97,30 +115,70 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step, int save_errno = errno; do_write = 0; + result = GCONV_OK; do { - const char *newinbuf = inbuf; - size_t actually; + const wchar_t *newinbuf = (const wchar_t *) inbuf; + size_t actually = 0; + size_t cnt = 0; - errno = 0; - actually = __wmemrtombs (&data->outbuf[data->outbufavail], - (const wchar_t **) &newinbuf, - *inlen / sizeof (wchar_t), - data->outbufsize - data->outbufavail, - data->statep); + while (data->outbufavail < data->outbufsize + && cnt * sizeof (wchar_t) <= *inlen) + { + wchar_t wc = newinbuf[cnt]; + + if (wc < 0 && wc > 0x7fffffff) + { + /* This is no correct ISO 10646 character. */ + result = GCONV_ILLEGAL_INPUT; + break; + } + + if (wc < 0x80) + { + /* It's an one byte sequence. */ + data->outbuf[data->outbufavail++] = (char) wc; + ++actually; + } + else + { + size_t step; + size_t start; + + for (step = 2; step < 6; ++step) + if ((wc & encoding_mask[step - 2]) == 0) + break; + + if (data->outbufavail + step >= data->outbufsize) + /* Too long. */ + break; + + start = data->outbufavail; + data->outbufavail += step; + actually += step; + data->outbuf[start] = encoding_byte[step - 2]; + --step; + do + { + data->outbuf[start + step] = 0x80 | (wc & 0x3f); + wc >>= 6; + } + while (--step > 0); + data->outbuf[start] |= wc; + } + + ++cnt; + } /* Remember how much we converted. */ - do_write += newinbuf - inbuf; - *inlen -= newinbuf - inbuf; + do_write += cnt * sizeof (wchar_t); + *inlen -= cnt * sizeof (wchar_t); data->outbufavail += actually; /* Check whether an illegal character appeared. */ - if (errno != 0) - { - result = GCONV_ILLEGAL_INPUT; - break; - } + if (result != GCONV_OK) + break; if (data->is_last) { @@ -199,26 +257,101 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step, int save_errno = errno; do_write = 0; + result = GCONV_OK; do { - const char *newinbuf = inbuf; - size_t actually; + wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail]; + size_t cnt = 0; + size_t actually = 0; - errno = 0; - actually = __wmemrtowcs ((wchar_t *) &data->outbuf[data->outbufavail], - &newinbuf, *inlen, - ((data->outbufsize - - data->outbufavail) / sizeof (wchar_t)), - data->statep); + while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize + && cnt < *inlen) + { + size_t start = cnt; + wchar_t value; + unsigned char byte; + int count; + + /* Next input byte. */ + byte = inbuf[cnt++]; + + if (byte < 0x80) + { + /* One byte sequence. */ + count = 0; + value = byte; + } + else if ((byte & 0xe0) == 0xc0) + { + count = 1; + value = byte & 0x1f; + } + else if ((byte & 0xf0) == 0xe0) + { + /* We expect three bytes. */ + count = 2; + value = byte & 0x0f; + } + else if ((byte & 0xf8) == 0xf0) + { + /* We expect four bytes. */ + count = 3; + value = byte & 0x07; + } + else if ((byte & 0xfc) == 0xf8) + { + /* We expect five bytes. */ + count = 4; + value = byte & 0x03; + } + else if ((byte & 0xfe) == 0xfc) + { + /* We expect six bytes. */ + count = 5; + value = byte & 0x01; + } + else + { + /* This is an illegal encoding. */ + result = GCONV_ILLEGAL_INPUT; + break; + } + + /* Read the possible remaining bytes. */ + while (cnt < *inbuf && count > 0) + { + byte = inbuf[cnt++]; + --count; + + if ((byte & 0xc0) != 0x80) + { + /* This is an illegal encoding. */ + result = GCONV_ILLEGAL_INPUT; + break; + } + + value <<= 6; + value |= byte & 0x3f; + } + + if (result != GCONV_OK) + { + cnt = start; + break; + } + + *outbuf++ = value; + ++actually; + } /* Remember how much we converted. */ do_write += actually; - *inlen -= newinbuf - inbuf; + *inlen -= cnt; data->outbufavail += actually * sizeof (wchar_t); /* Check whether an illegal character appeared. */ - if (errno != 0) + if (result != GCONV_OK) { result = GCONV_ILLEGAL_INPUT; break; diff --git a/sysdeps/unix/start.c b/sysdeps/unix/start.c index 6868fe9a95..49d17810ad 100644 --- a/sysdeps/unix/start.c +++ b/sysdeps/unix/start.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1991, 1993, 1995, 1996, 1997 Free Software Foundation, Inc. +/* Copyright (C) 1991, 93, 95, 96, 97, 98 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -37,7 +37,7 @@ weak_alias (__data_start, data_start) #ifndef errno volatile int __errno; -string_alias (__errno, errno) +strong_alias (__errno, errno) #endif extern void __libc_init __P ((int argc, char **argv, char **envp)); -- cgit 1.4.1