diff options
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | locale/programs/stringtrans.c | 2 | ||||
-rw-r--r-- | posix/glob.h | 7 | ||||
-rw-r--r-- | socket/sys/socket.h (renamed from sysdeps/unix/sysv/linux/sys/socket.h) | 18 | ||||
-rw-r--r-- | stdio-common/printf-parse.h | 2 | ||||
-rw-r--r-- | stdio-common/printf-prs.c | 7 | ||||
-rw-r--r-- | stdio-common/vfprintf.c | 7 | ||||
-rw-r--r-- | sysdeps/generic/machine-gmon.h | 2 | ||||
-rw-r--r-- | sysdeps/generic/socketbits.h (renamed from sysdeps/generic/sys/socket.h) | 147 | ||||
-rw-r--r-- | sysdeps/mach/hurd/connect.c | 5 | ||||
-rw-r--r-- | sysdeps/mach/hurd/send.c | 5 | ||||
-rw-r--r-- | sysdeps/unix/inet/syscalls.list | 4 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/configure | 10 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/socketbits.h | 2 | ||||
-rw-r--r-- | wcsmbs/btowc.c | 10 | ||||
-rw-r--r-- | wcsmbs/mbrlen.c | 3 | ||||
-rw-r--r-- | wcsmbs/mbrtowc.c | 111 | ||||
-rw-r--r-- | wcsmbs/mbsinit.c | 18 | ||||
-rw-r--r-- | wcsmbs/mbsrtowcs.c | 114 | ||||
-rw-r--r-- | wcsmbs/wchar.h | 20 | ||||
-rw-r--r-- | wcsmbs/wcrtomb.c | 62 | ||||
-rw-r--r-- | wcsmbs/wcsrtombs.c | 95 | ||||
-rw-r--r-- | wcsmbs/wctob.c | 9 |
23 files changed, 396 insertions, 283 deletions
diff --git a/ChangeLog b/ChangeLog index a619747dbf..bd2e083ed4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,24 @@ Tue Jul 9 09:37:55 1996 Roland McGrath <roland@delasyd.gnu.ai.mit.edu> + * posix/glob.h (__glob_opendir_hook, __glob_readdir_hook, + __glob_closedir_hook): Remove decls. + + * sysdeps/generic/machine-gmon.h: Declare mcount_internal. + + * sysdeps/unix/inet/syscalls.list: Define __ names with weak aliases + for send and connect syscalls. + + * socket/sys/socket.h: New file, taken from non-sysdep parts of + linux/sys/socket.h; break sysdeps parts out into socketbits.h. + Declare __ names for send and connect. + * sysdeps/generic/socketbits.h: New file. + * sysdeps/unix/sysv/linux/socketbits.h: New file. + * sysdeps/unix/sysv/linux/sys/socket.h: File removed. + * sysdeps/generic/sys/socket.h: File removed. + + * sysdeps/mach/hurd/connect.c: Define __ name and weak alias. + * sysdeps/mach/hurd/send.c: Likewise. + * sysdeps/mach/libc-lock.h: New file. * sysdeps/unix/readdir.c: Do locking. * sysdeps/unix/seekdir.c: Likewise. diff --git a/locale/programs/stringtrans.c b/locale/programs/stringtrans.c index bff5aa41a2..10b04fa62e 100644 --- a/locale/programs/stringtrans.c +++ b/locale/programs/stringtrans.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. -COntributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. +Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as diff --git a/posix/glob.h b/posix/glob.h index 73fd018066..8607e9f04c 100644 --- a/posix/glob.h +++ b/posix/glob.h @@ -100,13 +100,6 @@ extern int glob __P ((const char *__pattern, int __flags, extern void globfree __P ((glob_t *__pglob)); -#if !defined (_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 2 || defined (_GNU_SOURCE) -/* If they are not NULL, `glob' uses these functions to read directories. */ -extern __ptr_t (*__glob_opendir_hook) __P ((const char *__directory)); -extern const char *(*__glob_readdir_hook) __P ((__ptr_t __stream)); -extern void (*__glob_closedir_hook) __P ((__ptr_t __stream)); -#endif - #ifdef __cplusplus } #endif diff --git a/sysdeps/unix/sysv/linux/sys/socket.h b/socket/sys/socket.h index a680d5fb29..172c897af7 100644 --- a/sysdeps/unix/sysv/linux/sys/socket.h +++ b/socket/sys/socket.h @@ -1,4 +1,5 @@ -/* Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc. +/* Declarations of socket constants, types, and functions. +Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -27,8 +28,11 @@ __BEGIN_DECLS #include <stddef.h> -/* Get Linux specific constants and data types. */ -#include <linux/socket.h> +/* This operating system-specific header file defines the SOCK_*, PF_*, + AF_*, MSG_*, SOL_*, and SO_* constants, and the `struct sockaddr', + `struct msghdr', and `struct linger' types. */ +#include <socketbits.h> + /* This is the type we use for generic socket address arguments. @@ -90,6 +94,8 @@ extern int getsockname __P ((int __fd, __SOCKADDR_ARG __addr, For connectionless socket types, just set the default address to send to and the only address from which to accept transmissions. Return 0 on success, -1 for errors. */ +extern int __connect __P ((int __fd, + __CONST_SOCKADDR_ARG __addr, size_t __len)); extern int connect __P ((int __fd, __CONST_SOCKADDR_ARG __addr, size_t __len)); @@ -100,6 +106,7 @@ extern int getpeername __P ((int __fd, __SOCKADDR_ARG __addr, /* Send N bytes of BUF to socket FD. Returns the number sent or -1. */ +extern int __send __P ((int __fd, __ptr_t __buf, size_t __n, int __flags)); extern int send __P ((int __fd, __ptr_t __buf, size_t __n, int __flags)); /* Read N bytes into BUF from socket FD. @@ -164,6 +171,11 @@ extern int accept __P ((int __fd, __SOCKADDR_ARG __addr, extern int shutdown __P ((int __fd, int __how)); +/* FDTYPE is S_IFSOCK or another S_IF* macro defined in <sys/stat.h>; + returns 1 if FD is open on an object of the indicated type, 0 if not, + or -1 for errors (setting errno). */ +extern int isfdtype __P ((int __fd, int __fdtype)); + __END_DECLS #endif /* sys/socket.h */ diff --git a/stdio-common/printf-parse.h b/stdio-common/printf-parse.h index a7960e6003..9a5cfbac7e 100644 --- a/stdio-common/printf-parse.h +++ b/stdio-common/printf-parse.h @@ -97,6 +97,8 @@ find_spec (const char *format, mbstate_t *ps) { int len; + /* Remove any hints of a wrong encoding. */ + ps->count = 0; if (isascii (*format) || (len = mbrlen (format, MB_CUR_MAX, ps)) <= 0) ++format; else diff --git a/stdio-common/printf-prs.c b/stdio-common/printf-prs.c index d0756de7d4..a15be55c48 100644 --- a/stdio-common/printf-prs.c +++ b/stdio-common/printf-prs.c @@ -81,7 +81,6 @@ parse_printf_format (fmt, n, argtypes) nargs = 0; max_ref_arg = 0; - mbstate = 0; /* Search for format specifications. */ for (fmt = find_spec (fmt, &mbstate); *fmt != '\0'; fmt = spec.next_fmt) @@ -90,14 +89,14 @@ parse_printf_format (fmt, n, argtypes) nargs += parse_one_spec (fmt, nargs, &spec, &max_ref_arg, &mbstate); /* If the width is determined by an argument this is an int. */ - if (spec.width_arg != -1 && spec.width_arg < n) + if (spec.width_arg != -1 && (size_t) spec.width_arg < n) argtypes[spec.width_arg] = PA_INT; /* If the precision is determined by an argument this is an int. */ - if (spec.prec_arg != -1 && spec.prec_arg < n) + if (spec.prec_arg != -1 && (size_t) spec.prec_arg < n) argtypes[spec.prec_arg] = PA_INT; - if (spec.data_arg < n) + if ((size_t) spec.data_arg < n) switch (spec.ndata_args) { case 0: /* No arguments. */ diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c index 8031b99ae9..3fa53a62a8 100644 --- a/stdio-common/vfprintf.c +++ b/stdio-common/vfprintf.c @@ -735,16 +735,14 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap) else \ { \ const wchar_t *s2 = (const wchar_t *) string; \ - mbstate_t mbstate = 0; \ + mbstate_t mbstate; \ \ - len = wcsrtombs (NULL, &s2, prec != -1 ? prec : UINT_MAX, \ - &mbstate); \ + len = wcsrtombs (NULL, &s2, 0, &mbstate); \ if (len == (size_t) -1) \ /* Illegal wide-character string. */ \ return -1; \ \ s2 = (const wchar_t *) string; \ - mbstate = 0; \ string = alloca (len + 1); \ (void) wcsrtombs (string, &s2, prec != -1 ? prec : UINT_MAX, \ &mbstate); \ @@ -841,7 +839,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap) /* Initialize local variables. */ done = 0; grouping = (const char *) -1; - mbstate = 0; ap_save = ap; nspecs_done = 0; diff --git a/sysdeps/generic/machine-gmon.h b/sysdeps/generic/machine-gmon.h index 80ee97fcc3..115962a601 100644 --- a/sysdeps/generic/machine-gmon.h +++ b/sysdeps/generic/machine-gmon.h @@ -38,6 +38,8 @@ void _mcount (void); weak_alias (_mcount, mcount) #endif +static void mcount_internal (u_long frompc, u_long selfpc); + #define _MCOUNT_DECL(frompc, selfpc) \ static inline void mcount_internal (frompc, selfpc) diff --git a/sysdeps/generic/sys/socket.h b/sysdeps/generic/socketbits.h index 6ee3ebe52e..770f011ffb 100644 --- a/sysdeps/generic/sys/socket.h +++ b/sysdeps/generic/socketbits.h @@ -1,4 +1,5 @@ -/* Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc. +/* System-specific socket constants and types. Generic/4.3 BSD version. +Copyright (C) 1991, 92, 94, 95, 96 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,9 +17,9 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifndef _SYS_SOCKET_H +#ifndef _SOCKETBITS_H -#define _SYS_SOCKET_H 1 +#define _SOCKETBITS_H 1 #include <features.h> __BEGIN_DECLS @@ -115,75 +116,6 @@ struct sockaddr char sa_data[14]; /* Address data. */ }; -/* This is the type we use for generic socket address arguments. - - With GCC 2.7 and later, the funky union causes redeclarations or uses with - any of the listed types to be allowed without complaint. */ -#if (!defined (__GNUC__) || __GNUC__ < 2 || \ - (__GNUC__ == 2 && __GNUC_MINOR__ < 7)) -#define __SOCKADDR_ARG struct sockaddr * -#define __CONST_SOCKADDR_ARG __const struct sockaddr * -#else -/* Add more `struct sockaddr_AF' types here as necessary. - These are all the ones I found on NetBSD and Linux. */ -#define __SOCKADDR_ALLTYPES \ - __SOCKADDR_ONETYPE (sockaddr) \ - __SOCKADDR_ONETYPE (sockaddr_at) \ - __SOCKADDR_ONETYPE (sockaddr_ax25) \ - __SOCKADDR_ONETYPE (sockaddr_dl) \ - __SOCKADDR_ONETYPE (sockaddr_eon) \ - __SOCKADDR_ONETYPE (sockaddr_in) \ - __SOCKADDR_ONETYPE (sockaddr_in6) \ - __SOCKADDR_ONETYPE (sockaddr_inarp) \ - __SOCKADDR_ONETYPE (sockaddr_ipx) \ - __SOCKADDR_ONETYPE (sockaddr_iso) \ - __SOCKADDR_ONETYPE (sockaddr_ns) \ - __SOCKADDR_ONETYPE (sockaddr_un) \ - __SOCKADDR_ONETYPE (sockaddr_x25) - -#define __SOCKADDR_ONETYPE(type) struct type *__##type##__; -typedef union { __SOCKADDR_ALLTYPES - } __SOCKADDR_ARG __attribute__ ((__transparent_union__)); -#undef __SOCKADDR_ONETYPE -#define __SOCKADDR_ONETYPE(type) __const struct type *__##type##__; -typedef union { __SOCKADDR_ALLTYPES - } __CONST_SOCKADDR_ARG __attribute__ ((__transparent_union__)); -#undef __SOCKADDR_ONETYPE -#endif - - -/* Create a new socket of type TYPE in domain DOMAIN, using - protocol PROTOCOL. If PROTOCOL is zero, one is chosen automatically. - Returns a file descriptor for the new socket, or -1 for errors. */ -extern int socket __P ((int __domain, enum __socket_type __type, - int __protocol)); - -/* Create two new sockets, of type TYPE in domain DOMAIN and using - protocol PROTOCOL, which are connected to each other, and put file - descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, - one will be chosen automatically. Returns 0 on success, -1 for errors. */ -extern int socketpair __P ((int __domain, enum __socket_type __type, - int __protocol, int __fds[2])); - -/* Give the socket FD the local address ADDR (which is LEN bytes long). */ -extern int bind __P ((int __fd, __CONST_SOCKADDR_ARG __addr, size_t __len)); - -/* Put the local address of FD into *ADDR and its length in *LEN. */ -extern int getsockname __P ((int __fd, __SOCKADDR_ARG __addr, - size_t *__len)); - -/* Open a connection on socket FD to peer at ADDR (which LEN bytes long). - For connectionless socket types, just set the default address to send to - and the only address from which to accept transmissions. - Return 0 on success, -1 for errors. */ -extern int connect __P ((int __fd, - __CONST_SOCKADDR_ARG __addr, size_t __len)); - -/* Put the address of the peer connected to socket FD into *ADDR - (which is *LEN bytes long), and its actual length into *LEN. */ -extern int getpeername __P ((int __fd, __SOCKADDR_ARG __addr, - size_t *__len)); - /* Bits in the FLAGS argument to `send', `recv', et al. */ enum @@ -198,26 +130,6 @@ enum MSG_DONTWAIT = 0x80, /* This message should be nonblocking. */ }; -/* Send N bytes of BUF to socket FD. Returns the number sent or -1. */ -extern int send __P ((int __fd, __ptr_t __buf, size_t __n, int __flags)); - -/* Read N bytes into BUF from socket FD. - Returns the number read or -1 for errors. */ -extern int recv __P ((int __fd, __ptr_t __buf, size_t __n, int __flags)); - -/* Send N bytes of BUF on socket FD to peer at address ADDR (which is - ADDR_LEN bytes long). Returns the number sent, or -1 for errors. */ -extern int sendto __P ((int __fd, __ptr_t __buf, size_t __n, int __flags, - __CONST_SOCKADDR_ARG __addr, size_t __addr_len)); - -/* Read N bytes into BUF through socket FD. - If ADDR is not NULL, fill in *ADDR_LEN bytes of it with tha address of - the sender, and store the actual size of the address in *ADDR_LEN. - Returns the number of bytes read or -1 for errors. */ -extern int recvfrom __P ((int __fd, __ptr_t __buf, size_t __n, int __flags, - __SOCKADDR_ARG __addr, size_t *__addr_len)); - - /* Structure describing messages sent by `sendmsg' and received by `recvmsg'. */ @@ -233,15 +145,6 @@ struct msghdr size_t msg_accrightslen; /* Length of access rights information. */ }; -/* Send a message described MESSAGE on socket FD. - Returns the number of bytes sent, or -1 for errors. */ -extern int sendmsg __P ((int __fd, __const struct msghdr *__message, - int __flags)); - -/* Receive a message as described by MESSAGE from socket FD. - Returns the number of bytes read or -1 for errors. */ -extern int recvmsg __P ((int __fd, struct msghdr *__message, int __flags)); - /* Protocol number used to manipulate socket-level options with `getsockopt' and `setsockopt'. */ @@ -285,46 +188,6 @@ struct linger int l_linger; /* Time to linger. */ }; - -/* Put the current value for socket FD's option OPTNAME at protocol level LEVEL - into OPTVAL (which is *OPTLEN bytes long), and set *OPTLEN to the value's - actual length. Returns 0 on success, -1 for errors. */ -extern int getsockopt __P ((int __fd, int __level, int __optname, - __ptr_t __optval, size_t *__optlen)); - -/* Set socket FD's option OPTNAME at protocol level LEVEL - to *OPTVAL (which is OPTLEN bytes long). - Returns 0 on success, -1 for errors. */ -extern int setsockopt __P ((int __fd, int __level, int __optname, - __ptr_t __optval, size_t __optlen)); - - -/* Prepare to accept connections on socket FD. - N connection requests will be queued before further requests are refused. - Returns 0 on success, -1 for errors. */ -extern int listen __P ((int __fd, unsigned int __n)); - -/* Await a connection on socket FD. - When a connection arrives, open a new socket to communicate with it, - set *ADDR (which is *ADDR_LEN bytes long) to the address of the connecting - peer and *ADDR_LEN to the address's actual length, and return the - new socket's descriptor, or -1 for errors. */ -extern int accept __P ((int __fd, __SOCKADDR_ARG __addr, - size_t *__addr_len)); - -/* Shut down all or part of the connection open on socket FD. - HOW determines what to shut down: - 0 = No more receptions; - 1 = No more transmissions; - 2 = No more receptions or transmissions. - Returns 0 on success, -1 for errors. */ -extern int shutdown __P ((int __fd, int __how)); - - -/* Determine whether FILDES if the property identified by the value if - FDTYPE. */ -extern int isfdtype __P ((int __fildes, int __fdtype)); - __END_DECLS -#endif /* sys/socket.h */ +#endif /* socketbits.h */ diff --git a/sysdeps/mach/hurd/connect.c b/sysdeps/mach/hurd/connect.c index 0ecf7369ed..f55a2ae591 100644 --- a/sysdeps/mach/hurd/connect.c +++ b/sysdeps/mach/hurd/connect.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. +/* Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -30,7 +30,7 @@ Cambridge, MA 02139, USA. */ and the only address from which to accept transmissions. Return 0 on success, -1 for errors. */ int -DEFUN(connect, (fd, addr, len), +DEFUN(__connect, (fd, addr, len), int fd AND const struct sockaddr_un *addr AND size_t len) { error_t err; @@ -72,3 +72,4 @@ DEFUN(connect, (fd, addr, len), return err ? __hurd_dfail (fd, err) : 0; } +weak_alias (__connect, connect) diff --git a/sysdeps/mach/hurd/send.c b/sysdeps/mach/hurd/send.c index 153ee93701..17fa66344f 100644 --- a/sysdeps/mach/hurd/send.c +++ b/sysdeps/mach/hurd/send.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1994 Free Software Foundation, Inc. +/* Copyright (C) 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -25,7 +25,7 @@ Cambridge, MA 02139, USA. */ /* Send N bytes of BUF to socket FD. Returns the number sent or -1. */ int -DEFUN(send, (fd, buf, n, flags), +DEFUN(__send, (fd, buf, n, flags), int fd AND PTR buf AND size_t n AND int flags) { error_t err; @@ -38,3 +38,4 @@ DEFUN(send, (fd, buf, n, flags), return err ? __hurd_dfail (fd, err) : wrote; } +weak_alias (__send, send) diff --git a/sysdeps/unix/inet/syscalls.list b/sysdeps/unix/inet/syscalls.list index 3b6ac59990..661a8433f5 100644 --- a/sysdeps/unix/inet/syscalls.list +++ b/sysdeps/unix/inet/syscalls.list @@ -2,7 +2,7 @@ accept - accept 3 accept bind - bind 3 bind -connect - connect 3 connect +__connect - connect 3 connect connect gethostid - gethostid 0 gethostid gethostname - gethostname 2 __gethostname gethostname getpeername - getpeername 3 getpeername @@ -12,7 +12,7 @@ listen - listen 2 listen recv - recv 4 recv recvfrom - recvfrom 6 recvfrom recvmsg - recvmsg 3 recvmsg -send - send 4 send +__send - send 4 send send sendmsg - sendmsg 3 sendmsg sendto - sendto 6 sendto sethostid - sethostid 1 sethostid diff --git a/sysdeps/unix/sysv/linux/configure b/sysdeps/unix/sysv/linux/configure index 7dbb0f9f33..fd72c092ad 100644 --- a/sysdeps/unix/sysv/linux/configure +++ b/sysdeps/unix/sysv/linux/configure @@ -8,7 +8,7 @@ test $stdio = default && stdio=libio inhibit_glue=yes echo $ac_n "checking installed Linux kernel header files""... $ac_c" 1>&6 -if eval "test \"`echo '$''{'libc_cv_linux'+set}'`\" = set"; then +if eval "test \"`echo '$''{'libc_cv_linux201'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <<EOF @@ -24,17 +24,17 @@ eat flaming death EOF if { (eval echo configure:26: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* - libc_cv_linux='2.0.1 or later' + libc_cv_linux201='2.0.1 or later' else rm -rf conftest* - libc_cv_linux='TOO OLD!' + libc_cv_linux201='TOO OLD!' fi rm -f conftest* fi -echo "$ac_t""$libc_cv_linux" 1>&6 -if test "$libc_cv_linux" != '2.0.1 or later'; then +echo "$ac_t""$libc_cv_linux201" 1>&6 +if test "$libc_cv_linux201" != '2.0.1 or later'; then { echo "configure: error: GNU libc requires kernel header files from Linux 2.0.1 or later to be installed before configuring. The kernel header files are found usually in /usr/include/asm and /usr/include/linux; make sure diff --git a/sysdeps/unix/sysv/linux/socketbits.h b/sysdeps/unix/sysv/linux/socketbits.h new file mode 100644 index 0000000000..1c7cee0dad --- /dev/null +++ b/sysdeps/unix/sysv/linux/socketbits.h @@ -0,0 +1,2 @@ +/* Get Linux specific constants and data types kernel header. */ +#include <linux/socket.h> diff --git a/wcsmbs/btowc.c b/wcsmbs/btowc.c index 062be7ec02..2f13cc7ce4 100644 --- a/wcsmbs/btowc.c +++ b/wcsmbs/btowc.c @@ -21,16 +21,14 @@ Boston, MA 02111-1307, USA. */ #include <wchar.h> +/* We use UTF8 encoding for multibyte strings and therefore a valid + one byte multibyte string only can have a value from 0 to 0x7f. */ wint_t btowc (c) int c; { - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ - if (WEOF != (wint_t) EOF) + if (WEOF != (wint_t) EOF || c < 0 || c > 0x7f) return WEOF; else - return c; + return (wint_t) c; } diff --git a/wcsmbs/mbrlen.c b/wcsmbs/mbrlen.c index a50631e8d1..c5a27116be 100644 --- a/wcsmbs/mbrlen.c +++ b/wcsmbs/mbrlen.c @@ -26,10 +26,11 @@ static mbstate_t internal; size_t -mbrlen (s, n, ps) +__mbrlen (s, n, ps) const char *s; size_t n; mbstate_t *ps; { return mbrtowc (NULL, s, n, ps ?: &internal); } +weak_alias (__mbrlen, mbrlen) diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c index 2c4b0779da..9e70a0b2c9 100644 --- a/wcsmbs/mbrtowc.c +++ b/wcsmbs/mbrtowc.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. -Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu> +Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -17,50 +17,115 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <errno.h> #include <wchar.h> +#ifndef EILSEQ +#define EILSEQ EINVAL +#endif + static mbstate_t internal; size_t -mbrtowc (pwc, s, n, ps) - wchar_t *pwc; - const char *s; - size_t n; - mbstate_t *ps; +mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) { wchar_t to_wide; + size_t used = 0; if (ps == NULL) ps = &internal; - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ - if (s == NULL) { + /* See first paragraph of description in 7.16.6.3.2. */ pwc = NULL; s = ""; n = 1; } - if (n == 0) - return (size_t) -2; + if (n > 0) + { + if (ps->count == 0) + { + unsigned char byte = (unsigned char) *s++; + ++used; - /* For now. */ - to_wide = (wchar_t) *s; + /* We must look for a possible first byte of a UTF8 sequence. */ + if (byte < 0x80) + { + /* One byte sequence. */ + if (pwc != NULL) + *pwc = (wchar_t) byte; + return byte ? used : 0; + } - if (pwc != NULL) - *pwc = to_wide; + if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe) + { + /* Oh, oh. An encoding error. */ + errno = EILSEQ; + return (size_t) -1; + } - if (pwc == L'\0') - { - *ps = 0; /* This is required. */ - return 0; + if ((byte & 0xe0) == 0xc0) + { + /* We expect two bytes. */ + ps->count = 1; + ps->value = byte & 0x1f; + } + else if ((byte & 0xf0) == 0xe0) + { + /* We expect three bytes. */ + ps->count = 2; + ps->value = byte & 0x0f; + } + else if ((byte & 0xf8) == 0xf0) + { + /* We expect four bytes. */ + ps->count = 3; + ps->value = byte & 0x07; + } + else if ((byte & 0xfc) == 0xf8) + { + /* We expect five bytes. */ + ps->count = 4; + ps->value = byte & 0x03; + } + else + { + /* We expect six bytes. */ + ps->count = 5; + ps->value = byte & 0x01; + } + } + + /* We know we have to handle a multibyte character and there are + some more bytes to read. */ + while (used < n) + { + /* The second to sixths byte must be of the form 10xxxxxx. */ + unsigned char byte = (unsigned char) *s++; + ++used; + + if ((byte & 0xc0) != 0x80) + { + /* Oh, oh. An encoding error. */ + errno = EILSEQ; + return (size_t) -1; + } + + ps->value <<= 6; + ps->value |= byte & 0x3f; + + if (--ps->count == 0) + { + /* The character is finished. */ + if (pwc != NULL) + *pwc = (wchar_t) ps->value; + return ps->value ? used : 0; + } + } } - /* Return code (size_t)-1 cannot happend for now. */ - return 1; + return (size_t) -2; } diff --git a/wcsmbs/mbsinit.c b/wcsmbs/mbsinit.c index efbfd09347..f56ce20331 100644 --- a/wcsmbs/mbsinit.c +++ b/wcsmbs/mbsinit.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. -Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu> +Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -20,15 +20,17 @@ Boston, MA 02111-1307, USA. */ #include <string.h> #include <wchar.h> - +/* In GNU libc the conversion functions only can convert between the + fixed wide character representation and the multibyte + representation of the same character set. Since we use ISO 10646 + in UCS4 encoding for wide characters the best solution for + multibyte characters is the UTF8 encoding. I.e., the only state + information is a counter of the processed bytes so far and the + value collected so far. Especially, we don't have different shift + states. */ int mbsinit (ps) const mbstate_t *ps; { - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ - - return ps == NULL || *ps == 0; + return ps == NULL || ps->count == 0; } diff --git a/wcsmbs/mbsrtowcs.c b/wcsmbs/mbsrtowcs.c index dc026b7252..712b199271 100644 --- a/wcsmbs/mbsrtowcs.c +++ b/wcsmbs/mbsrtowcs.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. -Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu> +Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -17,9 +17,16 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include <errno.h> #include <wchar.h> +#ifndef EILSEQ +#define EILSEQ EINVAL +#endif + +/* We don't need the state really because we don't have shift states + to maintain between calls to this function. */ static mbstate_t internal; size_t @@ -29,35 +36,102 @@ mbsrtowcs (dst, src, len, ps) size_t len; mbstate_t *ps; { - size_t result = 0; + size_t written = 0; + const char *run = *src; if (ps == NULL) ps = &internal; - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ + if (dst == NULL) + /* The LEN parameter has to be ignored if we don't actually write + anything. */ + len = ~0; - while (len > 0 && **src != '\0') + /* Copy all words. */ + while (written < len) { - /* For now there is no possibly illegal MB char sequence. */ - if (dst != NULL) - dst[result] = (wchar_t) **src; - ++result; - ++(*src); - --len; - } + wchar_t value; + size_t count; + unsigned char byte = *run++; - if (len > 0) - { + /* We expect a start of a new multibyte character. */ + if (byte < 0x80) + { + /* One byte sequence. */ + count = 0; + value = byte; + } + else if ((byte & 0xe0) == 0xc0) + { + count = 1; + value = byte & 0x1f; + } + else if ((byte & 0xf0) == 0xe0) + { + /* We expect three bytes. */ + count = 2; + value = byte & 0x0f; + } + else if ((byte & 0xf8) == 0xf0) + { + /* We expect four bytes. */ + count = 3; + value = byte & 0x07; + } + else if ((byte & 0xfc) == 0xf8) + { + /* We expect five bytes. */ + count = 4; + value = byte & 0x03; + } + else if ((byte & 0xfe) == 0xfc) + { + /* We expect six bytes. */ + count = 5; + value = byte & 0x01; + } + else + { + /* This is an illegal encoding. */ + errno = EILSEQ; + return (size_t) -1; + } + + /* Read the possible remaining bytes. */ + while (count-- > 0) + { + byte = *run++; + + if ((byte & 0xc0) != 0x80) + { + /* This is an illegal encoding. */ + errno = EILSEQ; + return (size_t) -1; + } + + value <<= 6; + value |= byte & 0x3f; + } + + /* Store value is required. */ if (dst != NULL) + *dst++ = value; + + /* The whole sequence is read. Check whether end of string is + reached. */ + if (value == L'\0') { - dst[result] = L'\0'; - *ps = 0; + /* Found the end of the string. */ + *src = NULL; + return written; } - *src = NULL; + + /* Increment counter of produced words. */ + ++written; } - return result; + /* Store address of next byte to process. */ + *src = run; + + return written; } diff --git a/wcsmbs/wchar.h b/wcsmbs/wchar.h index cc821b8a50..806bafa655 100644 --- a/wcsmbs/wchar.h +++ b/wcsmbs/wchar.h @@ -48,7 +48,11 @@ typedef unsigned int wint_t; /* Conversion state information. */ -typedef int mbstate_t; /* FIXME */ +typedef struct +{ + int count; /* Number of bytes needed for the current character. */ + wint_t value; /* Value so far. */ +} mbstate_t; #define WCHAR_MIN ((wchar_t) 0) #define WCHAR_MAX (~WCHAR_MIN) @@ -145,9 +149,6 @@ extern int wctob __P ((wint_t __c)); state. */ extern int mbsinit __P ((__const mbstate_t *__ps)); -/* Return number of bytes in multibyte character pointed to by S. */ -extern size_t mbrlen __P ((__const char *__s, size_t __n, mbstate_t *ps)); - /* Write wide character representation of multibyte character pointed to by S to PWC. */ extern size_t mbrtowc __P ((wchar_t *__pwc, __const char *__s, size_t __n, @@ -156,6 +157,17 @@ extern size_t mbrtowc __P ((wchar_t *__pwc, __const char *__s, size_t __n, /* Write multibyte representation of wide character WC to S. */ extern size_t wcrtomb __P ((char *__s, wchar_t __wc, mbstate_t *__ps)); +/* Return number of bytes in multibyte character pointed to by S. */ +extern size_t __mbrlen __P ((__const char *__s, size_t __n, mbstate_t *__ps)); +extern size_t mbrlen __P ((__const char *__s, size_t __n, mbstate_t *__ps)); + +#if defined (__OPTIMIZE__) \ + && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)) +/* Define inline function as optimization. */ +extern __inline size_t mbrlen (__const char *s, size_t n, mbstate_t *ps) +{ return ps != NULL ? mbrtowc (NULL, s, n, ps) : __mbrlen (s, n, NULL); } +#endif + /* Write wide character representation of multibyte chracter string SRC to DST. */ extern size_t mbsrtowcs __P ((wchar_t *__dst, __const char **__src, diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c index 9069fb105c..eb007a69b9 100644 --- a/wcsmbs/wcrtomb.c +++ b/wcsmbs/wcrtomb.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. -Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu> +Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -24,46 +24,68 @@ Boston, MA 02111-1307, USA. */ #define EILSEQ EINVAL #endif +static const wchar_t encoding_mask[] = +{ + ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff +}; + +static const unsigned char encoding_byte[] = +{ + 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; +/* The state is for this UTF8 encoding not used. */ static mbstate_t internal; size_t -wcrtomb (s, wc, ps) - char *s; - wchar_t wc; - mbstate_t *ps; +wcrtomb (char *s, wchar_t wc, mbstate_t *ps) { char fake[1]; + size_t written = 0; if (ps == NULL) ps = &internal; - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ - if (s == NULL) { s = fake; wc = L'\0'; } - if (wc == L'\0') + /* Store the UTF8 representation of WC. */ + if (wc < 0 || wc > 0x7fffffff) { - /* FIXME Write any shift sequence to get to *PS == NULL. */ - *ps = 0; - *s = '\0'; + /* This is no correct ISO 10646 character. */ + errno = EILSEQ; + return (size_t) -1; + } + + if (wc < 0x80) + { + /* It's a one byte sequence. */ + if (s != NULL) + *s = (char) wc; return 1; } - /* FIXME For now we don't handle real multi-byte encodings. */ - if ((wc & ~0xff) != 0) + for (written = 2; written < 6; ++written) + if ((wc & encoding_mask[written - 2]) == 0) + break; + + if (s != NULL) { - errno = EILSEQ; - return (size_t) -1; + size_t cnt = written; + s[0] = encoding_byte[cnt - 2]; + + --cnt; + do + { + s[cnt] = 0x80 | (wc & 0x3f); + wc >>= 6; + } + while (--cnt > 0); + s[0] |= wc; } - *s = (char) wc; - return 1; + return written; } diff --git a/wcsmbs/wcsrtombs.c b/wcsmbs/wcsrtombs.c index 9f1000937b..99ca6acc5b 100644 --- a/wcsmbs/wcsrtombs.c +++ b/wcsmbs/wcsrtombs.c @@ -1,6 +1,6 @@ /* Copyright (C) 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. -Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu> +Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -25,6 +25,18 @@ Boston, MA 02111-1307, USA. */ #endif +static const wchar_t encoding_mask[] = +{ + ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff +}; + +static const unsigned char encoding_byte[] = +{ + 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; + +/* We don't need the state really because we don't have shift states + to maintain between calls to this function. */ static mbstate_t internal; size_t @@ -34,40 +46,79 @@ wcsrtombs (dst, src, len, ps) size_t len; mbstate_t *ps; { - size_t result = 0; + size_t written = 0; + const wchar_t *run = *src; if (ps == NULL) ps = &internal; - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ + if (dst == NULL) + /* The LEN parameter has to be ignored if we don't actually write + anything. */ + len = ~0; - while (len > 0 && **src != L'\0') + while (written < len) { - if ((**src & ~0xff) != 0) + wchar_t wc = *run++; + + if (wc < 0 || wc > 0x7fffffff) { + /* This is no correct ISO 10646 character. */ errno = EILSEQ; return (size_t) -1; } - if (dst != NULL) - dst[result] = (char) **src; - ++result; - ++(*src); - --len; - } - - if (len > 0) - { - if (dst != NULL) + if (wc == L'\0') + { + /* Found the end. */ + if (dst != NULL) + *dst = '\0'; + *src = NULL; + return written; + } + else if (wc < 0x80) { - dst[result] = '\0'; - *ps = 0; + /* It's an one byte sequence. */ + if (dst != NULL) + *dst++ = (char) wc; + ++written; + } + else + { + size_t step; + + for (step = 2; step < 6; ++step) + if ((wc & encoding_mask[step - 2]) == 0) + break; + + if (written + step >= len) + /* Too long. */ + break; + + if (dst != NULL) + { + size_t cnt = step; + + dst[0] = encoding_byte[cnt - 2]; + + --cnt; + do + { + dst[cnt] = 0x80 | (wc & 0x3f); + wc >>= 6; + } + while (--cnt > 0); + dst[0] |= wc; + + dst += step; + } + + written += step; } - *src = NULL; } - return result; + /* Store position of first unprocessed word. */ + *src = run; + + return written; } diff --git a/wcsmbs/wctob.c b/wcsmbs/wctob.c index c27bd6baba..f541a2e97b 100644 --- a/wcsmbs/wctob.c +++ b/wcsmbs/wctob.c @@ -21,14 +21,11 @@ Boston, MA 02111-1307, USA. */ #include <wchar.h> +/* We use UTF8 encoding for multibyte strings and therefore a valid + one byte multibyte string only can have a value from 0 to 0x7f. */ int wctob (c) wint_t c; { - /*************************************************************\ - |* This is no complete implementation. While the multi-byte *| - |* character handling is not finished this will do. *| - \*************************************************************/ - - return (c & ~0xff) == 0 ? c : EOF; + return (c >= 0 && c <= 0x7f) ? c : EOF; } |