From 8f5ca04bc7fd53741d80117df992995ace8f6d2d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 16 Oct 1995 01:37:51 +0000 Subject: Sat Oct 14 02:52:36 1995 Ulrich Drepper * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include . * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. Sat Oct 14 02:52:36 1995 Ulrich Drepper * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include . * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. --- stdio/_itoa.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++++- stdio/_itoa.h | 26 ---- stdio/test_rdwr.c | 1 + 3 files changed, 396 insertions(+), 32 deletions(-) (limited to 'stdio') diff --git a/stdio/_itoa.c b/stdio/_itoa.c index 19e732dcfe..caa8179624 100644 --- a/stdio/_itoa.c +++ b/stdio/_itoa.c @@ -1,6 +1,8 @@ /* Internal function for converting integers to ASCII. Copyright (C) 1994, 1995 Free Software Foundation, Inc. This file is part of the GNU C Library. +Contributed by Torbjorn Granlund +and Ulrich Drepper . The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -17,13 +19,400 @@ License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include +#include "../stdlib/gmp.h" +#include "../stdlib/gmp-impl.h" +#include "../stdlib/longlong.h" + +#include "_itoa.h" + + +/* Canonize environment. For some architectures not all values might + be defined in the GMP header files. */ +#ifndef UMUL_TIME +# define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +# define UDIV_TIME 1 +#endif + +/* Control memory layout. */ +#ifdef PACK +# undef PACK +# define PACK __attribute__ ((packed)) +#else +# define PACK +#endif + + +/* Declare local types. */ +struct base_table_t +{ +#if (UDIV_TIME > 2 * UMUL_TIME) + mp_limb base_multiplier; +#endif + char flag; + char post_shift; +#if BITS_PER_MP_LIMB == 32 + struct + { + char normalization_steps; + char ndigits; + mp_limb base PACK; +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb base_ninv PACK; +#endif + } big; +#endif +}; + +/* To reduce the memory needed we include some fields of the tables + only confitionally. */ +#if BITS_PER_MP_LIMB == 32 +# if UDIV_TIME > 2 * UMUL_TIME +# define SEL1(X) X, +# define SEL2(X) ,X +# else +# define SEL1(X) +# define SEL2(X) +# endif +#endif + + +/* Local variables. */ +static const struct base_table_t base_table[] = +{ +#if BITS_PER_MP_LIMB == 64 + /* 2 */ {0ul, 1, 1}, + /* 3 */ {0xaaaaaaaaaaaaaaabul, 0, 1}, + /* 4 */ {0ul, 1, 2}, + /* 5 */ {0xcccccccccccccccdul, 0, 2}, + /* 6 */ {0xaaaaaaaaaaaaaaabul, 0, 2}, + /* 7 */ {0x2492492492492493ul, 1, 3}, + /* 8 */ {0ul, 1, 3}, + /* 9 */ {0xe38e38e38e38e38ful, 0, 3}, + /* 10 */ {0xcccccccccccccccdul, 0, 3}, + /* 11 */ {0x2e8ba2e8ba2e8ba3ul, 0, 1}, + /* 12 */ {0xaaaaaaaaaaaaaaabul, 0, 3}, + /* 13 */ {0x4ec4ec4ec4ec4ec5ul, 0, 2}, + /* 14 */ {0x2492492492492493ul, 1, 4}, + /* 15 */ {0x8888888888888889ul, 0, 3}, + /* 16 */ {0ul, 1, 4}, + /* 17 */ {0xf0f0f0f0f0f0f0f1ul, 0, 4}, + /* 18 */ {0xe38e38e38e38e38ful, 0, 4}, + /* 19 */ {0xd79435e50d79435ful, 0, 4}, + /* 20 */ {0xcccccccccccccccdul, 0, 4}, + /* 21 */ {0x8618618618618619ul, 1, 5}, + /* 22 */ {0x2e8ba2e8ba2e8ba3ul, 0, 2}, + /* 23 */ {0x642c8590b21642c9ul, 1, 5}, + /* 24 */ {0xaaaaaaaaaaaaaaabul, 0, 4}, + /* 25 */ {0x47ae147ae147ae15ul, 1, 5}, + /* 26 */ {0x4ec4ec4ec4ec4ec5ul, 0, 3}, + /* 27 */ {0x97b425ed097b425ful, 0, 4}, + /* 28 */ {0x2492492492492493ul, 1, 5}, + /* 29 */ {0x1a7b9611a7b9611bul, 1, 5}, + /* 30 */ {0x8888888888888889ul, 0, 4}, + /* 31 */ {0x0842108421084211ul, 1, 5}, + /* 32 */ {0ul, 1, 5}, + /* 33 */ {0x0f83e0f83e0f83e1ul, 0, 1}, + /* 34 */ {0xf0f0f0f0f0f0f0f1ul, 0, 5}, + /* 35 */ {0xea0ea0ea0ea0ea0ful, 0, 5}, + /* 36 */ {0xe38e38e38e38e38ful, 0, 5} +#endif +#if BITS_PER_MP_LIMB == 32 + /* 2 */ {SEL1(0ul) 1, 1, {0, 31, 0x80000000ul SEL2(0xfffffffful)}}, + /* 3 */ {SEL1(0xaaaaaaabul) 0, 1, {0, 20, 0xcfd41b91ul SEL2(0x3b563c24ul)}}, + /* 4 */ {SEL1(0ul) 1, 2, {1, 15, 0x40000000ul SEL2(0xfffffffful)}}, + /* 5 */ {SEL1(0xcccccccdul) 0, 2, {1, 13, 0x48c27395ul SEL2(0xc25c2684ul)}}, + /* 6 */ {SEL1(0xaaaaaaabul) 0, 2, {0, 12, 0x81bf1000ul SEL2(0xf91bd1b6ul)}}, + /* 7 */ {SEL1(0x24924925ul) 1, 3, {1, 11, 0x75db9c97ul SEL2(0x1607a2cbul)}}, + /* 8 */ {SEL1(0ul) 1, 3, {1, 10, 0x40000000ul SEL2(0xfffffffful)}}, + /* 9 */ {SEL1(0x38e38e39ul) 0, 1, {0, 10, 0xcfd41b91ul SEL2(0x3b563c24ul)}}, + /* 10 */ {SEL1(0xcccccccdul) 0, 3, {2, 9, 0x3b9aca00ul SEL2(0x12e0be82ul)}}, + /* 11 */ {SEL1(0xba2e8ba3ul) 0, 3, {0, 9, 0x8c8b6d2bul SEL2(0xd24cde04ul)}}, + /* 12 */ {SEL1(0xaaaaaaabul) 0, 3, {3, 8, 0x19a10000ul SEL2(0x3fa39ab5ul)}}, + /* 13 */ {SEL1(0x4ec4ec4ful) 0, 2, {2, 8, 0x309f1021ul SEL2(0x50f8ac5ful)}}, + /* 14 */ {SEL1(0x24924925ul) 1, 4, {1, 8, 0x57f6c100ul SEL2(0x74843b1eul)}}, + /* 15 */ {SEL1(0x88888889ul) 0, 3, {0, 8, 0x98c29b81ul SEL2(0xad0326c2ul)}}, + /* 16 */ {SEL1(0ul) 1, 4, {3, 7, 0x10000000ul SEL2(0xfffffffful)}}, + /* 17 */ {SEL1(0xf0f0f0f1ul) 0, 4, {3, 7, 0x18754571ul SEL2(0x4ef0b6bdul)}}, + /* 18 */ {SEL1(0x38e38e39ul) 0, 2, {2, 7, 0x247dbc80ul SEL2(0xc0fc48a1ul)}}, + /* 19 */ {SEL1(0xaf286bcbul) 1, 5, {2, 7, 0x3547667bul SEL2(0x33838942ul)}}, + /* 20 */ {SEL1(0xcccccccdul) 0, 4, {1, 7, 0x4c4b4000ul SEL2(0xad7f29abul)}}, + /* 21 */ {SEL1(0x86186187ul) 1, 5, {1, 7, 0x6b5a6e1dul SEL2(0x313c3d15ul)}}, + /* 22 */ {SEL1(0xba2e8ba3ul) 0, 4, {0, 7, 0x94ace180ul SEL2(0xb8cca9e0ul)}}, + /* 23 */ {SEL1(0xb21642c9ul) 0, 4, {0, 7, 0xcaf18367ul SEL2(0x42ed6de9ul)}}, + /* 24 */ {SEL1(0xaaaaaaabul) 0, 4, {4, 6, 0x0b640000ul SEL2(0x67980e0bul)}}, + /* 25 */ {SEL1(0x51eb851ful) 0, 3, {4, 6, 0x0e8d4a51ul SEL2(0x19799812ul)}}, + /* 26 */ {SEL1(0x4ec4ec4ful) 0, 3, {3, 6, 0x1269ae40ul SEL2(0xbce85396ul)}}, + /* 27 */ {SEL1(0x2f684bdbul) 1, 5, {3, 6, 0x17179149ul SEL2(0x62c103a9ul)}}, + /* 28 */ {SEL1(0x24924925ul) 1, 5, {3, 6, 0x1cb91000ul SEL2(0x1d353d43ul)}}, + /* 29 */ {SEL1(0x8d3dcb09ul) 0, 4, {2, 6, 0x23744899ul SEL2(0xce1deceaul)}}, + /* 30 */ {SEL1(0x88888889ul) 0, 4, {2, 6, 0x2b73a840ul SEL2(0x790fc511ul)}}, + /* 31 */ {SEL1(0x08421085ul) 1, 5, {2, 6, 0x34e63b41ul SEL2(0x35b865a0ul)}}, + /* 32 */ {SEL1(0ul) 1, 5, {1, 6, 0x40000000ul SEL2(0xfffffffful)}}, + /* 33 */ {SEL1(0x3e0f83e1ul) 0, 3, {1, 6, 0x4cfa3cc1ul SEL2(0xa9aed1b3ul)}}, + /* 34 */ {SEL1(0xf0f0f0f1ul) 0, 5, {1, 6, 0x5c13d840ul SEL2(0x63dfc229ul)}}, + /* 35 */ {SEL1(0xd41d41d5ul) 1, 6, {1, 6, 0x6d91b519ul SEL2(0x2b0fee30ul)}}, + /* 36 */ {SEL1(0x38e38e39ul) 0, 3, {0, 6, 0x81bf1000ul SEL2(0xf91bd1b6ul)}} +#endif +}; + /* Lower-case digits. */ -const char _itoa_lower_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; +static const char _itoa_lower_digits[] + = "0123456789abcdefghijklmnopqrstuvwxyz"; /* Upper-case digits. */ -const char _itoa_upper_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +static const char _itoa_upper_digits[] + = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -/* Cause _itoa.h to define _itoa as a real function instead of an - `extern inline'. */ -#define _EXTERN_INLINE /* empty */ -#include "_itoa.h" +char * +_itoa (value, buflim, base, upper_case) + unsigned long long int value; + char *buflim; + unsigned int base; + int upper_case; +{ + const char *digits = upper_case ? _itoa_upper_digits : _itoa_lower_digits; + char *bp = buflim; + const struct base_table_t *brec = &base_table[base - 2]; + + switch (base) + { +#define RUN_2N(BITS) \ + do \ + { \ + /* `unsigned long long int' always has 64 bits. */ \ + mp_limb work_hi = value >> (64 - BITS_PER_MP_LIMB); \ + \ + if (BITS_PER_MP_LIMB == 32) \ + if (work_hi != 0) \ + { \ + mp_limb work_lo; \ + int cnt; \ + \ + work_lo = value & 0xfffffffful; \ + for (cnt = BITS_PER_MP_LIMB / BITS; cnt > 0; --cnt) \ + { \ + *--bp = digits[work_lo & ((1ul << BITS) - 1)]; \ + work_lo >>= BITS; \ + } \ + if (BITS_PER_MP_LIMB % BITS != 0) \ + { \ + work_lo |= ((work_hi \ + & ((1 << BITS - BITS_PER_MP_LIMB % BITS) \ + - 1)) \ + << BITS_PER_MP_LIMB % BITS); \ + *--bp = digits[work_lo]; \ + work_hi >>= BITS - BITS_PER_MP_LIMB % BITS; \ + } \ + } \ + else \ + work_hi = value & 0xfffffffful; \ + do \ + { \ + *--bp = digits[work_hi & ((1 << BITS) - 1)]; \ + work_hi >>= BITS; \ + } \ + while (work_hi != 0); \ + } \ + while (0) + case 8: + RUN_2N (3); + break; + + case 16: + RUN_2N (4); + break; + + default: + { +#if BITS_PER_MP_LIMB == 64 + mp_limb base_multiplier = brec->base_multiplier; + if (brec->flag) + while (value != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, value, base_multiplier); + quo = (x + ((value - x) >> 1)) >> (brec->post_shift - 1); + rem = value - quo * base; + *--bp = digits[rem]; + value = quo; + } + else + while (value != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, value, base_multiplier); + quo = x >> brec->post_shift; + rem = value - quo * base; + *--bp = digits[rem]; + value = quo; + } +#endif +#if BITS_PER_MP_LIMB == 32 + mp_limb t[3]; + int n; + + /* First convert x0 to 1-3 words in base s->big.base. + Optimize for frequent cases of 32 bit numbers. */ + if ((mp_limb) (value >> 32) >= 1) + { + int big_normalization_steps = brec->big.normalization_steps; + mp_limb big_base_norm = brec->big.base << big_normalization_steps; + + if ((mp_limb) (value >> 32) >= brec->big.base) + { + mp_limb x1hi, x1lo, r; + /* If you want to optimize this, take advantage of + that the quotient in the first udiv_qrnnd will + always be very small. It might be faster just to + subtract in a tight loop. */ + +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb x, xh, xl; + + if (big_normalization_steps == 0) + xh = 0; + else + xh = (mp_limb) (value >> 64 - big_normalization_steps); + xl = (mp_limb) (value >> 32 - big_normalization_steps); + udiv_qrnnd_preinv (x1hi, r, xh, xl, big_base_norm, + brec->big.base_ninv); + + xl = ((mp_limb) value) << big_normalization_steps; + udiv_qrnnd_preinv (x1lo, x, r, xl, big_base_norm, + big_normalization_steps); + t[2] = x >> big_normalization_steps; + + if (big_normalization_steps == 0) + xh = x1hi; + else + xh = ((x1hi << big_normalization_steps) + | (x1lo >> 32 - big_normalization_steps)); + xl = x1lo << big_normalization_steps; + udiv_qrnnd_preinv (t[0], x, xh, xl, big_base_norm, + big_normalization_steps); + t[1] = x >> big_normalization_steps; +#elif UDIV_NEEDS_NORMALIZATION + mp_limb x, xh, xl; + + if (big_normalization_steps == 0) + xh = 0; + else + xh = (mp_limb) (value >> 64 - big_normalization_steps); + xl = (mp_limb) (value >> 32 - big_normalization_steps); + udiv_qrnnd (x1hi, r, xh, xl, big_base_norm); + + xl = ((mp_limb) value) << big_normalization_steps; + udiv_qrnnd (x1lo, x, r, xl, big_base_norm); + t[2] = x >> big_normalization_steps; + + if (big_normalization_steps == 0) + xh = x1hi; + else + xh = ((x1hi << big_normalization_steps) + | (x1lo >> 32 - big_normalization_steps)); + xl = x1lo << big_normalization_steps; + udiv_qrnnd (t[0], x, xh, xl, big_base_norm); + t[1] = x >> big_normalization_steps; +#else + udiv_qrnnd (x1hi, r, 0, (mp_limb) (value >> 32), + brec->big.base); + udiv_qrnnd (x1lo, t[2], r, (mp_limb) value, brec->big.base); + udiv_qrnnd (t[0], t[1], x1hi, x1lo, brec->big.base); +#endif + n = 3; + } + else + { +#if (UDIV_TIME > 2 * UMUL_TIME) + mp_limb x; + + value <<= brec->big.normalization_steps; + udiv_qrnnd_preinv (t[0], x, (mp_limb) (value >> 32), + (mp_limb) value, big_base_norm, + brec->big.base_ninv); + t[1] = x >> brec->big.normalization_steps; +#elif UDIV_NEEDS_NORMALIZATION + mp_limb x; + + value <<= big_normalization_steps; + udiv_qrnnd (t[0], x, (mp_limb) (value >> 32), + (mp_limb) value, big_base_norm); + t[1] = x >> big_normalization_steps; +#else + udiv_qrnnd (t[0], t[1], (mp_limb) (value >> 32), + (mp_limb) value, brec->big.base); +#endif + n = 2; + } + } + else + { + t[0] = value; + n = 1; + } + + /* Convert the 1-3 words in t[], word by word, to ASCII. */ + do + { + mp_limb ti = t[--n]; + int ndig_for_this_limb = 0; + +#if UDIV_TIME > 2 * UMUL_TIME + mp_limb base_multiplier = brec->base_multiplier; + if (brec->flag) + while (ti != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, ti, base_multiplier); + quo = (x + ((ti - x) >> 1)) >> (brec->post_shift - 1); + rem = ti - quo * base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } + else + while (ti != 0) + { + mp_limb quo, rem, x, dummy; + + umul_ppmm (x, dummy, ti, base_multiplier); + quo = x >> brec->post_shift; + rem = ti - quo * base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } +#else + while (ti != 0) + { + mp_limb quo, rem; + + quo = ti / base; + rem = ti % base; + *--bp = digits[rem]; + ti = quo; + ++ndig_for_this_limb; + } +#endif + /* If this wasn't the most significant word, pad with zeros. */ + if (n != 0) + while (ndig_for_this_limb < brec->big.ndigits) + { + *--bp = '0'; + ++ndig_for_this_limb; + } + } + while (n != 0); +#endif + } + break; + } + + return bp; +} diff --git a/stdio/_itoa.h b/stdio/_itoa.h index 81240507b1..ab3d1d1d3a 100644 --- a/stdio/_itoa.h +++ b/stdio/_itoa.h @@ -21,8 +21,6 @@ Cambridge, MA 02139, USA. */ #define _ITOA_H #include -extern const char _itoa_lower_digits[], _itoa_upper_digits[]; - /* Convert VALUE into ASCII in base BASE (2..36). Write backwards starting the character just before BUFLIM. Return the address of the first (left-to-right) character in the number. @@ -31,28 +29,4 @@ extern const char _itoa_lower_digits[], _itoa_upper_digits[]; extern char *_itoa __P ((unsigned long long int value, char *buflim, unsigned int base, int upper_case)); -#ifndef _EXTERN_INLINE -#define _EXTERN_INLINE extern __inline -#endif - -_EXTERN_INLINE -char * -_itoa (unsigned long long int value, char *buflim, - unsigned int base, int upper_case) -{ - /* Base-36 digits for numbers. */ - const char *digits = upper_case ? _itoa_upper_digits : _itoa_lower_digits; - - register char *bp = buflim; - - while (value > 0) - { - *--bp = digits[value % base]; - value /= base; - } - - return bp; -} - - #endif /* itoa.h */ diff --git a/stdio/test_rdwr.c b/stdio/test_rdwr.c index 8e0c1dfade..f987f16cd4 100644 --- a/stdio/test_rdwr.c +++ b/stdio/test_rdwr.c @@ -17,6 +17,7 @@ not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include +#include #include #include #include -- cgit 1.4.1