diff options
author | Jakub Jelinek <jakub@redhat.com> | 2007-07-12 18:26:36 +0000 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2007-07-12 18:26:36 +0000 |
commit | 75c50bbb58e47bf4a0519170539ea0bc83035290 (patch) | |
tree | 2ea1f8305970753e4a657acb2ccc15ca3eec8e2c | |
parent | b898e8fb2edbd994cdaa9aca94228d13f4fb26de (diff) | |
download | glibc-75c50bbb58e47bf4a0519170539ea0bc83035290.tar.gz glibc-75c50bbb58e47bf4a0519170539ea0bc83035290.tar.xz glibc-75c50bbb58e47bf4a0519170539ea0bc83035290.zip |
-rw-r--r-- | fedora/glibc-rh234946.patch | 32 | ||||
-rw-r--r-- | fedora/glibc-x86_64-memcpy.patch | 1439 | ||||
-rw-r--r-- | fedora/glibc.spec.in | 7 |
3 files changed, 1477 insertions, 1 deletions
diff --git a/fedora/glibc-rh234946.patch b/fedora/glibc-rh234946.patch new file mode 100644 index 0000000000..add3f8023f --- /dev/null +++ b/fedora/glibc-rh234946.patch @@ -0,0 +1,32 @@ +2006-12-25 Sripathi Kodi <sripathik@in.ibm.com> + + * include/link.h: Declare new flag l_fini_called in struct link_map. + * elf/dl-fini.c: In _dl_fini, set l_fini_called to 1 instead of + l_init_called to 0. + +--- libc/elf/dl-fini.c 2006-12-22 01:54:22.000000000 -0600 ++++ libc/elf/dl-fini.c 2006-12-24 22:51:52.000000000 -0600 +@@ -215,10 +215,10 @@ _dl_fini (void) + { + l = maps[i]; + +- if (l->l_init_called) ++ if (l->l_init_called && !l->l_fini_called) + { + /* Make sure nothing happens if we are called twice. */ +- l->l_init_called = 0; ++ l->l_fini_called = 1; + + /* Is there a destructor function? */ + if (l->l_info[DT_FINI_ARRAY] != NULL +--- libc/include/link.h 2006-12-22 01:54:22.000000000 -0600 ++++ libc/include/link.h 2006-12-24 22:53:29.000000000 -0600 +@@ -185,6 +185,8 @@ struct link_map + unsigned int l_contiguous:1; /* Nonzero if inter-segment holes are + mprotected or if no holes are present at + all. */ ++ unsigned int l_fini_called:1; /* Nonzero if _dl_fini has processed ++ this object */ + + /* Array with version names. */ + unsigned int l_nversions; diff --git a/fedora/glibc-x86_64-memcpy.patch b/fedora/glibc-x86_64-memcpy.patch new file mode 100644 index 0000000000..3888134df8 --- /dev/null +++ b/fedora/glibc-x86_64-memcpy.patch @@ -0,0 +1,1439 @@ +2007-05-21 Ulrich Drepper <drepper@redhat.com> + + * sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Pass correct value + as second parameter to handle_intel. + + * sysdeps/unix/sysv/linux/x86_64/sysconf.c: Move cache information + handling to ... + * sysdeps/x86_64/cacheinfo.c: ... here. New file. + * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add + cacheinfo. + * sysdeps/x86_64/memcpy.S: Complete rewrite. + * sysdeps/x86_64/mempcpy.S: Adjust appropriately. + Patch by Evandro Menezes <evandro.menezes@amd.com>. + +--- libc/sysdeps/x86_64/Makefile 16 Aug 2004 06:46:14 -0000 1.4 ++++ libc/sysdeps/x86_64/Makefile 21 May 2007 19:20:45 -0000 1.5 +@@ -9,3 +9,7 @@ endif + ifeq ($(subdir),gmon) + sysdep_routines += _mcount + endif ++ ++ifeq ($(subdir),string) ++sysdep_routines += cacheinfo ++endif +--- libc/sysdeps/x86_64/cacheinfo.c 1 Jan 1970 00:00:00 -0000 ++++ libc/sysdeps/x86_64/cacheinfo.c 21 May 2007 22:37:45 -0000 1.2 +@@ -0,0 +1,451 @@ ++/* x86_64 cache info. ++ Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, write to the Free ++ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA ++ 02111-1307 USA. ++*/ ++ ++#include <assert.h> ++#include <stdbool.h> ++#include <stdlib.h> ++#include <unistd.h> ++ ++static const struct intel_02_cache_info ++{ ++ unsigned int idx; ++ int name; ++ long int size; ++ long int assoc; ++ long int linesize; ++} intel_02_known [] = ++ { ++ { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 }, ++ { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 }, ++ { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 }, ++ { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 }, ++ { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 }, ++ { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 }, ++ { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 }, ++ { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 }, ++ { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 }, ++ { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 }, ++ { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 }, ++ { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 }, ++ { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 }, ++ { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 }, ++ { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 }, ++ { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, ++ { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 }, ++ { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 }, ++ { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 }, ++ { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 }, ++ { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 }, ++ { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 }, ++ { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 }, ++ { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 }, ++ { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 }, ++ { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 }, ++ { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 }, ++ { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 }, ++ { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 }, ++ { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 }, ++ { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 }, ++ { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 }, ++ { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, ++ { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 }, ++ { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 }, ++ { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 }, ++ { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, ++ { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 }, ++ { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 }, ++ { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 }, ++ { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 }, ++ { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 }, ++ { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 }, ++ { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, ++ { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, ++ }; ++ ++#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0])) ++ ++static int ++intel_02_known_compare (const void *p1, const void *p2) ++{ ++ const struct intel_02_cache_info *i1; ++ const struct intel_02_cache_info *i2; ++ ++ i1 = (const struct intel_02_cache_info *) p1; ++ i2 = (const struct intel_02_cache_info *) p2; ++ ++ if (i1->idx == i2->idx) ++ return 0; ++ ++ return i1->idx < i2->idx ? -1 : 1; ++} ++ ++ ++static long int ++__attribute__ ((noinline)) ++intel_check_word (int name, unsigned int value, bool *has_level_2, ++ bool *no_level_2_or_3) ++{ ++ if ((value & 0x80000000) != 0) ++ /* The register value is reserved. */ ++ return 0; ++ ++ /* Fold the name. The _SC_ constants are always in the order SIZE, ++ ASSOC, LINESIZE. */ ++ int folded_name = (_SC_LEVEL1_ICACHE_SIZE ++ + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3); ++ ++ while (value != 0) ++ { ++ unsigned int byte = value & 0xff; ++ ++ if (byte == 0x40) ++ { ++ *no_level_2_or_3 = true; ++ ++ if (folded_name == _SC_LEVEL3_CACHE_SIZE) ++ /* No need to look further. */ ++ break; ++ } ++ else ++ { ++ if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE) ++ { ++ /* Intel reused this value. For family 15, model 6 it ++ specifies the 3rd level cache. Otherwise the 2nd ++ level cache. */ ++ unsigned int eax; ++ unsigned int ebx; ++ unsigned int ecx; ++ unsigned int edx; ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (1)); ++ ++ unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); ++ unsigned int model = ((((eax >>16) & 0xf) << 4) ++ + ((eax >> 4) & 0xf)); ++ if (family == 15 && model == 6) ++ { ++ /* The level 3 cache is encoded for this model like ++ the level 2 cache is for other models. Pretend ++ the caller asked for the level 2 cache. */ ++ name = (_SC_LEVEL2_CACHE_SIZE ++ + (name - _SC_LEVEL3_CACHE_SIZE)); ++ folded_name = _SC_LEVEL3_CACHE_SIZE; ++ } ++ } ++ ++ struct intel_02_cache_info *found; ++ struct intel_02_cache_info search; ++ ++ search.idx = byte; ++ found = bsearch (&search, intel_02_known, nintel_02_known, ++ sizeof (intel_02_known[0]), intel_02_known_compare); ++ if (found != NULL) ++ { ++ if (found->name == folded_name) ++ { ++ unsigned int offset = name - folded_name; ++ ++ if (offset == 0) ++ /* Cache size. */ ++ return found->size; ++ if (offset == 1) ++ return found->assoc; ++ ++ assert (offset == 2); ++ return found->linesize; ++ } ++ ++ if (found->name == _SC_LEVEL2_CACHE_SIZE) ++ *has_level_2 = true; ++ } ++ } ++ ++ /* Next byte for the next round. */ ++ value >>= 8; ++ } ++ ++ /* Nothing found. */ ++ return 0; ++} ++ ++ ++static long int __attribute__ ((noinline)) ++handle_intel (int name, unsigned int maxidx) ++{ ++ assert (maxidx >= 2); ++ ++ /* OK, we can use the CPUID instruction to get all info about the ++ caches. */ ++ unsigned int cnt = 0; ++ unsigned int max = 1; ++ long int result = 0; ++ bool no_level_2_or_3 = false; ++ bool has_level_2 = false; ++ ++ while (cnt++ < max) ++ { ++ unsigned int eax; ++ unsigned int ebx; ++ unsigned int ecx; ++ unsigned int edx; ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (2)); ++ ++ /* The low byte of EAX in the first round contain the number of ++ rounds we have to make. At least one, the one we are already ++ doing. */ ++ if (cnt == 1) ++ { ++ max = eax & 0xff; ++ eax &= 0xffffff00; ++ } ++ ++ /* Process the individual registers' value. */ ++ result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3); ++ if (result != 0) ++ return result; ++ ++ result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3); ++ if (result != 0) ++ return result; ++ ++ result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3); ++ if (result != 0) ++ return result; ++ ++ result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3); ++ if (result != 0) ++ return result; ++ } ++ ++ if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE ++ && no_level_2_or_3) ++ return -1; ++ ++ return 0; ++} ++ ++ ++static long int __attribute__ ((noinline)) ++handle_amd (int name) ++{ ++ unsigned int eax; ++ unsigned int ebx; ++ unsigned int ecx; ++ unsigned int edx; ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (0x80000000)); ++ ++ if (name >= _SC_LEVEL3_CACHE_SIZE) ++ return 0; ++ ++ unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); ++ if (eax < fn) ++ return 0; ++ ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (fn)); ++ ++ if (name < _SC_LEVEL1_DCACHE_SIZE) ++ { ++ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; ++ ecx = edx; ++ } ++ ++ switch (name) ++ { ++ case _SC_LEVEL1_DCACHE_SIZE: ++ return (ecx >> 14) & 0x3fc00; ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ ecx >>= 16; ++ if ((ecx & 0xff) == 0xff) ++ /* Fully associative. */ ++ return (ecx << 2) & 0x3fc00; ++ return ecx & 0xff; ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ return ecx & 0xff; ++ case _SC_LEVEL2_CACHE_SIZE: ++ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; ++ case _SC_LEVEL2_CACHE_ASSOC: ++ ecx >>= 12; ++ switch (ecx & 0xf) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 4: ++ return ecx & 0xf; ++ case 6: ++ return 8; ++ case 8: ++ return 16; ++ case 0xf: ++ return (ecx << 6) & 0x3fffc00; ++ default: ++ return 0; ++ } ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; ++ default: ++ assert (! "cannot happen"); ++ } ++ return -1; ++} ++ ++ ++/* Get the value of the system variable NAME. */ ++long int ++attribute_hidden ++__cache_sysconf (int name) ++{ ++ /* Find out what brand of processor. */ ++ unsigned int eax; ++ unsigned int ebx; ++ unsigned int ecx; ++ unsigned int edx; ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (0)); ++ ++ /* This spells out "GenuineIntel". */ ++ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) ++ return handle_intel (name, eax); ++ ++ /* This spells out "AuthenticAMD". */ ++ if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) ++ return handle_amd (name); ++ ++ // XXX Fill in more vendors. ++ ++ /* CPU not known, we have no information. */ ++ return 0; ++} ++ ++ ++/* Half the core cache size for use in memory and string routines, typically ++ L1 size. */ ++long int __x86_64_core_cache_size_half attribute_hidden = 32 * 1024 / 2; ++/* Shared cache size for use in memory and string routines, typically ++ L2 or L3 size. */ ++long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2; ++/* PREFETCHW support flag for use in memory and string routines. */ ++int __x86_64_prefetchw attribute_hidden; ++ ++ ++static void ++__attribute__((constructor)) ++init_cacheinfo (void) ++{ ++ /* Find out what brand of processor. */ ++ unsigned int eax; ++ unsigned int ebx; ++ unsigned int ecx; ++ unsigned int edx; ++ int max_cpuid; ++ int max_cpuid_ex; ++ long int core = -1; ++ long int shared = -1; ++ unsigned int level; ++ unsigned int threads = 0; ++ ++ asm volatile ("cpuid" ++ : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (0)); ++ ++ /* This spells out "GenuineIntel". */ ++ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) ++ { ++ core = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid); ++ ++ /* Try L3 first. */ ++ level = 3; ++ shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid); ++ ++ if (shared <= 0) ++ { ++ /* Try L2 otherwise. */ ++ level = 2; ++ shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid); ++ } ++ ++ /* Figure out the number of logical threads that share the ++ highest cache level. */ ++ if (max_cpuid >= 4) ++ { ++ int i = 0; ++ ++ /* Query until desired cache level is enumerated. */ ++ do ++ { ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (4), "2" (i++)); ++ } ++ while (((eax >> 5) & 0x7) != level); ++ ++ threads = ((eax >> 14) & 0x3ff) + 1; ++ } ++ else ++ { ++ /* Assume that all logical threads share the highest cache level. */ ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (1)); ++ ++ threads = (ebx >> 16) & 0xff; ++ } ++ ++ /* Cap usage of highest cache level to the number of supported ++ threads. */ ++ if (shared > 0 && threads > 0) ++ shared /= threads; ++ } ++ /* This spells out "AuthenticAMD". */ ++ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) ++ { ++ core = handle_amd (_SC_LEVEL1_DCACHE_SIZE); ++ shared = handle_amd (_SC_LEVEL2_CACHE_SIZE); ++ ++ asm volatile ("cpuid" ++ : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (0x80000000)); ++ ++ if (max_cpuid_ex >= 0x80000001) ++ { ++ asm volatile ("cpuid" ++ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) ++ : "0" (0x80000001)); ++ /* PREFETCHW || 3DNow! */ ++ if ((ecx & 0x100) || (edx & 0x80000000)) ++ __x86_64_prefetchw = -1; ++ } ++ } ++ ++ if (core > 0) ++ __x86_64_core_cache_size_half = core / 2; ++ ++ if (shared > 0) ++ __x86_64_shared_cache_size_half = shared / 2; ++} +--- libc/sysdeps/x86_64/memcpy.S 18 Oct 2004 04:17:08 -0000 1.5 ++++ libc/sysdeps/x86_64/memcpy.S 21 May 2007 19:21:01 -0000 1.6 +@@ -1,7 +1,10 @@ +-/* Highly optimized version for x86-64. +- Copyright (C) 1997, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. ++/* ++ Optimized memcpy for x86-64. ++ ++ Copyright (C) 2007 Free Software Foundation, Inc. ++ Contributed by Evandro Menezes <evandro.menezes@amd.com>, 2007. ++ + This file is part of the GNU C Library. +- Based on i586 version contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public +@@ -16,86 +19,556 @@ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +- 02111-1307 USA. */ ++ 02111-1307 USA. ++*/ + + #include <sysdep.h> + #include "asm-syntax.h" +-#include "bp-sym.h" +-#include "bp-asm.h" + +-/* BEWARE: `#ifdef memcpy' means that memcpy is redefined as `mempcpy', +- and the return value is the byte after the last one copied in +- the destination. */ +-#define MEMPCPY_P (defined memcpy) ++/* Stack slots in the red-zone. */ ++ ++#ifdef USE_AS_MEMPCPY ++# define RETVAL (0) ++#else ++# define RETVAL (-8) ++#endif ++#define SAVE0 (RETVAL - 8) ++#define SAVE1 (SAVE0 - 8) ++#define SAVE2 (SAVE1 - 8) ++#define SAVE3 (SAVE2 - 8) + + .text ++ + #if defined PIC && !defined NOT_IN_libc + ENTRY (__memcpy_chk) ++ + cmpq %rdx, %rcx + jb HIDDEN_JUMPTARGET (__chk_fail) ++ + END (__memcpy_chk) + #endif +-ENTRY (BP_SYM (memcpy)) +- /* Cutoff for the big loop is a size of 32 bytes since otherwise +- the loop will never be entered. */ ++ ++ENTRY(memcpy) /* (void *, const void*, size_t) */ ++ ++/* Handle tiny blocks. */ ++ ++L(1try): /* up to 32B */ + cmpq $32, %rdx +- movq %rdx, %rcx +-#if !MEMPCPY_P +- movq %rdi, %r10 /* Save value. */ ++#ifndef USE_AS_MEMPCPY ++ movq %rdi, %rax /* save return value */ + #endif ++ jae L(1after) + +- /* We need this in any case. */ +- cld ++L(1): /* 1-byte once */ ++ testb $1, %dl ++ jz L(1a) + +- jbe 1f ++ movzbl (%rsi), %ecx ++ movb %cl, (%rdi) + +- /* Align destination. */ +- movq %rdi, %rax +- negq %rax +- andq $7, %rax +- subq %rax, %rcx +- xchgq %rax, %rcx ++ incq %rsi ++ incq %rdi ++ ++ .p2align 4,, 4 ++ ++L(1a): /* 2-byte once */ ++ testb $2, %dl ++ jz L(1b) ++ ++ movzwl (%rsi), %ecx ++ movw %cx, (%rdi) + +- rep; movsb ++ addq $2, %rsi ++ addq $2, %rdi + +- movq %rax, %rcx +- subq $32, %rcx +- js 2f ++ .p2align 4,, 4 ++ ++L(1b): /* 4-byte once */ ++ testb $4, %dl ++ jz L(1c) ++ ++ movl (%rsi), %ecx ++ movl %ecx, (%rdi) ++ ++ addq $4, %rsi ++ addq $4, %rdi ++ ++ .p2align 4,, 4 ++ ++L(1c): /* 8-byte once */ ++ testb $8, %dl ++ jz L(1d) ++ ++ movq (%rsi), %rcx ++ movq %rcx, (%rdi) ++ ++ addq $8, %rsi ++ addq $8, %rdi ++ ++ .p2align 4,, 4 ++ ++L(1d): /* 16-byte loop */ ++ andl $0xf0, %edx ++ jz L(exit) + + .p2align 4 +-3: + +- /* Now correct the loop counter. Please note that in the following +- code the flags are not changed anymore. */ +- subq $32, %rcx ++L(1loop): ++ movq (%rsi), %rcx ++ movq 8 (%rsi), %r8 ++ movq %rcx, (%rdi) ++ movq %r8, 8 (%rdi) ++ ++ subl $16, %edx ++ ++ leaq 16 (%rsi), %rsi ++ leaq 16 (%rdi), %rdi ++ ++ jnz L(1loop) ++ ++ .p2align 4,, 4 ++ ++L(exit): /* exit */ ++#ifdef USE_AS_MEMPCPY ++ movq %rdi, %rax /* return value */ ++#else ++ rep ++#endif ++ retq ++ ++ .p2align 4 ++ ++L(1after): ++#ifndef USE_AS_MEMPCPY ++ movq %rax, RETVAL (%rsp) /* save return value */ ++#endif ++ ++/* Align to the natural word size. */ ++ ++L(aligntry): ++ movl %esi, %ecx /* align by destination */ ++ ++ andl $7, %ecx ++ jz L(alignafter) /* already aligned */ ++ ++L(align): /* align */ ++ leaq -8 (%rcx, %rdx), %rdx /* calculate remaining bytes */ ++ subl $8, %ecx ++ ++ .p2align 4 ++ ++L(alignloop): /* 1-byte alignment loop */ ++ movzbl (%rsi), %eax ++ movb %al, (%rdi) ++ ++ incl %ecx ++ ++ leaq 1 (%rsi), %rsi ++ leaq 1 (%rdi), %rdi ++ ++ jnz L(alignloop) ++ ++ .p2align 4 ++ ++L(alignafter): ++ ++/* Loop to handle mid-sized blocks. */ ++ ++L(32try): /* up to 1KB */ ++ cmpq $1024, %rdx ++ ja L(32after) ++ ++L(32): /* 32-byte loop */ ++ movl %edx, %ecx ++ shrl $5, %ecx ++ jz L(32skip) ++ ++ .p2align 4 ++ ++L(32loop): ++ decl %ecx + + movq (%rsi), %rax +- movq 8(%rsi), %rdx +- movq 16(%rsi), %r8 +- movq 24(%rsi), %r9 ++ movq 8 (%rsi), %r8 ++ movq 16 (%rsi), %r9 ++ movq 24 (%rsi), %r10 ++ + movq %rax, (%rdi) +- movq %rdx, 8(%rdi) +- movq %r8, 16(%rdi) +- movq %r9, 24(%rdi) ++ movq %r8, 8 (%rdi) ++ movq %r9, 16 (%rdi) ++ movq %r10, 24 (%rdi) + + leaq 32(%rsi), %rsi + leaq 32(%rdi), %rdi + +- jns 3b ++ jz L(32skip) /* help out smaller blocks */ ++ ++ decl %ecx ++ ++ movq (%rsi), %rax ++ movq 8 (%rsi), %r8 ++ movq 16 (%rsi), %r9 ++ movq 24 (%rsi), %r10 ++ ++ movq %rax, (%rdi) ++ movq %r8, 8 (%rdi) ++ movq %r9, 16 (%rdi) ++ movq %r10, 24 (%rdi) ++ ++ leaq 32 (%rsi), %rsi ++ leaq 32 (%rdi), %rdi ++ ++ jnz L(32loop) ++ ++ .p2align 4 + +- /* Correct extra loop counter modification. */ +-2: addq $32, %rcx +-1: rep; movsb ++L(32skip): ++ andl $31, %edx /* check for left overs */ ++#ifdef USE_AS_MEMPCPY ++ jnz L(1) + +-#if MEMPCPY_P +- movq %rdi, %rax /* Set return value. */ ++ movq %rdi, %rax + #else +- movq %r10, %rax /* Set return value. */ ++ movq RETVAL (%rsp), %rax ++ jnz L(1) + ++ rep ++#endif ++ retq /* exit */ ++ ++ .p2align 4 ++ ++L(32after): ++ ++/* ++ In order to minimize code-size in RTLD, algorithms specific for ++ larger blocks are excluded when building for RTLD. ++*/ ++ ++/* Handle large blocks smaller than 1/2 L1. */ ++ ++L(fasttry): /* first 1/2 L1 */ ++#ifndef NOT_IN_libc /* only up to this algorithm outside of libc.so */ ++ movq __x86_64_core_cache_size_half (%rip), %r11 ++ cmpq %rdx, %r11 /* calculate the smaller of */ ++ cmovaq %rdx, %r11 /* remaining bytes and 1/2 L1 */ ++#endif ++ ++L(fast): /* good ol' MOVS */ ++#ifndef NOT_IN_libc ++ movq %r11, %rcx ++ andq $-8, %r11 ++#else ++ movq %rdx, %rcx ++#endif ++ shrq $3, %rcx ++ jz L(fastskip) ++ ++ rep ++ movsq ++ ++ .p2align 4,, 4 ++ ++L(fastskip): ++#ifndef NOT_IN_libc ++ subq %r11, %rdx /* check for more */ ++ testq $-8, %rdx ++ jnz L(fastafter) + #endif +- ret + +-END (BP_SYM (memcpy)) +-#if !MEMPCPY_P ++ andl $7, %edx /* check for left overs */ ++#ifdef USE_AS_MEMPCPY ++ jnz L(1) ++ ++ movq %rdi, %rax ++#else ++ movq RETVAL (%rsp), %rax ++ jnz L(1) ++ ++ rep ++#endif ++ retq /* exit */ ++ ++#ifndef NOT_IN_libc /* none of the algorithms below for RTLD */ ++ ++ .p2align 4 ++ ++L(fastafter): ++ ++/* Handle large blocks smaller than 1/2 L2. */ ++ ++L(pretry): /* first 1/2 L2 */ ++ movq __x86_64_shared_cache_size_half (%rip), %r8 ++ cmpq %rdx, %r8 /* calculate the lesser of */ ++ cmovaq %rdx, %r8 /* remaining bytes and 1/2 L2 */ ++ ++L(pre): /* 64-byte with prefetching */ ++ movq %r8, %rcx ++ andq $-64, %r8 ++ shrq $6, %rcx ++ jz L(preskip) ++ ++ movq %r14, SAVE0 (%rsp) ++ cfi_rel_offset (%r14, SAVE0) ++ movq %r13, SAVE1 (%rsp) ++ cfi_rel_offset (%r13, SAVE1) ++ movq %r12, SAVE2 (%rsp) ++ cfi_rel_offset (%r12, SAVE2) ++ movq %rbx, SAVE3 (%rsp) ++ cfi_rel_offset (%rbx, SAVE3) ++ ++ cmpl $0, __x86_64_prefetchw (%rip) ++ jz L(preloop) /* check if PREFETCHW OK */ ++ ++ .p2align 4 ++ ++/* ... when PREFETCHW is available (less cache-probe traffic in MP systems). */ ++ ++L(prewloop): /* cache-line in state M */ ++ decq %rcx ++ ++ movq (%rsi), %rax ++ movq 8 (%rsi), %rbx ++ movq 16 (%rsi), %r9 ++ movq 24 (%rsi), %r10 ++ movq 32 (%rsi), %r11 ++ movq 40 (%rsi), %r12 ++ movq 48 (%rsi), %r13 ++ movq 56 (%rsi), %r14 ++ ++ prefetcht0 0 + 896 (%rsi) ++ prefetcht0 64 + 896 (%rsi) ++ ++ movq %rax, (%rdi) ++ movq %rbx, 8 (%rdi) ++ movq %r9, 16 (%rdi) ++ movq %r10, 24 (%rdi) ++ movq %r11, 32 (%rdi) ++ movq %r12, 40 (%rdi) ++ movq %r13, 48 (%rdi) ++ movq %r14, 56 (%rdi) ++ ++ leaq 64 (%rsi), %rsi ++ leaq 64 (%rdi), %rdi ++ ++ jz L(prebail) ++ ++ decq %rcx ++ ++ movq (%rsi), %rax ++ movq 8 (%rsi), %rbx ++ movq 16 (%rsi), %r9 ++ movq 24 (%rsi), %r10 ++ movq 32 (%rsi), %r11 ++ movq 40 (%rsi), %r12 ++ movq 48 (%rsi), %r13 ++ movq 56 (%rsi), %r14 ++ ++ movq %rax, (%rdi) ++ movq %rbx, 8 (%rdi) ++ movq %r9, 16 (%rdi) ++ movq %r10, 24 (%rdi) ++ movq %r11, 32 (%rdi) ++ movq %r12, 40 (%rdi) ++ movq %r13, 48 (%rdi) ++ movq %r14, 56 (%rdi) ++ ++ prefetchw 896 - 64 (%rdi) ++ prefetchw 896 - 0 (%rdi) ++ ++ leaq 64 (%rsi), %rsi ++ leaq 64 (%rdi), %rdi ++ ++ jnz L(prewloop) ++ jmp L(prebail) ++ ++ .p2align 4 ++ ++/* ... when PREFETCHW is not available. */ ++ ++L(preloop): /* cache-line in state E */ ++ decq %rcx ++ ++ movq (%rsi), %rax ++ movq 8 (%rsi), %rbx ++ movq 16 (%rsi), %r9 ++ movq 24 (%rsi), %r10 ++ movq 32 (%rsi), %r11 ++ movq 40 (%rsi), %r12 ++ movq 48 (%rsi), %r13 ++ movq 56 (%rsi), %r14 ++ ++ prefetcht0 896 + 0 (%rsi) ++ prefetcht0 896 + 64 (%rsi) ++ ++ movq %rax, (%rdi) ++ movq %rbx, 8 (%rdi) ++ movq %r9, 16 (%rdi) ++ movq %r10, 24 (%rdi) ++ movq %r11, 32 (%rdi) ++ movq %r12, 40 (%rdi) ++ movq %r13, 48 (%rdi) ++ movq %r14, 56 (%rdi) ++ ++ leaq 64 (%rsi), %rsi ++ leaq 64 (%rdi), %rdi ++ ++ jz L(prebail) ++ ++ decq %rcx ++ ++ movq (%rsi), %rax ++ movq 8 (%rsi), %rbx ++ movq 16 (%rsi), %r9 ++ movq 24 (%rsi), %r10 ++ movq 32 (%rsi), %r11 ++ movq 40 (%rsi), %r12 ++ movq 48 (%rsi), %r13 ++ movq 56 (%rsi), %r14 ++ ++ prefetcht0 896 - 64 (%rdi) ++ prefetcht0 896 - 0 (%rdi) ++ ++ movq %rax, (%rdi) ++ movq %rbx, 8 (%rdi) ++ movq %r9, 16 (%rdi) ++ movq %r10, 24 (%rdi) ++ movq %r11, 32 (%rdi) ++ movq %r12, 40 (%rdi) ++ movq %r13, 48 (%rdi) ++ movq %r14, 56 (%rdi) ++ ++ leaq 64 (%rsi), %rsi ++ leaq 64 (%rdi), %rdi ++ ++ jnz L(preloop) ++ ++L(prebail): ++ movq SAVE3 (%rsp), %rbx ++ cfi_restore (%rbx) ++ movq SAVE2 (%rsp), %r12 ++ cfi_restore (%r12) ++ movq SAVE1 (%rsp), %r13 ++ cfi_restore (%r13) ++ movq SAVE0 (%rsp), %r14 ++ cfi_restore (%r14) ++ ++/* .p2align 4 */ ++ ++L(preskip): ++ subq %r8, %rdx /* check for more */ ++ testq $-64, %rdx ++ jnz L(preafter) ++ ++ andl $63, %edx /* check for left overs */ ++#ifdef USE_AS_MEMPCPY ++ jnz L(1) ++ ++ movq %rdi, %rax ++#else ++ movq RETVAL (%rsp), %rax ++ jnz L(1) ++ ++ rep ++#endif ++ retq /* exit */ ++ ++ .p2align 4 ++ ++L(preafter): ++ ++/* Loop to handle huge blocks. */ ++ ++L(NTtry): ++ ++L(NT): /* non-temporal 128-byte */ ++ movq %rdx, %rcx ++ shrq $7, %rcx ++ jz L(NTskip) ++ ++ movq %r14, SAVE0 (%rsp) ++ cfi_rel_offset (%r14, SAVE0) ++ movq %r13, SAVE1 (%rsp) ++ cfi_rel_offset (%r13, SAVE1) ++ movq %r12, SAVE2 (%rsp) ++ cfi_rel_offset (%r12, SAVE2) ++ ++ .p2align 4 ++ ++L(NTloop): ++ prefetchnta 768 (%rsi) ++ prefetchnta 832 (%rsi) ++ ++ decq %rcx ++ ++ movq (%rsi), %rax ++ movq 8 (%rsi), %r8 ++ movq 16 (%rsi), %r9 ++ movq 24 (%rsi), %r10 ++ movq 32 (%rsi), %r11 ++ movq 40 (%rsi), %r12 ++ movq 48 (%rsi), %r13 ++ movq 56 (%rsi), %r14 ++ ++ movntiq %rax, (%rdi) ++ movntiq %r8, 8 (%rdi) ++ movntiq %r9, 16 (%rdi) ++ movntiq %r10, 24 (%rdi) ++ movntiq %r11, 32 (%rdi) ++ movntiq %r12, 40 (%rdi) ++ movntiq %r13, 48 (%rdi) ++ movntiq %r14, 56 (%rdi) ++ ++ movq 64 (%rsi), %rax ++ movq 72 (%rsi), %r8 ++ movq 80 (%rsi), %r9 ++ movq 88 (%rsi), %r10 ++ movq 96 (%rsi), %r11 ++ movq 104 (%rsi), %r12 ++ movq 112 (%rsi), %r13 ++ movq 120 (%rsi), %r14 ++ ++ movntiq %rax, 64 (%rdi) ++ movntiq %r8, 72 (%rdi) ++ movntiq %r9, 80 (%rdi) ++ movntiq %r10, 88 (%rdi) ++ movntiq %r11, 96 (%rdi) ++ movntiq %r12, 104 (%rdi) ++ movntiq %r13, 112 (%rdi) ++ movntiq %r14, 120 (%rdi) ++ ++ leaq 128 (%rsi), %rsi ++ leaq 128 (%rdi), %rdi ++ ++ jnz L(NTloop) ++ ++ sfence /* serialize memory stores */ ++ ++ movq SAVE2 (%rsp), %r12 ++ cfi_restore (%r12) ++ movq SAVE1 (%rsp), %r13 ++ cfi_restore (%r13) ++ movq SAVE0 (%rsp), %r14 ++ cfi_restore (%r14) ++ ++L(NTskip): ++ andl $127, %edx /* check for left overs */ ++#ifdef USE_AS_MEMPCPY ++ jnz L(1) ++ ++ movq %rdi, %rax ++#else ++ movq RETVAL (%rsp), %rax ++ jnz L(1) ++ ++ rep ++#endif ++ retq /* exit */ ++ ++#endif /* !NOT_IN_libc */ ++ ++END(memcpy) ++ ++#ifndef USE_AS_MEMPCPY + libc_hidden_builtin_def (memcpy) + #endif +--- libc/sysdeps/x86_64/mempcpy.S 18 Oct 2004 04:17:08 -0000 1.3 ++++ libc/sysdeps/x86_64/mempcpy.S 21 May 2007 19:21:16 -0000 1.4 +@@ -1,3 +1,4 @@ ++#define USE_AS_MEMPCPY + #define memcpy __mempcpy + #define __memcpy_chk __mempcpy_chk + #include <sysdeps/x86_64/memcpy.S> +--- libc/sysdeps/unix/sysv/linux/x86_64/sysconf.c 10 Nov 2006 07:31:55 -0000 1.7 ++++ libc/sysdeps/unix/sysv/linux/x86_64/sysconf.c 21 May 2007 19:18:37 -0000 1.8 +@@ -24,328 +24,17 @@ + + + static long int linux_sysconf (int name); +- +- +-static const struct intel_02_cache_info +-{ +- unsigned int idx; +- int name; +- long int size; +- long int assoc; +- long int linesize; +-} intel_02_known[] = +- { +- { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 }, +- { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 }, +- { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 }, +- { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 }, +- { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 }, +- { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 }, +- { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 }, +- { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 }, +- { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 }, +- { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 }, +- { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 }, +- { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 }, +- { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 }, +- { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 }, +- { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 }, +- { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, +- { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 }, +- { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 }, +- { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 }, +- { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 }, +- { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 }, +- { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 }, +- { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 }, +- { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 }, +- { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 }, +- { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 }, +- { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 }, +- { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 }, +- { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 }, +- { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 }, +- { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 }, +- { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 }, +- { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, +- { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 }, +- { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 }, +- { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 }, +- { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, +- { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 }, +- { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 }, +- { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 }, +- { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 }, +- { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 }, +- { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 }, +- { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 }, +- { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 }, +- }; +-#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known[0])) +- +- +-static int +-intel_02_known_compare (const void *p1, const void *p2) +-{ +- const struct intel_02_cache_info *i1; +- const struct intel_02_cache_info *i2; +- +- i1 = (const struct intel_02_cache_info *) p1; +- i2 = (const struct intel_02_cache_info *) p2; +- +- if (i1->idx == i2->idx) +- return 0; +- +- return i1->idx < i2->idx ? -1 : 1; +-} +- +- +-static long int +-__attribute__ ((noinline)) +-intel_check_word (int name, unsigned int value, bool *has_level_2, +- bool *no_level_2_or_3) +-{ +- if ((value & 0x80000000) != 0) +- /* The register value is reserved. */ +- return 0; +- +- /* Fold the name. The _SC_ constants are always in the order SIZE, +- ASSOC, LINESIZE. */ +- int folded_name = (_SC_LEVEL1_ICACHE_SIZE +- + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3); +- +- while (value != 0) +- { +- unsigned int byte = value & 0xff; +- +- if (byte == 0x40) +- { +- *no_level_2_or_3 = true; +- +- if (folded_name == _SC_LEVEL3_CACHE_SIZE) +- /* No need to look further. */ +- break; +- } +- else +- { +- if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE) +- { +- /* Intel reused this value. For family 15, model 6 it +- specifies the 3rd level cache. Otherwise the 2nd +- level cache. */ +- unsigned int eax; +- unsigned int ebx; +- unsigned int ecx; +- unsigned int edx; +- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) +- : "0" (1)); +- +- unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf); +- unsigned int model = ((((eax >>16) & 0xf) << 4) +- + ((eax >> 4) & 0xf)); +- if (family == 15 && model == 6) +- { +- /* The level 3 cache is encoded for this model like +- the level 2 cache is for other models. Pretend +- the caller asked for the level 2 cache. */ +- name = (_SC_LEVEL2_CACHE_SIZE +- + (name - _SC_LEVEL3_CACHE_SIZE)); +- folded_name = _SC_LEVEL3_CACHE_SIZE; +- } +- } +- +- struct intel_02_cache_info *found; +- struct intel_02_cache_info search; +- +- search.idx = byte; +- found = bsearch (&search, intel_02_known, nintel_02_known, +- sizeof (intel_02_known[0]), intel_02_known_compare); +- if (found != NULL) +- { +- if (found->name == folded_name) +- { +- unsigned int offset = name - folded_name; +- +- if (offset == 0) +- /* Cache size. */ +- return found->size; +- if (offset == 1) +- return found->assoc; +- +- assert (offset == 2); +- return found->linesize; +- } +- +- if (found->name == _SC_LEVEL2_CACHE_SIZE) +- *has_level_2 = true; +- } +- } +- +- /* Next byte for the next round. */ +- value >>= 8; +- } +- +- /* Nothing found. */ +- return 0; +-} +- +- +-static long int __attribute__ ((noinline)) +-handle_intel (int name, unsigned int maxidx) +-{ +- assert (maxidx >= 2); +- +- /* OK, we can use the CPUID instruction to get all info about the +- caches. */ +- unsigned int cnt = 0; +- unsigned int max = 1; +- long int result = 0; +- bool no_level_2_or_3 = false; +- bool has_level_2 = false; +- while (cnt++ < max) +- { +- unsigned int eax; +- unsigned int ebx; +- unsigned int ecx; +- unsigned int edx; +- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) +- : "0" (2)); +- +- /* The low byte of EAX in the first round contain the number of +- rounds we have to make. At least one, the one we are already +- doing. */ +- if (cnt == 1) +- { +- max = eax & 0xff; +- eax &= 0xffffff00; +- } +- +- /* Process the individual registers' value. */ +- result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3); +- if (result != 0) +- return result; +- +- result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3); +- if (result != 0) +- return result; +- +- result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3); +- if (result != 0) +- return result; +- +- result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3); +- if (result != 0) +- return result; +- } +- +- if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE +- && no_level_2_or_3) +- return -1; +- +- return 0; +-} +- +- +-static long int __attribute__ ((noinline)) +-handle_amd (int name) +-{ +- unsigned int eax; +- unsigned int ebx; +- unsigned int ecx; +- unsigned int edx; +- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) +- : "0" (0x80000000)); +- +- if (name >= _SC_LEVEL3_CACHE_SIZE) +- return 0; +- +- unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); +- if (eax < fn) +- return 0; +- +- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) +- : "0" (fn)); +- +- if (name < _SC_LEVEL1_DCACHE_SIZE) +- { +- name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; +- ecx = edx; +- } +- +- switch (name) +- { +- case _SC_LEVEL1_DCACHE_SIZE: +- return (ecx >> 14) & 0x3fc00; +- case _SC_LEVEL1_DCACHE_ASSOC: +- ecx >>= 16; +- if ((ecx & 0xff) == 0xff) +- /* Fully associative. */ +- return (ecx << 2) & 0x3fc00; +- return ecx & 0xff; +- case _SC_LEVEL1_DCACHE_LINESIZE: +- return ecx & 0xff; +- case _SC_LEVEL2_CACHE_SIZE: +- return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; +- case _SC_LEVEL2_CACHE_ASSOC: +- ecx >>= 12; +- switch (ecx & 0xf) +- { +- case 0: +- case 1: +- case 2: +- case 4: +- return ecx & 0xf; +- case 6: +- return 8; +- case 8: +- return 16; +- case 0xf: +- return (ecx << 6) & 0x3fffc00; +- default: +- return 0; +- } +- case _SC_LEVEL2_CACHE_LINESIZE: +- return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; +- default: +- assert (! "cannot happen"); +- } +- return -1; +-} ++extern long int __cache_sysconf (int) attribute_hidden; + + + /* Get the value of the system variable NAME. */ + long int + __sysconf (int name) + { +- /* We only handle the cache information here (for now). */ +- if (name < _SC_LEVEL1_ICACHE_SIZE || name > _SC_LEVEL4_CACHE_LINESIZE) +- return linux_sysconf (name); +- +- /* Find out what brand of processor. */ +- unsigned int eax; +- unsigned int ebx; +- unsigned int ecx; +- unsigned int edx; +- asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" +- : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx) +- : "0" (0)); +- +- /* This spells out "GenuineIntel". */ +- if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) +- return handle_intel (name, eax); +- +- /* This spells out "AuthenticAMD". */ +- if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) +- return handle_amd (name); +- +- // XXX Fill in more vendors. ++ if (name >= _SC_LEVEL1_ICACHE_SIZE && name <= _SC_LEVEL4_CACHE_LINESIZE) ++ return __cache_sysconf (name); + +- /* CPU not known, we have no information. */ +- return 0; ++ return linux_sysconf (name); + } + + /* Now the generic Linux version. */ diff --git a/fedora/glibc.spec.in b/fedora/glibc.spec.in index 6710838ca5..5e13ea805a 100644 --- a/fedora/glibc.spec.in +++ b/fedora/glibc.spec.in @@ -1,4 +1,4 @@ -%define glibcrelease 19 +%define glibcrelease 18.1 %define auxarches i586 i686 athlon sparcv9 alphaev6 %define xenarches i686 athlon %ifarch %{xenarches} @@ -31,6 +31,8 @@ Source2: %(echo %{glibcsrcdir} | sed s/glibc-/glibc-libidn-/).tar.bz2 Source3: %{glibcname}-fedora-%{glibcdate}.tar.bz2 Patch0: %{glibcname}-fedora.patch Patch1: %{name}-ia64-lib64.patch +Patch2: glibc-rh234946.patch +Patch3: glibc-x86_64-memcpy.patch Buildroot: %{_tmppath}/glibc-%{PACKAGE_VERSION}-root Obsoletes: zoneinfo, libc-static, libc-devel, libc-profile, libc-headers, Obsoletes: gencat, locale, ldconfig, locale-ja, glibc-profile @@ -242,6 +244,9 @@ package or when debugging this package. %patch1 -p1 %endif %endif +%patch2 -p1 +# Postpone until this is enough tested in F7 +#%patch3 -p1 # Hack till glibc-kernheaders get updated, argh mkdir -p override_headers/linux |