From e2e4f56056adddc3c1efe676b40a4b4f2453103b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 13 Aug 2015 03:37:47 -0700 Subject: Add _dl_x86_cpu_features to rtld_global This patch adds _dl_x86_cpu_features to rtld_global in x86 ld.so and initializes it early before __libc_start_main is called so that cpu_features is always available when it is used and we can avoid calling __init_cpu_features in IFUNC selectors. * sysdeps/i386/dl-machine.h: Include . (dl_platform_init): Call init_cpu_features. * sysdeps/i386/dl-procinfo.c (_dl_x86_cpu_features): New. * sysdeps/i386/i686/cacheinfo.c (DISABLE_PREFERRED_MEMORY_INSTRUCTION): Removed. * sysdeps/i386/i686/multiarch/Makefile (aux): Remove init-arch. * sysdeps/i386/i686/multiarch/Versions: Removed. * sysdeps/i386/i686/multiarch/ifunc-defines.sym (KIND_OFFSET): Removed. * sysdeps/i386/ldsodefs.h: Include . * sysdeps/unix/sysv/linux/x86/Makefile (libpthread-sysdep_routines): Remove init-arch. * sysdeps/unix/sysv/linux/x86_64/dl-procinfo.c: Include instead of sysdeps/generic/dl-procinfo.c>. * sysdeps/x86/Makefile [$(subdir) == csu] (gen-as-const-headers): Add cpu-features-offsets.sym and rtld-global-offsets.sym. [$(subdir) == elf] (sysdep-dl-routines): Add dl-get-cpu-features. [$(subdir) == elf] (tests): Add tst-get-cpu-features. [$(subdir) == elf] (tests-static): Add tst-get-cpu-features-static. * sysdeps/x86/Versions: New file. * sysdeps/x86/cpu-features-offsets.sym: Likewise. * sysdeps/x86/cpu-features.c: Likewise. * sysdeps/x86/cpu-features.h: Likewise. * sysdeps/x86/dl-get-cpu-features.c: Likewise. * sysdeps/x86/libc-start.c: Likewise. * sysdeps/x86/rtld-global-offsets.sym: Likewise. * sysdeps/x86/tst-get-cpu-features-static.c: Likewise. * sysdeps/x86/tst-get-cpu-features.c: Likewise. * sysdeps/x86_64/dl-procinfo.c: Likewise. * sysdeps/x86_64/cacheinfo.c (__cpuid_count): Removed. Assume USE_MULTIARCH is defined and don't check it. (is_intel): Replace __cpu_features with GLRO(dl_x86_cpu_features). (is_amd): Likewise. (max_cpuid): Likewise. (intel_check_word): Likewise. (__cache_sysconf): Don't call __init_cpu_features. (__x86_preferred_memory_instruction): Removed. (init_cacheinfo): Don't call __init_cpu_features. Replace __cpu_features with GLRO(dl_x86_cpu_features). * sysdeps/x86_64/dl-machine.h: . (dl_platform_init): Call init_cpu_features. * sysdeps/x86_64/ldsodefs.h: Include . * sysdeps/x86_64/multiarch/Makefile (aux): Remove init-arch. * sysdeps/x86_64/multiarch/Versions: Removed. * sysdeps/x86_64/multiarch/cacheinfo.c: Likewise. * sysdeps/x86_64/multiarch/init-arch.c: Likewise. * sysdeps/x86_64/multiarch/ifunc-defines.sym (KIND_OFFSET): Removed. * sysdeps/x86_64/multiarch/init-arch.h: Rewrite. --- sysdeps/x86_64/multiarch/Makefile | 1 - sysdeps/x86_64/multiarch/Versions | 5 - sysdeps/x86_64/multiarch/cacheinfo.c | 2 - sysdeps/x86_64/multiarch/ifunc-defines.sym | 1 - sysdeps/x86_64/multiarch/init-arch.c | 223 ----------------------------- sysdeps/x86_64/multiarch/init-arch.h | 194 +------------------------ 6 files changed, 5 insertions(+), 421 deletions(-) delete mode 100644 sysdeps/x86_64/multiarch/Versions delete mode 100644 sysdeps/x86_64/multiarch/cacheinfo.c delete mode 100644 sysdeps/x86_64/multiarch/init-arch.c (limited to 'sysdeps/x86_64/multiarch') diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index d7002a9df3..d10b4d4fb3 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -1,5 +1,4 @@ ifeq ($(subdir),csu) -aux += init-arch tests += test-multiarch gen-as-const-headers += ifunc-defines.sym endif diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions deleted file mode 100644 index 59b185ac8d..0000000000 --- a/sysdeps/x86_64/multiarch/Versions +++ /dev/null @@ -1,5 +0,0 @@ -libc { - GLIBC_PRIVATE { - __get_cpu_features; - } -} diff --git a/sysdeps/x86_64/multiarch/cacheinfo.c b/sysdeps/x86_64/multiarch/cacheinfo.c deleted file mode 100644 index f87b8dce6b..0000000000 --- a/sysdeps/x86_64/multiarch/cacheinfo.c +++ /dev/null @@ -1,2 +0,0 @@ -#define DISABLE_PREFERRED_MEMORY_INSTRUCTION -#include "../cacheinfo.c" diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym index a410d8808f..3df946f343 100644 --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym @@ -4,7 +4,6 @@ -- CPU_FEATURES_SIZE sizeof (struct cpu_features) -KIND_OFFSET offsetof (struct cpu_features, kind) CPUID_OFFSET offsetof (struct cpu_features, cpuid) CPUID_SIZE sizeof (struct cpuid_registers) CPUID_EAX_OFFSET offsetof (struct cpuid_registers, eax) diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c deleted file mode 100644 index aaad5fa841..0000000000 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ /dev/null @@ -1,223 +0,0 @@ -/* Initialize CPU feature data. - This file is part of the GNU C Library. - Copyright (C) 2008-2015 Free Software Foundation, Inc. - Contributed by Ulrich Drepper . - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include -#include "init-arch.h" - - -struct cpu_features __cpu_features attribute_hidden; - - -static void -get_common_indeces (unsigned int *family, unsigned int *model) -{ - __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - *family = (eax >> 8) & 0x0f; - *model = (eax >> 4) & 0x0f; -} - - -void -__init_cpu_features (void) -{ - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - unsigned int family = 0; - unsigned int model = 0; - enum cpu_features_kind kind; - - __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx); - - /* This spells out "GenuineIntel". */ - if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) - { - kind = arch_kind_intel; - - get_common_indeces (&family, &model); - - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; - unsigned int extended_family = (eax >> 20) & 0xff; - unsigned int extended_model = (eax >> 12) & 0xf0; - if (family == 0x0f) - { - family += extended_family; - model += extended_model; - } - else if (family == 0x06) - { - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - model += extended_model; - switch (model) - { - case 0x1c: - case 0x26: - /* BSF is slow on Atom. */ - __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF; - break; - - case 0x37: - case 0x4a: - case 0x4d: - case 0x5a: - case 0x5d: - /* Unaligned load versions are faster than SSSE3 - on Silvermont. */ -#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop -#endif -#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 -# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 -#endif - __cpu_features.feature[index_Fast_Unaligned_Load] - |= (bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop - | bit_Slow_SSE4_2); - break; - - default: - /* Unknown family 0x06 processors. Assuming this is one - of Core i3/i5/i7 processors if AVX is available. */ - if ((ecx & bit_AVX) == 0) - break; - - case 0x1a: - case 0x1e: - case 0x1f: - case 0x25: - case 0x2c: - case 0x2e: - case 0x2f: - /* Rep string instructions, copy backward, unaligned loads - and pminub are fast on Intel Core i3, i5 and i7. */ -#if index_Fast_Rep_String != index_Fast_Copy_Backward -# error index_Fast_Rep_String != index_Fast_Copy_Backward -#endif -#if index_Fast_Rep_String != index_Fast_Unaligned_Load -# error index_Fast_Rep_String != index_Fast_Unaligned_Load -#endif -#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop -#endif - __cpu_features.feature[index_Fast_Rep_String] - |= (bit_Fast_Rep_String - | bit_Fast_Copy_Backward - | bit_Fast_Unaligned_Load - | bit_Prefer_PMINUB_for_stringop); - break; - } - } - } - /* This spells out "AuthenticAMD". */ - else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) - { - kind = arch_kind_amd; - - get_common_indeces (&family, &model); - - ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - - unsigned int eax; - __cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax >= 0x80000001) - __cpuid (0x80000001, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].edx); - } - else - kind = arch_kind_other; - - if (__cpu_features.max_cpuid >= 7) - __cpuid_count (7, 0, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx, - __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx); - - /* Can we call xgetbv? */ - if (CPUID_OSXSAVE) - { - unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - /* Is YMM and XMM state usable? */ - if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == - (bit_YMM_state | bit_XMM_state)) - { - /* Determine if AVX is usable. */ - if (CPUID_AVX) - __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; -#if index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -# error index_AVX2_Usable != index_AVX_Fast_Unaligned_Load -#endif - /* Determine if AVX2 is usable. Unaligned load with 256-bit - AVX registers are faster on processors with AVX2. */ - if (CPUID_AVX2) - __cpu_features.feature[index_AVX2_Usable] - |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load; - /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and - ZMM16-ZMM31 state are enabled. */ - if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state - | bit_ZMM16_31_state)) == - (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) - { - /* Determine if AVX512F is usable. */ - if (CPUID_AVX512F) - { - __cpu_features.feature[index_AVX512F_Usable] - |= bit_AVX512F_Usable; - /* Determine if AVX512DQ is usable. */ - if (CPUID_AVX512DQ) - __cpu_features.feature[index_AVX512DQ_Usable] - |= bit_AVX512DQ_Usable; - } - } - /* Determine if FMA is usable. */ - if (CPUID_FMA) - __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable; - /* Determine if FMA4 is usable. */ - if (CPUID_FMA4) - __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable; - } - } - - __cpu_features.family = family; - __cpu_features.model = model; - atomic_write_barrier (); - __cpu_features.kind = kind; -} - -#undef __get_cpu_features - -const struct cpu_features * -__get_cpu_features (void) -{ - if (__cpu_features.kind == arch_kind_unknown) - __init_cpu_features (); - - return &__cpu_features; -} diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index cfc6e7049e..2b9988ebc3 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -15,192 +15,8 @@ License along with the GNU C Library; if not, see . */ -#define bit_Fast_Rep_String (1 << 0) -#define bit_Fast_Copy_Backward (1 << 1) -#define bit_Slow_BSF (1 << 2) -#define bit_Fast_Unaligned_Load (1 << 4) -#define bit_Prefer_PMINUB_for_stringop (1 << 5) -#define bit_AVX_Usable (1 << 6) -#define bit_FMA_Usable (1 << 7) -#define bit_FMA4_Usable (1 << 8) -#define bit_Slow_SSE4_2 (1 << 9) -#define bit_AVX2_Usable (1 << 10) -#define bit_AVX_Fast_Unaligned_Load (1 << 11) -#define bit_AVX512F_Usable (1 << 12) -#define bit_AVX512DQ_Usable (1 << 13) - -/* CPUID Feature flags. */ - -/* COMMON_CPUID_INDEX_1. */ -#define bit_SSE2 (1 << 26) -#define bit_SSSE3 (1 << 9) -#define bit_SSE4_1 (1 << 19) -#define bit_SSE4_2 (1 << 20) -#define bit_OSXSAVE (1 << 27) -#define bit_AVX (1 << 28) -#define bit_POPCOUNT (1 << 23) -#define bit_FMA (1 << 12) -#define bit_FMA4 (1 << 16) - -/* COMMON_CPUID_INDEX_7. */ -#define bit_RTM (1 << 11) -#define bit_AVX2 (1 << 5) -#define bit_AVX512F (1 << 16) -#define bit_AVX512DQ (1 << 17) - -/* XCR0 Feature flags. */ -#define bit_XMM_state (1 << 1) -#define bit_YMM_state (2 << 1) -#define bit_Opmask_state (1 << 5) -#define bit_ZMM0_15_state (1 << 6) -#define bit_ZMM16_31_state (1 << 7) - -/* The integer bit array index for the first set of internal feature bits. */ -# define FEATURE_INDEX_1 0 - -/* The current maximum size of the feature integer bit array. */ -# define FEATURE_INDEX_MAX 1 - -#ifdef __ASSEMBLER__ - -# include - -# define index_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET -# define index_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET -# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET - -# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE -# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE -# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE - -#else /* __ASSEMBLER__ */ - -# include - -enum - { - COMMON_CPUID_INDEX_1 = 0, - COMMON_CPUID_INDEX_7, - COMMON_CPUID_INDEX_80000001, /* for AMD */ - /* Keep the following line at the end. */ - COMMON_CPUID_INDEX_MAX - }; - -extern struct cpu_features -{ - enum cpu_features_kind - { - arch_kind_unknown = 0, - arch_kind_intel, - arch_kind_amd, - arch_kind_other - } kind; - int max_cpuid; - struct cpuid_registers - { - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - } cpuid[COMMON_CPUID_INDEX_MAX]; - unsigned int family; - unsigned int model; - unsigned int feature[FEATURE_INDEX_MAX]; -} __cpu_features attribute_hidden; - - -extern void __init_cpu_features (void) attribute_hidden; -# define INIT_ARCH() \ - do \ - if (__cpu_features.kind == arch_kind_unknown) \ - __init_cpu_features (); \ - while (0) - -/* Used from outside libc.so to get access to the CPU features structure. */ -extern const struct cpu_features *__get_cpu_features (void) - __attribute__ ((const)); - -# if IS_IN (libc) -# define __get_cpu_features() (&__cpu_features) -# endif - -# define HAS_CPU_FEATURE(idx, reg, bit) \ - ((__get_cpu_features ()->cpuid[idx].reg & (bit)) != 0) - -/* Following are the feature tests used throughout libc. */ - -/* CPUID_* evaluates to true if the feature flag is enabled. - We always use &__cpu_features because the HAS_CPUID_* macros - are called only within __init_cpu_features, where we can't - call __get_cpu_features without infinite recursion. */ -# define HAS_CPUID_FLAG(idx, reg, bit) \ - (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0) - -# define CPUID_OSXSAVE \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE) -# define CPUID_AVX \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX) -# define CPUID_FMA \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_FMA) -# define CPUID_FMA4 \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) -# define CPUID_RTM \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM) -# define CPUID_AVX2 \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2) -# define CPUID_AVX512F \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F) -# define CPUID_AVX512DQ \ - HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ) - -/* HAS_* evaluates to true if we may use the feature at runtime. */ -# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) -# define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT) -# define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3) -# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) -# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) -# define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM) - -# define index_Fast_Rep_String FEATURE_INDEX_1 -# define index_Fast_Copy_Backward FEATURE_INDEX_1 -# define index_Slow_BSF FEATURE_INDEX_1 -# define index_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 -# define index_AVX_Usable FEATURE_INDEX_1 -# define index_FMA_Usable FEATURE_INDEX_1 -# define index_FMA4_Usable FEATURE_INDEX_1 -# define index_Slow_SSE4_2 FEATURE_INDEX_1 -# define index_AVX2_Usable FEATURE_INDEX_1 -# define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_AVX512F_Usable FEATURE_INDEX_1 -# define index_AVX512DQ_Usable FEATURE_INDEX_1 - -# define HAS_ARCH_FEATURE(name) \ - ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) - -# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) -# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) -# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) -# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) -# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) -# define HAS_AVX2 HAS_ARCH_FEATURE (AVX2_Usable) -# define HAS_AVX512F HAS_ARCH_FEATURE (AVX512F_Usable) -# define HAS_AVX512DQ HAS_ARCH_FEATURE (AVX512DQ_Usable) -# define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable) -# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) -# define HAS_AVX_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - -#endif /* __ASSEMBLER__ */ +#ifdef __ASSEMBLER__ +# include +#else +# include +#endif -- cgit 1.4.1