summaryrefslogtreecommitdiffstats
path: root/patches/source/glibc/glibc-2.15_avx2.diff
diff options
context:
space:
mode:
Diffstat (limited to 'patches/source/glibc/glibc-2.15_avx2.diff')
-rw-r--r--patches/source/glibc/glibc-2.15_avx2.diff171
1 files changed, 171 insertions, 0 deletions
diff --git a/patches/source/glibc/glibc-2.15_avx2.diff b/patches/source/glibc/glibc-2.15_avx2.diff
new file mode 100644
index 000000000..1b1b88be9
--- /dev/null
+++ b/patches/source/glibc/glibc-2.15_avx2.diff
@@ -0,0 +1,171 @@
+From: Ulrich Drepper <drepper@gmail.com>
+Date: Thu, 26 Jan 2012 14:45:54 +0000 (-0500)
+Subject: Really fix AVX tests
+X-Git-Tag: glibc-2.16-tps~1052
+X-Git-Url: http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff_plain;h=08cf777f9e7f6d826658a99c7d77a359f73a45bf
+
+Really fix AVX tests
+
+There is no problem with strcmp, it doesn't use the YMM registers.
+The math routines might since gcc perhaps generates such code.
+Introduce bit_YMM_USBALE and use it in the math routines.
+---
+
+--- sysdeps/x86_64/fpu/multiarch/e_atan2.c
++++ sysdeps/x86_64/fpu/multiarch/e_atan2.c
+@@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (double, double);
+
+ libm_ifunc (__ieee754_atan2,
+ HAS_FMA4 ? __ieee754_atan2_fma4
+- : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
++ : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
+ strong_alias (__ieee754_atan2, __atan2_finite)
+
+ # define __ieee754_atan2 __ieee754_atan2_sse2
+--- sysdeps/x86_64/fpu/multiarch/e_exp.c
++++ sysdeps/x86_64/fpu/multiarch/e_exp.c
+@@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double);
+
+ libm_ifunc (__ieee754_exp,
+ HAS_FMA4 ? __ieee754_exp_fma4
+- : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2));
++ : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2));
+ strong_alias (__ieee754_exp, __exp_finite)
+
+ # define __ieee754_exp __ieee754_exp_sse2
+--- sysdeps/x86_64/fpu/multiarch/e_log.c
++++ sysdeps/x86_64/fpu/multiarch/e_log.c
+@@ -14,7 +14,7 @@ extern double __ieee754_log_fma4 (double);
+
+ libm_ifunc (__ieee754_log,
+ HAS_FMA4 ? __ieee754_log_fma4
+- : (HAS_AVX ? __ieee754_log_avx
++ : (HAS_YMM_USABLE ? __ieee754_log_avx
+ : __ieee754_log_sse2));
+ strong_alias (__ieee754_log, __log_finite)
+
+--- sysdeps/x86_64/fpu/multiarch/s_atan.c
++++ sysdeps/x86_64/fpu/multiarch/s_atan.c
+@@ -12,7 +12,8 @@ extern double __atan_fma4 (double);
+ # define __atan_fma4 ((void *) 0)
+ # endif
+
+-libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2);
++libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 :
++ HAS_YMM_USABLE ? __atan_avx : __atan_sse2));
+
+ # define atan __atan_sse2
+ #endif
+--- sysdeps/x86_64/fpu/multiarch/s_sin.c
++++ sysdeps/x86_64/fpu/multiarch/s_sin.c
+@@ -17,10 +17,12 @@ extern double __sin_fma4 (double);
+ # define __sin_fma4 ((void *) 0)
+ # endif
+
+-libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2);
++libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 :
++ HAS_YMM_USABLE ? __cos_avx : __cos_sse2));
+ weak_alias (__cos, cos)
+
+-libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2);
++libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 :
++ HAS_YMM_USABLE ? __sin_avx : __sin_sse2));
+ weak_alias (__sin, sin)
+
+ # define __cos __cos_sse2
+--- sysdeps/x86_64/fpu/multiarch/s_tan.c
++++ sysdeps/x86_64/fpu/multiarch/s_tan.c
+@@ -12,7 +12,8 @@ extern double __tan_fma4 (double);
+ # define __tan_fma4 ((void *) 0)
+ # endif
+
+-libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2);
++libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 :
++ HAS_YMM_USABLE ? __tan_avx : __tan_sse2));
+
+ # define tan __tan_sse2
+ #endif
+--- sysdeps/x86_64/multiarch/init-arch.c
++++ sysdeps/x86_64/multiarch/init-arch.c
+@@ -147,13 +147,13 @@ __init_cpu_features (void)
+ if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
+ {
+ /* Reset the AVX bit in case OSXSAVE is disabled. */
+- if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) == 0
+- || ({ unsigned int xcrlow;
+- unsigned int xcrhigh;
+- asm ("xgetbv"
+- : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
+- (xcrlow & 6) != 6; }))
+- __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX;
++ if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
++ && ({ unsigned int xcrlow;
++ unsigned int xcrhigh;
++ asm ("xgetbv"
++ : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
++ (xcrlow & 6) == 6; }))
++ __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+ }
+
+ __cpu_features.family = family;
+--- sysdeps/x86_64/multiarch/init-arch.h
++++ sysdeps/x86_64/multiarch/init-arch.h
+@@ -22,6 +22,7 @@
+ #define bit_Prefer_SSE_for_memop (1 << 3)
+ #define bit_Fast_Unaligned_Load (1 << 4)
+ #define bit_Prefer_PMINUB_for_stringop (1 << 5)
++#define bit_YMM_Usable (1 << 6)
+
+ #define bit_SSE2 (1 << 26)
+ #define bit_SSSE3 (1 << 9)
+@@ -49,6 +50,7 @@
+ # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
+ # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
+ # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
++# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE
+
+ #else /* __ASSEMBLER__ */
+
+@@ -93,7 +95,7 @@ extern struct cpu_features
+
+
+ extern void __init_cpu_features (void) attribute_hidden;
+-#define INIT_ARCH()\
++# define INIT_ARCH() \
+ do \
+ if (__cpu_features.kind == arch_kind_unknown) \
+ __init_cpu_features (); \
+@@ -126,23 +128,21 @@ extern const struct cpu_features *__get_cpu_features (void)
+ # define index_Slow_BSF FEATURE_INDEX_1
+ # define index_Prefer_SSE_for_memop FEATURE_INDEX_1
+ # define index_Fast_Unaligned_Load FEATURE_INDEX_1
++# define index_YMM_Usable FEATURE_INDEX_1
+
+-#define HAS_ARCH_FEATURE(idx, bit) \
+- ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
++# define HAS_ARCH_FEATURE(name) \
++ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
+
+-#define HAS_FAST_REP_STRING \
+- HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
++# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String)
+
+-#define HAS_FAST_COPY_BACKWARD \
+- HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
++# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward)
+
+-#define HAS_SLOW_BSF \
+- HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
++# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF)
+
+-#define HAS_PREFER_SSE_FOR_MEMOP \
+- HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
++# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop)
+
+-#define HAS_FAST_UNALIGNED_LOAD \
+- HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load)
++# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
++
++# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable)
+
+ #endif /* __ASSEMBLER__ */