summaryrefslogtreecommitdiffstats
path: root/meta
diff options
context:
space:
mode:
Diffstat (limited to 'meta')
-rw-r--r--meta/recipes-core/eglibc/eglibc-2.13/ppc-sqrt.patch1076
1 files changed, 538 insertions, 538 deletions
diff --git a/meta/recipes-core/eglibc/eglibc-2.13/ppc-sqrt.patch b/meta/recipes-core/eglibc/eglibc-2.13/ppc-sqrt.patch
index 0c6f0cb1f0..203040c15c 100644
--- a/meta/recipes-core/eglibc/eglibc-2.13/ppc-sqrt.patch
+++ b/meta/recipes-core/eglibc/eglibc-2.13/ppc-sqrt.patch
@@ -1,538 +1,538 @@
1Upstream-Status: Pending 1Upstream-Status: Pending
2 2
32011-03-22 Joseph Myers <joseph@codesourcery.com> 32011-03-22 Joseph Myers <joseph@codesourcery.com>
4 4
5 Merge from SG++ 2.11: 5 Merge from SG++ 2.11:
6 6
7 2010-10-05 Nathan Froyd <froydnj@codesourcery.com> 7 2010-10-05 Nathan Froyd <froydnj@codesourcery.com>
8 8
9 Issue #9382 9 Issue #9382
10 10
11 * sysdeps/powerpc/powerpc32/603e/: New directory. 11 * sysdeps/powerpc/powerpc32/603e/: New directory.
12 * sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/: New directory. 12 * sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/: New directory.
13 * sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/: New directory. 13 * sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/: New directory.
14 * sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/: New directory. 14 * sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/: New directory.
15 * sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c: Update. 15 * sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c: Update.
16 * sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c: Update. 16 * sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c: Update.
17 * sysdeps/powerpc/powerpc64/e5500/fpu/Implies: New file. 17 * sysdeps/powerpc/powerpc64/e5500/fpu/Implies: New file.
18 18
19Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c 19Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
20=================================================================== 20===================================================================
21--- /dev/null 21--- /dev/null
22+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c 22+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
23@@ -0,0 +1,134 @@ 23@@ -0,0 +1,134 @@
24+/* Double-precision floating point square root. 24+/* Double-precision floating point square root.
25+ Copyright (C) 2010 Free Software Foundation, Inc. 25+ Copyright (C) 2010 Free Software Foundation, Inc.
26+ This file is part of the GNU C Library. 26+ This file is part of the GNU C Library.
27+ 27+
28+ The GNU C Library is free software; you can redistribute it and/or 28+ The GNU C Library is free software; you can redistribute it and/or
29+ modify it under the terms of the GNU Lesser General Public 29+ modify it under the terms of the GNU Lesser General Public
30+ License as published by the Free Software Foundation; either 30+ License as published by the Free Software Foundation; either
31+ version 2.1 of the License, or (at your option) any later version. 31+ version 2.1 of the License, or (at your option) any later version.
32+ 32+
33+ The GNU C Library is distributed in the hope that it will be useful, 33+ The GNU C Library is distributed in the hope that it will be useful,
34+ but WITHOUT ANY WARRANTY; without even the implied warranty of 34+ but WITHOUT ANY WARRANTY; without even the implied warranty of
35+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 35+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36+ Lesser General Public License for more details. 36+ Lesser General Public License for more details.
37+ 37+
38+ You should have received a copy of the GNU Lesser General Public 38+ You should have received a copy of the GNU Lesser General Public
39+ License along with the GNU C Library; if not, write to the Free 39+ License along with the GNU C Library; if not, write to the Free
40+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 40+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
41+ 02111-1307 USA. */ 41+ 02111-1307 USA. */
42+ 42+
43+#include <math.h> 43+#include <math.h>
44+#include <math_private.h> 44+#include <math_private.h>
45+#include <fenv_libc.h> 45+#include <fenv_libc.h>
46+#include <inttypes.h> 46+#include <inttypes.h>
47+ 47+
48+#include <sysdep.h> 48+#include <sysdep.h>
49+#include <ldsodefs.h> 49+#include <ldsodefs.h>
50+ 50+
51+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; 51+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
52+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; 52+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
53+static const float two108 = 3.245185536584267269e+32; 53+static const float two108 = 3.245185536584267269e+32;
54+static const float twom54 = 5.551115123125782702e-17; 54+static const float twom54 = 5.551115123125782702e-17;
55+static const float half = 0.5; 55+static const float half = 0.5;
56+ 56+
57+/* The method is based on the descriptions in: 57+/* The method is based on the descriptions in:
58+ 58+
59+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; 59+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
60+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 60+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
61+ 61+
62+ We find the actual square root and half of its reciprocal 62+ We find the actual square root and half of its reciprocal
63+ simultaneously. */ 63+ simultaneously. */
64+ 64+
65+#ifdef __STDC__ 65+#ifdef __STDC__
66+double 66+double
67+__ieee754_sqrt (double b) 67+__ieee754_sqrt (double b)
68+#else 68+#else
69+double 69+double
70+__ieee754_sqrt (b) 70+__ieee754_sqrt (b)
71+ double b; 71+ double b;
72+#endif 72+#endif
73+{ 73+{
74+ if (__builtin_expect (b > 0, 1)) 74+ if (__builtin_expect (b > 0, 1))
75+ { 75+ {
76+ double y, g, h, d, r; 76+ double y, g, h, d, r;
77+ ieee_double_shape_type u; 77+ ieee_double_shape_type u;
78+ 78+
79+ if (__builtin_expect (b != a_inf.value, 1)) 79+ if (__builtin_expect (b != a_inf.value, 1))
80+ { 80+ {
81+ fenv_t fe; 81+ fenv_t fe;
82+ 82+
83+ fe = fegetenv_register (); 83+ fe = fegetenv_register ();
84+ 84+
85+ u.value = b; 85+ u.value = b;
86+ 86+
87+ relax_fenv_state (); 87+ relax_fenv_state ();
88+ 88+
89+ __asm__ ("frsqrte %[estimate], %[x]\n" 89+ __asm__ ("frsqrte %[estimate], %[x]\n"
90+ : [estimate] "=f" (y) : [x] "f" (b)); 90+ : [estimate] "=f" (y) : [x] "f" (b));
91+ 91+
92+ /* Following Muller et al, page 168, equation 5.20. 92+ /* Following Muller et al, page 168, equation 5.20.
93+ 93+
94+ h goes to 1/(2*sqrt(b)) 94+ h goes to 1/(2*sqrt(b))
95+ g goes to sqrt(b). 95+ g goes to sqrt(b).
96+ 96+
97+ We need three iterations to get within 1ulp. */ 97+ We need three iterations to get within 1ulp. */
98+ 98+
99+ /* Indicate that these can be performed prior to the branch. GCC 99+ /* Indicate that these can be performed prior to the branch. GCC
100+ insists on sinking them below the branch, however; it seems like 100+ insists on sinking them below the branch, however; it seems like
101+ they'd be better before the branch so that we can cover any latency 101+ they'd be better before the branch so that we can cover any latency
102+ from storing the argument and loading its high word. Oh well. */ 102+ from storing the argument and loading its high word. Oh well. */
103+ 103+
104+ g = b * y; 104+ g = b * y;
105+ h = 0.5 * y; 105+ h = 0.5 * y;
106+ 106+
107+ /* Handle small numbers by scaling. */ 107+ /* Handle small numbers by scaling. */
108+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) 108+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
109+ return __ieee754_sqrt (b * two108) * twom54; 109+ return __ieee754_sqrt (b * two108) * twom54;
110+ 110+
111+#define FMADD(a_, c_, b_) \ 111+#define FMADD(a_, c_, b_) \
112+ ({ double __r; \ 112+ ({ double __r; \
113+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ 113+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
114+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 114+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
115+ __r;}) 115+ __r;})
116+#define FNMSUB(a_, c_, b_) \ 116+#define FNMSUB(a_, c_, b_) \
117+ ({ double __r; \ 117+ ({ double __r; \
118+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ 118+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
119+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 119+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
120+ __r;}) 120+ __r;})
121+ 121+
122+ r = FNMSUB (g, h, half); 122+ r = FNMSUB (g, h, half);
123+ g = FMADD (g, r, g); 123+ g = FMADD (g, r, g);
124+ h = FMADD (h, r, h); 124+ h = FMADD (h, r, h);
125+ 125+
126+ r = FNMSUB (g, h, half); 126+ r = FNMSUB (g, h, half);
127+ g = FMADD (g, r, g); 127+ g = FMADD (g, r, g);
128+ h = FMADD (h, r, h); 128+ h = FMADD (h, r, h);
129+ 129+
130+ r = FNMSUB (g, h, half); 130+ r = FNMSUB (g, h, half);
131+ g = FMADD (g, r, g); 131+ g = FMADD (g, r, g);
132+ h = FMADD (h, r, h); 132+ h = FMADD (h, r, h);
133+ 133+
134+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ 134+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
135+ 135+
136+ /* Final refinement. */ 136+ /* Final refinement. */
137+ d = FNMSUB (g, g, b); 137+ d = FNMSUB (g, g, b);
138+ 138+
139+ fesetenv_register (fe); 139+ fesetenv_register (fe);
140+ return FMADD (d, h, g); 140+ return FMADD (d, h, g);
141+ } 141+ }
142+ } 142+ }
143+ else if (b < 0) 143+ else if (b < 0)
144+ { 144+ {
145+ /* For some reason, some PowerPC32 processors don't implement 145+ /* For some reason, some PowerPC32 processors don't implement
146+ FE_INVALID_SQRT. */ 146+ FE_INVALID_SQRT. */
147+#ifdef FE_INVALID_SQRT 147+#ifdef FE_INVALID_SQRT
148+ feraiseexcept (FE_INVALID_SQRT); 148+ feraiseexcept (FE_INVALID_SQRT);
149+ 149+
150+ fenv_union_t u = { .fenv = fegetenv_register () }; 150+ fenv_union_t u = { .fenv = fegetenv_register () };
151+ if ((u.l[1] & FE_INVALID) == 0) 151+ if ((u.l[1] & FE_INVALID) == 0)
152+#endif 152+#endif
153+ feraiseexcept (FE_INVALID); 153+ feraiseexcept (FE_INVALID);
154+ b = a_nan.value; 154+ b = a_nan.value;
155+ } 155+ }
156+ return f_wash (b); 156+ return f_wash (b);
157+} 157+}
158Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c 158Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
159=================================================================== 159===================================================================
160--- /dev/null 160--- /dev/null
161+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c 161+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
162@@ -0,0 +1,101 @@ 162@@ -0,0 +1,101 @@
163+/* Single-precision floating point square root. 163+/* Single-precision floating point square root.
164+ Copyright (C) 2010 Free Software Foundation, Inc. 164+ Copyright (C) 2010 Free Software Foundation, Inc.
165+ This file is part of the GNU C Library. 165+ This file is part of the GNU C Library.
166+ 166+
167+ The GNU C Library is free software; you can redistribute it and/or 167+ The GNU C Library is free software; you can redistribute it and/or
168+ modify it under the terms of the GNU Lesser General Public 168+ modify it under the terms of the GNU Lesser General Public
169+ License as published by the Free Software Foundation; either 169+ License as published by the Free Software Foundation; either
170+ version 2.1 of the License, or (at your option) any later version. 170+ version 2.1 of the License, or (at your option) any later version.
171+ 171+
172+ The GNU C Library is distributed in the hope that it will be useful, 172+ The GNU C Library is distributed in the hope that it will be useful,
173+ but WITHOUT ANY WARRANTY; without even the implied warranty of 173+ but WITHOUT ANY WARRANTY; without even the implied warranty of
174+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 174+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
175+ Lesser General Public License for more details. 175+ Lesser General Public License for more details.
176+ 176+
177+ You should have received a copy of the GNU Lesser General Public 177+ You should have received a copy of the GNU Lesser General Public
178+ License along with the GNU C Library; if not, write to the Free 178+ License along with the GNU C Library; if not, write to the Free
179+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 179+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
180+ 02111-1307 USA. */ 180+ 02111-1307 USA. */
181+ 181+
182+#include <math.h> 182+#include <math.h>
183+#include <math_private.h> 183+#include <math_private.h>
184+#include <fenv_libc.h> 184+#include <fenv_libc.h>
185+#include <inttypes.h> 185+#include <inttypes.h>
186+ 186+
187+#include <sysdep.h> 187+#include <sysdep.h>
188+#include <ldsodefs.h> 188+#include <ldsodefs.h>
189+ 189+
190+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; 190+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
191+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; 191+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
192+static const float threehalf = 1.5; 192+static const float threehalf = 1.5;
193+ 193+
194+/* The method is based on the descriptions in: 194+/* The method is based on the descriptions in:
195+ 195+
196+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; 196+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
197+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 197+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
198+ 198+
199+ We find the reciprocal square root and use that to compute the actual 199+ We find the reciprocal square root and use that to compute the actual
200+ square root. */ 200+ square root. */
201+ 201+
202+#ifdef __STDC__ 202+#ifdef __STDC__
203+float 203+float
204+__ieee754_sqrtf (float b) 204+__ieee754_sqrtf (float b)
205+#else 205+#else
206+float 206+float
207+__ieee754_sqrtf (b) 207+__ieee754_sqrtf (b)
208+ float b; 208+ float b;
209+#endif 209+#endif
210+{ 210+{
211+ if (__builtin_expect (b > 0, 1)) 211+ if (__builtin_expect (b > 0, 1))
212+ { 212+ {
213+#define FMSUB(a_, c_, b_) \ 213+#define FMSUB(a_, c_, b_) \
214+ ({ double __r; \ 214+ ({ double __r; \
215+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ 215+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
216+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 216+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
217+ __r;}) 217+ __r;})
218+#define FNMSUB(a_, c_, b_) \ 218+#define FNMSUB(a_, c_, b_) \
219+ ({ double __r; \ 219+ ({ double __r; \
220+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ 220+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
221+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 221+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
222+ __r;}) 222+ __r;})
223+ 223+
224+ if (__builtin_expect (b != a_inf.value, 1)) 224+ if (__builtin_expect (b != a_inf.value, 1))
225+ { 225+ {
226+ double y, x; 226+ double y, x;
227+ fenv_t fe; 227+ fenv_t fe;
228+ 228+
229+ fe = fegetenv_register (); 229+ fe = fegetenv_register ();
230+ 230+
231+ relax_fenv_state (); 231+ relax_fenv_state ();
232+ 232+
233+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ 233+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
234+ y = FMSUB (threehalf, b, b); 234+ y = FMSUB (threehalf, b, b);
235+ 235+
236+ /* Initial estimate. */ 236+ /* Initial estimate. */
237+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); 237+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
238+ 238+
239+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ 239+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
240+ x = x * FNMSUB (y, x * x, threehalf); 240+ x = x * FNMSUB (y, x * x, threehalf);
241+ x = x * FNMSUB (y, x * x, threehalf); 241+ x = x * FNMSUB (y, x * x, threehalf);
242+ x = x * FNMSUB (y, x * x, threehalf); 242+ x = x * FNMSUB (y, x * x, threehalf);
243+ 243+
244+ /* All done. */ 244+ /* All done. */
245+ fesetenv_register (fe); 245+ fesetenv_register (fe);
246+ return x * b; 246+ return x * b;
247+ } 247+ }
248+ } 248+ }
249+ else if (b < 0) 249+ else if (b < 0)
250+ { 250+ {
251+ /* For some reason, some PowerPC32 processors don't implement 251+ /* For some reason, some PowerPC32 processors don't implement
252+ FE_INVALID_SQRT. */ 252+ FE_INVALID_SQRT. */
253+#ifdef FE_INVALID_SQRT 253+#ifdef FE_INVALID_SQRT
254+ feraiseexcept (FE_INVALID_SQRT); 254+ feraiseexcept (FE_INVALID_SQRT);
255+ 255+
256+ fenv_union_t u = { .fenv = fegetenv_register () }; 256+ fenv_union_t u = { .fenv = fegetenv_register () };
257+ if ((u.l[1] & FE_INVALID) == 0) 257+ if ((u.l[1] & FE_INVALID) == 0)
258+#endif 258+#endif
259+ feraiseexcept (FE_INVALID); 259+ feraiseexcept (FE_INVALID);
260+ b = a_nan.value; 260+ b = a_nan.value;
261+ } 261+ }
262+ return f_washf (b); 262+ return f_washf (b);
263+} 263+}
264Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c 264Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
265=================================================================== 265===================================================================
266--- /dev/null 266--- /dev/null
267+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c 267+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
268@@ -0,0 +1,134 @@ 268@@ -0,0 +1,134 @@
269+/* Double-precision floating point square root. 269+/* Double-precision floating point square root.
270+ Copyright (C) 2010 Free Software Foundation, Inc. 270+ Copyright (C) 2010 Free Software Foundation, Inc.
271+ This file is part of the GNU C Library. 271+ This file is part of the GNU C Library.
272+ 272+
273+ The GNU C Library is free software; you can redistribute it and/or 273+ The GNU C Library is free software; you can redistribute it and/or
274+ modify it under the terms of the GNU Lesser General Public 274+ modify it under the terms of the GNU Lesser General Public
275+ License as published by the Free Software Foundation; either 275+ License as published by the Free Software Foundation; either
276+ version 2.1 of the License, or (at your option) any later version. 276+ version 2.1 of the License, or (at your option) any later version.
277+ 277+
278+ The GNU C Library is distributed in the hope that it will be useful, 278+ The GNU C Library is distributed in the hope that it will be useful,
279+ but WITHOUT ANY WARRANTY; without even the implied warranty of 279+ but WITHOUT ANY WARRANTY; without even the implied warranty of
280+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 280+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
281+ Lesser General Public License for more details. 281+ Lesser General Public License for more details.
282+ 282+
283+ You should have received a copy of the GNU Lesser General Public 283+ You should have received a copy of the GNU Lesser General Public
284+ License along with the GNU C Library; if not, write to the Free 284+ License along with the GNU C Library; if not, write to the Free
285+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 285+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
286+ 02111-1307 USA. */ 286+ 02111-1307 USA. */
287+ 287+
288+#include <math.h> 288+#include <math.h>
289+#include <math_private.h> 289+#include <math_private.h>
290+#include <fenv_libc.h> 290+#include <fenv_libc.h>
291+#include <inttypes.h> 291+#include <inttypes.h>
292+ 292+
293+#include <sysdep.h> 293+#include <sysdep.h>
294+#include <ldsodefs.h> 294+#include <ldsodefs.h>
295+ 295+
296+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; 296+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
297+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; 297+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
298+static const float two108 = 3.245185536584267269e+32; 298+static const float two108 = 3.245185536584267269e+32;
299+static const float twom54 = 5.551115123125782702e-17; 299+static const float twom54 = 5.551115123125782702e-17;
300+static const float half = 0.5; 300+static const float half = 0.5;
301+ 301+
302+/* The method is based on the descriptions in: 302+/* The method is based on the descriptions in:
303+ 303+
304+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; 304+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
305+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 305+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
306+ 306+
307+ We find the actual square root and half of its reciprocal 307+ We find the actual square root and half of its reciprocal
308+ simultaneously. */ 308+ simultaneously. */
309+ 309+
310+#ifdef __STDC__ 310+#ifdef __STDC__
311+double 311+double
312+__ieee754_sqrt (double b) 312+__ieee754_sqrt (double b)
313+#else 313+#else
314+double 314+double
315+__ieee754_sqrt (b) 315+__ieee754_sqrt (b)
316+ double b; 316+ double b;
317+#endif 317+#endif
318+{ 318+{
319+ if (__builtin_expect (b > 0, 1)) 319+ if (__builtin_expect (b > 0, 1))
320+ { 320+ {
321+ double y, g, h, d, r; 321+ double y, g, h, d, r;
322+ ieee_double_shape_type u; 322+ ieee_double_shape_type u;
323+ 323+
324+ if (__builtin_expect (b != a_inf.value, 1)) 324+ if (__builtin_expect (b != a_inf.value, 1))
325+ { 325+ {
326+ fenv_t fe; 326+ fenv_t fe;
327+ 327+
328+ fe = fegetenv_register (); 328+ fe = fegetenv_register ();
329+ 329+
330+ u.value = b; 330+ u.value = b;
331+ 331+
332+ relax_fenv_state (); 332+ relax_fenv_state ();
333+ 333+
334+ __asm__ ("frsqrte %[estimate], %[x]\n" 334+ __asm__ ("frsqrte %[estimate], %[x]\n"
335+ : [estimate] "=f" (y) : [x] "f" (b)); 335+ : [estimate] "=f" (y) : [x] "f" (b));
336+ 336+
337+ /* Following Muller et al, page 168, equation 5.20. 337+ /* Following Muller et al, page 168, equation 5.20.
338+ 338+
339+ h goes to 1/(2*sqrt(b)) 339+ h goes to 1/(2*sqrt(b))
340+ g goes to sqrt(b). 340+ g goes to sqrt(b).
341+ 341+
342+ We need three iterations to get within 1ulp. */ 342+ We need three iterations to get within 1ulp. */
343+ 343+
344+ /* Indicate that these can be performed prior to the branch. GCC 344+ /* Indicate that these can be performed prior to the branch. GCC
345+ insists on sinking them below the branch, however; it seems like 345+ insists on sinking them below the branch, however; it seems like
346+ they'd be better before the branch so that we can cover any latency 346+ they'd be better before the branch so that we can cover any latency
347+ from storing the argument and loading its high word. Oh well. */ 347+ from storing the argument and loading its high word. Oh well. */
348+ 348+
349+ g = b * y; 349+ g = b * y;
350+ h = 0.5 * y; 350+ h = 0.5 * y;
351+ 351+
352+ /* Handle small numbers by scaling. */ 352+ /* Handle small numbers by scaling. */
353+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) 353+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
354+ return __ieee754_sqrt (b * two108) * twom54; 354+ return __ieee754_sqrt (b * two108) * twom54;
355+ 355+
356+#define FMADD(a_, c_, b_) \ 356+#define FMADD(a_, c_, b_) \
357+ ({ double __r; \ 357+ ({ double __r; \
358+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ 358+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
359+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 359+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
360+ __r;}) 360+ __r;})
361+#define FNMSUB(a_, c_, b_) \ 361+#define FNMSUB(a_, c_, b_) \
362+ ({ double __r; \ 362+ ({ double __r; \
363+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ 363+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
364+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 364+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
365+ __r;}) 365+ __r;})
366+ 366+
367+ r = FNMSUB (g, h, half); 367+ r = FNMSUB (g, h, half);
368+ g = FMADD (g, r, g); 368+ g = FMADD (g, r, g);
369+ h = FMADD (h, r, h); 369+ h = FMADD (h, r, h);
370+ 370+
371+ r = FNMSUB (g, h, half); 371+ r = FNMSUB (g, h, half);
372+ g = FMADD (g, r, g); 372+ g = FMADD (g, r, g);
373+ h = FMADD (h, r, h); 373+ h = FMADD (h, r, h);
374+ 374+
375+ r = FNMSUB (g, h, half); 375+ r = FNMSUB (g, h, half);
376+ g = FMADD (g, r, g); 376+ g = FMADD (g, r, g);
377+ h = FMADD (h, r, h); 377+ h = FMADD (h, r, h);
378+ 378+
379+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ 379+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
380+ 380+
381+ /* Final refinement. */ 381+ /* Final refinement. */
382+ d = FNMSUB (g, g, b); 382+ d = FNMSUB (g, g, b);
383+ 383+
384+ fesetenv_register (fe); 384+ fesetenv_register (fe);
385+ return FMADD (d, h, g); 385+ return FMADD (d, h, g);
386+ } 386+ }
387+ } 387+ }
388+ else if (b < 0) 388+ else if (b < 0)
389+ { 389+ {
390+ /* For some reason, some PowerPC32 processors don't implement 390+ /* For some reason, some PowerPC32 processors don't implement
391+ FE_INVALID_SQRT. */ 391+ FE_INVALID_SQRT. */
392+#ifdef FE_INVALID_SQRT 392+#ifdef FE_INVALID_SQRT
393+ feraiseexcept (FE_INVALID_SQRT); 393+ feraiseexcept (FE_INVALID_SQRT);
394+ 394+
395+ fenv_union_t u = { .fenv = fegetenv_register () }; 395+ fenv_union_t u = { .fenv = fegetenv_register () };
396+ if ((u.l[1] & FE_INVALID) == 0) 396+ if ((u.l[1] & FE_INVALID) == 0)
397+#endif 397+#endif
398+ feraiseexcept (FE_INVALID); 398+ feraiseexcept (FE_INVALID);
399+ b = a_nan.value; 399+ b = a_nan.value;
400+ } 400+ }
401+ return f_wash (b); 401+ return f_wash (b);
402+} 402+}
403Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c 403Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
404=================================================================== 404===================================================================
405--- /dev/null 405--- /dev/null
406+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c 406+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
407@@ -0,0 +1,101 @@ 407@@ -0,0 +1,101 @@
408+/* Single-precision floating point square root. 408+/* Single-precision floating point square root.
409+ Copyright (C) 2010 Free Software Foundation, Inc. 409+ Copyright (C) 2010 Free Software Foundation, Inc.
410+ This file is part of the GNU C Library. 410+ This file is part of the GNU C Library.
411+ 411+
412+ The GNU C Library is free software; you can redistribute it and/or 412+ The GNU C Library is free software; you can redistribute it and/or
413+ modify it under the terms of the GNU Lesser General Public 413+ modify it under the terms of the GNU Lesser General Public
414+ License as published by the Free Software Foundation; either 414+ License as published by the Free Software Foundation; either
415+ version 2.1 of the License, or (at your option) any later version. 415+ version 2.1 of the License, or (at your option) any later version.
416+ 416+
417+ The GNU C Library is distributed in the hope that it will be useful, 417+ The GNU C Library is distributed in the hope that it will be useful,
418+ but WITHOUT ANY WARRANTY; without even the implied warranty of 418+ but WITHOUT ANY WARRANTY; without even the implied warranty of
419+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 419+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
420+ Lesser General Public License for more details. 420+ Lesser General Public License for more details.
421+ 421+
422+ You should have received a copy of the GNU Lesser General Public 422+ You should have received a copy of the GNU Lesser General Public
423+ License along with the GNU C Library; if not, write to the Free 423+ License along with the GNU C Library; if not, write to the Free
424+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 424+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
425+ 02111-1307 USA. */ 425+ 02111-1307 USA. */
426+ 426+
427+#include <math.h> 427+#include <math.h>
428+#include <math_private.h> 428+#include <math_private.h>
429+#include <fenv_libc.h> 429+#include <fenv_libc.h>
430+#include <inttypes.h> 430+#include <inttypes.h>
431+ 431+
432+#include <sysdep.h> 432+#include <sysdep.h>
433+#include <ldsodefs.h> 433+#include <ldsodefs.h>
434+ 434+
435+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; 435+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
436+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; 436+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
437+static const float threehalf = 1.5; 437+static const float threehalf = 1.5;
438+ 438+
439+/* The method is based on the descriptions in: 439+/* The method is based on the descriptions in:
440+ 440+
441+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; 441+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
442+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 442+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
443+ 443+
444+ We find the reciprocal square root and use that to compute the actual 444+ We find the reciprocal square root and use that to compute the actual
445+ square root. */ 445+ square root. */
446+ 446+
447+#ifdef __STDC__ 447+#ifdef __STDC__
448+float 448+float
449+__ieee754_sqrtf (float b) 449+__ieee754_sqrtf (float b)
450+#else 450+#else
451+float 451+float
452+__ieee754_sqrtf (b) 452+__ieee754_sqrtf (b)
453+ float b; 453+ float b;
454+#endif 454+#endif
455+{ 455+{
456+ if (__builtin_expect (b > 0, 1)) 456+ if (__builtin_expect (b > 0, 1))
457+ { 457+ {
458+#define FMSUB(a_, c_, b_) \ 458+#define FMSUB(a_, c_, b_) \
459+ ({ double __r; \ 459+ ({ double __r; \
460+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ 460+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
461+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 461+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
462+ __r;}) 462+ __r;})
463+#define FNMSUB(a_, c_, b_) \ 463+#define FNMSUB(a_, c_, b_) \
464+ ({ double __r; \ 464+ ({ double __r; \
465+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ 465+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
466+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ 466+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
467+ __r;}) 467+ __r;})
468+ 468+
469+ if (__builtin_expect (b != a_inf.value, 1)) 469+ if (__builtin_expect (b != a_inf.value, 1))
470+ { 470+ {
471+ double y, x; 471+ double y, x;
472+ fenv_t fe; 472+ fenv_t fe;
473+ 473+
474+ fe = fegetenv_register (); 474+ fe = fegetenv_register ();
475+ 475+
476+ relax_fenv_state (); 476+ relax_fenv_state ();
477+ 477+
478+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ 478+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
479+ y = FMSUB (threehalf, b, b); 479+ y = FMSUB (threehalf, b, b);
480+ 480+
481+ /* Initial estimate. */ 481+ /* Initial estimate. */
482+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); 482+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
483+ 483+
484+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ 484+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
485+ x = x * FNMSUB (y, x * x, threehalf); 485+ x = x * FNMSUB (y, x * x, threehalf);
486+ x = x * FNMSUB (y, x * x, threehalf); 486+ x = x * FNMSUB (y, x * x, threehalf);
487+ x = x * FNMSUB (y, x * x, threehalf); 487+ x = x * FNMSUB (y, x * x, threehalf);
488+ 488+
489+ /* All done. */ 489+ /* All done. */
490+ fesetenv_register (fe); 490+ fesetenv_register (fe);
491+ return x * b; 491+ return x * b;
492+ } 492+ }
493+ } 493+ }
494+ else if (b < 0) 494+ else if (b < 0)
495+ { 495+ {
496+ /* For some reason, some PowerPC32 processors don't implement 496+ /* For some reason, some PowerPC32 processors don't implement
497+ FE_INVALID_SQRT. */ 497+ FE_INVALID_SQRT. */
498+#ifdef FE_INVALID_SQRT 498+#ifdef FE_INVALID_SQRT
499+ feraiseexcept (FE_INVALID_SQRT); 499+ feraiseexcept (FE_INVALID_SQRT);
500+ 500+
501+ fenv_union_t u = { .fenv = fegetenv_register () }; 501+ fenv_union_t u = { .fenv = fegetenv_register () };
502+ if ((u.l[1] & FE_INVALID) == 0) 502+ if ((u.l[1] & FE_INVALID) == 0)
503+#endif 503+#endif
504+ feraiseexcept (FE_INVALID); 504+ feraiseexcept (FE_INVALID);
505+ b = a_nan.value; 505+ b = a_nan.value;
506+ } 506+ }
507+ return f_washf (b); 507+ return f_washf (b);
508+} 508+}
509Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies 509Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
510=================================================================== 510===================================================================
511--- /dev/null 511--- /dev/null
512+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies 512+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
513@@ -0,0 +1 @@ 513@@ -0,0 +1 @@
514+powerpc/powerpc32/603e/fpu 514+powerpc/powerpc32/603e/fpu
515Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/fpu/Implies 515Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/fpu/Implies
516=================================================================== 516===================================================================
517--- /dev/null 517--- /dev/null
518+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/fpu/Implies 518+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/7400/fpu/Implies
519@@ -0,0 +1 @@ 519@@ -0,0 +1 @@
520+powerpc/powerpc32/603e/fpu 520+powerpc/powerpc32/603e/fpu
521Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies 521Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
522=================================================================== 522===================================================================
523--- /dev/null 523--- /dev/null
524+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies 524+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
525@@ -0,0 +1 @@ 525@@ -0,0 +1 @@
526+powerpc/powerpc32/603e/fpu 526+powerpc/powerpc32/603e/fpu
527Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies 527Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
528=================================================================== 528===================================================================
529--- /dev/null 529--- /dev/null
530+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies 530+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
531@@ -0,0 +1 @@ 531@@ -0,0 +1 @@
532+powerpc/powerpc64/e5500/fpu 532+powerpc/powerpc64/e5500/fpu
533Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies 533Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
534=================================================================== 534===================================================================
535--- /dev/null 535--- /dev/null
536+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies 536+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
537@@ -0,0 +1 @@ 537@@ -0,0 +1 @@
538+powerpc/powerpc32/603e/fpu 538+powerpc/powerpc32/603e/fpu