summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/eglibc/eglibc-2.17/glibc.fix_sqrt2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-core/eglibc/eglibc-2.17/glibc.fix_sqrt2.patch')
-rw-r--r--meta/recipes-core/eglibc/eglibc-2.17/glibc.fix_sqrt2.patch1491
1 files changed, 1491 insertions, 0 deletions
diff --git a/meta/recipes-core/eglibc/eglibc-2.17/glibc.fix_sqrt2.patch b/meta/recipes-core/eglibc/eglibc-2.17/glibc.fix_sqrt2.patch
new file mode 100644
index 0000000000..a66972998c
--- /dev/null
+++ b/meta/recipes-core/eglibc/eglibc-2.17/glibc.fix_sqrt2.patch
@@ -0,0 +1,1491 @@
1Signed-of-by: Edmar Wienskoski <edmar@freescale.com>
2Upstream-Status: Pending
3
4diff -ruN libc-orig/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
5--- libc-orig/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c 1969-12-31 18:00:00.000000000 -0600
6+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c 2012-06-14 14:51:50.452001745 -0500
7@@ -0,0 +1,134 @@
8+/* Double-precision floating point square root.
9+ Copyright (C) 2010 Free Software Foundation, Inc.
10+ This file is part of the GNU C Library.
11+
12+ The GNU C Library is free software; you can redistribute it and/or
13+ modify it under the terms of the GNU Lesser General Public
14+ License as published by the Free Software Foundation; either
15+ version 2.1 of the License, or (at your option) any later version.
16+
17+ The GNU C Library is distributed in the hope that it will be useful,
18+ but WITHOUT ANY WARRANTY; without even the implied warranty of
19+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20+ Lesser General Public License for more details.
21+
22+ You should have received a copy of the GNU Lesser General Public
23+ License along with the GNU C Library; if not, write to the Free
24+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25+ 02111-1307 USA. */
26+
27+#include <math.h>
28+#include <math_private.h>
29+#include <fenv_libc.h>
30+#include <inttypes.h>
31+
32+#include <sysdep.h>
33+#include <ldsodefs.h>
34+
35+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
36+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
37+static const float two108 = 3.245185536584267269e+32;
38+static const float twom54 = 5.551115123125782702e-17;
39+static const float half = 0.5;
40+
41+/* The method is based on the descriptions in:
42+
43+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
44+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
45+
46+ We find the actual square root and half of its reciprocal
47+ simultaneously. */
48+
49+#ifdef __STDC__
50+double
51+__ieee754_sqrt (double b)
52+#else
53+double
54+__ieee754_sqrt (b)
55+ double b;
56+#endif
57+{
58+ if (__builtin_expect (b > 0, 1))
59+ {
60+ double y, g, h, d, r;
61+ ieee_double_shape_type u;
62+
63+ if (__builtin_expect (b != a_inf.value, 1))
64+ {
65+ fenv_t fe;
66+
67+ fe = fegetenv_register ();
68+
69+ u.value = b;
70+
71+ relax_fenv_state ();
72+
73+ __asm__ ("frsqrte %[estimate], %[x]\n"
74+ : [estimate] "=f" (y) : [x] "f" (b));
75+
76+ /* Following Muller et al, page 168, equation 5.20.
77+
78+ h goes to 1/(2*sqrt(b))
79+ g goes to sqrt(b).
80+
81+ We need three iterations to get within 1ulp. */
82+
83+ /* Indicate that these can be performed prior to the branch. GCC
84+ insists on sinking them below the branch, however; it seems like
85+ they'd be better before the branch so that we can cover any latency
86+ from storing the argument and loading its high word. Oh well. */
87+
88+ g = b * y;
89+ h = 0.5 * y;
90+
91+ /* Handle small numbers by scaling. */
92+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
93+ return __ieee754_sqrt (b * two108) * twom54;
94+
95+#define FMADD(a_, c_, b_) \
96+ ({ double __r; \
97+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
98+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
99+ __r;})
100+#define FNMSUB(a_, c_, b_) \
101+ ({ double __r; \
102+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
103+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
104+ __r;})
105+
106+ r = FNMSUB (g, h, half);
107+ g = FMADD (g, r, g);
108+ h = FMADD (h, r, h);
109+
110+ r = FNMSUB (g, h, half);
111+ g = FMADD (g, r, g);
112+ h = FMADD (h, r, h);
113+
114+ r = FNMSUB (g, h, half);
115+ g = FMADD (g, r, g);
116+ h = FMADD (h, r, h);
117+
118+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
119+
120+ /* Final refinement. */
121+ d = FNMSUB (g, g, b);
122+
123+ fesetenv_register (fe);
124+ return FMADD (d, h, g);
125+ }
126+ }
127+ else if (b < 0)
128+ {
129+ /* For some reason, some PowerPC32 processors don't implement
130+ FE_INVALID_SQRT. */
131+#ifdef FE_INVALID_SQRT
132+ feraiseexcept (FE_INVALID_SQRT);
133+
134+ fenv_union_t u = { .fenv = fegetenv_register () };
135+ if ((u.l[1] & FE_INVALID) == 0)
136+#endif
137+ feraiseexcept (FE_INVALID);
138+ b = a_nan.value;
139+ }
140+ return f_wash (b);
141+}
142diff -ruN libc-orig/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
143--- libc-orig/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c 1969-12-31 18:00:00.000000000 -0600
144+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c 2012-06-14 14:51:50.452001745 -0500
145@@ -0,0 +1,101 @@
146+/* Single-precision floating point square root.
147+ Copyright (C) 2010 Free Software Foundation, Inc.
148+ This file is part of the GNU C Library.
149+
150+ The GNU C Library is free software; you can redistribute it and/or
151+ modify it under the terms of the GNU Lesser General Public
152+ License as published by the Free Software Foundation; either
153+ version 2.1 of the License, or (at your option) any later version.
154+
155+ The GNU C Library is distributed in the hope that it will be useful,
156+ but WITHOUT ANY WARRANTY; without even the implied warranty of
157+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
158+ Lesser General Public License for more details.
159+
160+ You should have received a copy of the GNU Lesser General Public
161+ License along with the GNU C Library; if not, write to the Free
162+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
163+ 02111-1307 USA. */
164+
165+#include <math.h>
166+#include <math_private.h>
167+#include <fenv_libc.h>
168+#include <inttypes.h>
169+
170+#include <sysdep.h>
171+#include <ldsodefs.h>
172+
173+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
174+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
175+static const float threehalf = 1.5;
176+
177+/* The method is based on the descriptions in:
178+
179+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
180+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
181+
182+ We find the reciprocal square root and use that to compute the actual
183+ square root. */
184+
185+#ifdef __STDC__
186+float
187+__ieee754_sqrtf (float b)
188+#else
189+float
190+__ieee754_sqrtf (b)
191+ float b;
192+#endif
193+{
194+ if (__builtin_expect (b > 0, 1))
195+ {
196+#define FMSUB(a_, c_, b_) \
197+ ({ double __r; \
198+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
199+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
200+ __r;})
201+#define FNMSUB(a_, c_, b_) \
202+ ({ double __r; \
203+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
204+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
205+ __r;})
206+
207+ if (__builtin_expect (b != a_inf.value, 1))
208+ {
209+ double y, x;
210+ fenv_t fe;
211+
212+ fe = fegetenv_register ();
213+
214+ relax_fenv_state ();
215+
216+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
217+ y = FMSUB (threehalf, b, b);
218+
219+ /* Initial estimate. */
220+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
221+
222+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
223+ x = x * FNMSUB (y, x * x, threehalf);
224+ x = x * FNMSUB (y, x * x, threehalf);
225+ x = x * FNMSUB (y, x * x, threehalf);
226+
227+ /* All done. */
228+ fesetenv_register (fe);
229+ return x * b;
230+ }
231+ }
232+ else if (b < 0)
233+ {
234+ /* For some reason, some PowerPC32 processors don't implement
235+ FE_INVALID_SQRT. */
236+#ifdef FE_INVALID_SQRT
237+ feraiseexcept (FE_INVALID_SQRT);
238+
239+ fenv_union_t u = { .fenv = fegetenv_register () };
240+ if ((u.l[1] & FE_INVALID) == 0)
241+#endif
242+ feraiseexcept (FE_INVALID);
243+ b = a_nan.value;
244+ }
245+ return f_washf (b);
246+}
247diff -ruN libc-orig/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
248--- libc-orig/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c 1969-12-31 18:00:00.000000000 -0600
249+++ libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c 2012-06-14 14:55:14.749001061 -0500
250@@ -0,0 +1,134 @@
251+/* Double-precision floating point square root.
252+ Copyright (C) 2010 Free Software Foundation, Inc.
253+ This file is part of the GNU C Library.
254+
255+ The GNU C Library is free software; you can redistribute it and/or
256+ modify it under the terms of the GNU Lesser General Public
257+ License as published by the Free Software Foundation; either
258+ version 2.1 of the License, or (at your option) any later version.
259+
260+ The GNU C Library is distributed in the hope that it will be useful,
261+ but WITHOUT ANY WARRANTY; without even the implied warranty of
262+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
263+ Lesser General Public License for more details.
264+
265+ You should have received a copy of the GNU Lesser General Public
266+ License along with the GNU C Library; if not, write to the Free
267+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
268+ 02111-1307 USA. */
269+
270+#include <math.h>
271+#include <math_private.h>
272+#include <fenv_libc.h>
273+#include <inttypes.h>
274+
275+#include <sysdep.h>
276+#include <ldsodefs.h>
277+
278+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
279+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
280+static const float two108 = 3.245185536584267269e+32;
281+static const float twom54 = 5.551115123125782702e-17;
282+static const float half = 0.5;
283+
284+/* The method is based on the descriptions in:
285+
286+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
287+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
288+
289+ We find the actual square root and half of its reciprocal
290+ simultaneously. */
291+
292+#ifdef __STDC__
293+double
294+__ieee754_sqrt (double b)
295+#else
296+double
297+__ieee754_sqrt (b)
298+ double b;
299+#endif
300+{
301+ if (__builtin_expect (b > 0, 1))
302+ {
303+ double y, g, h, d, r;
304+ ieee_double_shape_type u;
305+
306+ if (__builtin_expect (b != a_inf.value, 1))
307+ {
308+ fenv_t fe;
309+
310+ fe = fegetenv_register ();
311+
312+ u.value = b;
313+
314+ relax_fenv_state ();
315+
316+ __asm__ ("frsqrte %[estimate], %[x]\n"
317+ : [estimate] "=f" (y) : [x] "f" (b));
318+
319+ /* Following Muller et al, page 168, equation 5.20.
320+
321+ h goes to 1/(2*sqrt(b))
322+ g goes to sqrt(b).
323+
324+ We need three iterations to get within 1ulp. */
325+
326+ /* Indicate that these can be performed prior to the branch. GCC
327+ insists on sinking them below the branch, however; it seems like
328+ they'd be better before the branch so that we can cover any latency
329+ from storing the argument and loading its high word. Oh well. */
330+
331+ g = b * y;
332+ h = 0.5 * y;
333+
334+ /* Handle small numbers by scaling. */
335+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
336+ return __ieee754_sqrt (b * two108) * twom54;
337+
338+#define FMADD(a_, c_, b_) \
339+ ({ double __r; \
340+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
341+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
342+ __r;})
343+#define FNMSUB(a_, c_, b_) \
344+ ({ double __r; \
345+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
346+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
347+ __r;})
348+
349+ r = FNMSUB (g, h, half);
350+ g = FMADD (g, r, g);
351+ h = FMADD (h, r, h);
352+
353+ r = FNMSUB (g, h, half);
354+ g = FMADD (g, r, g);
355+ h = FMADD (h, r, h);
356+
357+ r = FNMSUB (g, h, half);
358+ g = FMADD (g, r, g);
359+ h = FMADD (h, r, h);
360+
361+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
362+
363+ /* Final refinement. */
364+ d = FNMSUB (g, g, b);
365+
366+ fesetenv_register (fe);
367+ return FMADD (d, h, g);
368+ }
369+ }
370+ else if (b < 0)
371+ {
372+ /* For some reason, some PowerPC32 processors don't implement
373+ FE_INVALID_SQRT. */
374+#ifdef FE_INVALID_SQRT
375+ feraiseexcept (FE_INVALID_SQRT);
376+
377+ fenv_union_t u = { .fenv = fegetenv_register () };
378+ if ((u.l[1] & FE_INVALID) == 0)
379+#endif
380+ feraiseexcept (FE_INVALID);
381+ b = a_nan.value;
382+ }
383+ return f_wash (b);
384+}
385diff -ruN libc-orig/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
386--- libc-orig/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c 1969-12-31 18:00:00.000000000 -0600
387+++ libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c 2012-06-14 14:55:14.749001061 -0500
388@@ -0,0 +1,101 @@
389+/* Single-precision floating point square root.
390+ Copyright (C) 2010 Free Software Foundation, Inc.
391+ This file is part of the GNU C Library.
392+
393+ The GNU C Library is free software; you can redistribute it and/or
394+ modify it under the terms of the GNU Lesser General Public
395+ License as published by the Free Software Foundation; either
396+ version 2.1 of the License, or (at your option) any later version.
397+
398+ The GNU C Library is distributed in the hope that it will be useful,
399+ but WITHOUT ANY WARRANTY; without even the implied warranty of
400+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
401+ Lesser General Public License for more details.
402+
403+ You should have received a copy of the GNU Lesser General Public
404+ License along with the GNU C Library; if not, write to the Free
405+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
406+ 02111-1307 USA. */
407+
408+#include <math.h>
409+#include <math_private.h>
410+#include <fenv_libc.h>
411+#include <inttypes.h>
412+
413+#include <sysdep.h>
414+#include <ldsodefs.h>
415+
416+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
417+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
418+static const float threehalf = 1.5;
419+
420+/* The method is based on the descriptions in:
421+
422+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
423+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
424+
425+ We find the reciprocal square root and use that to compute the actual
426+ square root. */
427+
428+#ifdef __STDC__
429+float
430+__ieee754_sqrtf (float b)
431+#else
432+float
433+__ieee754_sqrtf (b)
434+ float b;
435+#endif
436+{
437+ if (__builtin_expect (b > 0, 1))
438+ {
439+#define FMSUB(a_, c_, b_) \
440+ ({ double __r; \
441+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
442+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
443+ __r;})
444+#define FNMSUB(a_, c_, b_) \
445+ ({ double __r; \
446+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
447+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
448+ __r;})
449+
450+ if (__builtin_expect (b != a_inf.value, 1))
451+ {
452+ double y, x;
453+ fenv_t fe;
454+
455+ fe = fegetenv_register ();
456+
457+ relax_fenv_state ();
458+
459+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
460+ y = FMSUB (threehalf, b, b);
461+
462+ /* Initial estimate. */
463+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
464+
465+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
466+ x = x * FNMSUB (y, x * x, threehalf);
467+ x = x * FNMSUB (y, x * x, threehalf);
468+ x = x * FNMSUB (y, x * x, threehalf);
469+
470+ /* All done. */
471+ fesetenv_register (fe);
472+ return x * b;
473+ }
474+ }
475+ else if (b < 0)
476+ {
477+ /* For some reason, some PowerPC32 processors don't implement
478+ FE_INVALID_SQRT. */
479+#ifdef FE_INVALID_SQRT
480+ feraiseexcept (FE_INVALID_SQRT);
481+
482+ fenv_union_t u = { .fenv = fegetenv_register () };
483+ if ((u.l[1] & FE_INVALID) == 0)
484+#endif
485+ feraiseexcept (FE_INVALID);
486+ b = a_nan.value;
487+ }
488+ return f_washf (b);
489+}
490diff -ruN libc-orig/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
491--- libc-orig/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c 1969-12-31 18:00:00.000000000 -0600
492+++ libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c 2012-06-14 14:55:21.812002270 -0500
493@@ -0,0 +1,134 @@
494+/* Double-precision floating point square root.
495+ Copyright (C) 2010 Free Software Foundation, Inc.
496+ This file is part of the GNU C Library.
497+
498+ The GNU C Library is free software; you can redistribute it and/or
499+ modify it under the terms of the GNU Lesser General Public
500+ License as published by the Free Software Foundation; either
501+ version 2.1 of the License, or (at your option) any later version.
502+
503+ The GNU C Library is distributed in the hope that it will be useful,
504+ but WITHOUT ANY WARRANTY; without even the implied warranty of
505+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
506+ Lesser General Public License for more details.
507+
508+ You should have received a copy of the GNU Lesser General Public
509+ License along with the GNU C Library; if not, write to the Free
510+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
511+ 02111-1307 USA. */
512+
513+#include <math.h>
514+#include <math_private.h>
515+#include <fenv_libc.h>
516+#include <inttypes.h>
517+
518+#include <sysdep.h>
519+#include <ldsodefs.h>
520+
521+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
522+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
523+static const float two108 = 3.245185536584267269e+32;
524+static const float twom54 = 5.551115123125782702e-17;
525+static const float half = 0.5;
526+
527+/* The method is based on the descriptions in:
528+
529+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
530+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
531+
532+ We find the actual square root and half of its reciprocal
533+ simultaneously. */
534+
535+#ifdef __STDC__
536+double
537+__ieee754_sqrt (double b)
538+#else
539+double
540+__ieee754_sqrt (b)
541+ double b;
542+#endif
543+{
544+ if (__builtin_expect (b > 0, 1))
545+ {
546+ double y, g, h, d, r;
547+ ieee_double_shape_type u;
548+
549+ if (__builtin_expect (b != a_inf.value, 1))
550+ {
551+ fenv_t fe;
552+
553+ fe = fegetenv_register ();
554+
555+ u.value = b;
556+
557+ relax_fenv_state ();
558+
559+ __asm__ ("frsqrte %[estimate], %[x]\n"
560+ : [estimate] "=f" (y) : [x] "f" (b));
561+
562+ /* Following Muller et al, page 168, equation 5.20.
563+
564+ h goes to 1/(2*sqrt(b))
565+ g goes to sqrt(b).
566+
567+ We need three iterations to get within 1ulp. */
568+
569+ /* Indicate that these can be performed prior to the branch. GCC
570+ insists on sinking them below the branch, however; it seems like
571+ they'd be better before the branch so that we can cover any latency
572+ from storing the argument and loading its high word. Oh well. */
573+
574+ g = b * y;
575+ h = 0.5 * y;
576+
577+ /* Handle small numbers by scaling. */
578+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
579+ return __ieee754_sqrt (b * two108) * twom54;
580+
581+#define FMADD(a_, c_, b_) \
582+ ({ double __r; \
583+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
584+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
585+ __r;})
586+#define FNMSUB(a_, c_, b_) \
587+ ({ double __r; \
588+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
589+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
590+ __r;})
591+
592+ r = FNMSUB (g, h, half);
593+ g = FMADD (g, r, g);
594+ h = FMADD (h, r, h);
595+
596+ r = FNMSUB (g, h, half);
597+ g = FMADD (g, r, g);
598+ h = FMADD (h, r, h);
599+
600+ r = FNMSUB (g, h, half);
601+ g = FMADD (g, r, g);
602+ h = FMADD (h, r, h);
603+
604+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
605+
606+ /* Final refinement. */
607+ d = FNMSUB (g, g, b);
608+
609+ fesetenv_register (fe);
610+ return FMADD (d, h, g);
611+ }
612+ }
613+ else if (b < 0)
614+ {
615+ /* For some reason, some PowerPC32 processors don't implement
616+ FE_INVALID_SQRT. */
617+#ifdef FE_INVALID_SQRT
618+ feraiseexcept (FE_INVALID_SQRT);
619+
620+ fenv_union_t u = { .fenv = fegetenv_register () };
621+ if ((u.l[1] & FE_INVALID) == 0)
622+#endif
623+ feraiseexcept (FE_INVALID);
624+ b = a_nan.value;
625+ }
626+ return f_wash (b);
627+}
628diff -ruN libc-orig/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
629--- libc-orig/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c 1969-12-31 18:00:00.000000000 -0600
630+++ libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c 2012-06-14 14:55:21.812002270 -0500
631@@ -0,0 +1,101 @@
632+/* Single-precision floating point square root.
633+ Copyright (C) 2010 Free Software Foundation, Inc.
634+ This file is part of the GNU C Library.
635+
636+ The GNU C Library is free software; you can redistribute it and/or
637+ modify it under the terms of the GNU Lesser General Public
638+ License as published by the Free Software Foundation; either
639+ version 2.1 of the License, or (at your option) any later version.
640+
641+ The GNU C Library is distributed in the hope that it will be useful,
642+ but WITHOUT ANY WARRANTY; without even the implied warranty of
643+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
644+ Lesser General Public License for more details.
645+
646+ You should have received a copy of the GNU Lesser General Public
647+ License along with the GNU C Library; if not, write to the Free
648+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
649+ 02111-1307 USA. */
650+
651+#include <math.h>
652+#include <math_private.h>
653+#include <fenv_libc.h>
654+#include <inttypes.h>
655+
656+#include <sysdep.h>
657+#include <ldsodefs.h>
658+
659+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
660+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
661+static const float threehalf = 1.5;
662+
663+/* The method is based on the descriptions in:
664+
665+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
666+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
667+
668+ We find the reciprocal square root and use that to compute the actual
669+ square root. */
670+
671+#ifdef __STDC__
672+float
673+__ieee754_sqrtf (float b)
674+#else
675+float
676+__ieee754_sqrtf (b)
677+ float b;
678+#endif
679+{
680+ if (__builtin_expect (b > 0, 1))
681+ {
682+#define FMSUB(a_, c_, b_) \
683+ ({ double __r; \
684+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
685+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
686+ __r;})
687+#define FNMSUB(a_, c_, b_) \
688+ ({ double __r; \
689+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
690+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
691+ __r;})
692+
693+ if (__builtin_expect (b != a_inf.value, 1))
694+ {
695+ double y, x;
696+ fenv_t fe;
697+
698+ fe = fegetenv_register ();
699+
700+ relax_fenv_state ();
701+
702+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
703+ y = FMSUB (threehalf, b, b);
704+
705+ /* Initial estimate. */
706+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
707+
708+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
709+ x = x * FNMSUB (y, x * x, threehalf);
710+ x = x * FNMSUB (y, x * x, threehalf);
711+ x = x * FNMSUB (y, x * x, threehalf);
712+
713+ /* All done. */
714+ fesetenv_register (fe);
715+ return x * b;
716+ }
717+ }
718+ else if (b < 0)
719+ {
720+ /* For some reason, some PowerPC32 processors don't implement
721+ FE_INVALID_SQRT. */
722+#ifdef FE_INVALID_SQRT
723+ feraiseexcept (FE_INVALID_SQRT);
724+
725+ fenv_union_t u = { .fenv = fegetenv_register () };
726+ if ((u.l[1] & FE_INVALID) == 0)
727+#endif
728+ feraiseexcept (FE_INVALID);
729+ b = a_nan.value;
730+ }
731+ return f_washf (b);
732+}
733diff -ruN libc-orig/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
734--- libc-orig/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c 1969-12-31 18:00:00.000000000 -0600
735+++ libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c 2012-06-14 14:55:24.620001266 -0500
736@@ -0,0 +1,134 @@
737+/* Double-precision floating point square root.
738+ Copyright (C) 2010 Free Software Foundation, Inc.
739+ This file is part of the GNU C Library.
740+
741+ The GNU C Library is free software; you can redistribute it and/or
742+ modify it under the terms of the GNU Lesser General Public
743+ License as published by the Free Software Foundation; either
744+ version 2.1 of the License, or (at your option) any later version.
745+
746+ The GNU C Library is distributed in the hope that it will be useful,
747+ but WITHOUT ANY WARRANTY; without even the implied warranty of
748+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
749+ Lesser General Public License for more details.
750+
751+ You should have received a copy of the GNU Lesser General Public
752+ License along with the GNU C Library; if not, write to the Free
753+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
754+ 02111-1307 USA. */
755+
756+#include <math.h>
757+#include <math_private.h>
758+#include <fenv_libc.h>
759+#include <inttypes.h>
760+
761+#include <sysdep.h>
762+#include <ldsodefs.h>
763+
764+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
765+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
766+static const float two108 = 3.245185536584267269e+32;
767+static const float twom54 = 5.551115123125782702e-17;
768+static const float half = 0.5;
769+
770+/* The method is based on the descriptions in:
771+
772+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
773+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
774+
775+ We find the actual square root and half of its reciprocal
776+ simultaneously. */
777+
778+#ifdef __STDC__
779+double
780+__ieee754_sqrt (double b)
781+#else
782+double
783+__ieee754_sqrt (b)
784+ double b;
785+#endif
786+{
787+ if (__builtin_expect (b > 0, 1))
788+ {
789+ double y, g, h, d, r;
790+ ieee_double_shape_type u;
791+
792+ if (__builtin_expect (b != a_inf.value, 1))
793+ {
794+ fenv_t fe;
795+
796+ fe = fegetenv_register ();
797+
798+ u.value = b;
799+
800+ relax_fenv_state ();
801+
802+ __asm__ ("frsqrte %[estimate], %[x]\n"
803+ : [estimate] "=f" (y) : [x] "f" (b));
804+
805+ /* Following Muller et al, page 168, equation 5.20.
806+
807+ h goes to 1/(2*sqrt(b))
808+ g goes to sqrt(b).
809+
810+ We need three iterations to get within 1ulp. */
811+
812+ /* Indicate that these can be performed prior to the branch. GCC
813+ insists on sinking them below the branch, however; it seems like
814+ they'd be better before the branch so that we can cover any latency
815+ from storing the argument and loading its high word. Oh well. */
816+
817+ g = b * y;
818+ h = 0.5 * y;
819+
820+ /* Handle small numbers by scaling. */
821+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
822+ return __ieee754_sqrt (b * two108) * twom54;
823+
824+#define FMADD(a_, c_, b_) \
825+ ({ double __r; \
826+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
827+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
828+ __r;})
829+#define FNMSUB(a_, c_, b_) \
830+ ({ double __r; \
831+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
832+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
833+ __r;})
834+
835+ r = FNMSUB (g, h, half);
836+ g = FMADD (g, r, g);
837+ h = FMADD (h, r, h);
838+
839+ r = FNMSUB (g, h, half);
840+ g = FMADD (g, r, g);
841+ h = FMADD (h, r, h);
842+
843+ r = FNMSUB (g, h, half);
844+ g = FMADD (g, r, g);
845+ h = FMADD (h, r, h);
846+
847+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
848+
849+ /* Final refinement. */
850+ d = FNMSUB (g, g, b);
851+
852+ fesetenv_register (fe);
853+ return FMADD (d, h, g);
854+ }
855+ }
856+ else if (b < 0)
857+ {
858+ /* For some reason, some PowerPC32 processors don't implement
859+ FE_INVALID_SQRT. */
860+#ifdef FE_INVALID_SQRT
861+ feraiseexcept (FE_INVALID_SQRT);
862+
863+ fenv_union_t u = { .fenv = fegetenv_register () };
864+ if ((u.l[1] & FE_INVALID) == 0)
865+#endif
866+ feraiseexcept (FE_INVALID);
867+ b = a_nan.value;
868+ }
869+ return f_wash (b);
870+}
871diff -ruN libc-orig/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
872--- libc-orig/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c 1969-12-31 18:00:00.000000000 -0600
873+++ libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c 2012-06-14 14:55:24.620001266 -0500
874@@ -0,0 +1,101 @@
875+/* Single-precision floating point square root.
876+ Copyright (C) 2010 Free Software Foundation, Inc.
877+ This file is part of the GNU C Library.
878+
879+ The GNU C Library is free software; you can redistribute it and/or
880+ modify it under the terms of the GNU Lesser General Public
881+ License as published by the Free Software Foundation; either
882+ version 2.1 of the License, or (at your option) any later version.
883+
884+ The GNU C Library is distributed in the hope that it will be useful,
885+ but WITHOUT ANY WARRANTY; without even the implied warranty of
886+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
887+ Lesser General Public License for more details.
888+
889+ You should have received a copy of the GNU Lesser General Public
890+ License along with the GNU C Library; if not, write to the Free
891+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
892+ 02111-1307 USA. */
893+
894+#include <math.h>
895+#include <math_private.h>
896+#include <fenv_libc.h>
897+#include <inttypes.h>
898+
899+#include <sysdep.h>
900+#include <ldsodefs.h>
901+
902+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
903+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
904+static const float threehalf = 1.5;
905+
906+/* The method is based on the descriptions in:
907+
908+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
909+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
910+
911+ We find the reciprocal square root and use that to compute the actual
912+ square root. */
913+
914+#ifdef __STDC__
915+float
916+__ieee754_sqrtf (float b)
917+#else
918+float
919+__ieee754_sqrtf (b)
920+ float b;
921+#endif
922+{
923+ if (__builtin_expect (b > 0, 1))
924+ {
925+#define FMSUB(a_, c_, b_) \
926+ ({ double __r; \
927+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
928+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
929+ __r;})
930+#define FNMSUB(a_, c_, b_) \
931+ ({ double __r; \
932+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
933+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
934+ __r;})
935+
936+ if (__builtin_expect (b != a_inf.value, 1))
937+ {
938+ double y, x;
939+ fenv_t fe;
940+
941+ fe = fegetenv_register ();
942+
943+ relax_fenv_state ();
944+
945+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
946+ y = FMSUB (threehalf, b, b);
947+
948+ /* Initial estimate. */
949+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
950+
951+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
952+ x = x * FNMSUB (y, x * x, threehalf);
953+ x = x * FNMSUB (y, x * x, threehalf);
954+ x = x * FNMSUB (y, x * x, threehalf);
955+
956+ /* All done. */
957+ fesetenv_register (fe);
958+ return x * b;
959+ }
960+ }
961+ else if (b < 0)
962+ {
963+ /* For some reason, some PowerPC32 processors don't implement
964+ FE_INVALID_SQRT. */
965+#ifdef FE_INVALID_SQRT
966+ feraiseexcept (FE_INVALID_SQRT);
967+
968+ fenv_union_t u = { .fenv = fegetenv_register () };
969+ if ((u.l[1] & FE_INVALID) == 0)
970+#endif
971+ feraiseexcept (FE_INVALID);
972+ b = a_nan.value;
973+ }
974+ return f_washf (b);
975+}
976diff -ruN libc-orig/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
977--- libc-orig/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c 1969-12-31 18:00:00.000000000 -0600
978+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c 2012-06-14 14:51:50.452001745 -0500
979@@ -0,0 +1,134 @@
980+/* Double-precision floating point square root.
981+ Copyright (C) 2010 Free Software Foundation, Inc.
982+ This file is part of the GNU C Library.
983+
984+ The GNU C Library is free software; you can redistribute it and/or
985+ modify it under the terms of the GNU Lesser General Public
986+ License as published by the Free Software Foundation; either
987+ version 2.1 of the License, or (at your option) any later version.
988+
989+ The GNU C Library is distributed in the hope that it will be useful,
990+ but WITHOUT ANY WARRANTY; without even the implied warranty of
991+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
992+ Lesser General Public License for more details.
993+
994+ You should have received a copy of the GNU Lesser General Public
995+ License along with the GNU C Library; if not, write to the Free
996+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
997+ 02111-1307 USA. */
998+
999+#include <math.h>
1000+#include <math_private.h>
1001+#include <fenv_libc.h>
1002+#include <inttypes.h>
1003+
1004+#include <sysdep.h>
1005+#include <ldsodefs.h>
1006+
1007+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1008+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1009+static const float two108 = 3.245185536584267269e+32;
1010+static const float twom54 = 5.551115123125782702e-17;
1011+static const float half = 0.5;
1012+
1013+/* The method is based on the descriptions in:
1014+
1015+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1016+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1017+
1018+ We find the actual square root and half of its reciprocal
1019+ simultaneously. */
1020+
1021+#ifdef __STDC__
1022+double
1023+__ieee754_sqrt (double b)
1024+#else
1025+double
1026+__ieee754_sqrt (b)
1027+ double b;
1028+#endif
1029+{
1030+ if (__builtin_expect (b > 0, 1))
1031+ {
1032+ double y, g, h, d, r;
1033+ ieee_double_shape_type u;
1034+
1035+ if (__builtin_expect (b != a_inf.value, 1))
1036+ {
1037+ fenv_t fe;
1038+
1039+ fe = fegetenv_register ();
1040+
1041+ u.value = b;
1042+
1043+ relax_fenv_state ();
1044+
1045+ __asm__ ("frsqrte %[estimate], %[x]\n"
1046+ : [estimate] "=f" (y) : [x] "f" (b));
1047+
1048+ /* Following Muller et al, page 168, equation 5.20.
1049+
1050+ h goes to 1/(2*sqrt(b))
1051+ g goes to sqrt(b).
1052+
1053+ We need three iterations to get within 1ulp. */
1054+
1055+ /* Indicate that these can be performed prior to the branch. GCC
1056+ insists on sinking them below the branch, however; it seems like
1057+ they'd be better before the branch so that we can cover any latency
1058+ from storing the argument and loading its high word. Oh well. */
1059+
1060+ g = b * y;
1061+ h = 0.5 * y;
1062+
1063+ /* Handle small numbers by scaling. */
1064+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
1065+ return __ieee754_sqrt (b * two108) * twom54;
1066+
1067+#define FMADD(a_, c_, b_) \
1068+ ({ double __r; \
1069+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
1070+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1071+ __r;})
1072+#define FNMSUB(a_, c_, b_) \
1073+ ({ double __r; \
1074+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1075+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1076+ __r;})
1077+
1078+ r = FNMSUB (g, h, half);
1079+ g = FMADD (g, r, g);
1080+ h = FMADD (h, r, h);
1081+
1082+ r = FNMSUB (g, h, half);
1083+ g = FMADD (g, r, g);
1084+ h = FMADD (h, r, h);
1085+
1086+ r = FNMSUB (g, h, half);
1087+ g = FMADD (g, r, g);
1088+ h = FMADD (h, r, h);
1089+
1090+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
1091+
1092+ /* Final refinement. */
1093+ d = FNMSUB (g, g, b);
1094+
1095+ fesetenv_register (fe);
1096+ return FMADD (d, h, g);
1097+ }
1098+ }
1099+ else if (b < 0)
1100+ {
1101+ /* For some reason, some PowerPC32 processors don't implement
1102+ FE_INVALID_SQRT. */
1103+#ifdef FE_INVALID_SQRT
1104+ feraiseexcept (FE_INVALID_SQRT);
1105+
1106+ fenv_union_t u = { .fenv = fegetenv_register () };
1107+ if ((u.l[1] & FE_INVALID) == 0)
1108+#endif
1109+ feraiseexcept (FE_INVALID);
1110+ b = a_nan.value;
1111+ }
1112+ return f_wash (b);
1113+}
1114diff -ruN libc-orig/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
1115--- libc-orig/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c 1969-12-31 18:00:00.000000000 -0600
1116+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c 2012-06-14 14:51:50.452001745 -0500
1117@@ -0,0 +1,101 @@
1118+/* Single-precision floating point square root.
1119+ Copyright (C) 2010 Free Software Foundation, Inc.
1120+ This file is part of the GNU C Library.
1121+
1122+ The GNU C Library is free software; you can redistribute it and/or
1123+ modify it under the terms of the GNU Lesser General Public
1124+ License as published by the Free Software Foundation; either
1125+ version 2.1 of the License, or (at your option) any later version.
1126+
1127+ The GNU C Library is distributed in the hope that it will be useful,
1128+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1129+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1130+ Lesser General Public License for more details.
1131+
1132+ You should have received a copy of the GNU Lesser General Public
1133+ License along with the GNU C Library; if not, write to the Free
1134+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1135+ 02111-1307 USA. */
1136+
1137+#include <math.h>
1138+#include <math_private.h>
1139+#include <fenv_libc.h>
1140+#include <inttypes.h>
1141+
1142+#include <sysdep.h>
1143+#include <ldsodefs.h>
1144+
1145+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1146+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1147+static const float threehalf = 1.5;
1148+
1149+/* The method is based on the descriptions in:
1150+
1151+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1152+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1153+
1154+ We find the reciprocal square root and use that to compute the actual
1155+ square root. */
1156+
1157+#ifdef __STDC__
1158+float
1159+__ieee754_sqrtf (float b)
1160+#else
1161+float
1162+__ieee754_sqrtf (b)
1163+ float b;
1164+#endif
1165+{
1166+ if (__builtin_expect (b > 0, 1))
1167+ {
1168+#define FMSUB(a_, c_, b_) \
1169+ ({ double __r; \
1170+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
1171+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1172+ __r;})
1173+#define FNMSUB(a_, c_, b_) \
1174+ ({ double __r; \
1175+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1176+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1177+ __r;})
1178+
1179+ if (__builtin_expect (b != a_inf.value, 1))
1180+ {
1181+ double y, x;
1182+ fenv_t fe;
1183+
1184+ fe = fegetenv_register ();
1185+
1186+ relax_fenv_state ();
1187+
1188+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
1189+ y = FMSUB (threehalf, b, b);
1190+
1191+ /* Initial estimate. */
1192+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
1193+
1194+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
1195+ x = x * FNMSUB (y, x * x, threehalf);
1196+ x = x * FNMSUB (y, x * x, threehalf);
1197+ x = x * FNMSUB (y, x * x, threehalf);
1198+
1199+ /* All done. */
1200+ fesetenv_register (fe);
1201+ return x * b;
1202+ }
1203+ }
1204+ else if (b < 0)
1205+ {
1206+ /* For some reason, some PowerPC32 processors don't implement
1207+ FE_INVALID_SQRT. */
1208+#ifdef FE_INVALID_SQRT
1209+ feraiseexcept (FE_INVALID_SQRT);
1210+
1211+ fenv_union_t u = { .fenv = fegetenv_register () };
1212+ if ((u.l[1] & FE_INVALID) == 0)
1213+#endif
1214+ feraiseexcept (FE_INVALID);
1215+ b = a_nan.value;
1216+ }
1217+ return f_washf (b);
1218+}
1219diff -ruN libc-orig/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
1220--- libc-orig/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c 1969-12-31 18:00:00.000000000 -0600
1221+++ libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c 2012-06-14 14:56:02.080000985 -0500
1222@@ -0,0 +1,134 @@
1223+/* Double-precision floating point square root.
1224+ Copyright (C) 2010 Free Software Foundation, Inc.
1225+ This file is part of the GNU C Library.
1226+
1227+ The GNU C Library is free software; you can redistribute it and/or
1228+ modify it under the terms of the GNU Lesser General Public
1229+ License as published by the Free Software Foundation; either
1230+ version 2.1 of the License, or (at your option) any later version.
1231+
1232+ The GNU C Library is distributed in the hope that it will be useful,
1233+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1234+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1235+ Lesser General Public License for more details.
1236+
1237+ You should have received a copy of the GNU Lesser General Public
1238+ License along with the GNU C Library; if not, write to the Free
1239+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1240+ 02111-1307 USA. */
1241+
1242+#include <math.h>
1243+#include <math_private.h>
1244+#include <fenv_libc.h>
1245+#include <inttypes.h>
1246+
1247+#include <sysdep.h>
1248+#include <ldsodefs.h>
1249+
1250+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1251+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1252+static const float two108 = 3.245185536584267269e+32;
1253+static const float twom54 = 5.551115123125782702e-17;
1254+static const float half = 0.5;
1255+
1256+/* The method is based on the descriptions in:
1257+
1258+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1259+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1260+
1261+ We find the actual square root and half of its reciprocal
1262+ simultaneously. */
1263+
1264+#ifdef __STDC__
1265+double
1266+__ieee754_sqrt (double b)
1267+#else
1268+double
1269+__ieee754_sqrt (b)
1270+ double b;
1271+#endif
1272+{
1273+ if (__builtin_expect (b > 0, 1))
1274+ {
1275+ double y, g, h, d, r;
1276+ ieee_double_shape_type u;
1277+
1278+ if (__builtin_expect (b != a_inf.value, 1))
1279+ {
1280+ fenv_t fe;
1281+
1282+ fe = fegetenv_register ();
1283+
1284+ u.value = b;
1285+
1286+ relax_fenv_state ();
1287+
1288+ __asm__ ("frsqrte %[estimate], %[x]\n"
1289+ : [estimate] "=f" (y) : [x] "f" (b));
1290+
1291+ /* Following Muller et al, page 168, equation 5.20.
1292+
1293+ h goes to 1/(2*sqrt(b))
1294+ g goes to sqrt(b).
1295+
1296+ We need three iterations to get within 1ulp. */
1297+
1298+ /* Indicate that these can be performed prior to the branch. GCC
1299+ insists on sinking them below the branch, however; it seems like
1300+ they'd be better before the branch so that we can cover any latency
1301+ from storing the argument and loading its high word. Oh well. */
1302+
1303+ g = b * y;
1304+ h = 0.5 * y;
1305+
1306+ /* Handle small numbers by scaling. */
1307+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
1308+ return __ieee754_sqrt (b * two108) * twom54;
1309+
1310+#define FMADD(a_, c_, b_) \
1311+ ({ double __r; \
1312+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
1313+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1314+ __r;})
1315+#define FNMSUB(a_, c_, b_) \
1316+ ({ double __r; \
1317+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1318+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1319+ __r;})
1320+
1321+ r = FNMSUB (g, h, half);
1322+ g = FMADD (g, r, g);
1323+ h = FMADD (h, r, h);
1324+
1325+ r = FNMSUB (g, h, half);
1326+ g = FMADD (g, r, g);
1327+ h = FMADD (h, r, h);
1328+
1329+ r = FNMSUB (g, h, half);
1330+ g = FMADD (g, r, g);
1331+ h = FMADD (h, r, h);
1332+
1333+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
1334+
1335+ /* Final refinement. */
1336+ d = FNMSUB (g, g, b);
1337+
1338+ fesetenv_register (fe);
1339+ return FMADD (d, h, g);
1340+ }
1341+ }
1342+ else if (b < 0)
1343+ {
1344+ /* For some reason, some PowerPC32 processors don't implement
1345+ FE_INVALID_SQRT. */
1346+#ifdef FE_INVALID_SQRT
1347+ feraiseexcept (FE_INVALID_SQRT);
1348+
1349+ fenv_union_t u = { .fenv = fegetenv_register () };
1350+ if ((u.l[1] & FE_INVALID) == 0)
1351+#endif
1352+ feraiseexcept (FE_INVALID);
1353+ b = a_nan.value;
1354+ }
1355+ return f_wash (b);
1356+}
1357diff -ruN libc-orig/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
1358--- libc-orig/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c 1969-12-31 18:00:00.000000000 -0600
1359+++ libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c 2012-06-14 14:56:02.080000985 -0500
1360@@ -0,0 +1,101 @@
1361+/* Single-precision floating point square root.
1362+ Copyright (C) 2010 Free Software Foundation, Inc.
1363+ This file is part of the GNU C Library.
1364+
1365+ The GNU C Library is free software; you can redistribute it and/or
1366+ modify it under the terms of the GNU Lesser General Public
1367+ License as published by the Free Software Foundation; either
1368+ version 2.1 of the License, or (at your option) any later version.
1369+
1370+ The GNU C Library is distributed in the hope that it will be useful,
1371+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1372+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1373+ Lesser General Public License for more details.
1374+
1375+ You should have received a copy of the GNU Lesser General Public
1376+ License along with the GNU C Library; if not, write to the Free
1377+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1378+ 02111-1307 USA. */
1379+
1380+#include <math.h>
1381+#include <math_private.h>
1382+#include <fenv_libc.h>
1383+#include <inttypes.h>
1384+
1385+#include <sysdep.h>
1386+#include <ldsodefs.h>
1387+
1388+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1389+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1390+static const float threehalf = 1.5;
1391+
1392+/* The method is based on the descriptions in:
1393+
1394+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1395+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1396+
1397+ We find the reciprocal square root and use that to compute the actual
1398+ square root. */
1399+
1400+#ifdef __STDC__
1401+float
1402+__ieee754_sqrtf (float b)
1403+#else
1404+float
1405+__ieee754_sqrtf (b)
1406+ float b;
1407+#endif
1408+{
1409+ if (__builtin_expect (b > 0, 1))
1410+ {
1411+#define FMSUB(a_, c_, b_) \
1412+ ({ double __r; \
1413+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
1414+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1415+ __r;})
1416+#define FNMSUB(a_, c_, b_) \
1417+ ({ double __r; \
1418+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1419+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1420+ __r;})
1421+
1422+ if (__builtin_expect (b != a_inf.value, 1))
1423+ {
1424+ double y, x;
1425+ fenv_t fe;
1426+
1427+ fe = fegetenv_register ();
1428+
1429+ relax_fenv_state ();
1430+
1431+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
1432+ y = FMSUB (threehalf, b, b);
1433+
1434+ /* Initial estimate. */
1435+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
1436+
1437+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
1438+ x = x * FNMSUB (y, x * x, threehalf);
1439+ x = x * FNMSUB (y, x * x, threehalf);
1440+ x = x * FNMSUB (y, x * x, threehalf);
1441+
1442+ /* All done. */
1443+ fesetenv_register (fe);
1444+ return x * b;
1445+ }
1446+ }
1447+ else if (b < 0)
1448+ {
1449+ /* For some reason, some PowerPC32 processors don't implement
1450+ FE_INVALID_SQRT. */
1451+#ifdef FE_INVALID_SQRT
1452+ feraiseexcept (FE_INVALID_SQRT);
1453+
1454+ fenv_union_t u = { .fenv = fegetenv_register () };
1455+ if ((u.l[1] & FE_INVALID) == 0)
1456+#endif
1457+ feraiseexcept (FE_INVALID);
1458+ b = a_nan.value;
1459+ }
1460+ return f_washf (b);
1461+}
1462diff -ruN libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
1463--- libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies 1969-12-31 18:00:00.000000000 -0600
1464+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies 2012-06-14 14:51:50.452001745 -0500
1465@@ -0,0 +1 @@
1466+powerpc/powerpc32/603e/fpu
1467diff -ruN libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
1468--- libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies 1969-12-31 18:00:00.000000000 -0600
1469+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies 2012-06-14 14:54:00.481000876 -0500
1470@@ -0,0 +1 @@
1471+powerpc/powerpc32/e500mc/fpu
1472diff -ruN libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
1473--- libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies 1969-12-31 18:00:00.000000000 -0600
1474+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies 2012-06-14 14:54:17.000001007 -0500
1475@@ -0,0 +1 @@
1476+powerpc/powerpc32/e5500/fpu
1477diff -ruN libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
1478--- libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies 1969-12-31 18:00:00.000000000 -0600
1479+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies 2012-06-14 14:54:31.054001299 -0500
1480@@ -0,0 +1 @@
1481+powerpc/powerpc32/e6500/fpu
1482diff -ruN libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
1483--- libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies 1969-12-31 18:00:00.000000000 -0600
1484+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies 2012-06-14 14:51:50.453001709 -0500
1485@@ -0,0 +1 @@
1486+powerpc/powerpc64/e5500/fpu
1487diff -ruN libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
1488--- libc-orig/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies 1969-12-31 18:00:00.000000000 -0600
1489+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies 2012-06-14 14:58:14.298001288 -0500
1490@@ -0,0 +1 @@
1491+powerpc/powerpc64/e6500/fpu