summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch')
-rw-r--r--meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch1516
1 files changed, 1516 insertions, 0 deletions
diff --git a/meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch b/meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch
new file mode 100644
index 0000000000..689b79c61c
--- /dev/null
+++ b/meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch
@@ -0,0 +1,1516 @@
1Signed-of-by: Edmar Wienskoski <edmar@freescale.com>
2Upstream-Status: Pending
3
4Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
5===================================================================
6--- /dev/null
7+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c
8@@ -0,0 +1,134 @@
9+/* Double-precision floating point square root.
10+ Copyright (C) 2010 Free Software Foundation, Inc.
11+ This file is part of the GNU C Library.
12+
13+ The GNU C Library is free software; you can redistribute it and/or
14+ modify it under the terms of the GNU Lesser General Public
15+ License as published by the Free Software Foundation; either
16+ version 2.1 of the License, or (at your option) any later version.
17+
18+ The GNU C Library is distributed in the hope that it will be useful,
19+ but WITHOUT ANY WARRANTY; without even the implied warranty of
20+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21+ Lesser General Public License for more details.
22+
23+ You should have received a copy of the GNU Lesser General Public
24+ License along with the GNU C Library; if not, write to the Free
25+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26+ 02111-1307 USA. */
27+
28+#include <math.h>
29+#include <math_private.h>
30+#include <fenv_libc.h>
31+#include <inttypes.h>
32+
33+#include <sysdep.h>
34+#include <ldsodefs.h>
35+
36+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
37+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
38+static const float two108 = 3.245185536584267269e+32;
39+static const float twom54 = 5.551115123125782702e-17;
40+static const float half = 0.5;
41+
42+/* The method is based on the descriptions in:
43+
44+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
45+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
46+
47+ We find the actual square root and half of its reciprocal
48+ simultaneously. */
49+
50+#ifdef __STDC__
51+double
52+__ieee754_sqrt (double b)
53+#else
54+double
55+__ieee754_sqrt (b)
56+ double b;
57+#endif
58+{
59+ if (__builtin_expect (b > 0, 1))
60+ {
61+ double y, g, h, d, r;
62+ ieee_double_shape_type u;
63+
64+ if (__builtin_expect (b != a_inf.value, 1))
65+ {
66+ fenv_t fe;
67+
68+ fe = fegetenv_register ();
69+
70+ u.value = b;
71+
72+ relax_fenv_state ();
73+
74+ __asm__ ("frsqrte %[estimate], %[x]\n"
75+ : [estimate] "=f" (y) : [x] "f" (b));
76+
77+ /* Following Muller et al, page 168, equation 5.20.
78+
79+ h goes to 1/(2*sqrt(b))
80+ g goes to sqrt(b).
81+
82+ We need three iterations to get within 1ulp. */
83+
84+ /* Indicate that these can be performed prior to the branch. GCC
85+ insists on sinking them below the branch, however; it seems like
86+ they'd be better before the branch so that we can cover any latency
87+ from storing the argument and loading its high word. Oh well. */
88+
89+ g = b * y;
90+ h = 0.5 * y;
91+
92+ /* Handle small numbers by scaling. */
93+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
94+ return __ieee754_sqrt (b * two108) * twom54;
95+
96+#define FMADD(a_, c_, b_) \
97+ ({ double __r; \
98+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
99+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
100+ __r;})
101+#define FNMSUB(a_, c_, b_) \
102+ ({ double __r; \
103+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
104+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
105+ __r;})
106+
107+ r = FNMSUB (g, h, half);
108+ g = FMADD (g, r, g);
109+ h = FMADD (h, r, h);
110+
111+ r = FNMSUB (g, h, half);
112+ g = FMADD (g, r, g);
113+ h = FMADD (h, r, h);
114+
115+ r = FNMSUB (g, h, half);
116+ g = FMADD (g, r, g);
117+ h = FMADD (h, r, h);
118+
119+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
120+
121+ /* Final refinement. */
122+ d = FNMSUB (g, g, b);
123+
124+ fesetenv_register (fe);
125+ return FMADD (d, h, g);
126+ }
127+ }
128+ else if (b < 0)
129+ {
130+ /* For some reason, some PowerPC32 processors don't implement
131+ FE_INVALID_SQRT. */
132+#ifdef FE_INVALID_SQRT
133+ feraiseexcept (FE_INVALID_SQRT);
134+
135+ fenv_union_t u = { .fenv = fegetenv_register () };
136+ if ((u.l & FE_INVALID) == 0)
137+#endif
138+ feraiseexcept (FE_INVALID);
139+ b = a_nan.value;
140+ }
141+ return f_wash (b);
142+}
143Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
144===================================================================
145--- /dev/null
146+++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c
147@@ -0,0 +1,101 @@
148+/* Single-precision floating point square root.
149+ Copyright (C) 2010 Free Software Foundation, Inc.
150+ This file is part of the GNU C Library.
151+
152+ The GNU C Library is free software; you can redistribute it and/or
153+ modify it under the terms of the GNU Lesser General Public
154+ License as published by the Free Software Foundation; either
155+ version 2.1 of the License, or (at your option) any later version.
156+
157+ The GNU C Library is distributed in the hope that it will be useful,
158+ but WITHOUT ANY WARRANTY; without even the implied warranty of
159+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
160+ Lesser General Public License for more details.
161+
162+ You should have received a copy of the GNU Lesser General Public
163+ License along with the GNU C Library; if not, write to the Free
164+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
165+ 02111-1307 USA. */
166+
167+#include <math.h>
168+#include <math_private.h>
169+#include <fenv_libc.h>
170+#include <inttypes.h>
171+
172+#include <sysdep.h>
173+#include <ldsodefs.h>
174+
175+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
176+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
177+static const float threehalf = 1.5;
178+
179+/* The method is based on the descriptions in:
180+
181+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
182+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
183+
184+ We find the reciprocal square root and use that to compute the actual
185+ square root. */
186+
187+#ifdef __STDC__
188+float
189+__ieee754_sqrtf (float b)
190+#else
191+float
192+__ieee754_sqrtf (b)
193+ float b;
194+#endif
195+{
196+ if (__builtin_expect (b > 0, 1))
197+ {
198+#define FMSUB(a_, c_, b_) \
199+ ({ double __r; \
200+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
201+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
202+ __r;})
203+#define FNMSUB(a_, c_, b_) \
204+ ({ double __r; \
205+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
206+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
207+ __r;})
208+
209+ if (__builtin_expect (b != a_inf.value, 1))
210+ {
211+ double y, x;
212+ fenv_t fe;
213+
214+ fe = fegetenv_register ();
215+
216+ relax_fenv_state ();
217+
218+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
219+ y = FMSUB (threehalf, b, b);
220+
221+ /* Initial estimate. */
222+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
223+
224+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
225+ x = x * FNMSUB (y, x * x, threehalf);
226+ x = x * FNMSUB (y, x * x, threehalf);
227+ x = x * FNMSUB (y, x * x, threehalf);
228+
229+ /* All done. */
230+ fesetenv_register (fe);
231+ return x * b;
232+ }
233+ }
234+ else if (b < 0)
235+ {
236+ /* For some reason, some PowerPC32 processors don't implement
237+ FE_INVALID_SQRT. */
238+#ifdef FE_INVALID_SQRT
239+ feraiseexcept (FE_INVALID_SQRT);
240+
241+ fenv_union_t u = { .fenv = fegetenv_register () };
242+ if ((u.l & FE_INVALID) == 0)
243+#endif
244+ feraiseexcept (FE_INVALID);
245+ b = a_nan.value;
246+ }
247+ return f_washf (b);
248+}
249Index: libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
250===================================================================
251--- /dev/null
252+++ libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c
253@@ -0,0 +1,134 @@
254+/* Double-precision floating point square root.
255+ Copyright (C) 2010 Free Software Foundation, Inc.
256+ This file is part of the GNU C Library.
257+
258+ The GNU C Library is free software; you can redistribute it and/or
259+ modify it under the terms of the GNU Lesser General Public
260+ License as published by the Free Software Foundation; either
261+ version 2.1 of the License, or (at your option) any later version.
262+
263+ The GNU C Library is distributed in the hope that it will be useful,
264+ but WITHOUT ANY WARRANTY; without even the implied warranty of
265+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
266+ Lesser General Public License for more details.
267+
268+ You should have received a copy of the GNU Lesser General Public
269+ License along with the GNU C Library; if not, write to the Free
270+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
271+ 02111-1307 USA. */
272+
273+#include <math.h>
274+#include <math_private.h>
275+#include <fenv_libc.h>
276+#include <inttypes.h>
277+
278+#include <sysdep.h>
279+#include <ldsodefs.h>
280+
281+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
282+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
283+static const float two108 = 3.245185536584267269e+32;
284+static const float twom54 = 5.551115123125782702e-17;
285+static const float half = 0.5;
286+
287+/* The method is based on the descriptions in:
288+
289+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
290+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
291+
292+ We find the actual square root and half of its reciprocal
293+ simultaneously. */
294+
295+#ifdef __STDC__
296+double
297+__ieee754_sqrt (double b)
298+#else
299+double
300+__ieee754_sqrt (b)
301+ double b;
302+#endif
303+{
304+ if (__builtin_expect (b > 0, 1))
305+ {
306+ double y, g, h, d, r;
307+ ieee_double_shape_type u;
308+
309+ if (__builtin_expect (b != a_inf.value, 1))
310+ {
311+ fenv_t fe;
312+
313+ fe = fegetenv_register ();
314+
315+ u.value = b;
316+
317+ relax_fenv_state ();
318+
319+ __asm__ ("frsqrte %[estimate], %[x]\n"
320+ : [estimate] "=f" (y) : [x] "f" (b));
321+
322+ /* Following Muller et al, page 168, equation 5.20.
323+
324+ h goes to 1/(2*sqrt(b))
325+ g goes to sqrt(b).
326+
327+ We need three iterations to get within 1ulp. */
328+
329+ /* Indicate that these can be performed prior to the branch. GCC
330+ insists on sinking them below the branch, however; it seems like
331+ they'd be better before the branch so that we can cover any latency
332+ from storing the argument and loading its high word. Oh well. */
333+
334+ g = b * y;
335+ h = 0.5 * y;
336+
337+ /* Handle small numbers by scaling. */
338+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
339+ return __ieee754_sqrt (b * two108) * twom54;
340+
341+#define FMADD(a_, c_, b_) \
342+ ({ double __r; \
343+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
344+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
345+ __r;})
346+#define FNMSUB(a_, c_, b_) \
347+ ({ double __r; \
348+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
349+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
350+ __r;})
351+
352+ r = FNMSUB (g, h, half);
353+ g = FMADD (g, r, g);
354+ h = FMADD (h, r, h);
355+
356+ r = FNMSUB (g, h, half);
357+ g = FMADD (g, r, g);
358+ h = FMADD (h, r, h);
359+
360+ r = FNMSUB (g, h, half);
361+ g = FMADD (g, r, g);
362+ h = FMADD (h, r, h);
363+
364+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
365+
366+ /* Final refinement. */
367+ d = FNMSUB (g, g, b);
368+
369+ fesetenv_register (fe);
370+ return FMADD (d, h, g);
371+ }
372+ }
373+ else if (b < 0)
374+ {
375+ /* For some reason, some PowerPC32 processors don't implement
376+ FE_INVALID_SQRT. */
377+#ifdef FE_INVALID_SQRT
378+ feraiseexcept (FE_INVALID_SQRT);
379+
380+ fenv_union_t u = { .fenv = fegetenv_register () };
381+ if ((u.l & FE_INVALID) == 0)
382+#endif
383+ feraiseexcept (FE_INVALID);
384+ b = a_nan.value;
385+ }
386+ return f_wash (b);
387+}
388Index: libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
389===================================================================
390--- /dev/null
391+++ libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c
392@@ -0,0 +1,101 @@
393+/* Single-precision floating point square root.
394+ Copyright (C) 2010 Free Software Foundation, Inc.
395+ This file is part of the GNU C Library.
396+
397+ The GNU C Library is free software; you can redistribute it and/or
398+ modify it under the terms of the GNU Lesser General Public
399+ License as published by the Free Software Foundation; either
400+ version 2.1 of the License, or (at your option) any later version.
401+
402+ The GNU C Library is distributed in the hope that it will be useful,
403+ but WITHOUT ANY WARRANTY; without even the implied warranty of
404+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
405+ Lesser General Public License for more details.
406+
407+ You should have received a copy of the GNU Lesser General Public
408+ License along with the GNU C Library; if not, write to the Free
409+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
410+ 02111-1307 USA. */
411+
412+#include <math.h>
413+#include <math_private.h>
414+#include <fenv_libc.h>
415+#include <inttypes.h>
416+
417+#include <sysdep.h>
418+#include <ldsodefs.h>
419+
420+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
421+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
422+static const float threehalf = 1.5;
423+
424+/* The method is based on the descriptions in:
425+
426+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
427+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
428+
429+ We find the reciprocal square root and use that to compute the actual
430+ square root. */
431+
432+#ifdef __STDC__
433+float
434+__ieee754_sqrtf (float b)
435+#else
436+float
437+__ieee754_sqrtf (b)
438+ float b;
439+#endif
440+{
441+ if (__builtin_expect (b > 0, 1))
442+ {
443+#define FMSUB(a_, c_, b_) \
444+ ({ double __r; \
445+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
446+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
447+ __r;})
448+#define FNMSUB(a_, c_, b_) \
449+ ({ double __r; \
450+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
451+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
452+ __r;})
453+
454+ if (__builtin_expect (b != a_inf.value, 1))
455+ {
456+ double y, x;
457+ fenv_t fe;
458+
459+ fe = fegetenv_register ();
460+
461+ relax_fenv_state ();
462+
463+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
464+ y = FMSUB (threehalf, b, b);
465+
466+ /* Initial estimate. */
467+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
468+
469+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
470+ x = x * FNMSUB (y, x * x, threehalf);
471+ x = x * FNMSUB (y, x * x, threehalf);
472+ x = x * FNMSUB (y, x * x, threehalf);
473+
474+ /* All done. */
475+ fesetenv_register (fe);
476+ return x * b;
477+ }
478+ }
479+ else if (b < 0)
480+ {
481+ /* For some reason, some PowerPC32 processors don't implement
482+ FE_INVALID_SQRT. */
483+#ifdef FE_INVALID_SQRT
484+ feraiseexcept (FE_INVALID_SQRT);
485+
486+ fenv_union_t u = { .fenv = fegetenv_register () };
487+ if ((u.l & FE_INVALID) == 0)
488+#endif
489+ feraiseexcept (FE_INVALID);
490+ b = a_nan.value;
491+ }
492+ return f_washf (b);
493+}
494Index: libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
495===================================================================
496--- /dev/null
497+++ libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c
498@@ -0,0 +1,134 @@
499+/* Double-precision floating point square root.
500+ Copyright (C) 2010 Free Software Foundation, Inc.
501+ This file is part of the GNU C Library.
502+
503+ The GNU C Library is free software; you can redistribute it and/or
504+ modify it under the terms of the GNU Lesser General Public
505+ License as published by the Free Software Foundation; either
506+ version 2.1 of the License, or (at your option) any later version.
507+
508+ The GNU C Library is distributed in the hope that it will be useful,
509+ but WITHOUT ANY WARRANTY; without even the implied warranty of
510+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
511+ Lesser General Public License for more details.
512+
513+ You should have received a copy of the GNU Lesser General Public
514+ License along with the GNU C Library; if not, write to the Free
515+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
516+ 02111-1307 USA. */
517+
518+#include <math.h>
519+#include <math_private.h>
520+#include <fenv_libc.h>
521+#include <inttypes.h>
522+
523+#include <sysdep.h>
524+#include <ldsodefs.h>
525+
526+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
527+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
528+static const float two108 = 3.245185536584267269e+32;
529+static const float twom54 = 5.551115123125782702e-17;
530+static const float half = 0.5;
531+
532+/* The method is based on the descriptions in:
533+
534+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
535+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
536+
537+ We find the actual square root and half of its reciprocal
538+ simultaneously. */
539+
540+#ifdef __STDC__
541+double
542+__ieee754_sqrt (double b)
543+#else
544+double
545+__ieee754_sqrt (b)
546+ double b;
547+#endif
548+{
549+ if (__builtin_expect (b > 0, 1))
550+ {
551+ double y, g, h, d, r;
552+ ieee_double_shape_type u;
553+
554+ if (__builtin_expect (b != a_inf.value, 1))
555+ {
556+ fenv_t fe;
557+
558+ fe = fegetenv_register ();
559+
560+ u.value = b;
561+
562+ relax_fenv_state ();
563+
564+ __asm__ ("frsqrte %[estimate], %[x]\n"
565+ : [estimate] "=f" (y) : [x] "f" (b));
566+
567+ /* Following Muller et al, page 168, equation 5.20.
568+
569+ h goes to 1/(2*sqrt(b))
570+ g goes to sqrt(b).
571+
572+ We need three iterations to get within 1ulp. */
573+
574+ /* Indicate that these can be performed prior to the branch. GCC
575+ insists on sinking them below the branch, however; it seems like
576+ they'd be better before the branch so that we can cover any latency
577+ from storing the argument and loading its high word. Oh well. */
578+
579+ g = b * y;
580+ h = 0.5 * y;
581+
582+ /* Handle small numbers by scaling. */
583+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
584+ return __ieee754_sqrt (b * two108) * twom54;
585+
586+#define FMADD(a_, c_, b_) \
587+ ({ double __r; \
588+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
589+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
590+ __r;})
591+#define FNMSUB(a_, c_, b_) \
592+ ({ double __r; \
593+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
594+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
595+ __r;})
596+
597+ r = FNMSUB (g, h, half);
598+ g = FMADD (g, r, g);
599+ h = FMADD (h, r, h);
600+
601+ r = FNMSUB (g, h, half);
602+ g = FMADD (g, r, g);
603+ h = FMADD (h, r, h);
604+
605+ r = FNMSUB (g, h, half);
606+ g = FMADD (g, r, g);
607+ h = FMADD (h, r, h);
608+
609+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
610+
611+ /* Final refinement. */
612+ d = FNMSUB (g, g, b);
613+
614+ fesetenv_register (fe);
615+ return FMADD (d, h, g);
616+ }
617+ }
618+ else if (b < 0)
619+ {
620+ /* For some reason, some PowerPC32 processors don't implement
621+ FE_INVALID_SQRT. */
622+#ifdef FE_INVALID_SQRT
623+ feraiseexcept (FE_INVALID_SQRT);
624+
625+ fenv_union_t u = { .fenv = fegetenv_register () };
626+ if ((u.l & FE_INVALID) == 0)
627+#endif
628+ feraiseexcept (FE_INVALID);
629+ b = a_nan.value;
630+ }
631+ return f_wash (b);
632+}
633Index: libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
634===================================================================
635--- /dev/null
636+++ libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c
637@@ -0,0 +1,101 @@
638+/* Single-precision floating point square root.
639+ Copyright (C) 2010 Free Software Foundation, Inc.
640+ This file is part of the GNU C Library.
641+
642+ The GNU C Library is free software; you can redistribute it and/or
643+ modify it under the terms of the GNU Lesser General Public
644+ License as published by the Free Software Foundation; either
645+ version 2.1 of the License, or (at your option) any later version.
646+
647+ The GNU C Library is distributed in the hope that it will be useful,
648+ but WITHOUT ANY WARRANTY; without even the implied warranty of
649+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
650+ Lesser General Public License for more details.
651+
652+ You should have received a copy of the GNU Lesser General Public
653+ License along with the GNU C Library; if not, write to the Free
654+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
655+ 02111-1307 USA. */
656+
657+#include <math.h>
658+#include <math_private.h>
659+#include <fenv_libc.h>
660+#include <inttypes.h>
661+
662+#include <sysdep.h>
663+#include <ldsodefs.h>
664+
665+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
666+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
667+static const float threehalf = 1.5;
668+
669+/* The method is based on the descriptions in:
670+
671+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
672+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
673+
674+ We find the reciprocal square root and use that to compute the actual
675+ square root. */
676+
677+#ifdef __STDC__
678+float
679+__ieee754_sqrtf (float b)
680+#else
681+float
682+__ieee754_sqrtf (b)
683+ float b;
684+#endif
685+{
686+ if (__builtin_expect (b > 0, 1))
687+ {
688+#define FMSUB(a_, c_, b_) \
689+ ({ double __r; \
690+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
691+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
692+ __r;})
693+#define FNMSUB(a_, c_, b_) \
694+ ({ double __r; \
695+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
696+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
697+ __r;})
698+
699+ if (__builtin_expect (b != a_inf.value, 1))
700+ {
701+ double y, x;
702+ fenv_t fe;
703+
704+ fe = fegetenv_register ();
705+
706+ relax_fenv_state ();
707+
708+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
709+ y = FMSUB (threehalf, b, b);
710+
711+ /* Initial estimate. */
712+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
713+
714+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
715+ x = x * FNMSUB (y, x * x, threehalf);
716+ x = x * FNMSUB (y, x * x, threehalf);
717+ x = x * FNMSUB (y, x * x, threehalf);
718+
719+ /* All done. */
720+ fesetenv_register (fe);
721+ return x * b;
722+ }
723+ }
724+ else if (b < 0)
725+ {
726+ /* For some reason, some PowerPC32 processors don't implement
727+ FE_INVALID_SQRT. */
728+#ifdef FE_INVALID_SQRT
729+ feraiseexcept (FE_INVALID_SQRT);
730+
731+ fenv_union_t u = { .fenv = fegetenv_register () };
732+ if ((u.l & FE_INVALID) == 0)
733+#endif
734+ feraiseexcept (FE_INVALID);
735+ b = a_nan.value;
736+ }
737+ return f_washf (b);
738+}
739Index: libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
740===================================================================
741--- /dev/null
742+++ libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c
743@@ -0,0 +1,134 @@
744+/* Double-precision floating point square root.
745+ Copyright (C) 2010 Free Software Foundation, Inc.
746+ This file is part of the GNU C Library.
747+
748+ The GNU C Library is free software; you can redistribute it and/or
749+ modify it under the terms of the GNU Lesser General Public
750+ License as published by the Free Software Foundation; either
751+ version 2.1 of the License, or (at your option) any later version.
752+
753+ The GNU C Library is distributed in the hope that it will be useful,
754+ but WITHOUT ANY WARRANTY; without even the implied warranty of
755+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
756+ Lesser General Public License for more details.
757+
758+ You should have received a copy of the GNU Lesser General Public
759+ License along with the GNU C Library; if not, write to the Free
760+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
761+ 02111-1307 USA. */
762+
763+#include <math.h>
764+#include <math_private.h>
765+#include <fenv_libc.h>
766+#include <inttypes.h>
767+
768+#include <sysdep.h>
769+#include <ldsodefs.h>
770+
771+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
772+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
773+static const float two108 = 3.245185536584267269e+32;
774+static const float twom54 = 5.551115123125782702e-17;
775+static const float half = 0.5;
776+
777+/* The method is based on the descriptions in:
778+
779+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
780+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
781+
782+ We find the actual square root and half of its reciprocal
783+ simultaneously. */
784+
785+#ifdef __STDC__
786+double
787+__ieee754_sqrt (double b)
788+#else
789+double
790+__ieee754_sqrt (b)
791+ double b;
792+#endif
793+{
794+ if (__builtin_expect (b > 0, 1))
795+ {
796+ double y, g, h, d, r;
797+ ieee_double_shape_type u;
798+
799+ if (__builtin_expect (b != a_inf.value, 1))
800+ {
801+ fenv_t fe;
802+
803+ fe = fegetenv_register ();
804+
805+ u.value = b;
806+
807+ relax_fenv_state ();
808+
809+ __asm__ ("frsqrte %[estimate], %[x]\n"
810+ : [estimate] "=f" (y) : [x] "f" (b));
811+
812+ /* Following Muller et al, page 168, equation 5.20.
813+
814+ h goes to 1/(2*sqrt(b))
815+ g goes to sqrt(b).
816+
817+ We need three iterations to get within 1ulp. */
818+
819+ /* Indicate that these can be performed prior to the branch. GCC
820+ insists on sinking them below the branch, however; it seems like
821+ they'd be better before the branch so that we can cover any latency
822+ from storing the argument and loading its high word. Oh well. */
823+
824+ g = b * y;
825+ h = 0.5 * y;
826+
827+ /* Handle small numbers by scaling. */
828+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
829+ return __ieee754_sqrt (b * two108) * twom54;
830+
831+#define FMADD(a_, c_, b_) \
832+ ({ double __r; \
833+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
834+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
835+ __r;})
836+#define FNMSUB(a_, c_, b_) \
837+ ({ double __r; \
838+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
839+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
840+ __r;})
841+
842+ r = FNMSUB (g, h, half);
843+ g = FMADD (g, r, g);
844+ h = FMADD (h, r, h);
845+
846+ r = FNMSUB (g, h, half);
847+ g = FMADD (g, r, g);
848+ h = FMADD (h, r, h);
849+
850+ r = FNMSUB (g, h, half);
851+ g = FMADD (g, r, g);
852+ h = FMADD (h, r, h);
853+
854+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
855+
856+ /* Final refinement. */
857+ d = FNMSUB (g, g, b);
858+
859+ fesetenv_register (fe);
860+ return FMADD (d, h, g);
861+ }
862+ }
863+ else if (b < 0)
864+ {
865+ /* For some reason, some PowerPC32 processors don't implement
866+ FE_INVALID_SQRT. */
867+#ifdef FE_INVALID_SQRT
868+ feraiseexcept (FE_INVALID_SQRT);
869+
870+ fenv_union_t u = { .fenv = fegetenv_register () };
871+ if ((u.l & FE_INVALID) == 0)
872+#endif
873+ feraiseexcept (FE_INVALID);
874+ b = a_nan.value;
875+ }
876+ return f_wash (b);
877+}
878Index: libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
879===================================================================
880--- /dev/null
881+++ libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c
882@@ -0,0 +1,101 @@
883+/* Single-precision floating point square root.
884+ Copyright (C) 2010 Free Software Foundation, Inc.
885+ This file is part of the GNU C Library.
886+
887+ The GNU C Library is free software; you can redistribute it and/or
888+ modify it under the terms of the GNU Lesser General Public
889+ License as published by the Free Software Foundation; either
890+ version 2.1 of the License, or (at your option) any later version.
891+
892+ The GNU C Library is distributed in the hope that it will be useful,
893+ but WITHOUT ANY WARRANTY; without even the implied warranty of
894+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
895+ Lesser General Public License for more details.
896+
897+ You should have received a copy of the GNU Lesser General Public
898+ License along with the GNU C Library; if not, write to the Free
899+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
900+ 02111-1307 USA. */
901+
902+#include <math.h>
903+#include <math_private.h>
904+#include <fenv_libc.h>
905+#include <inttypes.h>
906+
907+#include <sysdep.h>
908+#include <ldsodefs.h>
909+
910+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
911+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
912+static const float threehalf = 1.5;
913+
914+/* The method is based on the descriptions in:
915+
916+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
917+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
918+
919+ We find the reciprocal square root and use that to compute the actual
920+ square root. */
921+
922+#ifdef __STDC__
923+float
924+__ieee754_sqrtf (float b)
925+#else
926+float
927+__ieee754_sqrtf (b)
928+ float b;
929+#endif
930+{
931+ if (__builtin_expect (b > 0, 1))
932+ {
933+#define FMSUB(a_, c_, b_) \
934+ ({ double __r; \
935+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
936+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
937+ __r;})
938+#define FNMSUB(a_, c_, b_) \
939+ ({ double __r; \
940+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
941+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
942+ __r;})
943+
944+ if (__builtin_expect (b != a_inf.value, 1))
945+ {
946+ double y, x;
947+ fenv_t fe;
948+
949+ fe = fegetenv_register ();
950+
951+ relax_fenv_state ();
952+
953+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
954+ y = FMSUB (threehalf, b, b);
955+
956+ /* Initial estimate. */
957+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
958+
959+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
960+ x = x * FNMSUB (y, x * x, threehalf);
961+ x = x * FNMSUB (y, x * x, threehalf);
962+ x = x * FNMSUB (y, x * x, threehalf);
963+
964+ /* All done. */
965+ fesetenv_register (fe);
966+ return x * b;
967+ }
968+ }
969+ else if (b < 0)
970+ {
971+ /* For some reason, some PowerPC32 processors don't implement
972+ FE_INVALID_SQRT. */
973+#ifdef FE_INVALID_SQRT
974+ feraiseexcept (FE_INVALID_SQRT);
975+
976+ fenv_union_t u = { .fenv = fegetenv_register () };
977+ if ((u.l & FE_INVALID) == 0)
978+#endif
979+ feraiseexcept (FE_INVALID);
980+ b = a_nan.value;
981+ }
982+ return f_washf (b);
983+}
984Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
985===================================================================
986--- /dev/null
987+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c
988@@ -0,0 +1,134 @@
989+/* Double-precision floating point square root.
990+ Copyright (C) 2010 Free Software Foundation, Inc.
991+ This file is part of the GNU C Library.
992+
993+ The GNU C Library is free software; you can redistribute it and/or
994+ modify it under the terms of the GNU Lesser General Public
995+ License as published by the Free Software Foundation; either
996+ version 2.1 of the License, or (at your option) any later version.
997+
998+ The GNU C Library is distributed in the hope that it will be useful,
999+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1000+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1001+ Lesser General Public License for more details.
1002+
1003+ You should have received a copy of the GNU Lesser General Public
1004+ License along with the GNU C Library; if not, write to the Free
1005+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1006+ 02111-1307 USA. */
1007+
1008+#include <math.h>
1009+#include <math_private.h>
1010+#include <fenv_libc.h>
1011+#include <inttypes.h>
1012+
1013+#include <sysdep.h>
1014+#include <ldsodefs.h>
1015+
1016+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1017+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1018+static const float two108 = 3.245185536584267269e+32;
1019+static const float twom54 = 5.551115123125782702e-17;
1020+static const float half = 0.5;
1021+
1022+/* The method is based on the descriptions in:
1023+
1024+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1025+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1026+
1027+ We find the actual square root and half of its reciprocal
1028+ simultaneously. */
1029+
1030+#ifdef __STDC__
1031+double
1032+__ieee754_sqrt (double b)
1033+#else
1034+double
1035+__ieee754_sqrt (b)
1036+ double b;
1037+#endif
1038+{
1039+ if (__builtin_expect (b > 0, 1))
1040+ {
1041+ double y, g, h, d, r;
1042+ ieee_double_shape_type u;
1043+
1044+ if (__builtin_expect (b != a_inf.value, 1))
1045+ {
1046+ fenv_t fe;
1047+
1048+ fe = fegetenv_register ();
1049+
1050+ u.value = b;
1051+
1052+ relax_fenv_state ();
1053+
1054+ __asm__ ("frsqrte %[estimate], %[x]\n"
1055+ : [estimate] "=f" (y) : [x] "f" (b));
1056+
1057+ /* Following Muller et al, page 168, equation 5.20.
1058+
1059+ h goes to 1/(2*sqrt(b))
1060+ g goes to sqrt(b).
1061+
1062+ We need three iterations to get within 1ulp. */
1063+
1064+ /* Indicate that these can be performed prior to the branch. GCC
1065+ insists on sinking them below the branch, however; it seems like
1066+ they'd be better before the branch so that we can cover any latency
1067+ from storing the argument and loading its high word. Oh well. */
1068+
1069+ g = b * y;
1070+ h = 0.5 * y;
1071+
1072+ /* Handle small numbers by scaling. */
1073+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
1074+ return __ieee754_sqrt (b * two108) * twom54;
1075+
1076+#define FMADD(a_, c_, b_) \
1077+ ({ double __r; \
1078+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
1079+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1080+ __r;})
1081+#define FNMSUB(a_, c_, b_) \
1082+ ({ double __r; \
1083+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1084+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1085+ __r;})
1086+
1087+ r = FNMSUB (g, h, half);
1088+ g = FMADD (g, r, g);
1089+ h = FMADD (h, r, h);
1090+
1091+ r = FNMSUB (g, h, half);
1092+ g = FMADD (g, r, g);
1093+ h = FMADD (h, r, h);
1094+
1095+ r = FNMSUB (g, h, half);
1096+ g = FMADD (g, r, g);
1097+ h = FMADD (h, r, h);
1098+
1099+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
1100+
1101+ /* Final refinement. */
1102+ d = FNMSUB (g, g, b);
1103+
1104+ fesetenv_register (fe);
1105+ return FMADD (d, h, g);
1106+ }
1107+ }
1108+ else if (b < 0)
1109+ {
1110+ /* For some reason, some PowerPC32 processors don't implement
1111+ FE_INVALID_SQRT. */
1112+#ifdef FE_INVALID_SQRT
1113+ feraiseexcept (FE_INVALID_SQRT);
1114+
1115+ fenv_union_t u = { .fenv = fegetenv_register () };
1116+ if ((u.l & FE_INVALID) == 0)
1117+#endif
1118+ feraiseexcept (FE_INVALID);
1119+ b = a_nan.value;
1120+ }
1121+ return f_wash (b);
1122+}
1123Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
1124===================================================================
1125--- /dev/null
1126+++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c
1127@@ -0,0 +1,101 @@
1128+/* Single-precision floating point square root.
1129+ Copyright (C) 2010 Free Software Foundation, Inc.
1130+ This file is part of the GNU C Library.
1131+
1132+ The GNU C Library is free software; you can redistribute it and/or
1133+ modify it under the terms of the GNU Lesser General Public
1134+ License as published by the Free Software Foundation; either
1135+ version 2.1 of the License, or (at your option) any later version.
1136+
1137+ The GNU C Library is distributed in the hope that it will be useful,
1138+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1139+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1140+ Lesser General Public License for more details.
1141+
1142+ You should have received a copy of the GNU Lesser General Public
1143+ License along with the GNU C Library; if not, write to the Free
1144+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1145+ 02111-1307 USA. */
1146+
1147+#include <math.h>
1148+#include <math_private.h>
1149+#include <fenv_libc.h>
1150+#include <inttypes.h>
1151+
1152+#include <sysdep.h>
1153+#include <ldsodefs.h>
1154+
1155+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1156+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1157+static const float threehalf = 1.5;
1158+
1159+/* The method is based on the descriptions in:
1160+
1161+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1162+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1163+
1164+ We find the reciprocal square root and use that to compute the actual
1165+ square root. */
1166+
1167+#ifdef __STDC__
1168+float
1169+__ieee754_sqrtf (float b)
1170+#else
1171+float
1172+__ieee754_sqrtf (b)
1173+ float b;
1174+#endif
1175+{
1176+ if (__builtin_expect (b > 0, 1))
1177+ {
1178+#define FMSUB(a_, c_, b_) \
1179+ ({ double __r; \
1180+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
1181+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1182+ __r;})
1183+#define FNMSUB(a_, c_, b_) \
1184+ ({ double __r; \
1185+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1186+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1187+ __r;})
1188+
1189+ if (__builtin_expect (b != a_inf.value, 1))
1190+ {
1191+ double y, x;
1192+ fenv_t fe;
1193+
1194+ fe = fegetenv_register ();
1195+
1196+ relax_fenv_state ();
1197+
1198+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
1199+ y = FMSUB (threehalf, b, b);
1200+
1201+ /* Initial estimate. */
1202+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
1203+
1204+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
1205+ x = x * FNMSUB (y, x * x, threehalf);
1206+ x = x * FNMSUB (y, x * x, threehalf);
1207+ x = x * FNMSUB (y, x * x, threehalf);
1208+
1209+ /* All done. */
1210+ fesetenv_register (fe);
1211+ return x * b;
1212+ }
1213+ }
1214+ else if (b < 0)
1215+ {
1216+ /* For some reason, some PowerPC32 processors don't implement
1217+ FE_INVALID_SQRT. */
1218+#ifdef FE_INVALID_SQRT
1219+ feraiseexcept (FE_INVALID_SQRT);
1220+
1221+ fenv_union_t u = { .fenv = fegetenv_register () };
1222+ if ((u.l & FE_INVALID) == 0)
1223+#endif
1224+ feraiseexcept (FE_INVALID);
1225+ b = a_nan.value;
1226+ }
1227+ return f_washf (b);
1228+}
1229Index: libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
1230===================================================================
1231--- /dev/null
1232+++ libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c
1233@@ -0,0 +1,134 @@
1234+/* Double-precision floating point square root.
1235+ Copyright (C) 2010 Free Software Foundation, Inc.
1236+ This file is part of the GNU C Library.
1237+
1238+ The GNU C Library is free software; you can redistribute it and/or
1239+ modify it under the terms of the GNU Lesser General Public
1240+ License as published by the Free Software Foundation; either
1241+ version 2.1 of the License, or (at your option) any later version.
1242+
1243+ The GNU C Library is distributed in the hope that it will be useful,
1244+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1245+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1246+ Lesser General Public License for more details.
1247+
1248+ You should have received a copy of the GNU Lesser General Public
1249+ License along with the GNU C Library; if not, write to the Free
1250+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1251+ 02111-1307 USA. */
1252+
1253+#include <math.h>
1254+#include <math_private.h>
1255+#include <fenv_libc.h>
1256+#include <inttypes.h>
1257+
1258+#include <sysdep.h>
1259+#include <ldsodefs.h>
1260+
1261+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1262+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1263+static const float two108 = 3.245185536584267269e+32;
1264+static const float twom54 = 5.551115123125782702e-17;
1265+static const float half = 0.5;
1266+
1267+/* The method is based on the descriptions in:
1268+
1269+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1270+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1271+
1272+ We find the actual square root and half of its reciprocal
1273+ simultaneously. */
1274+
1275+#ifdef __STDC__
1276+double
1277+__ieee754_sqrt (double b)
1278+#else
1279+double
1280+__ieee754_sqrt (b)
1281+ double b;
1282+#endif
1283+{
1284+ if (__builtin_expect (b > 0, 1))
1285+ {
1286+ double y, g, h, d, r;
1287+ ieee_double_shape_type u;
1288+
1289+ if (__builtin_expect (b != a_inf.value, 1))
1290+ {
1291+ fenv_t fe;
1292+
1293+ fe = fegetenv_register ();
1294+
1295+ u.value = b;
1296+
1297+ relax_fenv_state ();
1298+
1299+ __asm__ ("frsqrte %[estimate], %[x]\n"
1300+ : [estimate] "=f" (y) : [x] "f" (b));
1301+
1302+ /* Following Muller et al, page 168, equation 5.20.
1303+
1304+ h goes to 1/(2*sqrt(b))
1305+ g goes to sqrt(b).
1306+
1307+ We need three iterations to get within 1ulp. */
1308+
1309+ /* Indicate that these can be performed prior to the branch. GCC
1310+ insists on sinking them below the branch, however; it seems like
1311+ they'd be better before the branch so that we can cover any latency
1312+ from storing the argument and loading its high word. Oh well. */
1313+
1314+ g = b * y;
1315+ h = 0.5 * y;
1316+
1317+ /* Handle small numbers by scaling. */
1318+ if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0))
1319+ return __ieee754_sqrt (b * two108) * twom54;
1320+
1321+#define FMADD(a_, c_, b_) \
1322+ ({ double __r; \
1323+ __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \
1324+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1325+ __r;})
1326+#define FNMSUB(a_, c_, b_) \
1327+ ({ double __r; \
1328+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1329+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1330+ __r;})
1331+
1332+ r = FNMSUB (g, h, half);
1333+ g = FMADD (g, r, g);
1334+ h = FMADD (h, r, h);
1335+
1336+ r = FNMSUB (g, h, half);
1337+ g = FMADD (g, r, g);
1338+ h = FMADD (h, r, h);
1339+
1340+ r = FNMSUB (g, h, half);
1341+ g = FMADD (g, r, g);
1342+ h = FMADD (h, r, h);
1343+
1344+ /* g is now +/- 1ulp, or exactly equal to, the square root of b. */
1345+
1346+ /* Final refinement. */
1347+ d = FNMSUB (g, g, b);
1348+
1349+ fesetenv_register (fe);
1350+ return FMADD (d, h, g);
1351+ }
1352+ }
1353+ else if (b < 0)
1354+ {
1355+ /* For some reason, some PowerPC32 processors don't implement
1356+ FE_INVALID_SQRT. */
1357+#ifdef FE_INVALID_SQRT
1358+ feraiseexcept (FE_INVALID_SQRT);
1359+
1360+ fenv_union_t u = { .fenv = fegetenv_register () };
1361+ if ((u.l & FE_INVALID) == 0)
1362+#endif
1363+ feraiseexcept (FE_INVALID);
1364+ b = a_nan.value;
1365+ }
1366+ return f_wash (b);
1367+}
1368Index: libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
1369===================================================================
1370--- /dev/null
1371+++ libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c
1372@@ -0,0 +1,101 @@
1373+/* Single-precision floating point square root.
1374+ Copyright (C) 2010 Free Software Foundation, Inc.
1375+ This file is part of the GNU C Library.
1376+
1377+ The GNU C Library is free software; you can redistribute it and/or
1378+ modify it under the terms of the GNU Lesser General Public
1379+ License as published by the Free Software Foundation; either
1380+ version 2.1 of the License, or (at your option) any later version.
1381+
1382+ The GNU C Library is distributed in the hope that it will be useful,
1383+ but WITHOUT ANY WARRANTY; without even the implied warranty of
1384+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1385+ Lesser General Public License for more details.
1386+
1387+ You should have received a copy of the GNU Lesser General Public
1388+ License along with the GNU C Library; if not, write to the Free
1389+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1390+ 02111-1307 USA. */
1391+
1392+#include <math.h>
1393+#include <math_private.h>
1394+#include <fenv_libc.h>
1395+#include <inttypes.h>
1396+
1397+#include <sysdep.h>
1398+#include <ldsodefs.h>
1399+
1400+static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
1401+static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
1402+static const float threehalf = 1.5;
1403+
1404+/* The method is based on the descriptions in:
1405+
1406+ _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5;
1407+ _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9
1408+
1409+ We find the reciprocal square root and use that to compute the actual
1410+ square root. */
1411+
1412+#ifdef __STDC__
1413+float
1414+__ieee754_sqrtf (float b)
1415+#else
1416+float
1417+__ieee754_sqrtf (b)
1418+ float b;
1419+#endif
1420+{
1421+ if (__builtin_expect (b > 0, 1))
1422+ {
1423+#define FMSUB(a_, c_, b_) \
1424+ ({ double __r; \
1425+ __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \
1426+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1427+ __r;})
1428+#define FNMSUB(a_, c_, b_) \
1429+ ({ double __r; \
1430+ __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \
1431+ : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
1432+ __r;})
1433+
1434+ if (__builtin_expect (b != a_inf.value, 1))
1435+ {
1436+ double y, x;
1437+ fenv_t fe;
1438+
1439+ fe = fegetenv_register ();
1440+
1441+ relax_fenv_state ();
1442+
1443+ /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */
1444+ y = FMSUB (threehalf, b, b);
1445+
1446+ /* Initial estimate. */
1447+ __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));
1448+
1449+ /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */
1450+ x = x * FNMSUB (y, x * x, threehalf);
1451+ x = x * FNMSUB (y, x * x, threehalf);
1452+ x = x * FNMSUB (y, x * x, threehalf);
1453+
1454+ /* All done. */
1455+ fesetenv_register (fe);
1456+ return x * b;
1457+ }
1458+ }
1459+ else if (b < 0)
1460+ {
1461+ /* For some reason, some PowerPC32 processors don't implement
1462+ FE_INVALID_SQRT. */
1463+#ifdef FE_INVALID_SQRT
1464+ feraiseexcept (FE_INVALID_SQRT);
1465+
1466+ fenv_union_t u = { .fenv = fegetenv_register () };
1467+ if ((u.l & FE_INVALID) == 0)
1468+#endif
1469+ feraiseexcept (FE_INVALID);
1470+ b = a_nan.value;
1471+ }
1472+ return f_washf (b);
1473+}
1474Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
1475===================================================================
1476--- /dev/null
1477+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies
1478@@ -0,0 +1 @@
1479+powerpc/powerpc32/603e/fpu
1480Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
1481===================================================================
1482--- /dev/null
1483+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies
1484@@ -0,0 +1,2 @@
1485+# e300c3 is a variant of 603e so use the same optimizations for sqrt
1486+powerpc/powerpc32/603e/fpu
1487Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
1488===================================================================
1489--- /dev/null
1490+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies
1491@@ -0,0 +1 @@
1492+powerpc/powerpc32/e500mc/fpu
1493Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
1494===================================================================
1495--- /dev/null
1496+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies
1497@@ -0,0 +1 @@
1498+powerpc/powerpc32/e5500/fpu
1499Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
1500===================================================================
1501--- /dev/null
1502+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies
1503@@ -0,0 +1 @@
1504+powerpc/powerpc32/e6500/fpu
1505Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
1506===================================================================
1507--- /dev/null
1508+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies
1509@@ -0,0 +1 @@
1510+powerpc/powerpc64/e5500/fpu
1511Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
1512===================================================================
1513--- /dev/null
1514+++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies
1515@@ -0,0 +1 @@
1516+powerpc/powerpc64/e6500/fpu