diff options
Diffstat (limited to 'meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch')
-rw-r--r-- | meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch | 1516 |
1 files changed, 1516 insertions, 0 deletions
diff --git a/meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch b/meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch new file mode 100644 index 0000000000..689b79c61c --- /dev/null +++ b/meta/recipes-core/eglibc/eglibc-2.19/glibc.fix_sqrt2.patch | |||
@@ -0,0 +1,1516 @@ | |||
1 | Signed-of-by: Edmar Wienskoski <edmar@freescale.com> | ||
2 | Upstream-Status: Pending | ||
3 | |||
4 | Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | ||
5 | =================================================================== | ||
6 | --- /dev/null | ||
7 | +++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | ||
8 | @@ -0,0 +1,134 @@ | ||
9 | +/* Double-precision floating point square root. | ||
10 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
11 | + This file is part of the GNU C Library. | ||
12 | + | ||
13 | + The GNU C Library is free software; you can redistribute it and/or | ||
14 | + modify it under the terms of the GNU Lesser General Public | ||
15 | + License as published by the Free Software Foundation; either | ||
16 | + version 2.1 of the License, or (at your option) any later version. | ||
17 | + | ||
18 | + The GNU C Library is distributed in the hope that it will be useful, | ||
19 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
21 | + Lesser General Public License for more details. | ||
22 | + | ||
23 | + You should have received a copy of the GNU Lesser General Public | ||
24 | + License along with the GNU C Library; if not, write to the Free | ||
25 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
26 | + 02111-1307 USA. */ | ||
27 | + | ||
28 | +#include <math.h> | ||
29 | +#include <math_private.h> | ||
30 | +#include <fenv_libc.h> | ||
31 | +#include <inttypes.h> | ||
32 | + | ||
33 | +#include <sysdep.h> | ||
34 | +#include <ldsodefs.h> | ||
35 | + | ||
36 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
37 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
38 | +static const float two108 = 3.245185536584267269e+32; | ||
39 | +static const float twom54 = 5.551115123125782702e-17; | ||
40 | +static const float half = 0.5; | ||
41 | + | ||
42 | +/* The method is based on the descriptions in: | ||
43 | + | ||
44 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
45 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
46 | + | ||
47 | + We find the actual square root and half of its reciprocal | ||
48 | + simultaneously. */ | ||
49 | + | ||
50 | +#ifdef __STDC__ | ||
51 | +double | ||
52 | +__ieee754_sqrt (double b) | ||
53 | +#else | ||
54 | +double | ||
55 | +__ieee754_sqrt (b) | ||
56 | + double b; | ||
57 | +#endif | ||
58 | +{ | ||
59 | + if (__builtin_expect (b > 0, 1)) | ||
60 | + { | ||
61 | + double y, g, h, d, r; | ||
62 | + ieee_double_shape_type u; | ||
63 | + | ||
64 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
65 | + { | ||
66 | + fenv_t fe; | ||
67 | + | ||
68 | + fe = fegetenv_register (); | ||
69 | + | ||
70 | + u.value = b; | ||
71 | + | ||
72 | + relax_fenv_state (); | ||
73 | + | ||
74 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
75 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
76 | + | ||
77 | + /* Following Muller et al, page 168, equation 5.20. | ||
78 | + | ||
79 | + h goes to 1/(2*sqrt(b)) | ||
80 | + g goes to sqrt(b). | ||
81 | + | ||
82 | + We need three iterations to get within 1ulp. */ | ||
83 | + | ||
84 | + /* Indicate that these can be performed prior to the branch. GCC | ||
85 | + insists on sinking them below the branch, however; it seems like | ||
86 | + they'd be better before the branch so that we can cover any latency | ||
87 | + from storing the argument and loading its high word. Oh well. */ | ||
88 | + | ||
89 | + g = b * y; | ||
90 | + h = 0.5 * y; | ||
91 | + | ||
92 | + /* Handle small numbers by scaling. */ | ||
93 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
94 | + return __ieee754_sqrt (b * two108) * twom54; | ||
95 | + | ||
96 | +#define FMADD(a_, c_, b_) \ | ||
97 | + ({ double __r; \ | ||
98 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
99 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
100 | + __r;}) | ||
101 | +#define FNMSUB(a_, c_, b_) \ | ||
102 | + ({ double __r; \ | ||
103 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
104 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
105 | + __r;}) | ||
106 | + | ||
107 | + r = FNMSUB (g, h, half); | ||
108 | + g = FMADD (g, r, g); | ||
109 | + h = FMADD (h, r, h); | ||
110 | + | ||
111 | + r = FNMSUB (g, h, half); | ||
112 | + g = FMADD (g, r, g); | ||
113 | + h = FMADD (h, r, h); | ||
114 | + | ||
115 | + r = FNMSUB (g, h, half); | ||
116 | + g = FMADD (g, r, g); | ||
117 | + h = FMADD (h, r, h); | ||
118 | + | ||
119 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
120 | + | ||
121 | + /* Final refinement. */ | ||
122 | + d = FNMSUB (g, g, b); | ||
123 | + | ||
124 | + fesetenv_register (fe); | ||
125 | + return FMADD (d, h, g); | ||
126 | + } | ||
127 | + } | ||
128 | + else if (b < 0) | ||
129 | + { | ||
130 | + /* For some reason, some PowerPC32 processors don't implement | ||
131 | + FE_INVALID_SQRT. */ | ||
132 | +#ifdef FE_INVALID_SQRT | ||
133 | + feraiseexcept (FE_INVALID_SQRT); | ||
134 | + | ||
135 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
136 | + if ((u.l & FE_INVALID) == 0) | ||
137 | +#endif | ||
138 | + feraiseexcept (FE_INVALID); | ||
139 | + b = a_nan.value; | ||
140 | + } | ||
141 | + return f_wash (b); | ||
142 | +} | ||
143 | Index: libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | ||
144 | =================================================================== | ||
145 | --- /dev/null | ||
146 | +++ libc/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | ||
147 | @@ -0,0 +1,101 @@ | ||
148 | +/* Single-precision floating point square root. | ||
149 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
150 | + This file is part of the GNU C Library. | ||
151 | + | ||
152 | + The GNU C Library is free software; you can redistribute it and/or | ||
153 | + modify it under the terms of the GNU Lesser General Public | ||
154 | + License as published by the Free Software Foundation; either | ||
155 | + version 2.1 of the License, or (at your option) any later version. | ||
156 | + | ||
157 | + The GNU C Library is distributed in the hope that it will be useful, | ||
158 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
159 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
160 | + Lesser General Public License for more details. | ||
161 | + | ||
162 | + You should have received a copy of the GNU Lesser General Public | ||
163 | + License along with the GNU C Library; if not, write to the Free | ||
164 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
165 | + 02111-1307 USA. */ | ||
166 | + | ||
167 | +#include <math.h> | ||
168 | +#include <math_private.h> | ||
169 | +#include <fenv_libc.h> | ||
170 | +#include <inttypes.h> | ||
171 | + | ||
172 | +#include <sysdep.h> | ||
173 | +#include <ldsodefs.h> | ||
174 | + | ||
175 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
176 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
177 | +static const float threehalf = 1.5; | ||
178 | + | ||
179 | +/* The method is based on the descriptions in: | ||
180 | + | ||
181 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
182 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
183 | + | ||
184 | + We find the reciprocal square root and use that to compute the actual | ||
185 | + square root. */ | ||
186 | + | ||
187 | +#ifdef __STDC__ | ||
188 | +float | ||
189 | +__ieee754_sqrtf (float b) | ||
190 | +#else | ||
191 | +float | ||
192 | +__ieee754_sqrtf (b) | ||
193 | + float b; | ||
194 | +#endif | ||
195 | +{ | ||
196 | + if (__builtin_expect (b > 0, 1)) | ||
197 | + { | ||
198 | +#define FMSUB(a_, c_, b_) \ | ||
199 | + ({ double __r; \ | ||
200 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
201 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
202 | + __r;}) | ||
203 | +#define FNMSUB(a_, c_, b_) \ | ||
204 | + ({ double __r; \ | ||
205 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
206 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
207 | + __r;}) | ||
208 | + | ||
209 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
210 | + { | ||
211 | + double y, x; | ||
212 | + fenv_t fe; | ||
213 | + | ||
214 | + fe = fegetenv_register (); | ||
215 | + | ||
216 | + relax_fenv_state (); | ||
217 | + | ||
218 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
219 | + y = FMSUB (threehalf, b, b); | ||
220 | + | ||
221 | + /* Initial estimate. */ | ||
222 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
223 | + | ||
224 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
225 | + x = x * FNMSUB (y, x * x, threehalf); | ||
226 | + x = x * FNMSUB (y, x * x, threehalf); | ||
227 | + x = x * FNMSUB (y, x * x, threehalf); | ||
228 | + | ||
229 | + /* All done. */ | ||
230 | + fesetenv_register (fe); | ||
231 | + return x * b; | ||
232 | + } | ||
233 | + } | ||
234 | + else if (b < 0) | ||
235 | + { | ||
236 | + /* For some reason, some PowerPC32 processors don't implement | ||
237 | + FE_INVALID_SQRT. */ | ||
238 | +#ifdef FE_INVALID_SQRT | ||
239 | + feraiseexcept (FE_INVALID_SQRT); | ||
240 | + | ||
241 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
242 | + if ((u.l & FE_INVALID) == 0) | ||
243 | +#endif | ||
244 | + feraiseexcept (FE_INVALID); | ||
245 | + b = a_nan.value; | ||
246 | + } | ||
247 | + return f_washf (b); | ||
248 | +} | ||
249 | Index: libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | ||
250 | =================================================================== | ||
251 | --- /dev/null | ||
252 | +++ libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | ||
253 | @@ -0,0 +1,134 @@ | ||
254 | +/* Double-precision floating point square root. | ||
255 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
256 | + This file is part of the GNU C Library. | ||
257 | + | ||
258 | + The GNU C Library is free software; you can redistribute it and/or | ||
259 | + modify it under the terms of the GNU Lesser General Public | ||
260 | + License as published by the Free Software Foundation; either | ||
261 | + version 2.1 of the License, or (at your option) any later version. | ||
262 | + | ||
263 | + The GNU C Library is distributed in the hope that it will be useful, | ||
264 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
265 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
266 | + Lesser General Public License for more details. | ||
267 | + | ||
268 | + You should have received a copy of the GNU Lesser General Public | ||
269 | + License along with the GNU C Library; if not, write to the Free | ||
270 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
271 | + 02111-1307 USA. */ | ||
272 | + | ||
273 | +#include <math.h> | ||
274 | +#include <math_private.h> | ||
275 | +#include <fenv_libc.h> | ||
276 | +#include <inttypes.h> | ||
277 | + | ||
278 | +#include <sysdep.h> | ||
279 | +#include <ldsodefs.h> | ||
280 | + | ||
281 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
282 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
283 | +static const float two108 = 3.245185536584267269e+32; | ||
284 | +static const float twom54 = 5.551115123125782702e-17; | ||
285 | +static const float half = 0.5; | ||
286 | + | ||
287 | +/* The method is based on the descriptions in: | ||
288 | + | ||
289 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
290 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
291 | + | ||
292 | + We find the actual square root and half of its reciprocal | ||
293 | + simultaneously. */ | ||
294 | + | ||
295 | +#ifdef __STDC__ | ||
296 | +double | ||
297 | +__ieee754_sqrt (double b) | ||
298 | +#else | ||
299 | +double | ||
300 | +__ieee754_sqrt (b) | ||
301 | + double b; | ||
302 | +#endif | ||
303 | +{ | ||
304 | + if (__builtin_expect (b > 0, 1)) | ||
305 | + { | ||
306 | + double y, g, h, d, r; | ||
307 | + ieee_double_shape_type u; | ||
308 | + | ||
309 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
310 | + { | ||
311 | + fenv_t fe; | ||
312 | + | ||
313 | + fe = fegetenv_register (); | ||
314 | + | ||
315 | + u.value = b; | ||
316 | + | ||
317 | + relax_fenv_state (); | ||
318 | + | ||
319 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
320 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
321 | + | ||
322 | + /* Following Muller et al, page 168, equation 5.20. | ||
323 | + | ||
324 | + h goes to 1/(2*sqrt(b)) | ||
325 | + g goes to sqrt(b). | ||
326 | + | ||
327 | + We need three iterations to get within 1ulp. */ | ||
328 | + | ||
329 | + /* Indicate that these can be performed prior to the branch. GCC | ||
330 | + insists on sinking them below the branch, however; it seems like | ||
331 | + they'd be better before the branch so that we can cover any latency | ||
332 | + from storing the argument and loading its high word. Oh well. */ | ||
333 | + | ||
334 | + g = b * y; | ||
335 | + h = 0.5 * y; | ||
336 | + | ||
337 | + /* Handle small numbers by scaling. */ | ||
338 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
339 | + return __ieee754_sqrt (b * two108) * twom54; | ||
340 | + | ||
341 | +#define FMADD(a_, c_, b_) \ | ||
342 | + ({ double __r; \ | ||
343 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
344 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
345 | + __r;}) | ||
346 | +#define FNMSUB(a_, c_, b_) \ | ||
347 | + ({ double __r; \ | ||
348 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
349 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
350 | + __r;}) | ||
351 | + | ||
352 | + r = FNMSUB (g, h, half); | ||
353 | + g = FMADD (g, r, g); | ||
354 | + h = FMADD (h, r, h); | ||
355 | + | ||
356 | + r = FNMSUB (g, h, half); | ||
357 | + g = FMADD (g, r, g); | ||
358 | + h = FMADD (h, r, h); | ||
359 | + | ||
360 | + r = FNMSUB (g, h, half); | ||
361 | + g = FMADD (g, r, g); | ||
362 | + h = FMADD (h, r, h); | ||
363 | + | ||
364 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
365 | + | ||
366 | + /* Final refinement. */ | ||
367 | + d = FNMSUB (g, g, b); | ||
368 | + | ||
369 | + fesetenv_register (fe); | ||
370 | + return FMADD (d, h, g); | ||
371 | + } | ||
372 | + } | ||
373 | + else if (b < 0) | ||
374 | + { | ||
375 | + /* For some reason, some PowerPC32 processors don't implement | ||
376 | + FE_INVALID_SQRT. */ | ||
377 | +#ifdef FE_INVALID_SQRT | ||
378 | + feraiseexcept (FE_INVALID_SQRT); | ||
379 | + | ||
380 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
381 | + if ((u.l & FE_INVALID) == 0) | ||
382 | +#endif | ||
383 | + feraiseexcept (FE_INVALID); | ||
384 | + b = a_nan.value; | ||
385 | + } | ||
386 | + return f_wash (b); | ||
387 | +} | ||
388 | Index: libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | ||
389 | =================================================================== | ||
390 | --- /dev/null | ||
391 | +++ libc/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | ||
392 | @@ -0,0 +1,101 @@ | ||
393 | +/* Single-precision floating point square root. | ||
394 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
395 | + This file is part of the GNU C Library. | ||
396 | + | ||
397 | + The GNU C Library is free software; you can redistribute it and/or | ||
398 | + modify it under the terms of the GNU Lesser General Public | ||
399 | + License as published by the Free Software Foundation; either | ||
400 | + version 2.1 of the License, or (at your option) any later version. | ||
401 | + | ||
402 | + The GNU C Library is distributed in the hope that it will be useful, | ||
403 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
404 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
405 | + Lesser General Public License for more details. | ||
406 | + | ||
407 | + You should have received a copy of the GNU Lesser General Public | ||
408 | + License along with the GNU C Library; if not, write to the Free | ||
409 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
410 | + 02111-1307 USA. */ | ||
411 | + | ||
412 | +#include <math.h> | ||
413 | +#include <math_private.h> | ||
414 | +#include <fenv_libc.h> | ||
415 | +#include <inttypes.h> | ||
416 | + | ||
417 | +#include <sysdep.h> | ||
418 | +#include <ldsodefs.h> | ||
419 | + | ||
420 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
421 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
422 | +static const float threehalf = 1.5; | ||
423 | + | ||
424 | +/* The method is based on the descriptions in: | ||
425 | + | ||
426 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
427 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
428 | + | ||
429 | + We find the reciprocal square root and use that to compute the actual | ||
430 | + square root. */ | ||
431 | + | ||
432 | +#ifdef __STDC__ | ||
433 | +float | ||
434 | +__ieee754_sqrtf (float b) | ||
435 | +#else | ||
436 | +float | ||
437 | +__ieee754_sqrtf (b) | ||
438 | + float b; | ||
439 | +#endif | ||
440 | +{ | ||
441 | + if (__builtin_expect (b > 0, 1)) | ||
442 | + { | ||
443 | +#define FMSUB(a_, c_, b_) \ | ||
444 | + ({ double __r; \ | ||
445 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
446 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
447 | + __r;}) | ||
448 | +#define FNMSUB(a_, c_, b_) \ | ||
449 | + ({ double __r; \ | ||
450 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
451 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
452 | + __r;}) | ||
453 | + | ||
454 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
455 | + { | ||
456 | + double y, x; | ||
457 | + fenv_t fe; | ||
458 | + | ||
459 | + fe = fegetenv_register (); | ||
460 | + | ||
461 | + relax_fenv_state (); | ||
462 | + | ||
463 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
464 | + y = FMSUB (threehalf, b, b); | ||
465 | + | ||
466 | + /* Initial estimate. */ | ||
467 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
468 | + | ||
469 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
470 | + x = x * FNMSUB (y, x * x, threehalf); | ||
471 | + x = x * FNMSUB (y, x * x, threehalf); | ||
472 | + x = x * FNMSUB (y, x * x, threehalf); | ||
473 | + | ||
474 | + /* All done. */ | ||
475 | + fesetenv_register (fe); | ||
476 | + return x * b; | ||
477 | + } | ||
478 | + } | ||
479 | + else if (b < 0) | ||
480 | + { | ||
481 | + /* For some reason, some PowerPC32 processors don't implement | ||
482 | + FE_INVALID_SQRT. */ | ||
483 | +#ifdef FE_INVALID_SQRT | ||
484 | + feraiseexcept (FE_INVALID_SQRT); | ||
485 | + | ||
486 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
487 | + if ((u.l & FE_INVALID) == 0) | ||
488 | +#endif | ||
489 | + feraiseexcept (FE_INVALID); | ||
490 | + b = a_nan.value; | ||
491 | + } | ||
492 | + return f_washf (b); | ||
493 | +} | ||
494 | Index: libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | ||
495 | =================================================================== | ||
496 | --- /dev/null | ||
497 | +++ libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | ||
498 | @@ -0,0 +1,134 @@ | ||
499 | +/* Double-precision floating point square root. | ||
500 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
501 | + This file is part of the GNU C Library. | ||
502 | + | ||
503 | + The GNU C Library is free software; you can redistribute it and/or | ||
504 | + modify it under the terms of the GNU Lesser General Public | ||
505 | + License as published by the Free Software Foundation; either | ||
506 | + version 2.1 of the License, or (at your option) any later version. | ||
507 | + | ||
508 | + The GNU C Library is distributed in the hope that it will be useful, | ||
509 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
510 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
511 | + Lesser General Public License for more details. | ||
512 | + | ||
513 | + You should have received a copy of the GNU Lesser General Public | ||
514 | + License along with the GNU C Library; if not, write to the Free | ||
515 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
516 | + 02111-1307 USA. */ | ||
517 | + | ||
518 | +#include <math.h> | ||
519 | +#include <math_private.h> | ||
520 | +#include <fenv_libc.h> | ||
521 | +#include <inttypes.h> | ||
522 | + | ||
523 | +#include <sysdep.h> | ||
524 | +#include <ldsodefs.h> | ||
525 | + | ||
526 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
527 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
528 | +static const float two108 = 3.245185536584267269e+32; | ||
529 | +static const float twom54 = 5.551115123125782702e-17; | ||
530 | +static const float half = 0.5; | ||
531 | + | ||
532 | +/* The method is based on the descriptions in: | ||
533 | + | ||
534 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
535 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
536 | + | ||
537 | + We find the actual square root and half of its reciprocal | ||
538 | + simultaneously. */ | ||
539 | + | ||
540 | +#ifdef __STDC__ | ||
541 | +double | ||
542 | +__ieee754_sqrt (double b) | ||
543 | +#else | ||
544 | +double | ||
545 | +__ieee754_sqrt (b) | ||
546 | + double b; | ||
547 | +#endif | ||
548 | +{ | ||
549 | + if (__builtin_expect (b > 0, 1)) | ||
550 | + { | ||
551 | + double y, g, h, d, r; | ||
552 | + ieee_double_shape_type u; | ||
553 | + | ||
554 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
555 | + { | ||
556 | + fenv_t fe; | ||
557 | + | ||
558 | + fe = fegetenv_register (); | ||
559 | + | ||
560 | + u.value = b; | ||
561 | + | ||
562 | + relax_fenv_state (); | ||
563 | + | ||
564 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
565 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
566 | + | ||
567 | + /* Following Muller et al, page 168, equation 5.20. | ||
568 | + | ||
569 | + h goes to 1/(2*sqrt(b)) | ||
570 | + g goes to sqrt(b). | ||
571 | + | ||
572 | + We need three iterations to get within 1ulp. */ | ||
573 | + | ||
574 | + /* Indicate that these can be performed prior to the branch. GCC | ||
575 | + insists on sinking them below the branch, however; it seems like | ||
576 | + they'd be better before the branch so that we can cover any latency | ||
577 | + from storing the argument and loading its high word. Oh well. */ | ||
578 | + | ||
579 | + g = b * y; | ||
580 | + h = 0.5 * y; | ||
581 | + | ||
582 | + /* Handle small numbers by scaling. */ | ||
583 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
584 | + return __ieee754_sqrt (b * two108) * twom54; | ||
585 | + | ||
586 | +#define FMADD(a_, c_, b_) \ | ||
587 | + ({ double __r; \ | ||
588 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
589 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
590 | + __r;}) | ||
591 | +#define FNMSUB(a_, c_, b_) \ | ||
592 | + ({ double __r; \ | ||
593 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
594 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
595 | + __r;}) | ||
596 | + | ||
597 | + r = FNMSUB (g, h, half); | ||
598 | + g = FMADD (g, r, g); | ||
599 | + h = FMADD (h, r, h); | ||
600 | + | ||
601 | + r = FNMSUB (g, h, half); | ||
602 | + g = FMADD (g, r, g); | ||
603 | + h = FMADD (h, r, h); | ||
604 | + | ||
605 | + r = FNMSUB (g, h, half); | ||
606 | + g = FMADD (g, r, g); | ||
607 | + h = FMADD (h, r, h); | ||
608 | + | ||
609 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
610 | + | ||
611 | + /* Final refinement. */ | ||
612 | + d = FNMSUB (g, g, b); | ||
613 | + | ||
614 | + fesetenv_register (fe); | ||
615 | + return FMADD (d, h, g); | ||
616 | + } | ||
617 | + } | ||
618 | + else if (b < 0) | ||
619 | + { | ||
620 | + /* For some reason, some PowerPC32 processors don't implement | ||
621 | + FE_INVALID_SQRT. */ | ||
622 | +#ifdef FE_INVALID_SQRT | ||
623 | + feraiseexcept (FE_INVALID_SQRT); | ||
624 | + | ||
625 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
626 | + if ((u.l & FE_INVALID) == 0) | ||
627 | +#endif | ||
628 | + feraiseexcept (FE_INVALID); | ||
629 | + b = a_nan.value; | ||
630 | + } | ||
631 | + return f_wash (b); | ||
632 | +} | ||
633 | Index: libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | ||
634 | =================================================================== | ||
635 | --- /dev/null | ||
636 | +++ libc/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | ||
637 | @@ -0,0 +1,101 @@ | ||
638 | +/* Single-precision floating point square root. | ||
639 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
640 | + This file is part of the GNU C Library. | ||
641 | + | ||
642 | + The GNU C Library is free software; you can redistribute it and/or | ||
643 | + modify it under the terms of the GNU Lesser General Public | ||
644 | + License as published by the Free Software Foundation; either | ||
645 | + version 2.1 of the License, or (at your option) any later version. | ||
646 | + | ||
647 | + The GNU C Library is distributed in the hope that it will be useful, | ||
648 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
649 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
650 | + Lesser General Public License for more details. | ||
651 | + | ||
652 | + You should have received a copy of the GNU Lesser General Public | ||
653 | + License along with the GNU C Library; if not, write to the Free | ||
654 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
655 | + 02111-1307 USA. */ | ||
656 | + | ||
657 | +#include <math.h> | ||
658 | +#include <math_private.h> | ||
659 | +#include <fenv_libc.h> | ||
660 | +#include <inttypes.h> | ||
661 | + | ||
662 | +#include <sysdep.h> | ||
663 | +#include <ldsodefs.h> | ||
664 | + | ||
665 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
666 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
667 | +static const float threehalf = 1.5; | ||
668 | + | ||
669 | +/* The method is based on the descriptions in: | ||
670 | + | ||
671 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
672 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
673 | + | ||
674 | + We find the reciprocal square root and use that to compute the actual | ||
675 | + square root. */ | ||
676 | + | ||
677 | +#ifdef __STDC__ | ||
678 | +float | ||
679 | +__ieee754_sqrtf (float b) | ||
680 | +#else | ||
681 | +float | ||
682 | +__ieee754_sqrtf (b) | ||
683 | + float b; | ||
684 | +#endif | ||
685 | +{ | ||
686 | + if (__builtin_expect (b > 0, 1)) | ||
687 | + { | ||
688 | +#define FMSUB(a_, c_, b_) \ | ||
689 | + ({ double __r; \ | ||
690 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
691 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
692 | + __r;}) | ||
693 | +#define FNMSUB(a_, c_, b_) \ | ||
694 | + ({ double __r; \ | ||
695 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
696 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
697 | + __r;}) | ||
698 | + | ||
699 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
700 | + { | ||
701 | + double y, x; | ||
702 | + fenv_t fe; | ||
703 | + | ||
704 | + fe = fegetenv_register (); | ||
705 | + | ||
706 | + relax_fenv_state (); | ||
707 | + | ||
708 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
709 | + y = FMSUB (threehalf, b, b); | ||
710 | + | ||
711 | + /* Initial estimate. */ | ||
712 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
713 | + | ||
714 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
715 | + x = x * FNMSUB (y, x * x, threehalf); | ||
716 | + x = x * FNMSUB (y, x * x, threehalf); | ||
717 | + x = x * FNMSUB (y, x * x, threehalf); | ||
718 | + | ||
719 | + /* All done. */ | ||
720 | + fesetenv_register (fe); | ||
721 | + return x * b; | ||
722 | + } | ||
723 | + } | ||
724 | + else if (b < 0) | ||
725 | + { | ||
726 | + /* For some reason, some PowerPC32 processors don't implement | ||
727 | + FE_INVALID_SQRT. */ | ||
728 | +#ifdef FE_INVALID_SQRT | ||
729 | + feraiseexcept (FE_INVALID_SQRT); | ||
730 | + | ||
731 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
732 | + if ((u.l & FE_INVALID) == 0) | ||
733 | +#endif | ||
734 | + feraiseexcept (FE_INVALID); | ||
735 | + b = a_nan.value; | ||
736 | + } | ||
737 | + return f_washf (b); | ||
738 | +} | ||
739 | Index: libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | ||
740 | =================================================================== | ||
741 | --- /dev/null | ||
742 | +++ libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | ||
743 | @@ -0,0 +1,134 @@ | ||
744 | +/* Double-precision floating point square root. | ||
745 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
746 | + This file is part of the GNU C Library. | ||
747 | + | ||
748 | + The GNU C Library is free software; you can redistribute it and/or | ||
749 | + modify it under the terms of the GNU Lesser General Public | ||
750 | + License as published by the Free Software Foundation; either | ||
751 | + version 2.1 of the License, or (at your option) any later version. | ||
752 | + | ||
753 | + The GNU C Library is distributed in the hope that it will be useful, | ||
754 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
755 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
756 | + Lesser General Public License for more details. | ||
757 | + | ||
758 | + You should have received a copy of the GNU Lesser General Public | ||
759 | + License along with the GNU C Library; if not, write to the Free | ||
760 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
761 | + 02111-1307 USA. */ | ||
762 | + | ||
763 | +#include <math.h> | ||
764 | +#include <math_private.h> | ||
765 | +#include <fenv_libc.h> | ||
766 | +#include <inttypes.h> | ||
767 | + | ||
768 | +#include <sysdep.h> | ||
769 | +#include <ldsodefs.h> | ||
770 | + | ||
771 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
772 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
773 | +static const float two108 = 3.245185536584267269e+32; | ||
774 | +static const float twom54 = 5.551115123125782702e-17; | ||
775 | +static const float half = 0.5; | ||
776 | + | ||
777 | +/* The method is based on the descriptions in: | ||
778 | + | ||
779 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
780 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
781 | + | ||
782 | + We find the actual square root and half of its reciprocal | ||
783 | + simultaneously. */ | ||
784 | + | ||
785 | +#ifdef __STDC__ | ||
786 | +double | ||
787 | +__ieee754_sqrt (double b) | ||
788 | +#else | ||
789 | +double | ||
790 | +__ieee754_sqrt (b) | ||
791 | + double b; | ||
792 | +#endif | ||
793 | +{ | ||
794 | + if (__builtin_expect (b > 0, 1)) | ||
795 | + { | ||
796 | + double y, g, h, d, r; | ||
797 | + ieee_double_shape_type u; | ||
798 | + | ||
799 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
800 | + { | ||
801 | + fenv_t fe; | ||
802 | + | ||
803 | + fe = fegetenv_register (); | ||
804 | + | ||
805 | + u.value = b; | ||
806 | + | ||
807 | + relax_fenv_state (); | ||
808 | + | ||
809 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
810 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
811 | + | ||
812 | + /* Following Muller et al, page 168, equation 5.20. | ||
813 | + | ||
814 | + h goes to 1/(2*sqrt(b)) | ||
815 | + g goes to sqrt(b). | ||
816 | + | ||
817 | + We need three iterations to get within 1ulp. */ | ||
818 | + | ||
819 | + /* Indicate that these can be performed prior to the branch. GCC | ||
820 | + insists on sinking them below the branch, however; it seems like | ||
821 | + they'd be better before the branch so that we can cover any latency | ||
822 | + from storing the argument and loading its high word. Oh well. */ | ||
823 | + | ||
824 | + g = b * y; | ||
825 | + h = 0.5 * y; | ||
826 | + | ||
827 | + /* Handle small numbers by scaling. */ | ||
828 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
829 | + return __ieee754_sqrt (b * two108) * twom54; | ||
830 | + | ||
831 | +#define FMADD(a_, c_, b_) \ | ||
832 | + ({ double __r; \ | ||
833 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
834 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
835 | + __r;}) | ||
836 | +#define FNMSUB(a_, c_, b_) \ | ||
837 | + ({ double __r; \ | ||
838 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
839 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
840 | + __r;}) | ||
841 | + | ||
842 | + r = FNMSUB (g, h, half); | ||
843 | + g = FMADD (g, r, g); | ||
844 | + h = FMADD (h, r, h); | ||
845 | + | ||
846 | + r = FNMSUB (g, h, half); | ||
847 | + g = FMADD (g, r, g); | ||
848 | + h = FMADD (h, r, h); | ||
849 | + | ||
850 | + r = FNMSUB (g, h, half); | ||
851 | + g = FMADD (g, r, g); | ||
852 | + h = FMADD (h, r, h); | ||
853 | + | ||
854 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
855 | + | ||
856 | + /* Final refinement. */ | ||
857 | + d = FNMSUB (g, g, b); | ||
858 | + | ||
859 | + fesetenv_register (fe); | ||
860 | + return FMADD (d, h, g); | ||
861 | + } | ||
862 | + } | ||
863 | + else if (b < 0) | ||
864 | + { | ||
865 | + /* For some reason, some PowerPC32 processors don't implement | ||
866 | + FE_INVALID_SQRT. */ | ||
867 | +#ifdef FE_INVALID_SQRT | ||
868 | + feraiseexcept (FE_INVALID_SQRT); | ||
869 | + | ||
870 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
871 | + if ((u.l & FE_INVALID) == 0) | ||
872 | +#endif | ||
873 | + feraiseexcept (FE_INVALID); | ||
874 | + b = a_nan.value; | ||
875 | + } | ||
876 | + return f_wash (b); | ||
877 | +} | ||
878 | Index: libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | ||
879 | =================================================================== | ||
880 | --- /dev/null | ||
881 | +++ libc/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | ||
882 | @@ -0,0 +1,101 @@ | ||
883 | +/* Single-precision floating point square root. | ||
884 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
885 | + This file is part of the GNU C Library. | ||
886 | + | ||
887 | + The GNU C Library is free software; you can redistribute it and/or | ||
888 | + modify it under the terms of the GNU Lesser General Public | ||
889 | + License as published by the Free Software Foundation; either | ||
890 | + version 2.1 of the License, or (at your option) any later version. | ||
891 | + | ||
892 | + The GNU C Library is distributed in the hope that it will be useful, | ||
893 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
894 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
895 | + Lesser General Public License for more details. | ||
896 | + | ||
897 | + You should have received a copy of the GNU Lesser General Public | ||
898 | + License along with the GNU C Library; if not, write to the Free | ||
899 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
900 | + 02111-1307 USA. */ | ||
901 | + | ||
902 | +#include <math.h> | ||
903 | +#include <math_private.h> | ||
904 | +#include <fenv_libc.h> | ||
905 | +#include <inttypes.h> | ||
906 | + | ||
907 | +#include <sysdep.h> | ||
908 | +#include <ldsodefs.h> | ||
909 | + | ||
910 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
911 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
912 | +static const float threehalf = 1.5; | ||
913 | + | ||
914 | +/* The method is based on the descriptions in: | ||
915 | + | ||
916 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
917 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
918 | + | ||
919 | + We find the reciprocal square root and use that to compute the actual | ||
920 | + square root. */ | ||
921 | + | ||
922 | +#ifdef __STDC__ | ||
923 | +float | ||
924 | +__ieee754_sqrtf (float b) | ||
925 | +#else | ||
926 | +float | ||
927 | +__ieee754_sqrtf (b) | ||
928 | + float b; | ||
929 | +#endif | ||
930 | +{ | ||
931 | + if (__builtin_expect (b > 0, 1)) | ||
932 | + { | ||
933 | +#define FMSUB(a_, c_, b_) \ | ||
934 | + ({ double __r; \ | ||
935 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
936 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
937 | + __r;}) | ||
938 | +#define FNMSUB(a_, c_, b_) \ | ||
939 | + ({ double __r; \ | ||
940 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
941 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
942 | + __r;}) | ||
943 | + | ||
944 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
945 | + { | ||
946 | + double y, x; | ||
947 | + fenv_t fe; | ||
948 | + | ||
949 | + fe = fegetenv_register (); | ||
950 | + | ||
951 | + relax_fenv_state (); | ||
952 | + | ||
953 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
954 | + y = FMSUB (threehalf, b, b); | ||
955 | + | ||
956 | + /* Initial estimate. */ | ||
957 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
958 | + | ||
959 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
960 | + x = x * FNMSUB (y, x * x, threehalf); | ||
961 | + x = x * FNMSUB (y, x * x, threehalf); | ||
962 | + x = x * FNMSUB (y, x * x, threehalf); | ||
963 | + | ||
964 | + /* All done. */ | ||
965 | + fesetenv_register (fe); | ||
966 | + return x * b; | ||
967 | + } | ||
968 | + } | ||
969 | + else if (b < 0) | ||
970 | + { | ||
971 | + /* For some reason, some PowerPC32 processors don't implement | ||
972 | + FE_INVALID_SQRT. */ | ||
973 | +#ifdef FE_INVALID_SQRT | ||
974 | + feraiseexcept (FE_INVALID_SQRT); | ||
975 | + | ||
976 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
977 | + if ((u.l & FE_INVALID) == 0) | ||
978 | +#endif | ||
979 | + feraiseexcept (FE_INVALID); | ||
980 | + b = a_nan.value; | ||
981 | + } | ||
982 | + return f_washf (b); | ||
983 | +} | ||
984 | Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | ||
985 | =================================================================== | ||
986 | --- /dev/null | ||
987 | +++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | ||
988 | @@ -0,0 +1,134 @@ | ||
989 | +/* Double-precision floating point square root. | ||
990 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
991 | + This file is part of the GNU C Library. | ||
992 | + | ||
993 | + The GNU C Library is free software; you can redistribute it and/or | ||
994 | + modify it under the terms of the GNU Lesser General Public | ||
995 | + License as published by the Free Software Foundation; either | ||
996 | + version 2.1 of the License, or (at your option) any later version. | ||
997 | + | ||
998 | + The GNU C Library is distributed in the hope that it will be useful, | ||
999 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1000 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1001 | + Lesser General Public License for more details. | ||
1002 | + | ||
1003 | + You should have received a copy of the GNU Lesser General Public | ||
1004 | + License along with the GNU C Library; if not, write to the Free | ||
1005 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1006 | + 02111-1307 USA. */ | ||
1007 | + | ||
1008 | +#include <math.h> | ||
1009 | +#include <math_private.h> | ||
1010 | +#include <fenv_libc.h> | ||
1011 | +#include <inttypes.h> | ||
1012 | + | ||
1013 | +#include <sysdep.h> | ||
1014 | +#include <ldsodefs.h> | ||
1015 | + | ||
1016 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1017 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1018 | +static const float two108 = 3.245185536584267269e+32; | ||
1019 | +static const float twom54 = 5.551115123125782702e-17; | ||
1020 | +static const float half = 0.5; | ||
1021 | + | ||
1022 | +/* The method is based on the descriptions in: | ||
1023 | + | ||
1024 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1025 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1026 | + | ||
1027 | + We find the actual square root and half of its reciprocal | ||
1028 | + simultaneously. */ | ||
1029 | + | ||
1030 | +#ifdef __STDC__ | ||
1031 | +double | ||
1032 | +__ieee754_sqrt (double b) | ||
1033 | +#else | ||
1034 | +double | ||
1035 | +__ieee754_sqrt (b) | ||
1036 | + double b; | ||
1037 | +#endif | ||
1038 | +{ | ||
1039 | + if (__builtin_expect (b > 0, 1)) | ||
1040 | + { | ||
1041 | + double y, g, h, d, r; | ||
1042 | + ieee_double_shape_type u; | ||
1043 | + | ||
1044 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1045 | + { | ||
1046 | + fenv_t fe; | ||
1047 | + | ||
1048 | + fe = fegetenv_register (); | ||
1049 | + | ||
1050 | + u.value = b; | ||
1051 | + | ||
1052 | + relax_fenv_state (); | ||
1053 | + | ||
1054 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
1055 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
1056 | + | ||
1057 | + /* Following Muller et al, page 168, equation 5.20. | ||
1058 | + | ||
1059 | + h goes to 1/(2*sqrt(b)) | ||
1060 | + g goes to sqrt(b). | ||
1061 | + | ||
1062 | + We need three iterations to get within 1ulp. */ | ||
1063 | + | ||
1064 | + /* Indicate that these can be performed prior to the branch. GCC | ||
1065 | + insists on sinking them below the branch, however; it seems like | ||
1066 | + they'd be better before the branch so that we can cover any latency | ||
1067 | + from storing the argument and loading its high word. Oh well. */ | ||
1068 | + | ||
1069 | + g = b * y; | ||
1070 | + h = 0.5 * y; | ||
1071 | + | ||
1072 | + /* Handle small numbers by scaling. */ | ||
1073 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
1074 | + return __ieee754_sqrt (b * two108) * twom54; | ||
1075 | + | ||
1076 | +#define FMADD(a_, c_, b_) \ | ||
1077 | + ({ double __r; \ | ||
1078 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
1079 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1080 | + __r;}) | ||
1081 | +#define FNMSUB(a_, c_, b_) \ | ||
1082 | + ({ double __r; \ | ||
1083 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1084 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1085 | + __r;}) | ||
1086 | + | ||
1087 | + r = FNMSUB (g, h, half); | ||
1088 | + g = FMADD (g, r, g); | ||
1089 | + h = FMADD (h, r, h); | ||
1090 | + | ||
1091 | + r = FNMSUB (g, h, half); | ||
1092 | + g = FMADD (g, r, g); | ||
1093 | + h = FMADD (h, r, h); | ||
1094 | + | ||
1095 | + r = FNMSUB (g, h, half); | ||
1096 | + g = FMADD (g, r, g); | ||
1097 | + h = FMADD (h, r, h); | ||
1098 | + | ||
1099 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
1100 | + | ||
1101 | + /* Final refinement. */ | ||
1102 | + d = FNMSUB (g, g, b); | ||
1103 | + | ||
1104 | + fesetenv_register (fe); | ||
1105 | + return FMADD (d, h, g); | ||
1106 | + } | ||
1107 | + } | ||
1108 | + else if (b < 0) | ||
1109 | + { | ||
1110 | + /* For some reason, some PowerPC32 processors don't implement | ||
1111 | + FE_INVALID_SQRT. */ | ||
1112 | +#ifdef FE_INVALID_SQRT | ||
1113 | + feraiseexcept (FE_INVALID_SQRT); | ||
1114 | + | ||
1115 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1116 | + if ((u.l & FE_INVALID) == 0) | ||
1117 | +#endif | ||
1118 | + feraiseexcept (FE_INVALID); | ||
1119 | + b = a_nan.value; | ||
1120 | + } | ||
1121 | + return f_wash (b); | ||
1122 | +} | ||
1123 | Index: libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | ||
1124 | =================================================================== | ||
1125 | --- /dev/null | ||
1126 | +++ libc/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | ||
1127 | @@ -0,0 +1,101 @@ | ||
1128 | +/* Single-precision floating point square root. | ||
1129 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
1130 | + This file is part of the GNU C Library. | ||
1131 | + | ||
1132 | + The GNU C Library is free software; you can redistribute it and/or | ||
1133 | + modify it under the terms of the GNU Lesser General Public | ||
1134 | + License as published by the Free Software Foundation; either | ||
1135 | + version 2.1 of the License, or (at your option) any later version. | ||
1136 | + | ||
1137 | + The GNU C Library is distributed in the hope that it will be useful, | ||
1138 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1139 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1140 | + Lesser General Public License for more details. | ||
1141 | + | ||
1142 | + You should have received a copy of the GNU Lesser General Public | ||
1143 | + License along with the GNU C Library; if not, write to the Free | ||
1144 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1145 | + 02111-1307 USA. */ | ||
1146 | + | ||
1147 | +#include <math.h> | ||
1148 | +#include <math_private.h> | ||
1149 | +#include <fenv_libc.h> | ||
1150 | +#include <inttypes.h> | ||
1151 | + | ||
1152 | +#include <sysdep.h> | ||
1153 | +#include <ldsodefs.h> | ||
1154 | + | ||
1155 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1156 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1157 | +static const float threehalf = 1.5; | ||
1158 | + | ||
1159 | +/* The method is based on the descriptions in: | ||
1160 | + | ||
1161 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1162 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1163 | + | ||
1164 | + We find the reciprocal square root and use that to compute the actual | ||
1165 | + square root. */ | ||
1166 | + | ||
1167 | +#ifdef __STDC__ | ||
1168 | +float | ||
1169 | +__ieee754_sqrtf (float b) | ||
1170 | +#else | ||
1171 | +float | ||
1172 | +__ieee754_sqrtf (b) | ||
1173 | + float b; | ||
1174 | +#endif | ||
1175 | +{ | ||
1176 | + if (__builtin_expect (b > 0, 1)) | ||
1177 | + { | ||
1178 | +#define FMSUB(a_, c_, b_) \ | ||
1179 | + ({ double __r; \ | ||
1180 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
1181 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1182 | + __r;}) | ||
1183 | +#define FNMSUB(a_, c_, b_) \ | ||
1184 | + ({ double __r; \ | ||
1185 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1186 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1187 | + __r;}) | ||
1188 | + | ||
1189 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1190 | + { | ||
1191 | + double y, x; | ||
1192 | + fenv_t fe; | ||
1193 | + | ||
1194 | + fe = fegetenv_register (); | ||
1195 | + | ||
1196 | + relax_fenv_state (); | ||
1197 | + | ||
1198 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
1199 | + y = FMSUB (threehalf, b, b); | ||
1200 | + | ||
1201 | + /* Initial estimate. */ | ||
1202 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
1203 | + | ||
1204 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
1205 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1206 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1207 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1208 | + | ||
1209 | + /* All done. */ | ||
1210 | + fesetenv_register (fe); | ||
1211 | + return x * b; | ||
1212 | + } | ||
1213 | + } | ||
1214 | + else if (b < 0) | ||
1215 | + { | ||
1216 | + /* For some reason, some PowerPC32 processors don't implement | ||
1217 | + FE_INVALID_SQRT. */ | ||
1218 | +#ifdef FE_INVALID_SQRT | ||
1219 | + feraiseexcept (FE_INVALID_SQRT); | ||
1220 | + | ||
1221 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1222 | + if ((u.l & FE_INVALID) == 0) | ||
1223 | +#endif | ||
1224 | + feraiseexcept (FE_INVALID); | ||
1225 | + b = a_nan.value; | ||
1226 | + } | ||
1227 | + return f_washf (b); | ||
1228 | +} | ||
1229 | Index: libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | ||
1230 | =================================================================== | ||
1231 | --- /dev/null | ||
1232 | +++ libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | ||
1233 | @@ -0,0 +1,134 @@ | ||
1234 | +/* Double-precision floating point square root. | ||
1235 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
1236 | + This file is part of the GNU C Library. | ||
1237 | + | ||
1238 | + The GNU C Library is free software; you can redistribute it and/or | ||
1239 | + modify it under the terms of the GNU Lesser General Public | ||
1240 | + License as published by the Free Software Foundation; either | ||
1241 | + version 2.1 of the License, or (at your option) any later version. | ||
1242 | + | ||
1243 | + The GNU C Library is distributed in the hope that it will be useful, | ||
1244 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1245 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1246 | + Lesser General Public License for more details. | ||
1247 | + | ||
1248 | + You should have received a copy of the GNU Lesser General Public | ||
1249 | + License along with the GNU C Library; if not, write to the Free | ||
1250 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1251 | + 02111-1307 USA. */ | ||
1252 | + | ||
1253 | +#include <math.h> | ||
1254 | +#include <math_private.h> | ||
1255 | +#include <fenv_libc.h> | ||
1256 | +#include <inttypes.h> | ||
1257 | + | ||
1258 | +#include <sysdep.h> | ||
1259 | +#include <ldsodefs.h> | ||
1260 | + | ||
1261 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1262 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1263 | +static const float two108 = 3.245185536584267269e+32; | ||
1264 | +static const float twom54 = 5.551115123125782702e-17; | ||
1265 | +static const float half = 0.5; | ||
1266 | + | ||
1267 | +/* The method is based on the descriptions in: | ||
1268 | + | ||
1269 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1270 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1271 | + | ||
1272 | + We find the actual square root and half of its reciprocal | ||
1273 | + simultaneously. */ | ||
1274 | + | ||
1275 | +#ifdef __STDC__ | ||
1276 | +double | ||
1277 | +__ieee754_sqrt (double b) | ||
1278 | +#else | ||
1279 | +double | ||
1280 | +__ieee754_sqrt (b) | ||
1281 | + double b; | ||
1282 | +#endif | ||
1283 | +{ | ||
1284 | + if (__builtin_expect (b > 0, 1)) | ||
1285 | + { | ||
1286 | + double y, g, h, d, r; | ||
1287 | + ieee_double_shape_type u; | ||
1288 | + | ||
1289 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1290 | + { | ||
1291 | + fenv_t fe; | ||
1292 | + | ||
1293 | + fe = fegetenv_register (); | ||
1294 | + | ||
1295 | + u.value = b; | ||
1296 | + | ||
1297 | + relax_fenv_state (); | ||
1298 | + | ||
1299 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
1300 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
1301 | + | ||
1302 | + /* Following Muller et al, page 168, equation 5.20. | ||
1303 | + | ||
1304 | + h goes to 1/(2*sqrt(b)) | ||
1305 | + g goes to sqrt(b). | ||
1306 | + | ||
1307 | + We need three iterations to get within 1ulp. */ | ||
1308 | + | ||
1309 | + /* Indicate that these can be performed prior to the branch. GCC | ||
1310 | + insists on sinking them below the branch, however; it seems like | ||
1311 | + they'd be better before the branch so that we can cover any latency | ||
1312 | + from storing the argument and loading its high word. Oh well. */ | ||
1313 | + | ||
1314 | + g = b * y; | ||
1315 | + h = 0.5 * y; | ||
1316 | + | ||
1317 | + /* Handle small numbers by scaling. */ | ||
1318 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
1319 | + return __ieee754_sqrt (b * two108) * twom54; | ||
1320 | + | ||
1321 | +#define FMADD(a_, c_, b_) \ | ||
1322 | + ({ double __r; \ | ||
1323 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
1324 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1325 | + __r;}) | ||
1326 | +#define FNMSUB(a_, c_, b_) \ | ||
1327 | + ({ double __r; \ | ||
1328 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1329 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1330 | + __r;}) | ||
1331 | + | ||
1332 | + r = FNMSUB (g, h, half); | ||
1333 | + g = FMADD (g, r, g); | ||
1334 | + h = FMADD (h, r, h); | ||
1335 | + | ||
1336 | + r = FNMSUB (g, h, half); | ||
1337 | + g = FMADD (g, r, g); | ||
1338 | + h = FMADD (h, r, h); | ||
1339 | + | ||
1340 | + r = FNMSUB (g, h, half); | ||
1341 | + g = FMADD (g, r, g); | ||
1342 | + h = FMADD (h, r, h); | ||
1343 | + | ||
1344 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
1345 | + | ||
1346 | + /* Final refinement. */ | ||
1347 | + d = FNMSUB (g, g, b); | ||
1348 | + | ||
1349 | + fesetenv_register (fe); | ||
1350 | + return FMADD (d, h, g); | ||
1351 | + } | ||
1352 | + } | ||
1353 | + else if (b < 0) | ||
1354 | + { | ||
1355 | + /* For some reason, some PowerPC32 processors don't implement | ||
1356 | + FE_INVALID_SQRT. */ | ||
1357 | +#ifdef FE_INVALID_SQRT | ||
1358 | + feraiseexcept (FE_INVALID_SQRT); | ||
1359 | + | ||
1360 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1361 | + if ((u.l & FE_INVALID) == 0) | ||
1362 | +#endif | ||
1363 | + feraiseexcept (FE_INVALID); | ||
1364 | + b = a_nan.value; | ||
1365 | + } | ||
1366 | + return f_wash (b); | ||
1367 | +} | ||
1368 | Index: libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | ||
1369 | =================================================================== | ||
1370 | --- /dev/null | ||
1371 | +++ libc/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | ||
1372 | @@ -0,0 +1,101 @@ | ||
1373 | +/* Single-precision floating point square root. | ||
1374 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
1375 | + This file is part of the GNU C Library. | ||
1376 | + | ||
1377 | + The GNU C Library is free software; you can redistribute it and/or | ||
1378 | + modify it under the terms of the GNU Lesser General Public | ||
1379 | + License as published by the Free Software Foundation; either | ||
1380 | + version 2.1 of the License, or (at your option) any later version. | ||
1381 | + | ||
1382 | + The GNU C Library is distributed in the hope that it will be useful, | ||
1383 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1384 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1385 | + Lesser General Public License for more details. | ||
1386 | + | ||
1387 | + You should have received a copy of the GNU Lesser General Public | ||
1388 | + License along with the GNU C Library; if not, write to the Free | ||
1389 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1390 | + 02111-1307 USA. */ | ||
1391 | + | ||
1392 | +#include <math.h> | ||
1393 | +#include <math_private.h> | ||
1394 | +#include <fenv_libc.h> | ||
1395 | +#include <inttypes.h> | ||
1396 | + | ||
1397 | +#include <sysdep.h> | ||
1398 | +#include <ldsodefs.h> | ||
1399 | + | ||
1400 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1401 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1402 | +static const float threehalf = 1.5; | ||
1403 | + | ||
1404 | +/* The method is based on the descriptions in: | ||
1405 | + | ||
1406 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1407 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1408 | + | ||
1409 | + We find the reciprocal square root and use that to compute the actual | ||
1410 | + square root. */ | ||
1411 | + | ||
1412 | +#ifdef __STDC__ | ||
1413 | +float | ||
1414 | +__ieee754_sqrtf (float b) | ||
1415 | +#else | ||
1416 | +float | ||
1417 | +__ieee754_sqrtf (b) | ||
1418 | + float b; | ||
1419 | +#endif | ||
1420 | +{ | ||
1421 | + if (__builtin_expect (b > 0, 1)) | ||
1422 | + { | ||
1423 | +#define FMSUB(a_, c_, b_) \ | ||
1424 | + ({ double __r; \ | ||
1425 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
1426 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1427 | + __r;}) | ||
1428 | +#define FNMSUB(a_, c_, b_) \ | ||
1429 | + ({ double __r; \ | ||
1430 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1431 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1432 | + __r;}) | ||
1433 | + | ||
1434 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1435 | + { | ||
1436 | + double y, x; | ||
1437 | + fenv_t fe; | ||
1438 | + | ||
1439 | + fe = fegetenv_register (); | ||
1440 | + | ||
1441 | + relax_fenv_state (); | ||
1442 | + | ||
1443 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
1444 | + y = FMSUB (threehalf, b, b); | ||
1445 | + | ||
1446 | + /* Initial estimate. */ | ||
1447 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
1448 | + | ||
1449 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
1450 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1451 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1452 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1453 | + | ||
1454 | + /* All done. */ | ||
1455 | + fesetenv_register (fe); | ||
1456 | + return x * b; | ||
1457 | + } | ||
1458 | + } | ||
1459 | + else if (b < 0) | ||
1460 | + { | ||
1461 | + /* For some reason, some PowerPC32 processors don't implement | ||
1462 | + FE_INVALID_SQRT. */ | ||
1463 | +#ifdef FE_INVALID_SQRT | ||
1464 | + feraiseexcept (FE_INVALID_SQRT); | ||
1465 | + | ||
1466 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1467 | + if ((u.l & FE_INVALID) == 0) | ||
1468 | +#endif | ||
1469 | + feraiseexcept (FE_INVALID); | ||
1470 | + b = a_nan.value; | ||
1471 | + } | ||
1472 | + return f_washf (b); | ||
1473 | +} | ||
1474 | Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies | ||
1475 | =================================================================== | ||
1476 | --- /dev/null | ||
1477 | +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies | ||
1478 | @@ -0,0 +1 @@ | ||
1479 | +powerpc/powerpc32/603e/fpu | ||
1480 | Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies | ||
1481 | =================================================================== | ||
1482 | --- /dev/null | ||
1483 | +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies | ||
1484 | @@ -0,0 +1,2 @@ | ||
1485 | +# e300c3 is a variant of 603e so use the same optimizations for sqrt | ||
1486 | +powerpc/powerpc32/603e/fpu | ||
1487 | Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies | ||
1488 | =================================================================== | ||
1489 | --- /dev/null | ||
1490 | +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies | ||
1491 | @@ -0,0 +1 @@ | ||
1492 | +powerpc/powerpc32/e500mc/fpu | ||
1493 | Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies | ||
1494 | =================================================================== | ||
1495 | --- /dev/null | ||
1496 | +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies | ||
1497 | @@ -0,0 +1 @@ | ||
1498 | +powerpc/powerpc32/e5500/fpu | ||
1499 | Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies | ||
1500 | =================================================================== | ||
1501 | --- /dev/null | ||
1502 | +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies | ||
1503 | @@ -0,0 +1 @@ | ||
1504 | +powerpc/powerpc32/e6500/fpu | ||
1505 | Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies | ||
1506 | =================================================================== | ||
1507 | --- /dev/null | ||
1508 | +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies | ||
1509 | @@ -0,0 +1 @@ | ||
1510 | +powerpc/powerpc64/e5500/fpu | ||
1511 | Index: libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies | ||
1512 | =================================================================== | ||
1513 | --- /dev/null | ||
1514 | +++ libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies | ||
1515 | @@ -0,0 +1 @@ | ||
1516 | +powerpc/powerpc64/e6500/fpu | ||