diff options
Diffstat (limited to 'meta/recipes-core/glibc/glibc/0008-fsl-e500-e5500-e6500-603e-fsqrt-implementation.patch')
-rw-r--r-- | meta/recipes-core/glibc/glibc/0008-fsl-e500-e5500-e6500-603e-fsqrt-implementation.patch | 1581 |
1 files changed, 0 insertions, 1581 deletions
diff --git a/meta/recipes-core/glibc/glibc/0008-fsl-e500-e5500-e6500-603e-fsqrt-implementation.patch b/meta/recipes-core/glibc/glibc/0008-fsl-e500-e5500-e6500-603e-fsqrt-implementation.patch deleted file mode 100644 index 2162bf38c2..0000000000 --- a/meta/recipes-core/glibc/glibc/0008-fsl-e500-e5500-e6500-603e-fsqrt-implementation.patch +++ /dev/null | |||
@@ -1,1581 +0,0 @@ | |||
1 | From 3d58330390a7d4f4ed32f4a9c25628af3e0dd5c1 Mon Sep 17 00:00:00 2001 | ||
2 | From: Khem Raj <raj.khem@gmail.com> | ||
3 | Date: Wed, 18 Mar 2015 00:01:50 +0000 | ||
4 | Subject: [PATCH] fsl e500/e5500/e6500/603e fsqrt implementation | ||
5 | |||
6 | Upstream-Status: Pending | ||
7 | Signed-off-by: Edmar Wienskoski <edmar@freescale.com> | ||
8 | Signed-off-by: Khem Raj <raj.khem@gmail.com> | ||
9 | --- | ||
10 | sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | 134 ++++++++++++++++++ | ||
11 | sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | 101 +++++++++++++ | ||
12 | sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | 134 ++++++++++++++++++ | ||
13 | .../powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | 101 +++++++++++++ | ||
14 | sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++ | ||
15 | sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | 101 +++++++++++++ | ||
16 | sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++ | ||
17 | sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | 101 +++++++++++++ | ||
18 | sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | 134 ++++++++++++++++++ | ||
19 | sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | 101 +++++++++++++ | ||
20 | sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | 134 ++++++++++++++++++ | ||
21 | sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | 101 +++++++++++++ | ||
22 | .../linux/powerpc/powerpc32/603e/fpu/Implies | 1 + | ||
23 | .../powerpc/powerpc32/e300c3/fpu/Implies | 2 + | ||
24 | .../powerpc/powerpc32/e500mc/fpu/Implies | 1 + | ||
25 | .../linux/powerpc/powerpc32/e5500/fpu/Implies | 1 + | ||
26 | .../linux/powerpc/powerpc32/e6500/fpu/Implies | 1 + | ||
27 | .../linux/powerpc/powerpc64/e5500/fpu/Implies | 1 + | ||
28 | .../linux/powerpc/powerpc64/e6500/fpu/Implies | 1 + | ||
29 | 19 files changed, 1418 insertions(+) | ||
30 | create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | ||
31 | create mode 100644 sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | ||
32 | create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | ||
33 | create mode 100644 sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | ||
34 | create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | ||
35 | create mode 100644 sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | ||
36 | create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | ||
37 | create mode 100644 sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | ||
38 | create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | ||
39 | create mode 100644 sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | ||
40 | create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | ||
41 | create mode 100644 sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | ||
42 | create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies | ||
43 | create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies | ||
44 | create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies | ||
45 | create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies | ||
46 | create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies | ||
47 | create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies | ||
48 | create mode 100644 sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies | ||
49 | |||
50 | diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | ||
51 | new file mode 100644 | ||
52 | index 0000000000..71e516d1c8 | ||
53 | --- /dev/null | ||
54 | +++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrt.c | ||
55 | @@ -0,0 +1,134 @@ | ||
56 | +/* Double-precision floating point square root. | ||
57 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
58 | + This file is part of the GNU C Library. | ||
59 | + | ||
60 | + The GNU C Library is free software; you can redistribute it and/or | ||
61 | + modify it under the terms of the GNU Lesser General Public | ||
62 | + License as published by the Free Software Foundation; either | ||
63 | + version 2.1 of the License, or (at your option) any later version. | ||
64 | + | ||
65 | + The GNU C Library is distributed in the hope that it will be useful, | ||
66 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
67 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
68 | + Lesser General Public License for more details. | ||
69 | + | ||
70 | + You should have received a copy of the GNU Lesser General Public | ||
71 | + License along with the GNU C Library; if not, write to the Free | ||
72 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
73 | + 02111-1307 USA. */ | ||
74 | + | ||
75 | +#include <math.h> | ||
76 | +#include <math_private.h> | ||
77 | +#include <fenv_libc.h> | ||
78 | +#include <inttypes.h> | ||
79 | + | ||
80 | +#include <sysdep.h> | ||
81 | +#include <ldsodefs.h> | ||
82 | + | ||
83 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
84 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
85 | +static const float two108 = 3.245185536584267269e+32; | ||
86 | +static const float twom54 = 5.551115123125782702e-17; | ||
87 | +static const float half = 0.5; | ||
88 | + | ||
89 | +/* The method is based on the descriptions in: | ||
90 | + | ||
91 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
92 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
93 | + | ||
94 | + We find the actual square root and half of its reciprocal | ||
95 | + simultaneously. */ | ||
96 | + | ||
97 | +#ifdef __STDC__ | ||
98 | +double | ||
99 | +__ieee754_sqrt (double b) | ||
100 | +#else | ||
101 | +double | ||
102 | +__ieee754_sqrt (b) | ||
103 | + double b; | ||
104 | +#endif | ||
105 | +{ | ||
106 | + if (__builtin_expect (b > 0, 1)) | ||
107 | + { | ||
108 | + double y, g, h, d, r; | ||
109 | + ieee_double_shape_type u; | ||
110 | + | ||
111 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
112 | + { | ||
113 | + fenv_t fe; | ||
114 | + | ||
115 | + fe = fegetenv_register (); | ||
116 | + | ||
117 | + u.value = b; | ||
118 | + | ||
119 | + relax_fenv_state (); | ||
120 | + | ||
121 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
122 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
123 | + | ||
124 | + /* Following Muller et al, page 168, equation 5.20. | ||
125 | + | ||
126 | + h goes to 1/(2*sqrt(b)) | ||
127 | + g goes to sqrt(b). | ||
128 | + | ||
129 | + We need three iterations to get within 1ulp. */ | ||
130 | + | ||
131 | + /* Indicate that these can be performed prior to the branch. GCC | ||
132 | + insists on sinking them below the branch, however; it seems like | ||
133 | + they'd be better before the branch so that we can cover any latency | ||
134 | + from storing the argument and loading its high word. Oh well. */ | ||
135 | + | ||
136 | + g = b * y; | ||
137 | + h = 0.5 * y; | ||
138 | + | ||
139 | + /* Handle small numbers by scaling. */ | ||
140 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
141 | + return __ieee754_sqrt (b * two108) * twom54; | ||
142 | + | ||
143 | +#define FMADD(a_, c_, b_) \ | ||
144 | + ({ double __r; \ | ||
145 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
146 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
147 | + __r;}) | ||
148 | +#define FNMSUB(a_, c_, b_) \ | ||
149 | + ({ double __r; \ | ||
150 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
151 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
152 | + __r;}) | ||
153 | + | ||
154 | + r = FNMSUB (g, h, half); | ||
155 | + g = FMADD (g, r, g); | ||
156 | + h = FMADD (h, r, h); | ||
157 | + | ||
158 | + r = FNMSUB (g, h, half); | ||
159 | + g = FMADD (g, r, g); | ||
160 | + h = FMADD (h, r, h); | ||
161 | + | ||
162 | + r = FNMSUB (g, h, half); | ||
163 | + g = FMADD (g, r, g); | ||
164 | + h = FMADD (h, r, h); | ||
165 | + | ||
166 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
167 | + | ||
168 | + /* Final refinement. */ | ||
169 | + d = FNMSUB (g, g, b); | ||
170 | + | ||
171 | + fesetenv_register (fe); | ||
172 | + return FMADD (d, h, g); | ||
173 | + } | ||
174 | + } | ||
175 | + else if (b < 0) | ||
176 | + { | ||
177 | + /* For some reason, some PowerPC32 processors don't implement | ||
178 | + FE_INVALID_SQRT. */ | ||
179 | +#ifdef FE_INVALID_SQRT | ||
180 | + feraiseexcept (FE_INVALID_SQRT); | ||
181 | + | ||
182 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
183 | + if ((u.l & FE_INVALID) == 0) | ||
184 | +#endif | ||
185 | + feraiseexcept (FE_INVALID); | ||
186 | + b = a_nan.value; | ||
187 | + } | ||
188 | + return f_wash (b); | ||
189 | +} | ||
190 | diff --git a/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | ||
191 | new file mode 100644 | ||
192 | index 0000000000..26fa067abf | ||
193 | --- /dev/null | ||
194 | +++ b/sysdeps/powerpc/powerpc32/603e/fpu/e_sqrtf.c | ||
195 | @@ -0,0 +1,101 @@ | ||
196 | +/* Single-precision floating point square root. | ||
197 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
198 | + This file is part of the GNU C Library. | ||
199 | + | ||
200 | + The GNU C Library is free software; you can redistribute it and/or | ||
201 | + modify it under the terms of the GNU Lesser General Public | ||
202 | + License as published by the Free Software Foundation; either | ||
203 | + version 2.1 of the License, or (at your option) any later version. | ||
204 | + | ||
205 | + The GNU C Library is distributed in the hope that it will be useful, | ||
206 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
207 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
208 | + Lesser General Public License for more details. | ||
209 | + | ||
210 | + You should have received a copy of the GNU Lesser General Public | ||
211 | + License along with the GNU C Library; if not, write to the Free | ||
212 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
213 | + 02111-1307 USA. */ | ||
214 | + | ||
215 | +#include <math.h> | ||
216 | +#include <math_private.h> | ||
217 | +#include <fenv_libc.h> | ||
218 | +#include <inttypes.h> | ||
219 | + | ||
220 | +#include <sysdep.h> | ||
221 | +#include <ldsodefs.h> | ||
222 | + | ||
223 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
224 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
225 | +static const float threehalf = 1.5; | ||
226 | + | ||
227 | +/* The method is based on the descriptions in: | ||
228 | + | ||
229 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
230 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
231 | + | ||
232 | + We find the reciprocal square root and use that to compute the actual | ||
233 | + square root. */ | ||
234 | + | ||
235 | +#ifdef __STDC__ | ||
236 | +float | ||
237 | +__ieee754_sqrtf (float b) | ||
238 | +#else | ||
239 | +float | ||
240 | +__ieee754_sqrtf (b) | ||
241 | + float b; | ||
242 | +#endif | ||
243 | +{ | ||
244 | + if (__builtin_expect (b > 0, 1)) | ||
245 | + { | ||
246 | +#define FMSUB(a_, c_, b_) \ | ||
247 | + ({ double __r; \ | ||
248 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
249 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
250 | + __r;}) | ||
251 | +#define FNMSUB(a_, c_, b_) \ | ||
252 | + ({ double __r; \ | ||
253 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
254 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
255 | + __r;}) | ||
256 | + | ||
257 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
258 | + { | ||
259 | + double y, x; | ||
260 | + fenv_t fe; | ||
261 | + | ||
262 | + fe = fegetenv_register (); | ||
263 | + | ||
264 | + relax_fenv_state (); | ||
265 | + | ||
266 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
267 | + y = FMSUB (threehalf, b, b); | ||
268 | + | ||
269 | + /* Initial estimate. */ | ||
270 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
271 | + | ||
272 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
273 | + x = x * FNMSUB (y, x * x, threehalf); | ||
274 | + x = x * FNMSUB (y, x * x, threehalf); | ||
275 | + x = x * FNMSUB (y, x * x, threehalf); | ||
276 | + | ||
277 | + /* All done. */ | ||
278 | + fesetenv_register (fe); | ||
279 | + return x * b; | ||
280 | + } | ||
281 | + } | ||
282 | + else if (b < 0) | ||
283 | + { | ||
284 | + /* For some reason, some PowerPC32 processors don't implement | ||
285 | + FE_INVALID_SQRT. */ | ||
286 | +#ifdef FE_INVALID_SQRT | ||
287 | + feraiseexcept (FE_INVALID_SQRT); | ||
288 | + | ||
289 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
290 | + if ((u.l & FE_INVALID) == 0) | ||
291 | +#endif | ||
292 | + feraiseexcept (FE_INVALID); | ||
293 | + b = a_nan.value; | ||
294 | + } | ||
295 | + return f_washf (b); | ||
296 | +} | ||
297 | diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | ||
298 | new file mode 100644 | ||
299 | index 0000000000..71e516d1c8 | ||
300 | --- /dev/null | ||
301 | +++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrt.c | ||
302 | @@ -0,0 +1,134 @@ | ||
303 | +/* Double-precision floating point square root. | ||
304 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
305 | + This file is part of the GNU C Library. | ||
306 | + | ||
307 | + The GNU C Library is free software; you can redistribute it and/or | ||
308 | + modify it under the terms of the GNU Lesser General Public | ||
309 | + License as published by the Free Software Foundation; either | ||
310 | + version 2.1 of the License, or (at your option) any later version. | ||
311 | + | ||
312 | + The GNU C Library is distributed in the hope that it will be useful, | ||
313 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
314 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
315 | + Lesser General Public License for more details. | ||
316 | + | ||
317 | + You should have received a copy of the GNU Lesser General Public | ||
318 | + License along with the GNU C Library; if not, write to the Free | ||
319 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
320 | + 02111-1307 USA. */ | ||
321 | + | ||
322 | +#include <math.h> | ||
323 | +#include <math_private.h> | ||
324 | +#include <fenv_libc.h> | ||
325 | +#include <inttypes.h> | ||
326 | + | ||
327 | +#include <sysdep.h> | ||
328 | +#include <ldsodefs.h> | ||
329 | + | ||
330 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
331 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
332 | +static const float two108 = 3.245185536584267269e+32; | ||
333 | +static const float twom54 = 5.551115123125782702e-17; | ||
334 | +static const float half = 0.5; | ||
335 | + | ||
336 | +/* The method is based on the descriptions in: | ||
337 | + | ||
338 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
339 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
340 | + | ||
341 | + We find the actual square root and half of its reciprocal | ||
342 | + simultaneously. */ | ||
343 | + | ||
344 | +#ifdef __STDC__ | ||
345 | +double | ||
346 | +__ieee754_sqrt (double b) | ||
347 | +#else | ||
348 | +double | ||
349 | +__ieee754_sqrt (b) | ||
350 | + double b; | ||
351 | +#endif | ||
352 | +{ | ||
353 | + if (__builtin_expect (b > 0, 1)) | ||
354 | + { | ||
355 | + double y, g, h, d, r; | ||
356 | + ieee_double_shape_type u; | ||
357 | + | ||
358 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
359 | + { | ||
360 | + fenv_t fe; | ||
361 | + | ||
362 | + fe = fegetenv_register (); | ||
363 | + | ||
364 | + u.value = b; | ||
365 | + | ||
366 | + relax_fenv_state (); | ||
367 | + | ||
368 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
369 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
370 | + | ||
371 | + /* Following Muller et al, page 168, equation 5.20. | ||
372 | + | ||
373 | + h goes to 1/(2*sqrt(b)) | ||
374 | + g goes to sqrt(b). | ||
375 | + | ||
376 | + We need three iterations to get within 1ulp. */ | ||
377 | + | ||
378 | + /* Indicate that these can be performed prior to the branch. GCC | ||
379 | + insists on sinking them below the branch, however; it seems like | ||
380 | + they'd be better before the branch so that we can cover any latency | ||
381 | + from storing the argument and loading its high word. Oh well. */ | ||
382 | + | ||
383 | + g = b * y; | ||
384 | + h = 0.5 * y; | ||
385 | + | ||
386 | + /* Handle small numbers by scaling. */ | ||
387 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
388 | + return __ieee754_sqrt (b * two108) * twom54; | ||
389 | + | ||
390 | +#define FMADD(a_, c_, b_) \ | ||
391 | + ({ double __r; \ | ||
392 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
393 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
394 | + __r;}) | ||
395 | +#define FNMSUB(a_, c_, b_) \ | ||
396 | + ({ double __r; \ | ||
397 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
398 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
399 | + __r;}) | ||
400 | + | ||
401 | + r = FNMSUB (g, h, half); | ||
402 | + g = FMADD (g, r, g); | ||
403 | + h = FMADD (h, r, h); | ||
404 | + | ||
405 | + r = FNMSUB (g, h, half); | ||
406 | + g = FMADD (g, r, g); | ||
407 | + h = FMADD (h, r, h); | ||
408 | + | ||
409 | + r = FNMSUB (g, h, half); | ||
410 | + g = FMADD (g, r, g); | ||
411 | + h = FMADD (h, r, h); | ||
412 | + | ||
413 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
414 | + | ||
415 | + /* Final refinement. */ | ||
416 | + d = FNMSUB (g, g, b); | ||
417 | + | ||
418 | + fesetenv_register (fe); | ||
419 | + return FMADD (d, h, g); | ||
420 | + } | ||
421 | + } | ||
422 | + else if (b < 0) | ||
423 | + { | ||
424 | + /* For some reason, some PowerPC32 processors don't implement | ||
425 | + FE_INVALID_SQRT. */ | ||
426 | +#ifdef FE_INVALID_SQRT | ||
427 | + feraiseexcept (FE_INVALID_SQRT); | ||
428 | + | ||
429 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
430 | + if ((u.l & FE_INVALID) == 0) | ||
431 | +#endif | ||
432 | + feraiseexcept (FE_INVALID); | ||
433 | + b = a_nan.value; | ||
434 | + } | ||
435 | + return f_wash (b); | ||
436 | +} | ||
437 | diff --git a/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | ||
438 | new file mode 100644 | ||
439 | index 0000000000..26fa067abf | ||
440 | --- /dev/null | ||
441 | +++ b/sysdeps/powerpc/powerpc32/e500mc/fpu/e_sqrtf.c | ||
442 | @@ -0,0 +1,101 @@ | ||
443 | +/* Single-precision floating point square root. | ||
444 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
445 | + This file is part of the GNU C Library. | ||
446 | + | ||
447 | + The GNU C Library is free software; you can redistribute it and/or | ||
448 | + modify it under the terms of the GNU Lesser General Public | ||
449 | + License as published by the Free Software Foundation; either | ||
450 | + version 2.1 of the License, or (at your option) any later version. | ||
451 | + | ||
452 | + The GNU C Library is distributed in the hope that it will be useful, | ||
453 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
454 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
455 | + Lesser General Public License for more details. | ||
456 | + | ||
457 | + You should have received a copy of the GNU Lesser General Public | ||
458 | + License along with the GNU C Library; if not, write to the Free | ||
459 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
460 | + 02111-1307 USA. */ | ||
461 | + | ||
462 | +#include <math.h> | ||
463 | +#include <math_private.h> | ||
464 | +#include <fenv_libc.h> | ||
465 | +#include <inttypes.h> | ||
466 | + | ||
467 | +#include <sysdep.h> | ||
468 | +#include <ldsodefs.h> | ||
469 | + | ||
470 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
471 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
472 | +static const float threehalf = 1.5; | ||
473 | + | ||
474 | +/* The method is based on the descriptions in: | ||
475 | + | ||
476 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
477 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
478 | + | ||
479 | + We find the reciprocal square root and use that to compute the actual | ||
480 | + square root. */ | ||
481 | + | ||
482 | +#ifdef __STDC__ | ||
483 | +float | ||
484 | +__ieee754_sqrtf (float b) | ||
485 | +#else | ||
486 | +float | ||
487 | +__ieee754_sqrtf (b) | ||
488 | + float b; | ||
489 | +#endif | ||
490 | +{ | ||
491 | + if (__builtin_expect (b > 0, 1)) | ||
492 | + { | ||
493 | +#define FMSUB(a_, c_, b_) \ | ||
494 | + ({ double __r; \ | ||
495 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
496 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
497 | + __r;}) | ||
498 | +#define FNMSUB(a_, c_, b_) \ | ||
499 | + ({ double __r; \ | ||
500 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
501 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
502 | + __r;}) | ||
503 | + | ||
504 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
505 | + { | ||
506 | + double y, x; | ||
507 | + fenv_t fe; | ||
508 | + | ||
509 | + fe = fegetenv_register (); | ||
510 | + | ||
511 | + relax_fenv_state (); | ||
512 | + | ||
513 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
514 | + y = FMSUB (threehalf, b, b); | ||
515 | + | ||
516 | + /* Initial estimate. */ | ||
517 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
518 | + | ||
519 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
520 | + x = x * FNMSUB (y, x * x, threehalf); | ||
521 | + x = x * FNMSUB (y, x * x, threehalf); | ||
522 | + x = x * FNMSUB (y, x * x, threehalf); | ||
523 | + | ||
524 | + /* All done. */ | ||
525 | + fesetenv_register (fe); | ||
526 | + return x * b; | ||
527 | + } | ||
528 | + } | ||
529 | + else if (b < 0) | ||
530 | + { | ||
531 | + /* For some reason, some PowerPC32 processors don't implement | ||
532 | + FE_INVALID_SQRT. */ | ||
533 | +#ifdef FE_INVALID_SQRT | ||
534 | + feraiseexcept (FE_INVALID_SQRT); | ||
535 | + | ||
536 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
537 | + if ((u.l & FE_INVALID) == 0) | ||
538 | +#endif | ||
539 | + feraiseexcept (FE_INVALID); | ||
540 | + b = a_nan.value; | ||
541 | + } | ||
542 | + return f_washf (b); | ||
543 | +} | ||
544 | diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | ||
545 | new file mode 100644 | ||
546 | index 0000000000..71e516d1c8 | ||
547 | --- /dev/null | ||
548 | +++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrt.c | ||
549 | @@ -0,0 +1,134 @@ | ||
550 | +/* Double-precision floating point square root. | ||
551 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
552 | + This file is part of the GNU C Library. | ||
553 | + | ||
554 | + The GNU C Library is free software; you can redistribute it and/or | ||
555 | + modify it under the terms of the GNU Lesser General Public | ||
556 | + License as published by the Free Software Foundation; either | ||
557 | + version 2.1 of the License, or (at your option) any later version. | ||
558 | + | ||
559 | + The GNU C Library is distributed in the hope that it will be useful, | ||
560 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
561 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
562 | + Lesser General Public License for more details. | ||
563 | + | ||
564 | + You should have received a copy of the GNU Lesser General Public | ||
565 | + License along with the GNU C Library; if not, write to the Free | ||
566 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
567 | + 02111-1307 USA. */ | ||
568 | + | ||
569 | +#include <math.h> | ||
570 | +#include <math_private.h> | ||
571 | +#include <fenv_libc.h> | ||
572 | +#include <inttypes.h> | ||
573 | + | ||
574 | +#include <sysdep.h> | ||
575 | +#include <ldsodefs.h> | ||
576 | + | ||
577 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
578 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
579 | +static const float two108 = 3.245185536584267269e+32; | ||
580 | +static const float twom54 = 5.551115123125782702e-17; | ||
581 | +static const float half = 0.5; | ||
582 | + | ||
583 | +/* The method is based on the descriptions in: | ||
584 | + | ||
585 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
586 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
587 | + | ||
588 | + We find the actual square root and half of its reciprocal | ||
589 | + simultaneously. */ | ||
590 | + | ||
591 | +#ifdef __STDC__ | ||
592 | +double | ||
593 | +__ieee754_sqrt (double b) | ||
594 | +#else | ||
595 | +double | ||
596 | +__ieee754_sqrt (b) | ||
597 | + double b; | ||
598 | +#endif | ||
599 | +{ | ||
600 | + if (__builtin_expect (b > 0, 1)) | ||
601 | + { | ||
602 | + double y, g, h, d, r; | ||
603 | + ieee_double_shape_type u; | ||
604 | + | ||
605 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
606 | + { | ||
607 | + fenv_t fe; | ||
608 | + | ||
609 | + fe = fegetenv_register (); | ||
610 | + | ||
611 | + u.value = b; | ||
612 | + | ||
613 | + relax_fenv_state (); | ||
614 | + | ||
615 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
616 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
617 | + | ||
618 | + /* Following Muller et al, page 168, equation 5.20. | ||
619 | + | ||
620 | + h goes to 1/(2*sqrt(b)) | ||
621 | + g goes to sqrt(b). | ||
622 | + | ||
623 | + We need three iterations to get within 1ulp. */ | ||
624 | + | ||
625 | + /* Indicate that these can be performed prior to the branch. GCC | ||
626 | + insists on sinking them below the branch, however; it seems like | ||
627 | + they'd be better before the branch so that we can cover any latency | ||
628 | + from storing the argument and loading its high word. Oh well. */ | ||
629 | + | ||
630 | + g = b * y; | ||
631 | + h = 0.5 * y; | ||
632 | + | ||
633 | + /* Handle small numbers by scaling. */ | ||
634 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
635 | + return __ieee754_sqrt (b * two108) * twom54; | ||
636 | + | ||
637 | +#define FMADD(a_, c_, b_) \ | ||
638 | + ({ double __r; \ | ||
639 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
640 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
641 | + __r;}) | ||
642 | +#define FNMSUB(a_, c_, b_) \ | ||
643 | + ({ double __r; \ | ||
644 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
645 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
646 | + __r;}) | ||
647 | + | ||
648 | + r = FNMSUB (g, h, half); | ||
649 | + g = FMADD (g, r, g); | ||
650 | + h = FMADD (h, r, h); | ||
651 | + | ||
652 | + r = FNMSUB (g, h, half); | ||
653 | + g = FMADD (g, r, g); | ||
654 | + h = FMADD (h, r, h); | ||
655 | + | ||
656 | + r = FNMSUB (g, h, half); | ||
657 | + g = FMADD (g, r, g); | ||
658 | + h = FMADD (h, r, h); | ||
659 | + | ||
660 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
661 | + | ||
662 | + /* Final refinement. */ | ||
663 | + d = FNMSUB (g, g, b); | ||
664 | + | ||
665 | + fesetenv_register (fe); | ||
666 | + return FMADD (d, h, g); | ||
667 | + } | ||
668 | + } | ||
669 | + else if (b < 0) | ||
670 | + { | ||
671 | + /* For some reason, some PowerPC32 processors don't implement | ||
672 | + FE_INVALID_SQRT. */ | ||
673 | +#ifdef FE_INVALID_SQRT | ||
674 | + feraiseexcept (FE_INVALID_SQRT); | ||
675 | + | ||
676 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
677 | + if ((u.l & FE_INVALID) == 0) | ||
678 | +#endif | ||
679 | + feraiseexcept (FE_INVALID); | ||
680 | + b = a_nan.value; | ||
681 | + } | ||
682 | + return f_wash (b); | ||
683 | +} | ||
684 | diff --git a/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | ||
685 | new file mode 100644 | ||
686 | index 0000000000..26fa067abf | ||
687 | --- /dev/null | ||
688 | +++ b/sysdeps/powerpc/powerpc32/e5500/fpu/e_sqrtf.c | ||
689 | @@ -0,0 +1,101 @@ | ||
690 | +/* Single-precision floating point square root. | ||
691 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
692 | + This file is part of the GNU C Library. | ||
693 | + | ||
694 | + The GNU C Library is free software; you can redistribute it and/or | ||
695 | + modify it under the terms of the GNU Lesser General Public | ||
696 | + License as published by the Free Software Foundation; either | ||
697 | + version 2.1 of the License, or (at your option) any later version. | ||
698 | + | ||
699 | + The GNU C Library is distributed in the hope that it will be useful, | ||
700 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
701 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
702 | + Lesser General Public License for more details. | ||
703 | + | ||
704 | + You should have received a copy of the GNU Lesser General Public | ||
705 | + License along with the GNU C Library; if not, write to the Free | ||
706 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
707 | + 02111-1307 USA. */ | ||
708 | + | ||
709 | +#include <math.h> | ||
710 | +#include <math_private.h> | ||
711 | +#include <fenv_libc.h> | ||
712 | +#include <inttypes.h> | ||
713 | + | ||
714 | +#include <sysdep.h> | ||
715 | +#include <ldsodefs.h> | ||
716 | + | ||
717 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
718 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
719 | +static const float threehalf = 1.5; | ||
720 | + | ||
721 | +/* The method is based on the descriptions in: | ||
722 | + | ||
723 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
724 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
725 | + | ||
726 | + We find the reciprocal square root and use that to compute the actual | ||
727 | + square root. */ | ||
728 | + | ||
729 | +#ifdef __STDC__ | ||
730 | +float | ||
731 | +__ieee754_sqrtf (float b) | ||
732 | +#else | ||
733 | +float | ||
734 | +__ieee754_sqrtf (b) | ||
735 | + float b; | ||
736 | +#endif | ||
737 | +{ | ||
738 | + if (__builtin_expect (b > 0, 1)) | ||
739 | + { | ||
740 | +#define FMSUB(a_, c_, b_) \ | ||
741 | + ({ double __r; \ | ||
742 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
743 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
744 | + __r;}) | ||
745 | +#define FNMSUB(a_, c_, b_) \ | ||
746 | + ({ double __r; \ | ||
747 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
748 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
749 | + __r;}) | ||
750 | + | ||
751 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
752 | + { | ||
753 | + double y, x; | ||
754 | + fenv_t fe; | ||
755 | + | ||
756 | + fe = fegetenv_register (); | ||
757 | + | ||
758 | + relax_fenv_state (); | ||
759 | + | ||
760 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
761 | + y = FMSUB (threehalf, b, b); | ||
762 | + | ||
763 | + /* Initial estimate. */ | ||
764 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
765 | + | ||
766 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
767 | + x = x * FNMSUB (y, x * x, threehalf); | ||
768 | + x = x * FNMSUB (y, x * x, threehalf); | ||
769 | + x = x * FNMSUB (y, x * x, threehalf); | ||
770 | + | ||
771 | + /* All done. */ | ||
772 | + fesetenv_register (fe); | ||
773 | + return x * b; | ||
774 | + } | ||
775 | + } | ||
776 | + else if (b < 0) | ||
777 | + { | ||
778 | + /* For some reason, some PowerPC32 processors don't implement | ||
779 | + FE_INVALID_SQRT. */ | ||
780 | +#ifdef FE_INVALID_SQRT | ||
781 | + feraiseexcept (FE_INVALID_SQRT); | ||
782 | + | ||
783 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
784 | + if ((u.l & FE_INVALID) == 0) | ||
785 | +#endif | ||
786 | + feraiseexcept (FE_INVALID); | ||
787 | + b = a_nan.value; | ||
788 | + } | ||
789 | + return f_washf (b); | ||
790 | +} | ||
791 | diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | ||
792 | new file mode 100644 | ||
793 | index 0000000000..71e516d1c8 | ||
794 | --- /dev/null | ||
795 | +++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrt.c | ||
796 | @@ -0,0 +1,134 @@ | ||
797 | +/* Double-precision floating point square root. | ||
798 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
799 | + This file is part of the GNU C Library. | ||
800 | + | ||
801 | + The GNU C Library is free software; you can redistribute it and/or | ||
802 | + modify it under the terms of the GNU Lesser General Public | ||
803 | + License as published by the Free Software Foundation; either | ||
804 | + version 2.1 of the License, or (at your option) any later version. | ||
805 | + | ||
806 | + The GNU C Library is distributed in the hope that it will be useful, | ||
807 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
808 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
809 | + Lesser General Public License for more details. | ||
810 | + | ||
811 | + You should have received a copy of the GNU Lesser General Public | ||
812 | + License along with the GNU C Library; if not, write to the Free | ||
813 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
814 | + 02111-1307 USA. */ | ||
815 | + | ||
816 | +#include <math.h> | ||
817 | +#include <math_private.h> | ||
818 | +#include <fenv_libc.h> | ||
819 | +#include <inttypes.h> | ||
820 | + | ||
821 | +#include <sysdep.h> | ||
822 | +#include <ldsodefs.h> | ||
823 | + | ||
824 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
825 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
826 | +static const float two108 = 3.245185536584267269e+32; | ||
827 | +static const float twom54 = 5.551115123125782702e-17; | ||
828 | +static const float half = 0.5; | ||
829 | + | ||
830 | +/* The method is based on the descriptions in: | ||
831 | + | ||
832 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
833 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
834 | + | ||
835 | + We find the actual square root and half of its reciprocal | ||
836 | + simultaneously. */ | ||
837 | + | ||
838 | +#ifdef __STDC__ | ||
839 | +double | ||
840 | +__ieee754_sqrt (double b) | ||
841 | +#else | ||
842 | +double | ||
843 | +__ieee754_sqrt (b) | ||
844 | + double b; | ||
845 | +#endif | ||
846 | +{ | ||
847 | + if (__builtin_expect (b > 0, 1)) | ||
848 | + { | ||
849 | + double y, g, h, d, r; | ||
850 | + ieee_double_shape_type u; | ||
851 | + | ||
852 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
853 | + { | ||
854 | + fenv_t fe; | ||
855 | + | ||
856 | + fe = fegetenv_register (); | ||
857 | + | ||
858 | + u.value = b; | ||
859 | + | ||
860 | + relax_fenv_state (); | ||
861 | + | ||
862 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
863 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
864 | + | ||
865 | + /* Following Muller et al, page 168, equation 5.20. | ||
866 | + | ||
867 | + h goes to 1/(2*sqrt(b)) | ||
868 | + g goes to sqrt(b). | ||
869 | + | ||
870 | + We need three iterations to get within 1ulp. */ | ||
871 | + | ||
872 | + /* Indicate that these can be performed prior to the branch. GCC | ||
873 | + insists on sinking them below the branch, however; it seems like | ||
874 | + they'd be better before the branch so that we can cover any latency | ||
875 | + from storing the argument and loading its high word. Oh well. */ | ||
876 | + | ||
877 | + g = b * y; | ||
878 | + h = 0.5 * y; | ||
879 | + | ||
880 | + /* Handle small numbers by scaling. */ | ||
881 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
882 | + return __ieee754_sqrt (b * two108) * twom54; | ||
883 | + | ||
884 | +#define FMADD(a_, c_, b_) \ | ||
885 | + ({ double __r; \ | ||
886 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
887 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
888 | + __r;}) | ||
889 | +#define FNMSUB(a_, c_, b_) \ | ||
890 | + ({ double __r; \ | ||
891 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
892 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
893 | + __r;}) | ||
894 | + | ||
895 | + r = FNMSUB (g, h, half); | ||
896 | + g = FMADD (g, r, g); | ||
897 | + h = FMADD (h, r, h); | ||
898 | + | ||
899 | + r = FNMSUB (g, h, half); | ||
900 | + g = FMADD (g, r, g); | ||
901 | + h = FMADD (h, r, h); | ||
902 | + | ||
903 | + r = FNMSUB (g, h, half); | ||
904 | + g = FMADD (g, r, g); | ||
905 | + h = FMADD (h, r, h); | ||
906 | + | ||
907 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
908 | + | ||
909 | + /* Final refinement. */ | ||
910 | + d = FNMSUB (g, g, b); | ||
911 | + | ||
912 | + fesetenv_register (fe); | ||
913 | + return FMADD (d, h, g); | ||
914 | + } | ||
915 | + } | ||
916 | + else if (b < 0) | ||
917 | + { | ||
918 | + /* For some reason, some PowerPC32 processors don't implement | ||
919 | + FE_INVALID_SQRT. */ | ||
920 | +#ifdef FE_INVALID_SQRT | ||
921 | + feraiseexcept (FE_INVALID_SQRT); | ||
922 | + | ||
923 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
924 | + if ((u.l & FE_INVALID) == 0) | ||
925 | +#endif | ||
926 | + feraiseexcept (FE_INVALID); | ||
927 | + b = a_nan.value; | ||
928 | + } | ||
929 | + return f_wash (b); | ||
930 | +} | ||
931 | diff --git a/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | ||
932 | new file mode 100644 | ||
933 | index 0000000000..26fa067abf | ||
934 | --- /dev/null | ||
935 | +++ b/sysdeps/powerpc/powerpc32/e6500/fpu/e_sqrtf.c | ||
936 | @@ -0,0 +1,101 @@ | ||
937 | +/* Single-precision floating point square root. | ||
938 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
939 | + This file is part of the GNU C Library. | ||
940 | + | ||
941 | + The GNU C Library is free software; you can redistribute it and/or | ||
942 | + modify it under the terms of the GNU Lesser General Public | ||
943 | + License as published by the Free Software Foundation; either | ||
944 | + version 2.1 of the License, or (at your option) any later version. | ||
945 | + | ||
946 | + The GNU C Library is distributed in the hope that it will be useful, | ||
947 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
948 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
949 | + Lesser General Public License for more details. | ||
950 | + | ||
951 | + You should have received a copy of the GNU Lesser General Public | ||
952 | + License along with the GNU C Library; if not, write to the Free | ||
953 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
954 | + 02111-1307 USA. */ | ||
955 | + | ||
956 | +#include <math.h> | ||
957 | +#include <math_private.h> | ||
958 | +#include <fenv_libc.h> | ||
959 | +#include <inttypes.h> | ||
960 | + | ||
961 | +#include <sysdep.h> | ||
962 | +#include <ldsodefs.h> | ||
963 | + | ||
964 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
965 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
966 | +static const float threehalf = 1.5; | ||
967 | + | ||
968 | +/* The method is based on the descriptions in: | ||
969 | + | ||
970 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
971 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
972 | + | ||
973 | + We find the reciprocal square root and use that to compute the actual | ||
974 | + square root. */ | ||
975 | + | ||
976 | +#ifdef __STDC__ | ||
977 | +float | ||
978 | +__ieee754_sqrtf (float b) | ||
979 | +#else | ||
980 | +float | ||
981 | +__ieee754_sqrtf (b) | ||
982 | + float b; | ||
983 | +#endif | ||
984 | +{ | ||
985 | + if (__builtin_expect (b > 0, 1)) | ||
986 | + { | ||
987 | +#define FMSUB(a_, c_, b_) \ | ||
988 | + ({ double __r; \ | ||
989 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
990 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
991 | + __r;}) | ||
992 | +#define FNMSUB(a_, c_, b_) \ | ||
993 | + ({ double __r; \ | ||
994 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
995 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
996 | + __r;}) | ||
997 | + | ||
998 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
999 | + { | ||
1000 | + double y, x; | ||
1001 | + fenv_t fe; | ||
1002 | + | ||
1003 | + fe = fegetenv_register (); | ||
1004 | + | ||
1005 | + relax_fenv_state (); | ||
1006 | + | ||
1007 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
1008 | + y = FMSUB (threehalf, b, b); | ||
1009 | + | ||
1010 | + /* Initial estimate. */ | ||
1011 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
1012 | + | ||
1013 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
1014 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1015 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1016 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1017 | + | ||
1018 | + /* All done. */ | ||
1019 | + fesetenv_register (fe); | ||
1020 | + return x * b; | ||
1021 | + } | ||
1022 | + } | ||
1023 | + else if (b < 0) | ||
1024 | + { | ||
1025 | + /* For some reason, some PowerPC32 processors don't implement | ||
1026 | + FE_INVALID_SQRT. */ | ||
1027 | +#ifdef FE_INVALID_SQRT | ||
1028 | + feraiseexcept (FE_INVALID_SQRT); | ||
1029 | + | ||
1030 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1031 | + if ((u.l & FE_INVALID) == 0) | ||
1032 | +#endif | ||
1033 | + feraiseexcept (FE_INVALID); | ||
1034 | + b = a_nan.value; | ||
1035 | + } | ||
1036 | + return f_washf (b); | ||
1037 | +} | ||
1038 | diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | ||
1039 | new file mode 100644 | ||
1040 | index 0000000000..71e516d1c8 | ||
1041 | --- /dev/null | ||
1042 | +++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrt.c | ||
1043 | @@ -0,0 +1,134 @@ | ||
1044 | +/* Double-precision floating point square root. | ||
1045 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
1046 | + This file is part of the GNU C Library. | ||
1047 | + | ||
1048 | + The GNU C Library is free software; you can redistribute it and/or | ||
1049 | + modify it under the terms of the GNU Lesser General Public | ||
1050 | + License as published by the Free Software Foundation; either | ||
1051 | + version 2.1 of the License, or (at your option) any later version. | ||
1052 | + | ||
1053 | + The GNU C Library is distributed in the hope that it will be useful, | ||
1054 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1055 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1056 | + Lesser General Public License for more details. | ||
1057 | + | ||
1058 | + You should have received a copy of the GNU Lesser General Public | ||
1059 | + License along with the GNU C Library; if not, write to the Free | ||
1060 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1061 | + 02111-1307 USA. */ | ||
1062 | + | ||
1063 | +#include <math.h> | ||
1064 | +#include <math_private.h> | ||
1065 | +#include <fenv_libc.h> | ||
1066 | +#include <inttypes.h> | ||
1067 | + | ||
1068 | +#include <sysdep.h> | ||
1069 | +#include <ldsodefs.h> | ||
1070 | + | ||
1071 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1072 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1073 | +static const float two108 = 3.245185536584267269e+32; | ||
1074 | +static const float twom54 = 5.551115123125782702e-17; | ||
1075 | +static const float half = 0.5; | ||
1076 | + | ||
1077 | +/* The method is based on the descriptions in: | ||
1078 | + | ||
1079 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1080 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1081 | + | ||
1082 | + We find the actual square root and half of its reciprocal | ||
1083 | + simultaneously. */ | ||
1084 | + | ||
1085 | +#ifdef __STDC__ | ||
1086 | +double | ||
1087 | +__ieee754_sqrt (double b) | ||
1088 | +#else | ||
1089 | +double | ||
1090 | +__ieee754_sqrt (b) | ||
1091 | + double b; | ||
1092 | +#endif | ||
1093 | +{ | ||
1094 | + if (__builtin_expect (b > 0, 1)) | ||
1095 | + { | ||
1096 | + double y, g, h, d, r; | ||
1097 | + ieee_double_shape_type u; | ||
1098 | + | ||
1099 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1100 | + { | ||
1101 | + fenv_t fe; | ||
1102 | + | ||
1103 | + fe = fegetenv_register (); | ||
1104 | + | ||
1105 | + u.value = b; | ||
1106 | + | ||
1107 | + relax_fenv_state (); | ||
1108 | + | ||
1109 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
1110 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
1111 | + | ||
1112 | + /* Following Muller et al, page 168, equation 5.20. | ||
1113 | + | ||
1114 | + h goes to 1/(2*sqrt(b)) | ||
1115 | + g goes to sqrt(b). | ||
1116 | + | ||
1117 | + We need three iterations to get within 1ulp. */ | ||
1118 | + | ||
1119 | + /* Indicate that these can be performed prior to the branch. GCC | ||
1120 | + insists on sinking them below the branch, however; it seems like | ||
1121 | + they'd be better before the branch so that we can cover any latency | ||
1122 | + from storing the argument and loading its high word. Oh well. */ | ||
1123 | + | ||
1124 | + g = b * y; | ||
1125 | + h = 0.5 * y; | ||
1126 | + | ||
1127 | + /* Handle small numbers by scaling. */ | ||
1128 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
1129 | + return __ieee754_sqrt (b * two108) * twom54; | ||
1130 | + | ||
1131 | +#define FMADD(a_, c_, b_) \ | ||
1132 | + ({ double __r; \ | ||
1133 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
1134 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1135 | + __r;}) | ||
1136 | +#define FNMSUB(a_, c_, b_) \ | ||
1137 | + ({ double __r; \ | ||
1138 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1139 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1140 | + __r;}) | ||
1141 | + | ||
1142 | + r = FNMSUB (g, h, half); | ||
1143 | + g = FMADD (g, r, g); | ||
1144 | + h = FMADD (h, r, h); | ||
1145 | + | ||
1146 | + r = FNMSUB (g, h, half); | ||
1147 | + g = FMADD (g, r, g); | ||
1148 | + h = FMADD (h, r, h); | ||
1149 | + | ||
1150 | + r = FNMSUB (g, h, half); | ||
1151 | + g = FMADD (g, r, g); | ||
1152 | + h = FMADD (h, r, h); | ||
1153 | + | ||
1154 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
1155 | + | ||
1156 | + /* Final refinement. */ | ||
1157 | + d = FNMSUB (g, g, b); | ||
1158 | + | ||
1159 | + fesetenv_register (fe); | ||
1160 | + return FMADD (d, h, g); | ||
1161 | + } | ||
1162 | + } | ||
1163 | + else if (b < 0) | ||
1164 | + { | ||
1165 | + /* For some reason, some PowerPC32 processors don't implement | ||
1166 | + FE_INVALID_SQRT. */ | ||
1167 | +#ifdef FE_INVALID_SQRT | ||
1168 | + feraiseexcept (FE_INVALID_SQRT); | ||
1169 | + | ||
1170 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1171 | + if ((u.l & FE_INVALID) == 0) | ||
1172 | +#endif | ||
1173 | + feraiseexcept (FE_INVALID); | ||
1174 | + b = a_nan.value; | ||
1175 | + } | ||
1176 | + return f_wash (b); | ||
1177 | +} | ||
1178 | diff --git a/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | ||
1179 | new file mode 100644 | ||
1180 | index 0000000000..26fa067abf | ||
1181 | --- /dev/null | ||
1182 | +++ b/sysdeps/powerpc/powerpc64/e5500/fpu/e_sqrtf.c | ||
1183 | @@ -0,0 +1,101 @@ | ||
1184 | +/* Single-precision floating point square root. | ||
1185 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
1186 | + This file is part of the GNU C Library. | ||
1187 | + | ||
1188 | + The GNU C Library is free software; you can redistribute it and/or | ||
1189 | + modify it under the terms of the GNU Lesser General Public | ||
1190 | + License as published by the Free Software Foundation; either | ||
1191 | + version 2.1 of the License, or (at your option) any later version. | ||
1192 | + | ||
1193 | + The GNU C Library is distributed in the hope that it will be useful, | ||
1194 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1195 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1196 | + Lesser General Public License for more details. | ||
1197 | + | ||
1198 | + You should have received a copy of the GNU Lesser General Public | ||
1199 | + License along with the GNU C Library; if not, write to the Free | ||
1200 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1201 | + 02111-1307 USA. */ | ||
1202 | + | ||
1203 | +#include <math.h> | ||
1204 | +#include <math_private.h> | ||
1205 | +#include <fenv_libc.h> | ||
1206 | +#include <inttypes.h> | ||
1207 | + | ||
1208 | +#include <sysdep.h> | ||
1209 | +#include <ldsodefs.h> | ||
1210 | + | ||
1211 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1212 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1213 | +static const float threehalf = 1.5; | ||
1214 | + | ||
1215 | +/* The method is based on the descriptions in: | ||
1216 | + | ||
1217 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1218 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1219 | + | ||
1220 | + We find the reciprocal square root and use that to compute the actual | ||
1221 | + square root. */ | ||
1222 | + | ||
1223 | +#ifdef __STDC__ | ||
1224 | +float | ||
1225 | +__ieee754_sqrtf (float b) | ||
1226 | +#else | ||
1227 | +float | ||
1228 | +__ieee754_sqrtf (b) | ||
1229 | + float b; | ||
1230 | +#endif | ||
1231 | +{ | ||
1232 | + if (__builtin_expect (b > 0, 1)) | ||
1233 | + { | ||
1234 | +#define FMSUB(a_, c_, b_) \ | ||
1235 | + ({ double __r; \ | ||
1236 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
1237 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1238 | + __r;}) | ||
1239 | +#define FNMSUB(a_, c_, b_) \ | ||
1240 | + ({ double __r; \ | ||
1241 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1242 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1243 | + __r;}) | ||
1244 | + | ||
1245 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1246 | + { | ||
1247 | + double y, x; | ||
1248 | + fenv_t fe; | ||
1249 | + | ||
1250 | + fe = fegetenv_register (); | ||
1251 | + | ||
1252 | + relax_fenv_state (); | ||
1253 | + | ||
1254 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
1255 | + y = FMSUB (threehalf, b, b); | ||
1256 | + | ||
1257 | + /* Initial estimate. */ | ||
1258 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
1259 | + | ||
1260 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
1261 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1262 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1263 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1264 | + | ||
1265 | + /* All done. */ | ||
1266 | + fesetenv_register (fe); | ||
1267 | + return x * b; | ||
1268 | + } | ||
1269 | + } | ||
1270 | + else if (b < 0) | ||
1271 | + { | ||
1272 | + /* For some reason, some PowerPC32 processors don't implement | ||
1273 | + FE_INVALID_SQRT. */ | ||
1274 | +#ifdef FE_INVALID_SQRT | ||
1275 | + feraiseexcept (FE_INVALID_SQRT); | ||
1276 | + | ||
1277 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1278 | + if ((u.l & FE_INVALID) == 0) | ||
1279 | +#endif | ||
1280 | + feraiseexcept (FE_INVALID); | ||
1281 | + b = a_nan.value; | ||
1282 | + } | ||
1283 | + return f_washf (b); | ||
1284 | +} | ||
1285 | diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | ||
1286 | new file mode 100644 | ||
1287 | index 0000000000..71e516d1c8 | ||
1288 | --- /dev/null | ||
1289 | +++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrt.c | ||
1290 | @@ -0,0 +1,134 @@ | ||
1291 | +/* Double-precision floating point square root. | ||
1292 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
1293 | + This file is part of the GNU C Library. | ||
1294 | + | ||
1295 | + The GNU C Library is free software; you can redistribute it and/or | ||
1296 | + modify it under the terms of the GNU Lesser General Public | ||
1297 | + License as published by the Free Software Foundation; either | ||
1298 | + version 2.1 of the License, or (at your option) any later version. | ||
1299 | + | ||
1300 | + The GNU C Library is distributed in the hope that it will be useful, | ||
1301 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1302 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1303 | + Lesser General Public License for more details. | ||
1304 | + | ||
1305 | + You should have received a copy of the GNU Lesser General Public | ||
1306 | + License along with the GNU C Library; if not, write to the Free | ||
1307 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1308 | + 02111-1307 USA. */ | ||
1309 | + | ||
1310 | +#include <math.h> | ||
1311 | +#include <math_private.h> | ||
1312 | +#include <fenv_libc.h> | ||
1313 | +#include <inttypes.h> | ||
1314 | + | ||
1315 | +#include <sysdep.h> | ||
1316 | +#include <ldsodefs.h> | ||
1317 | + | ||
1318 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1319 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1320 | +static const float two108 = 3.245185536584267269e+32; | ||
1321 | +static const float twom54 = 5.551115123125782702e-17; | ||
1322 | +static const float half = 0.5; | ||
1323 | + | ||
1324 | +/* The method is based on the descriptions in: | ||
1325 | + | ||
1326 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1327 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1328 | + | ||
1329 | + We find the actual square root and half of its reciprocal | ||
1330 | + simultaneously. */ | ||
1331 | + | ||
1332 | +#ifdef __STDC__ | ||
1333 | +double | ||
1334 | +__ieee754_sqrt (double b) | ||
1335 | +#else | ||
1336 | +double | ||
1337 | +__ieee754_sqrt (b) | ||
1338 | + double b; | ||
1339 | +#endif | ||
1340 | +{ | ||
1341 | + if (__builtin_expect (b > 0, 1)) | ||
1342 | + { | ||
1343 | + double y, g, h, d, r; | ||
1344 | + ieee_double_shape_type u; | ||
1345 | + | ||
1346 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1347 | + { | ||
1348 | + fenv_t fe; | ||
1349 | + | ||
1350 | + fe = fegetenv_register (); | ||
1351 | + | ||
1352 | + u.value = b; | ||
1353 | + | ||
1354 | + relax_fenv_state (); | ||
1355 | + | ||
1356 | + __asm__ ("frsqrte %[estimate], %[x]\n" | ||
1357 | + : [estimate] "=f" (y) : [x] "f" (b)); | ||
1358 | + | ||
1359 | + /* Following Muller et al, page 168, equation 5.20. | ||
1360 | + | ||
1361 | + h goes to 1/(2*sqrt(b)) | ||
1362 | + g goes to sqrt(b). | ||
1363 | + | ||
1364 | + We need three iterations to get within 1ulp. */ | ||
1365 | + | ||
1366 | + /* Indicate that these can be performed prior to the branch. GCC | ||
1367 | + insists on sinking them below the branch, however; it seems like | ||
1368 | + they'd be better before the branch so that we can cover any latency | ||
1369 | + from storing the argument and loading its high word. Oh well. */ | ||
1370 | + | ||
1371 | + g = b * y; | ||
1372 | + h = 0.5 * y; | ||
1373 | + | ||
1374 | + /* Handle small numbers by scaling. */ | ||
1375 | + if (__builtin_expect ((u.parts.msw & 0x7ff00000) <= 0x02000000, 0)) | ||
1376 | + return __ieee754_sqrt (b * two108) * twom54; | ||
1377 | + | ||
1378 | +#define FMADD(a_, c_, b_) \ | ||
1379 | + ({ double __r; \ | ||
1380 | + __asm__ ("fmadd %[r], %[a], %[c], %[b]\n" \ | ||
1381 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1382 | + __r;}) | ||
1383 | +#define FNMSUB(a_, c_, b_) \ | ||
1384 | + ({ double __r; \ | ||
1385 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1386 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1387 | + __r;}) | ||
1388 | + | ||
1389 | + r = FNMSUB (g, h, half); | ||
1390 | + g = FMADD (g, r, g); | ||
1391 | + h = FMADD (h, r, h); | ||
1392 | + | ||
1393 | + r = FNMSUB (g, h, half); | ||
1394 | + g = FMADD (g, r, g); | ||
1395 | + h = FMADD (h, r, h); | ||
1396 | + | ||
1397 | + r = FNMSUB (g, h, half); | ||
1398 | + g = FMADD (g, r, g); | ||
1399 | + h = FMADD (h, r, h); | ||
1400 | + | ||
1401 | + /* g is now +/- 1ulp, or exactly equal to, the square root of b. */ | ||
1402 | + | ||
1403 | + /* Final refinement. */ | ||
1404 | + d = FNMSUB (g, g, b); | ||
1405 | + | ||
1406 | + fesetenv_register (fe); | ||
1407 | + return FMADD (d, h, g); | ||
1408 | + } | ||
1409 | + } | ||
1410 | + else if (b < 0) | ||
1411 | + { | ||
1412 | + /* For some reason, some PowerPC32 processors don't implement | ||
1413 | + FE_INVALID_SQRT. */ | ||
1414 | +#ifdef FE_INVALID_SQRT | ||
1415 | + feraiseexcept (FE_INVALID_SQRT); | ||
1416 | + | ||
1417 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1418 | + if ((u.l & FE_INVALID) == 0) | ||
1419 | +#endif | ||
1420 | + feraiseexcept (FE_INVALID); | ||
1421 | + b = a_nan.value; | ||
1422 | + } | ||
1423 | + return f_wash (b); | ||
1424 | +} | ||
1425 | diff --git a/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | ||
1426 | new file mode 100644 | ||
1427 | index 0000000000..26fa067abf | ||
1428 | --- /dev/null | ||
1429 | +++ b/sysdeps/powerpc/powerpc64/e6500/fpu/e_sqrtf.c | ||
1430 | @@ -0,0 +1,101 @@ | ||
1431 | +/* Single-precision floating point square root. | ||
1432 | + Copyright (C) 2010 Free Software Foundation, Inc. | ||
1433 | + This file is part of the GNU C Library. | ||
1434 | + | ||
1435 | + The GNU C Library is free software; you can redistribute it and/or | ||
1436 | + modify it under the terms of the GNU Lesser General Public | ||
1437 | + License as published by the Free Software Foundation; either | ||
1438 | + version 2.1 of the License, or (at your option) any later version. | ||
1439 | + | ||
1440 | + The GNU C Library is distributed in the hope that it will be useful, | ||
1441 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
1442 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
1443 | + Lesser General Public License for more details. | ||
1444 | + | ||
1445 | + You should have received a copy of the GNU Lesser General Public | ||
1446 | + License along with the GNU C Library; if not, write to the Free | ||
1447 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | ||
1448 | + 02111-1307 USA. */ | ||
1449 | + | ||
1450 | +#include <math.h> | ||
1451 | +#include <math_private.h> | ||
1452 | +#include <fenv_libc.h> | ||
1453 | +#include <inttypes.h> | ||
1454 | + | ||
1455 | +#include <sysdep.h> | ||
1456 | +#include <ldsodefs.h> | ||
1457 | + | ||
1458 | +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; | ||
1459 | +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; | ||
1460 | +static const float threehalf = 1.5; | ||
1461 | + | ||
1462 | +/* The method is based on the descriptions in: | ||
1463 | + | ||
1464 | + _The Handbook of Floating-Pointer Arithmetic_ by Muller et al., chapter 5; | ||
1465 | + _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9 | ||
1466 | + | ||
1467 | + We find the reciprocal square root and use that to compute the actual | ||
1468 | + square root. */ | ||
1469 | + | ||
1470 | +#ifdef __STDC__ | ||
1471 | +float | ||
1472 | +__ieee754_sqrtf (float b) | ||
1473 | +#else | ||
1474 | +float | ||
1475 | +__ieee754_sqrtf (b) | ||
1476 | + float b; | ||
1477 | +#endif | ||
1478 | +{ | ||
1479 | + if (__builtin_expect (b > 0, 1)) | ||
1480 | + { | ||
1481 | +#define FMSUB(a_, c_, b_) \ | ||
1482 | + ({ double __r; \ | ||
1483 | + __asm__ ("fmsub %[r], %[a], %[c], %[b]\n" \ | ||
1484 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1485 | + __r;}) | ||
1486 | +#define FNMSUB(a_, c_, b_) \ | ||
1487 | + ({ double __r; \ | ||
1488 | + __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n" \ | ||
1489 | + : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \ | ||
1490 | + __r;}) | ||
1491 | + | ||
1492 | + if (__builtin_expect (b != a_inf.value, 1)) | ||
1493 | + { | ||
1494 | + double y, x; | ||
1495 | + fenv_t fe; | ||
1496 | + | ||
1497 | + fe = fegetenv_register (); | ||
1498 | + | ||
1499 | + relax_fenv_state (); | ||
1500 | + | ||
1501 | + /* Compute y = 1.5 * b - b. Uses fewer constants than y = 0.5 * b. */ | ||
1502 | + y = FMSUB (threehalf, b, b); | ||
1503 | + | ||
1504 | + /* Initial estimate. */ | ||
1505 | + __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b)); | ||
1506 | + | ||
1507 | + /* Iterate. x_{n+1} = x_n * (1.5 - y * (x_n * x_n)). */ | ||
1508 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1509 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1510 | + x = x * FNMSUB (y, x * x, threehalf); | ||
1511 | + | ||
1512 | + /* All done. */ | ||
1513 | + fesetenv_register (fe); | ||
1514 | + return x * b; | ||
1515 | + } | ||
1516 | + } | ||
1517 | + else if (b < 0) | ||
1518 | + { | ||
1519 | + /* For some reason, some PowerPC32 processors don't implement | ||
1520 | + FE_INVALID_SQRT. */ | ||
1521 | +#ifdef FE_INVALID_SQRT | ||
1522 | + feraiseexcept (FE_INVALID_SQRT); | ||
1523 | + | ||
1524 | + fenv_union_t u = { .fenv = fegetenv_register () }; | ||
1525 | + if ((u.l & FE_INVALID) == 0) | ||
1526 | +#endif | ||
1527 | + feraiseexcept (FE_INVALID); | ||
1528 | + b = a_nan.value; | ||
1529 | + } | ||
1530 | + return f_washf (b); | ||
1531 | +} | ||
1532 | diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies | ||
1533 | new file mode 100644 | ||
1534 | index 0000000000..b103b4dea5 | ||
1535 | --- /dev/null | ||
1536 | +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/603e/fpu/Implies | ||
1537 | @@ -0,0 +1 @@ | ||
1538 | +powerpc/powerpc32/603e/fpu | ||
1539 | diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies | ||
1540 | new file mode 100644 | ||
1541 | index 0000000000..64db17fada | ||
1542 | --- /dev/null | ||
1543 | +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e300c3/fpu/Implies | ||
1544 | @@ -0,0 +1,2 @@ | ||
1545 | +# e300c3 is a variant of 603e so use the same optimizations for sqrt | ||
1546 | +powerpc/powerpc32/603e/fpu | ||
1547 | diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies | ||
1548 | new file mode 100644 | ||
1549 | index 0000000000..7eac5fcf02 | ||
1550 | --- /dev/null | ||
1551 | +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500mc/fpu/Implies | ||
1552 | @@ -0,0 +1 @@ | ||
1553 | +powerpc/powerpc32/e500mc/fpu | ||
1554 | diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies | ||
1555 | new file mode 100644 | ||
1556 | index 0000000000..264b2a7700 | ||
1557 | --- /dev/null | ||
1558 | +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e5500/fpu/Implies | ||
1559 | @@ -0,0 +1 @@ | ||
1560 | +powerpc/powerpc32/e5500/fpu | ||
1561 | diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies | ||
1562 | new file mode 100644 | ||
1563 | index 0000000000..a25934467b | ||
1564 | --- /dev/null | ||
1565 | +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/e6500/fpu/Implies | ||
1566 | @@ -0,0 +1 @@ | ||
1567 | +powerpc/powerpc32/e6500/fpu | ||
1568 | diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies | ||
1569 | new file mode 100644 | ||
1570 | index 0000000000..a7bc854be8 | ||
1571 | --- /dev/null | ||
1572 | +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e5500/fpu/Implies | ||
1573 | @@ -0,0 +1 @@ | ||
1574 | +powerpc/powerpc64/e5500/fpu | ||
1575 | diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies | ||
1576 | new file mode 100644 | ||
1577 | index 0000000000..04ff8cc181 | ||
1578 | --- /dev/null | ||
1579 | +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/e6500/fpu/Implies | ||
1580 | @@ -0,0 +1 @@ | ||
1581 | +powerpc/powerpc64/e6500/fpu | ||