diff options
Diffstat (limited to 'meta/packages/glibc/glibc-2.4/arm-memcpy.patch')
-rw-r--r-- | meta/packages/glibc/glibc-2.4/arm-memcpy.patch | 758 |
1 files changed, 758 insertions, 0 deletions
diff --git a/meta/packages/glibc/glibc-2.4/arm-memcpy.patch b/meta/packages/glibc/glibc-2.4/arm-memcpy.patch new file mode 100644 index 0000000000..bc2b3dab84 --- /dev/null +++ b/meta/packages/glibc/glibc-2.4/arm-memcpy.patch | |||
@@ -0,0 +1,758 @@ | |||
1 | --- /dev/null 2004-02-02 20:32:13.000000000 +0000 | ||
2 | +++ sysdeps/arm/memmove.S 2004-03-20 18:37:23.000000000 +0000 | ||
3 | @@ -0,0 +1,251 @@ | ||
4 | +/* | ||
5 | + * Optimized memmove implementation for ARM processors | ||
6 | + * | ||
7 | + * Author: Nicolas Pitre | ||
8 | + * Created: Dec 23, 2003 | ||
9 | + * Copyright: (C) MontaVista Software, Inc. | ||
10 | + * | ||
11 | + * This file is free software; you can redistribute it and/or | ||
12 | + * modify it under the terms of the GNU Lesser General Public | ||
13 | + * License as published by the Free Software Foundation; either | ||
14 | + * version 2.1 of the License, or (at your option) any later version. | ||
15 | + * | ||
16 | + * This file is distributed in the hope that it will be useful, | ||
17 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | + * Lesser General Public License for more details. | ||
20 | + */ | ||
21 | + | ||
22 | +#include <sysdep.h> | ||
23 | + | ||
24 | + | ||
25 | +/* | ||
26 | + * Endian independent macros for shifting bytes within registers. | ||
27 | + */ | ||
28 | +#ifndef __ARMEB__ | ||
29 | +#define pull lsr | ||
30 | +#define push lsl | ||
31 | +#else | ||
32 | +#define pull lsl | ||
33 | +#define push lsr | ||
34 | +#endif | ||
35 | + | ||
36 | +/* | ||
37 | + * Enable data preload for architectures that support it (ARMv5 and above) | ||
38 | + */ | ||
39 | +#if defined(__ARM_ARCH_5__) || \ | ||
40 | + defined(__ARM_ARCH_5T__) || \ | ||
41 | + defined(__ARM_ARCH_5TE__) | ||
42 | +#define PLD(code...) code | ||
43 | +#else | ||
44 | +#define PLD(code...) | ||
45 | +#endif | ||
46 | + | ||
47 | + | ||
48 | +/* char * memmove (char *dst, const char *src) */ | ||
49 | +ENTRY(memmove) | ||
50 | + subs ip, r0, r1 | ||
51 | + cmphi r2, ip | ||
52 | + bls memcpy(PLT) | ||
53 | + | ||
54 | + stmfd sp!, {r0, r4, lr} | ||
55 | + add r1, r1, r2 | ||
56 | + add r0, r0, r2 | ||
57 | + subs r2, r2, #4 | ||
58 | + blt 25f | ||
59 | + ands ip, r0, #3 | ||
60 | + PLD( pld [r1, #-4] ) | ||
61 | + bne 26f | ||
62 | + ands ip, r1, #3 | ||
63 | + bne 27f | ||
64 | + | ||
65 | +19: subs r2, r2, #4 | ||
66 | + blt 24f | ||
67 | + subs r2, r2, #8 | ||
68 | + blt 23f | ||
69 | + subs r2, r2, #16 | ||
70 | + blt 22f | ||
71 | + | ||
72 | + PLD( pld [r1, #-32] ) | ||
73 | + PLD( subs r2, r2, #96 ) | ||
74 | + stmfd sp!, {r5 - r8} | ||
75 | + PLD( blt 21f ) | ||
76 | + | ||
77 | + PLD( @ cache alignment ) | ||
78 | + PLD( ands ip, r1, #31 ) | ||
79 | + PLD( pld [r1, #-64] ) | ||
80 | + PLD( beq 20f ) | ||
81 | + PLD( cmp r2, ip ) | ||
82 | + PLD( pld [r1, #-96] ) | ||
83 | + PLD( blt 20f ) | ||
84 | + PLD( cmp ip, #16 ) | ||
85 | + PLD( sub r2, r2, ip ) | ||
86 | + PLD( ldmgedb r1!, {r3 - r6} ) | ||
87 | + PLD( stmgedb r0!, {r3 - r6} ) | ||
88 | + PLD( beq 20f ) | ||
89 | + PLD( and ip, ip, #15 ) | ||
90 | + PLD( cmp ip, #8 ) | ||
91 | + PLD( ldr r3, [r1, #-4]! ) | ||
92 | + PLD( ldrge r4, [r1, #-4]! ) | ||
93 | + PLD( ldrgt r5, [r1, #-4]! ) | ||
94 | + PLD( str r3, [r0, #-4]! ) | ||
95 | + PLD( strge r4, [r0, #-4]! ) | ||
96 | + PLD( strgt r5, [r0, #-4]! ) | ||
97 | + | ||
98 | +20: PLD( pld [r1, #-96] ) | ||
99 | + PLD( pld [r1, #-128] ) | ||
100 | +21: ldmdb r1!, {r3, r4, ip, lr} | ||
101 | + subs r2, r2, #32 | ||
102 | + stmdb r0!, {r3, r4, ip, lr} | ||
103 | + ldmdb r1!, {r3, r4, ip, lr} | ||
104 | + stmgedb r0!, {r3, r4, ip, lr} | ||
105 | + ldmgedb r1!, {r3, r4, ip, lr} | ||
106 | + stmgedb r0!, {r3, r4, ip, lr} | ||
107 | + ldmgedb r1!, {r3, r4, ip, lr} | ||
108 | + subges r2, r2, #32 | ||
109 | + stmdb r0!, {r3, r4, ip, lr} | ||
110 | + bge 20b | ||
111 | + PLD( cmn r2, #96 ) | ||
112 | + PLD( bge 21b ) | ||
113 | + PLD( add r2, r2, #96 ) | ||
114 | + tst r2, #31 | ||
115 | + ldmfd sp!, {r5 - r8} | ||
116 | + ldmeqfd sp!, {r0, r4, pc} | ||
117 | + | ||
118 | + tst r2, #16 | ||
119 | +22: ldmnedb r1!, {r3, r4, ip, lr} | ||
120 | + stmnedb r0!, {r3, r4, ip, lr} | ||
121 | + | ||
122 | + tst r2, #8 | ||
123 | +23: ldmnedb r1!, {r3, r4} | ||
124 | + stmnedb r0!, {r3, r4} | ||
125 | + | ||
126 | + tst r2, #4 | ||
127 | +24: ldrne r3, [r1, #-4]! | ||
128 | + strne r3, [r0, #-4]! | ||
129 | + | ||
130 | +25: ands r2, r2, #3 | ||
131 | + ldmeqfd sp!, {r0, r4, pc} | ||
132 | + | ||
133 | + cmp r2, #2 | ||
134 | + ldrb r3, [r1, #-1] | ||
135 | + ldrgeb r4, [r1, #-2] | ||
136 | + ldrgtb ip, [r1, #-3] | ||
137 | + strb r3, [r0, #-1] | ||
138 | + strgeb r4, [r0, #-2] | ||
139 | + strgtb ip, [r0, #-3] | ||
140 | + ldmfd sp!, {r0, r4, pc} | ||
141 | + | ||
142 | +26: cmp ip, #2 | ||
143 | + ldrb r3, [r1, #-1]! | ||
144 | + ldrgeb r4, [r1, #-1]! | ||
145 | + ldrgtb lr, [r1, #-1]! | ||
146 | + strb r3, [r0, #-1]! | ||
147 | + strgeb r4, [r0, #-1]! | ||
148 | + strgtb lr, [r0, #-1]! | ||
149 | + subs r2, r2, ip | ||
150 | + blt 25b | ||
151 | + ands ip, r1, #3 | ||
152 | + beq 19b | ||
153 | + | ||
154 | +27: bic r1, r1, #3 | ||
155 | + cmp ip, #2 | ||
156 | + ldr r3, [r1] | ||
157 | + beq 35f | ||
158 | + blt 36f | ||
159 | + | ||
160 | + | ||
161 | + .macro backward_copy_shift push pull | ||
162 | + | ||
163 | + cmp r2, #12 | ||
164 | + PLD( pld [r1, #-4] ) | ||
165 | + blt 33f | ||
166 | + subs r2, r2, #28 | ||
167 | + stmfd sp!, {r5 - r9} | ||
168 | + blt 31f | ||
169 | + | ||
170 | + PLD( subs r2, r2, #96 ) | ||
171 | + PLD( pld [r1, #-32] ) | ||
172 | + PLD( blt 30f ) | ||
173 | + PLD( pld [r1, #-64] ) | ||
174 | + | ||
175 | + PLD( @ cache alignment ) | ||
176 | + PLD( ands ip, r1, #31 ) | ||
177 | + PLD( pld [r1, #-96] ) | ||
178 | + PLD( beq 29f ) | ||
179 | + PLD( cmp r2, ip ) | ||
180 | + PLD( pld [r1, #-128] ) | ||
181 | + PLD( blt 29f ) | ||
182 | + PLD( sub r2, r2, ip ) | ||
183 | +28: PLD( mov r4, r3, push #\push ) | ||
184 | + PLD( ldr r3, [r1, #-4]! ) | ||
185 | + PLD( subs ip, ip, #4 ) | ||
186 | + PLD( orr r4, r4, r3, pull #\pull ) | ||
187 | + PLD( str r4, [r0, #-4]! ) | ||
188 | + PLD( bgt 28b ) | ||
189 | + | ||
190 | +29: PLD( pld [r1, #-128] ) | ||
191 | +30: mov lr, r3, push #\push | ||
192 | + ldmdb r1!, {r3 - r9, ip} | ||
193 | + subs r2, r2, #32 | ||
194 | + orr lr, lr, ip, pull #\pull | ||
195 | + mov ip, ip, push #\push | ||
196 | + orr ip, ip, r9, pull #\pull | ||
197 | + mov r9, r9, push #\push | ||
198 | + orr r9, r9, r8, pull #\pull | ||
199 | + mov r8, r8, push #\push | ||
200 | + orr r8, r8, r7, pull #\pull | ||
201 | + mov r7, r7, push #\push | ||
202 | + orr r7, r7, r6, pull #\pull | ||
203 | + mov r6, r6, push #\push | ||
204 | + orr r6, r6, r5, pull #\pull | ||
205 | + mov r5, r5, push #\push | ||
206 | + orr r5, r5, r4, pull #\pull | ||
207 | + mov r4, r4, push #\push | ||
208 | + orr r4, r4, r3, pull #\pull | ||
209 | + stmdb r0!, {r4 - r9, ip, lr} | ||
210 | + bge 29b | ||
211 | + PLD( cmn r2, #96 ) | ||
212 | + PLD( bge 30b ) | ||
213 | + PLD( add r2, r2, #96 ) | ||
214 | + cmn r2, #16 | ||
215 | + blt 32f | ||
216 | +31: mov r7, r3, push #\push | ||
217 | + ldmdb r1!, {r3 - r6} | ||
218 | + sub r2, r2, #16 | ||
219 | + orr r7, r7, r6, pull #\pull | ||
220 | + mov r6, r6, push #\push | ||
221 | + orr r6, r6, r5, pull #\pull | ||
222 | + mov r5, r5, push #\push | ||
223 | + orr r5, r5, r4, pull #\pull | ||
224 | + mov r4, r4, push #\push | ||
225 | + orr r4, r4, r3, pull #\pull | ||
226 | + stmdb r0!, {r4 - r7} | ||
227 | +32: adds r2, r2, #28 | ||
228 | + ldmfd sp!, {r5 - r9} | ||
229 | + blt 34f | ||
230 | +33: mov r4, r3, push #\push | ||
231 | + ldr r3, [r1, #-4]! | ||
232 | + subs r2, r2, #4 | ||
233 | + orr r4, r4, r3, pull #\pull | ||
234 | + str r4, [r0, #-4]! | ||
235 | + bge 33b | ||
236 | +34: | ||
237 | + .endm | ||
238 | + | ||
239 | + | ||
240 | + backward_copy_shift push=8 pull=24 | ||
241 | + add r1, r1, #3 | ||
242 | + b 25b | ||
243 | + | ||
244 | +35: backward_copy_shift push=16 pull=16 | ||
245 | + add r1, r1, #2 | ||
246 | + b 25b | ||
247 | + | ||
248 | +36: backward_copy_shift push=24 pull=8 | ||
249 | + add r1, r1, #1 | ||
250 | + b 25b | ||
251 | + | ||
252 | + .size memmove, . - memmove | ||
253 | +END(memmove) | ||
254 | +libc_hidden_builtin_def (memmove) | ||
255 | --- /dev/null 2004-02-02 20:32:13.000000000 +0000 | ||
256 | +++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000 | ||
257 | @@ -0,0 +1,255 @@ | ||
258 | +/* | ||
259 | + * Optimized memmove implementation for ARM processors | ||
260 | + * | ||
261 | + * Author: Nicolas Pitre | ||
262 | + * Created: Dec 23, 2003 | ||
263 | + * Copyright: (C) MontaVista Software, Inc. | ||
264 | + * | ||
265 | + * This file is free software; you can redistribute it and/or | ||
266 | + * modify it under the terms of the GNU Lesser General Public | ||
267 | + * License as published by the Free Software Foundation; either | ||
268 | + * version 2.1 of the License, or (at your option) any later version. | ||
269 | + * | ||
270 | + * This file is distributed in the hope that it will be useful, | ||
271 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
272 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
273 | + * Lesser General Public License for more details. | ||
274 | + */ | ||
275 | + | ||
276 | +#include <sysdep.h> | ||
277 | + | ||
278 | + | ||
279 | +/* | ||
280 | + * Endian independent macros for shifting bytes within registers. | ||
281 | + */ | ||
282 | +#ifndef __ARMEB__ | ||
283 | +#define pull lsr | ||
284 | +#define push lsl | ||
285 | +#else | ||
286 | +#define pull lsl | ||
287 | +#define push lsr | ||
288 | +#endif | ||
289 | + | ||
290 | +/* | ||
291 | + * Enable data preload for architectures that support it (ARMv5 and above) | ||
292 | + */ | ||
293 | +#if defined(__ARM_ARCH_5__) || \ | ||
294 | + defined(__ARM_ARCH_5T__) || \ | ||
295 | + defined(__ARM_ARCH_5TE__) | ||
296 | +#define PLD(code...) code | ||
297 | +#else | ||
298 | +#define PLD(code...) | ||
299 | +#endif | ||
300 | + | ||
301 | +dst .req r1 | ||
302 | +src .req r0 | ||
303 | + | ||
304 | +/* void *bcopy (const char *src, char *dst, size_t size) */ | ||
305 | +ENTRY(bcopy) | ||
306 | + subs ip, dst, src | ||
307 | + cmphi r2, ip | ||
308 | + movls r3, r0 | ||
309 | + movls r0, r1 | ||
310 | + movls r1, r3 | ||
311 | + bls memcpy(PLT) | ||
312 | + | ||
313 | + stmfd sp!, {r4, lr} | ||
314 | + add src, src, r2 | ||
315 | + add dst, dst, r2 | ||
316 | + subs r2, r2, #4 | ||
317 | + blt 25f | ||
318 | + ands ip, dst, #3 | ||
319 | + PLD( pld [src, #-4] ) | ||
320 | + bne 26f | ||
321 | + ands ip, src, #3 | ||
322 | + bne 27f | ||
323 | + | ||
324 | +19: subs r2, r2, #4 | ||
325 | + blt 24f | ||
326 | + subs r2, r2, #8 | ||
327 | + blt 23f | ||
328 | + subs r2, r2, #16 | ||
329 | + blt 22f | ||
330 | + | ||
331 | + PLD( pld [src, #-32] ) | ||
332 | + PLD( subs r2, r2, #96 ) | ||
333 | + stmfd sp!, {r5 - r8} | ||
334 | + PLD( blt 21f ) | ||
335 | + | ||
336 | + PLD( @ cache alignment ) | ||
337 | + PLD( ands ip, src, #31 ) | ||
338 | + PLD( pld [src, #-64] ) | ||
339 | + PLD( beq 20f ) | ||
340 | + PLD( cmp r2, ip ) | ||
341 | + PLD( pld [src, #-96] ) | ||
342 | + PLD( blt 20f ) | ||
343 | + PLD( cmp ip, #16 ) | ||
344 | + PLD( sub r2, r2, ip ) | ||
345 | + PLD( ldmgedb src!, {r3 - r6} ) | ||
346 | + PLD( stmgedb dst!, {r3 - r6} ) | ||
347 | + PLD( beq 20f ) | ||
348 | + PLD( and ip, ip, #15 ) | ||
349 | + PLD( cmp ip, #8 ) | ||
350 | + PLD( ldr r3, [src, #-4]! ) | ||
351 | + PLD( ldrge r4, [src, #-4]! ) | ||
352 | + PLD( ldrgt r5, [src, #-4]! ) | ||
353 | + PLD( str r3, [dst, #-4]! ) | ||
354 | + PLD( strge r4, [dst, #-4]! ) | ||
355 | + PLD( strgt r5, [dst, #-4]! ) | ||
356 | + | ||
357 | +20: PLD( pld [src, #-96] ) | ||
358 | + PLD( pld [src, #-128] ) | ||
359 | +21: ldmdb src!, {r3, r4, ip, lr} | ||
360 | + subs r2, r2, #32 | ||
361 | + stmdb dst!, {r3, r4, ip, lr} | ||
362 | + ldmdb src!, {r3, r4, ip, lr} | ||
363 | + stmgedb dst!, {r3, r4, ip, lr} | ||
364 | + ldmgedb src!, {r3, r4, ip, lr} | ||
365 | + stmgedb dst!, {r3, r4, ip, lr} | ||
366 | + ldmgedb src!, {r3, r4, ip, lr} | ||
367 | + subges r2, r2, #32 | ||
368 | + stmdb dst!, {r3, r4, ip, lr} | ||
369 | + bge 20b | ||
370 | + PLD( cmn r2, #96 ) | ||
371 | + PLD( bge 21b ) | ||
372 | + PLD( add r2, r2, #96 ) | ||
373 | + tst r2, #31 | ||
374 | + ldmfd sp!, {r5 - r8} | ||
375 | + ldmeqfd sp!, {r4, pc} | ||
376 | + | ||
377 | + tst r2, #16 | ||
378 | +22: ldmnedb src!, {r3, r4, ip, lr} | ||
379 | + stmnedb dst!, {r3, r4, ip, lr} | ||
380 | + | ||
381 | + tst r2, #8 | ||
382 | +23: ldmnedb src!, {r3, r4} | ||
383 | + stmnedb dst!, {r3, r4} | ||
384 | + | ||
385 | + tst r2, #4 | ||
386 | +24: ldrne r3, [src, #-4]! | ||
387 | + strne r3, [dst, #-4]! | ||
388 | + | ||
389 | +25: ands r2, r2, #3 | ||
390 | + ldmeqfd sp!, {dst, r4, pc} | ||
391 | + | ||
392 | + cmp r2, #2 | ||
393 | + ldrb r3, [src, #-1] | ||
394 | + ldrgeb r4, [src, #-2] | ||
395 | + ldrgtb ip, [src, #-3] | ||
396 | + strb r3, [dst, #-1] | ||
397 | + strgeb r4, [dst, #-2] | ||
398 | + strgtb ip, [dst, #-3] | ||
399 | + ldmfd sp!, {dst, r4, pc} | ||
400 | + | ||
401 | +26: cmp ip, #2 | ||
402 | + ldrb r3, [src, #-1]! | ||
403 | + ldrgeb r4, [src, #-1]! | ||
404 | + ldrgtb lr, [src, #-1]! | ||
405 | + strb r3, [dst, #-1]! | ||
406 | + strgeb r4, [dst, #-1]! | ||
407 | + strgtb lr, [dst, #-1]! | ||
408 | + subs r2, r2, ip | ||
409 | + blt 25b | ||
410 | + ands ip, src, #3 | ||
411 | + beq 19b | ||
412 | + | ||
413 | +27: bic src, src, #3 | ||
414 | + cmp ip, #2 | ||
415 | + ldr r3, [src] | ||
416 | + beq 35f | ||
417 | + blt 36f | ||
418 | + | ||
419 | + | ||
420 | + .macro backward_copy_shift push pull | ||
421 | + | ||
422 | + cmp r2, #12 | ||
423 | + PLD( pld [src, #-4] ) | ||
424 | + blt 33f | ||
425 | + subs r2, r2, #28 | ||
426 | + stmfd sp!, {r5 - r9} | ||
427 | + blt 31f | ||
428 | + | ||
429 | + PLD( subs r2, r2, #96 ) | ||
430 | + PLD( pld [src, #-32] ) | ||
431 | + PLD( blt 30f ) | ||
432 | + PLD( pld [src, #-64] ) | ||
433 | + | ||
434 | + PLD( @ cache alignment ) | ||
435 | + PLD( ands ip, src, #31 ) | ||
436 | + PLD( pld [src, #-96] ) | ||
437 | + PLD( beq 29f ) | ||
438 | + PLD( cmp r2, ip ) | ||
439 | + PLD( pld [src, #-128] ) | ||
440 | + PLD( blt 29f ) | ||
441 | + PLD( sub r2, r2, ip ) | ||
442 | +28: PLD( mov r4, r3, push #\push ) | ||
443 | + PLD( ldr r3, [src, #-4]! ) | ||
444 | + PLD( subs ip, ip, #4 ) | ||
445 | + PLD( orr r4, r4, r3, pull #\pull ) | ||
446 | + PLD( str r4, [dst, #-4]! ) | ||
447 | + PLD( bgt 28b ) | ||
448 | + | ||
449 | +29: PLD( pld [src, #-128] ) | ||
450 | +30: mov lr, r3, push #\push | ||
451 | + ldmdb src!, {r3 - r9, ip} | ||
452 | + subs r2, r2, #32 | ||
453 | + orr lr, lr, ip, pull #\pull | ||
454 | + mov ip, ip, push #\push | ||
455 | + orr ip, ip, r9, pull #\pull | ||
456 | + mov r9, r9, push #\push | ||
457 | + orr r9, r9, r8, pull #\pull | ||
458 | + mov r8, r8, push #\push | ||
459 | + orr r8, r8, r7, pull #\pull | ||
460 | + mov r7, r7, push #\push | ||
461 | + orr r7, r7, r6, pull #\pull | ||
462 | + mov r6, r6, push #\push | ||
463 | + orr r6, r6, r5, pull #\pull | ||
464 | + mov r5, r5, push #\push | ||
465 | + orr r5, r5, r4, pull #\pull | ||
466 | + mov r4, r4, push #\push | ||
467 | + orr r4, r4, r3, pull #\pull | ||
468 | + stmdb dst!, {r4 - r9, ip, lr} | ||
469 | + bge 29b | ||
470 | + PLD( cmn r2, #96 ) | ||
471 | + PLD( bge 30b ) | ||
472 | + PLD( add r2, r2, #96 ) | ||
473 | + cmn r2, #16 | ||
474 | + blt 32f | ||
475 | +31: mov r7, r3, push #\push | ||
476 | + ldmdb src!, {r3 - r6} | ||
477 | + sub r2, r2, #16 | ||
478 | + orr r7, r7, r6, pull #\pull | ||
479 | + mov r6, r6, push #\push | ||
480 | + orr r6, r6, r5, pull #\pull | ||
481 | + mov r5, r5, push #\push | ||
482 | + orr r5, r5, r4, pull #\pull | ||
483 | + mov r4, r4, push #\push | ||
484 | + orr r4, r4, r3, pull #\pull | ||
485 | + stmdb dst!, {r4 - r7} | ||
486 | +32: adds r2, r2, #28 | ||
487 | + ldmfd sp!, {r5 - r9} | ||
488 | + blt 34f | ||
489 | +33: mov r4, r3, push #\push | ||
490 | + ldr r3, [src, #-4]! | ||
491 | + subs r2, r2, #4 | ||
492 | + orr r4, r4, r3, pull #\pull | ||
493 | + str r4, [dst, #-4]! | ||
494 | + bge 33b | ||
495 | +34: | ||
496 | + .endm | ||
497 | + | ||
498 | + | ||
499 | + backward_copy_shift push=8 pull=24 | ||
500 | + add src, src, #3 | ||
501 | + b 25b | ||
502 | + | ||
503 | +35: backward_copy_shift push=16 pull=16 | ||
504 | + add src, src, #2 | ||
505 | + b 25b | ||
506 | + | ||
507 | +36: backward_copy_shift push=24 pull=8 | ||
508 | + add src, src, #1 | ||
509 | + b 25b | ||
510 | + | ||
511 | + .size bcopy, . - bcopy | ||
512 | +END(bcopy) | ||
513 | |||
514 | --- /dev/null 2004-02-02 20:32:13.000000000 +0000 | ||
515 | +++ sysdeps/arm/memcpy.S 2004-05-02 14:33:22.000000000 +0100 | ||
516 | @@ -0,0 +1,242 @@ | ||
517 | +/* | ||
518 | + * Optimized memcpy implementation for ARM processors | ||
519 | + * | ||
520 | + * Author: Nicolas Pitre | ||
521 | + * Created: Dec 23, 2003 | ||
522 | + * Copyright: (C) MontaVista Software, Inc. | ||
523 | + * | ||
524 | + * This file is free software; you can redistribute it and/or | ||
525 | + * modify it under the terms of the GNU Lesser General Public | ||
526 | + * License as published by the Free Software Foundation; either | ||
527 | + * version 2.1 of the License, or (at your option) any later version. | ||
528 | + * | ||
529 | + * This file is distributed in the hope that it will be useful, | ||
530 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
531 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
532 | + * Lesser General Public License for more details. | ||
533 | + */ | ||
534 | + | ||
535 | +#include <sysdep.h> | ||
536 | + | ||
537 | + | ||
538 | +/* | ||
539 | + * Endian independent macros for shifting bytes within registers. | ||
540 | + */ | ||
541 | +#ifndef __ARMEB__ | ||
542 | +#define pull lsr | ||
543 | +#define push lsl | ||
544 | +#else | ||
545 | +#define pull lsl | ||
546 | +#define push lsr | ||
547 | +#endif | ||
548 | + | ||
549 | +/* | ||
550 | + * Enable data preload for architectures that support it (ARMv5 and above) | ||
551 | + */ | ||
552 | +#if defined(__ARM_ARCH_5__) || \ | ||
553 | + defined(__ARM_ARCH_5T__) || \ | ||
554 | + defined(__ARM_ARCH_5TE__) | ||
555 | +#define PLD(code...) code | ||
556 | +#else | ||
557 | +#define PLD(code...) | ||
558 | +#endif | ||
559 | + | ||
560 | + | ||
561 | +/* char * memcpy (char *dst, const char *src) */ | ||
562 | + | ||
563 | +ENTRY(memcpy) | ||
564 | + subs r2, r2, #4 | ||
565 | + stmfd sp!, {r0, r4, lr} | ||
566 | + blt 7f | ||
567 | + ands ip, r0, #3 | ||
568 | + PLD( pld [r1, #0] ) | ||
569 | + bne 8f | ||
570 | + ands ip, r1, #3 | ||
571 | + bne 9f | ||
572 | + | ||
573 | +1: subs r2, r2, #4 | ||
574 | + blt 6f | ||
575 | + subs r2, r2, #8 | ||
576 | + blt 5f | ||
577 | + subs r2, r2, #16 | ||
578 | + blt 4f | ||
579 | + | ||
580 | + PLD( subs r2, r2, #65 ) | ||
581 | + stmfd sp!, {r5 - r8} | ||
582 | + PLD( blt 3f ) | ||
583 | + PLD( pld [r1, #32] ) | ||
584 | + | ||
585 | + PLD( @ cache alignment ) | ||
586 | + PLD( ands ip, r1, #31 ) | ||
587 | + PLD( pld [r1, #64] ) | ||
588 | + PLD( beq 2f ) | ||
589 | + PLD( rsb ip, ip, #32 ) | ||
590 | + PLD( cmp r2, ip ) | ||
591 | + PLD( pld [r1, #96] ) | ||
592 | + PLD( blt 2f ) | ||
593 | + PLD( cmp ip, #16 ) | ||
594 | + PLD( sub r2, r2, ip ) | ||
595 | + PLD( ldmgeia r1!, {r3 - r6} ) | ||
596 | + PLD( stmgeia r0!, {r3 - r6} ) | ||
597 | + PLD( beq 2f ) | ||
598 | + PLD( and ip, ip, #15 ) | ||
599 | + PLD( cmp ip, #8 ) | ||
600 | + PLD( ldr r3, [r1], #4 ) | ||
601 | + PLD( ldrge r4, [r1], #4 ) | ||
602 | + PLD( ldrgt r5, [r1], #4 ) | ||
603 | + PLD( str r3, [r0], #4 ) | ||
604 | + PLD( strge r4, [r0], #4 ) | ||
605 | + PLD( strgt r5, [r0], #4 ) | ||
606 | + | ||
607 | +2: PLD( pld [r1, #96] ) | ||
608 | +3: ldmia r1!, {r3 - r8, ip, lr} | ||
609 | + subs r2, r2, #32 | ||
610 | + stmia r0!, {r3 - r8, ip, lr} | ||
611 | + bge 2b | ||
612 | + PLD( cmn r2, #65 ) | ||
613 | + PLD( bge 3b ) | ||
614 | + PLD( add r2, r2, #65 ) | ||
615 | + tst r2, #31 | ||
616 | + ldmfd sp!, {r5 - r8} | ||
617 | + ldmeqfd sp!, {r0, r4, pc} | ||
618 | + | ||
619 | + tst r2, #16 | ||
620 | +4: ldmneia r1!, {r3, r4, ip, lr} | ||
621 | + stmneia r0!, {r3, r4, ip, lr} | ||
622 | + | ||
623 | + tst r2, #8 | ||
624 | +5: ldmneia r1!, {r3, r4} | ||
625 | + stmneia r0!, {r3, r4} | ||
626 | + | ||
627 | + tst r2, #4 | ||
628 | +6: ldrne r3, [r1], #4 | ||
629 | + strne r3, [r0], #4 | ||
630 | + | ||
631 | +7: ands r2, r2, #3 | ||
632 | + ldmeqfd sp!, {r0, r4, pc} | ||
633 | + | ||
634 | + cmp r2, #2 | ||
635 | + ldrb r3, [r1], #1 | ||
636 | + ldrgeb r4, [r1], #1 | ||
637 | + ldrgtb ip, [r1] | ||
638 | + strb r3, [r0], #1 | ||
639 | + strgeb r4, [r0], #1 | ||
640 | + strgtb ip, [r0] | ||
641 | + ldmfd sp!, {r0, r4, pc} | ||
642 | + | ||
643 | +8: rsb ip, ip, #4 | ||
644 | + cmp ip, #2 | ||
645 | + ldrb r3, [r1], #1 | ||
646 | + ldrgeb r4, [r1], #1 | ||
647 | + ldrgtb lr, [r1], #1 | ||
648 | + strb r3, [r0], #1 | ||
649 | + strgeb r4, [r0], #1 | ||
650 | + strgtb lr, [r0], #1 | ||
651 | + subs r2, r2, ip | ||
652 | + blt 7b | ||
653 | + ands ip, r1, #3 | ||
654 | + beq 1b | ||
655 | + | ||
656 | +9: bic r1, r1, #3 | ||
657 | + cmp ip, #2 | ||
658 | + ldr lr, [r1], #4 | ||
659 | + beq 17f | ||
660 | + bgt 18f | ||
661 | + | ||
662 | + | ||
663 | + .macro forward_copy_shift pull push | ||
664 | + | ||
665 | + cmp r2, #12 | ||
666 | + PLD( pld [r1, #0] ) | ||
667 | + blt 15f | ||
668 | + subs r2, r2, #28 | ||
669 | + stmfd sp!, {r5 - r9} | ||
670 | + blt 13f | ||
671 | + | ||
672 | + PLD( subs r2, r2, #97 ) | ||
673 | + PLD( blt 12f ) | ||
674 | + PLD( pld [r1, #32] ) | ||
675 | + | ||
676 | + PLD( @ cache alignment ) | ||
677 | + PLD( rsb ip, r1, #36 ) | ||
678 | + PLD( pld [r1, #64] ) | ||
679 | + PLD( ands ip, ip, #31 ) | ||
680 | + PLD( pld [r1, #96] ) | ||
681 | + PLD( beq 11f ) | ||
682 | + PLD( cmp r2, ip ) | ||
683 | + PLD( pld [r1, #128] ) | ||
684 | + PLD( blt 11f ) | ||
685 | + PLD( sub r2, r2, ip ) | ||
686 | +10: PLD( mov r3, lr, pull #\pull ) | ||
687 | + PLD( ldr lr, [r1], #4 ) | ||
688 | + PLD( subs ip, ip, #4 ) | ||
689 | + PLD( orr r3, r3, lr, push #\push ) | ||
690 | + PLD( str r3, [r0], #4 ) | ||
691 | + PLD( bgt 10b ) | ||
692 | + | ||
693 | +11: PLD( pld [r1, #128] ) | ||
694 | +12: mov r3, lr, pull #\pull | ||
695 | + ldmia r1!, {r4 - r9, ip, lr} | ||
696 | + subs r2, r2, #32 | ||
697 | + orr r3, r3, r4, push #\push | ||
698 | + mov r4, r4, pull #\pull | ||
699 | + orr r4, r4, r5, push #\push | ||
700 | + mov r5, r5, pull #\pull | ||
701 | + orr r5, r5, r6, push #\push | ||
702 | + mov r6, r6, pull #\pull | ||
703 | + orr r6, r6, r7, push #\push | ||
704 | + mov r7, r7, pull #\pull | ||
705 | + orr r7, r7, r8, push #\push | ||
706 | + mov r8, r8, pull #\pull | ||
707 | + orr r8, r8, r9, push #\push | ||
708 | + mov r9, r9, pull #\pull | ||
709 | + orr r9, r9, ip, push #\push | ||
710 | + mov ip, ip, pull #\pull | ||
711 | + orr ip, ip, lr, push #\push | ||
712 | + stmia r0!, {r3 - r9, ip} | ||
713 | + bge 11b | ||
714 | + PLD( cmn r2, #97 ) | ||
715 | + PLD( bge 12b ) | ||
716 | + PLD( add r2, r2, #97 ) | ||
717 | + cmn r2, #16 | ||
718 | + blt 14f | ||
719 | +13: mov r3, lr, pull #\pull | ||
720 | + ldmia r1!, {r4 - r6, lr} | ||
721 | + sub r2, r2, #16 | ||
722 | + orr r3, r3, r4, push #\push | ||
723 | + mov r4, r4, pull #\pull | ||
724 | + orr r4, r4, r5, push #\push | ||
725 | + mov r5, r5, pull #\pull | ||
726 | + orr r5, r5, r6, push #\push | ||
727 | + mov r6, r6, pull #\pull | ||
728 | + orr r6, r6, lr, push #\push | ||
729 | + stmia r0!, {r3 - r6} | ||
730 | +14: adds r2, r2, #28 | ||
731 | + ldmfd sp!, {r5 - r9} | ||
732 | + blt 16f | ||
733 | +15: mov r3, lr, pull #\pull | ||
734 | + ldr lr, [r1], #4 | ||
735 | + subs r2, r2, #4 | ||
736 | + orr r3, r3, lr, push #\push | ||
737 | + str r3, [r0], #4 | ||
738 | + bge 15b | ||
739 | +16: | ||
740 | + .endm | ||
741 | + | ||
742 | + | ||
743 | + forward_copy_shift pull=8 push=24 | ||
744 | + sub r1, r1, #3 | ||
745 | + b 7b | ||
746 | + | ||
747 | +17: forward_copy_shift pull=16 push=16 | ||
748 | + sub r1, r1, #2 | ||
749 | + b 7b | ||
750 | + | ||
751 | +18: forward_copy_shift pull=24 push=8 | ||
752 | + sub r1, r1, #1 | ||
753 | + b 7b | ||
754 | + | ||
755 | + .size memcpy, . - memcpy | ||
756 | +END(memcpy) | ||
757 | +libc_hidden_builtin_def (memcpy) | ||
758 | + | ||