summaryrefslogtreecommitdiffstats
path: root/meta/packages/glibc/glibc-cvs/arm-memcpy.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/packages/glibc/glibc-cvs/arm-memcpy.patch')
-rw-r--r--meta/packages/glibc/glibc-cvs/arm-memcpy.patch758
1 files changed, 758 insertions, 0 deletions
diff --git a/meta/packages/glibc/glibc-cvs/arm-memcpy.patch b/meta/packages/glibc/glibc-cvs/arm-memcpy.patch
new file mode 100644
index 0000000000..bc2b3dab84
--- /dev/null
+++ b/meta/packages/glibc/glibc-cvs/arm-memcpy.patch
@@ -0,0 +1,758 @@
1--- /dev/null 2004-02-02 20:32:13.000000000 +0000
2+++ sysdeps/arm/memmove.S 2004-03-20 18:37:23.000000000 +0000
3@@ -0,0 +1,251 @@
4+/*
5+ * Optimized memmove implementation for ARM processors
6+ *
7+ * Author: Nicolas Pitre
8+ * Created: Dec 23, 2003
9+ * Copyright: (C) MontaVista Software, Inc.
10+ *
11+ * This file is free software; you can redistribute it and/or
12+ * modify it under the terms of the GNU Lesser General Public
13+ * License as published by the Free Software Foundation; either
14+ * version 2.1 of the License, or (at your option) any later version.
15+ *
16+ * This file is distributed in the hope that it will be useful,
17+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19+ * Lesser General Public License for more details.
20+ */
21+
22+#include <sysdep.h>
23+
24+
25+/*
26+ * Endian independent macros for shifting bytes within registers.
27+ */
28+#ifndef __ARMEB__
29+#define pull lsr
30+#define push lsl
31+#else
32+#define pull lsl
33+#define push lsr
34+#endif
35+
36+/*
37+ * Enable data preload for architectures that support it (ARMv5 and above)
38+ */
39+#if defined(__ARM_ARCH_5__) || \
40+ defined(__ARM_ARCH_5T__) || \
41+ defined(__ARM_ARCH_5TE__)
42+#define PLD(code...) code
43+#else
44+#define PLD(code...)
45+#endif
46+
47+
48+/* char * memmove (char *dst, const char *src) */
49+ENTRY(memmove)
50+ subs ip, r0, r1
51+ cmphi r2, ip
52+ bls memcpy(PLT)
53+
54+ stmfd sp!, {r0, r4, lr}
55+ add r1, r1, r2
56+ add r0, r0, r2
57+ subs r2, r2, #4
58+ blt 25f
59+ ands ip, r0, #3
60+ PLD( pld [r1, #-4] )
61+ bne 26f
62+ ands ip, r1, #3
63+ bne 27f
64+
65+19: subs r2, r2, #4
66+ blt 24f
67+ subs r2, r2, #8
68+ blt 23f
69+ subs r2, r2, #16
70+ blt 22f
71+
72+ PLD( pld [r1, #-32] )
73+ PLD( subs r2, r2, #96 )
74+ stmfd sp!, {r5 - r8}
75+ PLD( blt 21f )
76+
77+ PLD( @ cache alignment )
78+ PLD( ands ip, r1, #31 )
79+ PLD( pld [r1, #-64] )
80+ PLD( beq 20f )
81+ PLD( cmp r2, ip )
82+ PLD( pld [r1, #-96] )
83+ PLD( blt 20f )
84+ PLD( cmp ip, #16 )
85+ PLD( sub r2, r2, ip )
86+ PLD( ldmgedb r1!, {r3 - r6} )
87+ PLD( stmgedb r0!, {r3 - r6} )
88+ PLD( beq 20f )
89+ PLD( and ip, ip, #15 )
90+ PLD( cmp ip, #8 )
91+ PLD( ldr r3, [r1, #-4]! )
92+ PLD( ldrge r4, [r1, #-4]! )
93+ PLD( ldrgt r5, [r1, #-4]! )
94+ PLD( str r3, [r0, #-4]! )
95+ PLD( strge r4, [r0, #-4]! )
96+ PLD( strgt r5, [r0, #-4]! )
97+
98+20: PLD( pld [r1, #-96] )
99+ PLD( pld [r1, #-128] )
100+21: ldmdb r1!, {r3, r4, ip, lr}
101+ subs r2, r2, #32
102+ stmdb r0!, {r3, r4, ip, lr}
103+ ldmdb r1!, {r3, r4, ip, lr}
104+ stmgedb r0!, {r3, r4, ip, lr}
105+ ldmgedb r1!, {r3, r4, ip, lr}
106+ stmgedb r0!, {r3, r4, ip, lr}
107+ ldmgedb r1!, {r3, r4, ip, lr}
108+ subges r2, r2, #32
109+ stmdb r0!, {r3, r4, ip, lr}
110+ bge 20b
111+ PLD( cmn r2, #96 )
112+ PLD( bge 21b )
113+ PLD( add r2, r2, #96 )
114+ tst r2, #31
115+ ldmfd sp!, {r5 - r8}
116+ ldmeqfd sp!, {r0, r4, pc}
117+
118+ tst r2, #16
119+22: ldmnedb r1!, {r3, r4, ip, lr}
120+ stmnedb r0!, {r3, r4, ip, lr}
121+
122+ tst r2, #8
123+23: ldmnedb r1!, {r3, r4}
124+ stmnedb r0!, {r3, r4}
125+
126+ tst r2, #4
127+24: ldrne r3, [r1, #-4]!
128+ strne r3, [r0, #-4]!
129+
130+25: ands r2, r2, #3
131+ ldmeqfd sp!, {r0, r4, pc}
132+
133+ cmp r2, #2
134+ ldrb r3, [r1, #-1]
135+ ldrgeb r4, [r1, #-2]
136+ ldrgtb ip, [r1, #-3]
137+ strb r3, [r0, #-1]
138+ strgeb r4, [r0, #-2]
139+ strgtb ip, [r0, #-3]
140+ ldmfd sp!, {r0, r4, pc}
141+
142+26: cmp ip, #2
143+ ldrb r3, [r1, #-1]!
144+ ldrgeb r4, [r1, #-1]!
145+ ldrgtb lr, [r1, #-1]!
146+ strb r3, [r0, #-1]!
147+ strgeb r4, [r0, #-1]!
148+ strgtb lr, [r0, #-1]!
149+ subs r2, r2, ip
150+ blt 25b
151+ ands ip, r1, #3
152+ beq 19b
153+
154+27: bic r1, r1, #3
155+ cmp ip, #2
156+ ldr r3, [r1]
157+ beq 35f
158+ blt 36f
159+
160+
161+ .macro backward_copy_shift push pull
162+
163+ cmp r2, #12
164+ PLD( pld [r1, #-4] )
165+ blt 33f
166+ subs r2, r2, #28
167+ stmfd sp!, {r5 - r9}
168+ blt 31f
169+
170+ PLD( subs r2, r2, #96 )
171+ PLD( pld [r1, #-32] )
172+ PLD( blt 30f )
173+ PLD( pld [r1, #-64] )
174+
175+ PLD( @ cache alignment )
176+ PLD( ands ip, r1, #31 )
177+ PLD( pld [r1, #-96] )
178+ PLD( beq 29f )
179+ PLD( cmp r2, ip )
180+ PLD( pld [r1, #-128] )
181+ PLD( blt 29f )
182+ PLD( sub r2, r2, ip )
183+28: PLD( mov r4, r3, push #\push )
184+ PLD( ldr r3, [r1, #-4]! )
185+ PLD( subs ip, ip, #4 )
186+ PLD( orr r4, r4, r3, pull #\pull )
187+ PLD( str r4, [r0, #-4]! )
188+ PLD( bgt 28b )
189+
190+29: PLD( pld [r1, #-128] )
191+30: mov lr, r3, push #\push
192+ ldmdb r1!, {r3 - r9, ip}
193+ subs r2, r2, #32
194+ orr lr, lr, ip, pull #\pull
195+ mov ip, ip, push #\push
196+ orr ip, ip, r9, pull #\pull
197+ mov r9, r9, push #\push
198+ orr r9, r9, r8, pull #\pull
199+ mov r8, r8, push #\push
200+ orr r8, r8, r7, pull #\pull
201+ mov r7, r7, push #\push
202+ orr r7, r7, r6, pull #\pull
203+ mov r6, r6, push #\push
204+ orr r6, r6, r5, pull #\pull
205+ mov r5, r5, push #\push
206+ orr r5, r5, r4, pull #\pull
207+ mov r4, r4, push #\push
208+ orr r4, r4, r3, pull #\pull
209+ stmdb r0!, {r4 - r9, ip, lr}
210+ bge 29b
211+ PLD( cmn r2, #96 )
212+ PLD( bge 30b )
213+ PLD( add r2, r2, #96 )
214+ cmn r2, #16
215+ blt 32f
216+31: mov r7, r3, push #\push
217+ ldmdb r1!, {r3 - r6}
218+ sub r2, r2, #16
219+ orr r7, r7, r6, pull #\pull
220+ mov r6, r6, push #\push
221+ orr r6, r6, r5, pull #\pull
222+ mov r5, r5, push #\push
223+ orr r5, r5, r4, pull #\pull
224+ mov r4, r4, push #\push
225+ orr r4, r4, r3, pull #\pull
226+ stmdb r0!, {r4 - r7}
227+32: adds r2, r2, #28
228+ ldmfd sp!, {r5 - r9}
229+ blt 34f
230+33: mov r4, r3, push #\push
231+ ldr r3, [r1, #-4]!
232+ subs r2, r2, #4
233+ orr r4, r4, r3, pull #\pull
234+ str r4, [r0, #-4]!
235+ bge 33b
236+34:
237+ .endm
238+
239+
240+ backward_copy_shift push=8 pull=24
241+ add r1, r1, #3
242+ b 25b
243+
244+35: backward_copy_shift push=16 pull=16
245+ add r1, r1, #2
246+ b 25b
247+
248+36: backward_copy_shift push=24 pull=8
249+ add r1, r1, #1
250+ b 25b
251+
252+ .size memmove, . - memmove
253+END(memmove)
254+libc_hidden_builtin_def (memmove)
255--- /dev/null 2004-02-02 20:32:13.000000000 +0000
256+++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000
257@@ -0,0 +1,255 @@
258+/*
259+ * Optimized memmove implementation for ARM processors
260+ *
261+ * Author: Nicolas Pitre
262+ * Created: Dec 23, 2003
263+ * Copyright: (C) MontaVista Software, Inc.
264+ *
265+ * This file is free software; you can redistribute it and/or
266+ * modify it under the terms of the GNU Lesser General Public
267+ * License as published by the Free Software Foundation; either
268+ * version 2.1 of the License, or (at your option) any later version.
269+ *
270+ * This file is distributed in the hope that it will be useful,
271+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
272+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
273+ * Lesser General Public License for more details.
274+ */
275+
276+#include <sysdep.h>
277+
278+
279+/*
280+ * Endian independent macros for shifting bytes within registers.
281+ */
282+#ifndef __ARMEB__
283+#define pull lsr
284+#define push lsl
285+#else
286+#define pull lsl
287+#define push lsr
288+#endif
289+
290+/*
291+ * Enable data preload for architectures that support it (ARMv5 and above)
292+ */
293+#if defined(__ARM_ARCH_5__) || \
294+ defined(__ARM_ARCH_5T__) || \
295+ defined(__ARM_ARCH_5TE__)
296+#define PLD(code...) code
297+#else
298+#define PLD(code...)
299+#endif
300+
301+dst .req r1
302+src .req r0
303+
304+/* void *bcopy (const char *src, char *dst, size_t size) */
305+ENTRY(bcopy)
306+ subs ip, dst, src
307+ cmphi r2, ip
308+ movls r3, r0
309+ movls r0, r1
310+ movls r1, r3
311+ bls memcpy(PLT)
312+
313+ stmfd sp!, {r4, lr}
314+ add src, src, r2
315+ add dst, dst, r2
316+ subs r2, r2, #4
317+ blt 25f
318+ ands ip, dst, #3
319+ PLD( pld [src, #-4] )
320+ bne 26f
321+ ands ip, src, #3
322+ bne 27f
323+
324+19: subs r2, r2, #4
325+ blt 24f
326+ subs r2, r2, #8
327+ blt 23f
328+ subs r2, r2, #16
329+ blt 22f
330+
331+ PLD( pld [src, #-32] )
332+ PLD( subs r2, r2, #96 )
333+ stmfd sp!, {r5 - r8}
334+ PLD( blt 21f )
335+
336+ PLD( @ cache alignment )
337+ PLD( ands ip, src, #31 )
338+ PLD( pld [src, #-64] )
339+ PLD( beq 20f )
340+ PLD( cmp r2, ip )
341+ PLD( pld [src, #-96] )
342+ PLD( blt 20f )
343+ PLD( cmp ip, #16 )
344+ PLD( sub r2, r2, ip )
345+ PLD( ldmgedb src!, {r3 - r6} )
346+ PLD( stmgedb dst!, {r3 - r6} )
347+ PLD( beq 20f )
348+ PLD( and ip, ip, #15 )
349+ PLD( cmp ip, #8 )
350+ PLD( ldr r3, [src, #-4]! )
351+ PLD( ldrge r4, [src, #-4]! )
352+ PLD( ldrgt r5, [src, #-4]! )
353+ PLD( str r3, [dst, #-4]! )
354+ PLD( strge r4, [dst, #-4]! )
355+ PLD( strgt r5, [dst, #-4]! )
356+
357+20: PLD( pld [src, #-96] )
358+ PLD( pld [src, #-128] )
359+21: ldmdb src!, {r3, r4, ip, lr}
360+ subs r2, r2, #32
361+ stmdb dst!, {r3, r4, ip, lr}
362+ ldmdb src!, {r3, r4, ip, lr}
363+ stmgedb dst!, {r3, r4, ip, lr}
364+ ldmgedb src!, {r3, r4, ip, lr}
365+ stmgedb dst!, {r3, r4, ip, lr}
366+ ldmgedb src!, {r3, r4, ip, lr}
367+ subges r2, r2, #32
368+ stmdb dst!, {r3, r4, ip, lr}
369+ bge 20b
370+ PLD( cmn r2, #96 )
371+ PLD( bge 21b )
372+ PLD( add r2, r2, #96 )
373+ tst r2, #31
374+ ldmfd sp!, {r5 - r8}
375+ ldmeqfd sp!, {r4, pc}
376+
377+ tst r2, #16
378+22: ldmnedb src!, {r3, r4, ip, lr}
379+ stmnedb dst!, {r3, r4, ip, lr}
380+
381+ tst r2, #8
382+23: ldmnedb src!, {r3, r4}
383+ stmnedb dst!, {r3, r4}
384+
385+ tst r2, #4
386+24: ldrne r3, [src, #-4]!
387+ strne r3, [dst, #-4]!
388+
389+25: ands r2, r2, #3
390+ ldmeqfd sp!, {dst, r4, pc}
391+
392+ cmp r2, #2
393+ ldrb r3, [src, #-1]
394+ ldrgeb r4, [src, #-2]
395+ ldrgtb ip, [src, #-3]
396+ strb r3, [dst, #-1]
397+ strgeb r4, [dst, #-2]
398+ strgtb ip, [dst, #-3]
399+ ldmfd sp!, {dst, r4, pc}
400+
401+26: cmp ip, #2
402+ ldrb r3, [src, #-1]!
403+ ldrgeb r4, [src, #-1]!
404+ ldrgtb lr, [src, #-1]!
405+ strb r3, [dst, #-1]!
406+ strgeb r4, [dst, #-1]!
407+ strgtb lr, [dst, #-1]!
408+ subs r2, r2, ip
409+ blt 25b
410+ ands ip, src, #3
411+ beq 19b
412+
413+27: bic src, src, #3
414+ cmp ip, #2
415+ ldr r3, [src]
416+ beq 35f
417+ blt 36f
418+
419+
420+ .macro backward_copy_shift push pull
421+
422+ cmp r2, #12
423+ PLD( pld [src, #-4] )
424+ blt 33f
425+ subs r2, r2, #28
426+ stmfd sp!, {r5 - r9}
427+ blt 31f
428+
429+ PLD( subs r2, r2, #96 )
430+ PLD( pld [src, #-32] )
431+ PLD( blt 30f )
432+ PLD( pld [src, #-64] )
433+
434+ PLD( @ cache alignment )
435+ PLD( ands ip, src, #31 )
436+ PLD( pld [src, #-96] )
437+ PLD( beq 29f )
438+ PLD( cmp r2, ip )
439+ PLD( pld [src, #-128] )
440+ PLD( blt 29f )
441+ PLD( sub r2, r2, ip )
442+28: PLD( mov r4, r3, push #\push )
443+ PLD( ldr r3, [src, #-4]! )
444+ PLD( subs ip, ip, #4 )
445+ PLD( orr r4, r4, r3, pull #\pull )
446+ PLD( str r4, [dst, #-4]! )
447+ PLD( bgt 28b )
448+
449+29: PLD( pld [src, #-128] )
450+30: mov lr, r3, push #\push
451+ ldmdb src!, {r3 - r9, ip}
452+ subs r2, r2, #32
453+ orr lr, lr, ip, pull #\pull
454+ mov ip, ip, push #\push
455+ orr ip, ip, r9, pull #\pull
456+ mov r9, r9, push #\push
457+ orr r9, r9, r8, pull #\pull
458+ mov r8, r8, push #\push
459+ orr r8, r8, r7, pull #\pull
460+ mov r7, r7, push #\push
461+ orr r7, r7, r6, pull #\pull
462+ mov r6, r6, push #\push
463+ orr r6, r6, r5, pull #\pull
464+ mov r5, r5, push #\push
465+ orr r5, r5, r4, pull #\pull
466+ mov r4, r4, push #\push
467+ orr r4, r4, r3, pull #\pull
468+ stmdb dst!, {r4 - r9, ip, lr}
469+ bge 29b
470+ PLD( cmn r2, #96 )
471+ PLD( bge 30b )
472+ PLD( add r2, r2, #96 )
473+ cmn r2, #16
474+ blt 32f
475+31: mov r7, r3, push #\push
476+ ldmdb src!, {r3 - r6}
477+ sub r2, r2, #16
478+ orr r7, r7, r6, pull #\pull
479+ mov r6, r6, push #\push
480+ orr r6, r6, r5, pull #\pull
481+ mov r5, r5, push #\push
482+ orr r5, r5, r4, pull #\pull
483+ mov r4, r4, push #\push
484+ orr r4, r4, r3, pull #\pull
485+ stmdb dst!, {r4 - r7}
486+32: adds r2, r2, #28
487+ ldmfd sp!, {r5 - r9}
488+ blt 34f
489+33: mov r4, r3, push #\push
490+ ldr r3, [src, #-4]!
491+ subs r2, r2, #4
492+ orr r4, r4, r3, pull #\pull
493+ str r4, [dst, #-4]!
494+ bge 33b
495+34:
496+ .endm
497+
498+
499+ backward_copy_shift push=8 pull=24
500+ add src, src, #3
501+ b 25b
502+
503+35: backward_copy_shift push=16 pull=16
504+ add src, src, #2
505+ b 25b
506+
507+36: backward_copy_shift push=24 pull=8
508+ add src, src, #1
509+ b 25b
510+
511+ .size bcopy, . - bcopy
512+END(bcopy)
513
514--- /dev/null 2004-02-02 20:32:13.000000000 +0000
515+++ sysdeps/arm/memcpy.S 2004-05-02 14:33:22.000000000 +0100
516@@ -0,0 +1,242 @@
517+/*
518+ * Optimized memcpy implementation for ARM processors
519+ *
520+ * Author: Nicolas Pitre
521+ * Created: Dec 23, 2003
522+ * Copyright: (C) MontaVista Software, Inc.
523+ *
524+ * This file is free software; you can redistribute it and/or
525+ * modify it under the terms of the GNU Lesser General Public
526+ * License as published by the Free Software Foundation; either
527+ * version 2.1 of the License, or (at your option) any later version.
528+ *
529+ * This file is distributed in the hope that it will be useful,
530+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
531+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
532+ * Lesser General Public License for more details.
533+ */
534+
535+#include <sysdep.h>
536+
537+
538+/*
539+ * Endian independent macros for shifting bytes within registers.
540+ */
541+#ifndef __ARMEB__
542+#define pull lsr
543+#define push lsl
544+#else
545+#define pull lsl
546+#define push lsr
547+#endif
548+
549+/*
550+ * Enable data preload for architectures that support it (ARMv5 and above)
551+ */
552+#if defined(__ARM_ARCH_5__) || \
553+ defined(__ARM_ARCH_5T__) || \
554+ defined(__ARM_ARCH_5TE__)
555+#define PLD(code...) code
556+#else
557+#define PLD(code...)
558+#endif
559+
560+
561+/* char * memcpy (char *dst, const char *src) */
562+
563+ENTRY(memcpy)
564+ subs r2, r2, #4
565+ stmfd sp!, {r0, r4, lr}
566+ blt 7f
567+ ands ip, r0, #3
568+ PLD( pld [r1, #0] )
569+ bne 8f
570+ ands ip, r1, #3
571+ bne 9f
572+
573+1: subs r2, r2, #4
574+ blt 6f
575+ subs r2, r2, #8
576+ blt 5f
577+ subs r2, r2, #16
578+ blt 4f
579+
580+ PLD( subs r2, r2, #65 )
581+ stmfd sp!, {r5 - r8}
582+ PLD( blt 3f )
583+ PLD( pld [r1, #32] )
584+
585+ PLD( @ cache alignment )
586+ PLD( ands ip, r1, #31 )
587+ PLD( pld [r1, #64] )
588+ PLD( beq 2f )
589+ PLD( rsb ip, ip, #32 )
590+ PLD( cmp r2, ip )
591+ PLD( pld [r1, #96] )
592+ PLD( blt 2f )
593+ PLD( cmp ip, #16 )
594+ PLD( sub r2, r2, ip )
595+ PLD( ldmgeia r1!, {r3 - r6} )
596+ PLD( stmgeia r0!, {r3 - r6} )
597+ PLD( beq 2f )
598+ PLD( and ip, ip, #15 )
599+ PLD( cmp ip, #8 )
600+ PLD( ldr r3, [r1], #4 )
601+ PLD( ldrge r4, [r1], #4 )
602+ PLD( ldrgt r5, [r1], #4 )
603+ PLD( str r3, [r0], #4 )
604+ PLD( strge r4, [r0], #4 )
605+ PLD( strgt r5, [r0], #4 )
606+
607+2: PLD( pld [r1, #96] )
608+3: ldmia r1!, {r3 - r8, ip, lr}
609+ subs r2, r2, #32
610+ stmia r0!, {r3 - r8, ip, lr}
611+ bge 2b
612+ PLD( cmn r2, #65 )
613+ PLD( bge 3b )
614+ PLD( add r2, r2, #65 )
615+ tst r2, #31
616+ ldmfd sp!, {r5 - r8}
617+ ldmeqfd sp!, {r0, r4, pc}
618+
619+ tst r2, #16
620+4: ldmneia r1!, {r3, r4, ip, lr}
621+ stmneia r0!, {r3, r4, ip, lr}
622+
623+ tst r2, #8
624+5: ldmneia r1!, {r3, r4}
625+ stmneia r0!, {r3, r4}
626+
627+ tst r2, #4
628+6: ldrne r3, [r1], #4
629+ strne r3, [r0], #4
630+
631+7: ands r2, r2, #3
632+ ldmeqfd sp!, {r0, r4, pc}
633+
634+ cmp r2, #2
635+ ldrb r3, [r1], #1
636+ ldrgeb r4, [r1], #1
637+ ldrgtb ip, [r1]
638+ strb r3, [r0], #1
639+ strgeb r4, [r0], #1
640+ strgtb ip, [r0]
641+ ldmfd sp!, {r0, r4, pc}
642+
643+8: rsb ip, ip, #4
644+ cmp ip, #2
645+ ldrb r3, [r1], #1
646+ ldrgeb r4, [r1], #1
647+ ldrgtb lr, [r1], #1
648+ strb r3, [r0], #1
649+ strgeb r4, [r0], #1
650+ strgtb lr, [r0], #1
651+ subs r2, r2, ip
652+ blt 7b
653+ ands ip, r1, #3
654+ beq 1b
655+
656+9: bic r1, r1, #3
657+ cmp ip, #2
658+ ldr lr, [r1], #4
659+ beq 17f
660+ bgt 18f
661+
662+
663+ .macro forward_copy_shift pull push
664+
665+ cmp r2, #12
666+ PLD( pld [r1, #0] )
667+ blt 15f
668+ subs r2, r2, #28
669+ stmfd sp!, {r5 - r9}
670+ blt 13f
671+
672+ PLD( subs r2, r2, #97 )
673+ PLD( blt 12f )
674+ PLD( pld [r1, #32] )
675+
676+ PLD( @ cache alignment )
677+ PLD( rsb ip, r1, #36 )
678+ PLD( pld [r1, #64] )
679+ PLD( ands ip, ip, #31 )
680+ PLD( pld [r1, #96] )
681+ PLD( beq 11f )
682+ PLD( cmp r2, ip )
683+ PLD( pld [r1, #128] )
684+ PLD( blt 11f )
685+ PLD( sub r2, r2, ip )
686+10: PLD( mov r3, lr, pull #\pull )
687+ PLD( ldr lr, [r1], #4 )
688+ PLD( subs ip, ip, #4 )
689+ PLD( orr r3, r3, lr, push #\push )
690+ PLD( str r3, [r0], #4 )
691+ PLD( bgt 10b )
692+
693+11: PLD( pld [r1, #128] )
694+12: mov r3, lr, pull #\pull
695+ ldmia r1!, {r4 - r9, ip, lr}
696+ subs r2, r2, #32
697+ orr r3, r3, r4, push #\push
698+ mov r4, r4, pull #\pull
699+ orr r4, r4, r5, push #\push
700+ mov r5, r5, pull #\pull
701+ orr r5, r5, r6, push #\push
702+ mov r6, r6, pull #\pull
703+ orr r6, r6, r7, push #\push
704+ mov r7, r7, pull #\pull
705+ orr r7, r7, r8, push #\push
706+ mov r8, r8, pull #\pull
707+ orr r8, r8, r9, push #\push
708+ mov r9, r9, pull #\pull
709+ orr r9, r9, ip, push #\push
710+ mov ip, ip, pull #\pull
711+ orr ip, ip, lr, push #\push
712+ stmia r0!, {r3 - r9, ip}
713+ bge 11b
714+ PLD( cmn r2, #97 )
715+ PLD( bge 12b )
716+ PLD( add r2, r2, #97 )
717+ cmn r2, #16
718+ blt 14f
719+13: mov r3, lr, pull #\pull
720+ ldmia r1!, {r4 - r6, lr}
721+ sub r2, r2, #16
722+ orr r3, r3, r4, push #\push
723+ mov r4, r4, pull #\pull
724+ orr r4, r4, r5, push #\push
725+ mov r5, r5, pull #\pull
726+ orr r5, r5, r6, push #\push
727+ mov r6, r6, pull #\pull
728+ orr r6, r6, lr, push #\push
729+ stmia r0!, {r3 - r6}
730+14: adds r2, r2, #28
731+ ldmfd sp!, {r5 - r9}
732+ blt 16f
733+15: mov r3, lr, pull #\pull
734+ ldr lr, [r1], #4
735+ subs r2, r2, #4
736+ orr r3, r3, lr, push #\push
737+ str r3, [r0], #4
738+ bge 15b
739+16:
740+ .endm
741+
742+
743+ forward_copy_shift pull=8 push=24
744+ sub r1, r1, #3
745+ b 7b
746+
747+17: forward_copy_shift pull=16 push=16
748+ sub r1, r1, #2
749+ b 7b
750+
751+18: forward_copy_shift pull=24 push=8
752+ sub r1, r1, #1
753+ b 7b
754+
755+ .size memcpy, . - memcpy
756+END(memcpy)
757+libc_hidden_builtin_def (memcpy)
758+