diff options
Diffstat (limited to 'meta-architech-extras/recipes/linux/linux-imx-3.0.35/0002-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch')
-rw-r--r-- | meta-architech-extras/recipes/linux/linux-imx-3.0.35/0002-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/meta-architech-extras/recipes/linux/linux-imx-3.0.35/0002-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch b/meta-architech-extras/recipes/linux/linux-imx-3.0.35/0002-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch new file mode 100644 index 0000000..4c31e74 --- /dev/null +++ b/meta-architech-extras/recipes/linux/linux-imx-3.0.35/0002-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch | |||
@@ -0,0 +1,259 @@ | |||
1 | From 2235b85f1c76d98b5f1e160cbd0a61a84c15e125 Mon Sep 17 00:00:00 2001 | ||
2 | From: Ivan Djelic <ivan.djelic@parrot.com> | ||
3 | Date: Wed, 6 Mar 2013 20:09:27 +0100 | ||
4 | Subject: [PATCH] ARM: 7668/1: fix memset-related crashes caused by recent GCC | ||
5 | (4.7.2) optimizations | ||
6 | Organization: O.S. Systems Software LTDA. | ||
7 | |||
8 | Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on | ||
9 | assumptions about the implementation of memset and similar functions. | ||
10 | The current ARM optimized memset code does not return the value of | ||
11 | its first argument, as is usually expected from standard implementations. | ||
12 | |||
13 | For instance in the following function: | ||
14 | |||
15 | void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) | ||
16 | { | ||
17 | memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); | ||
18 | waiter->magic = waiter; | ||
19 | INIT_LIST_HEAD(&waiter->list); | ||
20 | } | ||
21 | |||
22 | compiled as: | ||
23 | |||
24 | 800554d0 <debug_mutex_lock_common>: | ||
25 | 800554d0: e92d4008 push {r3, lr} | ||
26 | 800554d4: e1a00001 mov r0, r1 | ||
27 | 800554d8: e3a02010 mov r2, #16 ; 0x10 | ||
28 | 800554dc: e3a01011 mov r1, #17 ; 0x11 | ||
29 | 800554e0: eb04426e bl 80165ea0 <memset> | ||
30 | 800554e4: e1a03000 mov r3, r0 | ||
31 | 800554e8: e583000c str r0, [r3, #12] | ||
32 | 800554ec: e5830000 str r0, [r3] | ||
33 | 800554f0: e5830004 str r0, [r3, #4] | ||
34 | 800554f4: e8bd8008 pop {r3, pc} | ||
35 | |||
36 | GCC assumes memset returns the value of pointer 'waiter' in register r0; causing | ||
37 | register/memory corruptions. | ||
38 | |||
39 | This patch fixes the return value of the assembly version of memset. | ||
40 | It adds a 'mov' instruction and merges an additional load+store into | ||
41 | existing load/store instructions. | ||
42 | For ease of review, here is a breakdown of the patch into 4 simple steps: | ||
43 | |||
44 | Step 1 | ||
45 | ====== | ||
46 | Perform the following substitutions: | ||
47 | ip -> r8, then | ||
48 | r0 -> ip, | ||
49 | and insert 'mov ip, r0' as the first statement of the function. | ||
50 | At this point, we have a memset() implementation returning the proper result, | ||
51 | but corrupting r8 on some paths (the ones that were using ip). | ||
52 | |||
53 | Step 2 | ||
54 | ====== | ||
55 | Make sure r8 is saved and restored when (! CALGN(1)+0) == 1: | ||
56 | |||
57 | save r8: | ||
58 | - str lr, [sp, #-4]! | ||
59 | + stmfd sp!, {r8, lr} | ||
60 | |||
61 | and restore r8 on both exit paths: | ||
62 | - ldmeqfd sp!, {pc} @ Now <64 bytes to go. | ||
63 | + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. | ||
64 | (...) | ||
65 | tst r2, #16 | ||
66 | stmneia ip!, {r1, r3, r8, lr} | ||
67 | - ldr lr, [sp], #4 | ||
68 | + ldmfd sp!, {r8, lr} | ||
69 | |||
70 | Step 3 | ||
71 | ====== | ||
72 | Make sure r8 is saved and restored when (! CALGN(1)+0) == 0: | ||
73 | |||
74 | save r8: | ||
75 | - stmfd sp!, {r4-r7, lr} | ||
76 | + stmfd sp!, {r4-r8, lr} | ||
77 | |||
78 | and restore r8 on both exit paths: | ||
79 | bgt 3b | ||
80 | - ldmeqfd sp!, {r4-r7, pc} | ||
81 | + ldmeqfd sp!, {r4-r8, pc} | ||
82 | (...) | ||
83 | tst r2, #16 | ||
84 | stmneia ip!, {r4-r7} | ||
85 | - ldmfd sp!, {r4-r7, lr} | ||
86 | + ldmfd sp!, {r4-r8, lr} | ||
87 | |||
88 | Step 4 | ||
89 | ====== | ||
90 | Rewrite register list "r4-r7, r8" as "r4-r8". | ||
91 | |||
92 | Upstream-Status: Pending | ||
93 | |||
94 | Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com> | ||
95 | Reviewed-by: Nicolas Pitre <nico@linaro.org> | ||
96 | Signed-off-by: Dirk Behme <dirk.behme@gmail.com> | ||
97 | Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> | ||
98 | (cherry picked from commit 455bd4c430b0c0a361f38e8658a0d6cb469942b5) | ||
99 | --- | ||
100 | arch/arm/lib/memset.S | 85 ++++++++++++++++++++++++++------------------------- | ||
101 | 1 file changed, 44 insertions(+), 41 deletions(-) | ||
102 | |||
103 | diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S | ||
104 | index 650d592..d912e73 100644 | ||
105 | --- a/arch/arm/lib/memset.S | ||
106 | +++ b/arch/arm/lib/memset.S | ||
107 | @@ -19,9 +19,9 @@ | ||
108 | 1: subs r2, r2, #4 @ 1 do we have enough | ||
109 | blt 5f @ 1 bytes to align with? | ||
110 | cmp r3, #2 @ 1 | ||
111 | - strltb r1, [r0], #1 @ 1 | ||
112 | - strleb r1, [r0], #1 @ 1 | ||
113 | - strb r1, [r0], #1 @ 1 | ||
114 | + strltb r1, [ip], #1 @ 1 | ||
115 | + strleb r1, [ip], #1 @ 1 | ||
116 | + strb r1, [ip], #1 @ 1 | ||
117 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | ||
118 | /* | ||
119 | * The pointer is now aligned and the length is adjusted. Try doing the | ||
120 | @@ -29,10 +29,14 @@ | ||
121 | */ | ||
122 | |||
123 | ENTRY(memset) | ||
124 | - ands r3, r0, #3 @ 1 unaligned? | ||
125 | +/* | ||
126 | + * Preserve the contents of r0 for the return value. | ||
127 | + */ | ||
128 | + mov ip, r0 | ||
129 | + ands r3, ip, #3 @ 1 unaligned? | ||
130 | bne 1b @ 1 | ||
131 | /* | ||
132 | - * we know that the pointer in r0 is aligned to a word boundary. | ||
133 | + * we know that the pointer in ip is aligned to a word boundary. | ||
134 | */ | ||
135 | orr r1, r1, r1, lsl #8 | ||
136 | orr r1, r1, r1, lsl #16 | ||
137 | @@ -43,29 +47,28 @@ ENTRY(memset) | ||
138 | #if ! CALGN(1)+0 | ||
139 | |||
140 | /* | ||
141 | - * We need an extra register for this loop - save the return address and | ||
142 | - * use the LR | ||
143 | + * We need 2 extra registers for this loop - use r8 and the LR | ||
144 | */ | ||
145 | - str lr, [sp, #-4]! | ||
146 | - mov ip, r1 | ||
147 | + stmfd sp!, {r8, lr} | ||
148 | + mov r8, r1 | ||
149 | mov lr, r1 | ||
150 | |||
151 | 2: subs r2, r2, #64 | ||
152 | - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. | ||
153 | - stmgeia r0!, {r1, r3, ip, lr} | ||
154 | - stmgeia r0!, {r1, r3, ip, lr} | ||
155 | - stmgeia r0!, {r1, r3, ip, lr} | ||
156 | + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. | ||
157 | + stmgeia ip!, {r1, r3, r8, lr} | ||
158 | + stmgeia ip!, {r1, r3, r8, lr} | ||
159 | + stmgeia ip!, {r1, r3, r8, lr} | ||
160 | bgt 2b | ||
161 | - ldmeqfd sp!, {pc} @ Now <64 bytes to go. | ||
162 | + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. | ||
163 | /* | ||
164 | * No need to correct the count; we're only testing bits from now on | ||
165 | */ | ||
166 | tst r2, #32 | ||
167 | - stmneia r0!, {r1, r3, ip, lr} | ||
168 | - stmneia r0!, {r1, r3, ip, lr} | ||
169 | + stmneia ip!, {r1, r3, r8, lr} | ||
170 | + stmneia ip!, {r1, r3, r8, lr} | ||
171 | tst r2, #16 | ||
172 | - stmneia r0!, {r1, r3, ip, lr} | ||
173 | - ldr lr, [sp], #4 | ||
174 | + stmneia ip!, {r1, r3, r8, lr} | ||
175 | + ldmfd sp!, {r8, lr} | ||
176 | |||
177 | #else | ||
178 | |||
179 | @@ -74,54 +77,54 @@ ENTRY(memset) | ||
180 | * whole cache lines at once. | ||
181 | */ | ||
182 | |||
183 | - stmfd sp!, {r4-r7, lr} | ||
184 | + stmfd sp!, {r4-r8, lr} | ||
185 | mov r4, r1 | ||
186 | mov r5, r1 | ||
187 | mov r6, r1 | ||
188 | mov r7, r1 | ||
189 | - mov ip, r1 | ||
190 | + mov r8, r1 | ||
191 | mov lr, r1 | ||
192 | |||
193 | cmp r2, #96 | ||
194 | - tstgt r0, #31 | ||
195 | + tstgt ip, #31 | ||
196 | ble 3f | ||
197 | |||
198 | - and ip, r0, #31 | ||
199 | - rsb ip, ip, #32 | ||
200 | - sub r2, r2, ip | ||
201 | - movs ip, ip, lsl #(32 - 4) | ||
202 | - stmcsia r0!, {r4, r5, r6, r7} | ||
203 | - stmmiia r0!, {r4, r5} | ||
204 | - tst ip, #(1 << 30) | ||
205 | - mov ip, r1 | ||
206 | - strne r1, [r0], #4 | ||
207 | + and r8, ip, #31 | ||
208 | + rsb r8, r8, #32 | ||
209 | + sub r2, r2, r8 | ||
210 | + movs r8, r8, lsl #(32 - 4) | ||
211 | + stmcsia ip!, {r4, r5, r6, r7} | ||
212 | + stmmiia ip!, {r4, r5} | ||
213 | + tst r8, #(1 << 30) | ||
214 | + mov r8, r1 | ||
215 | + strne r1, [ip], #4 | ||
216 | |||
217 | 3: subs r2, r2, #64 | ||
218 | - stmgeia r0!, {r1, r3-r7, ip, lr} | ||
219 | - stmgeia r0!, {r1, r3-r7, ip, lr} | ||
220 | + stmgeia ip!, {r1, r3-r8, lr} | ||
221 | + stmgeia ip!, {r1, r3-r8, lr} | ||
222 | bgt 3b | ||
223 | - ldmeqfd sp!, {r4-r7, pc} | ||
224 | + ldmeqfd sp!, {r4-r8, pc} | ||
225 | |||
226 | tst r2, #32 | ||
227 | - stmneia r0!, {r1, r3-r7, ip, lr} | ||
228 | + stmneia ip!, {r1, r3-r8, lr} | ||
229 | tst r2, #16 | ||
230 | - stmneia r0!, {r4-r7} | ||
231 | - ldmfd sp!, {r4-r7, lr} | ||
232 | + stmneia ip!, {r4-r7} | ||
233 | + ldmfd sp!, {r4-r8, lr} | ||
234 | |||
235 | #endif | ||
236 | |||
237 | 4: tst r2, #8 | ||
238 | - stmneia r0!, {r1, r3} | ||
239 | + stmneia ip!, {r1, r3} | ||
240 | tst r2, #4 | ||
241 | - strne r1, [r0], #4 | ||
242 | + strne r1, [ip], #4 | ||
243 | /* | ||
244 | * When we get here, we've got less than 4 bytes to zero. We | ||
245 | * may have an unaligned pointer as well. | ||
246 | */ | ||
247 | 5: tst r2, #2 | ||
248 | - strneb r1, [r0], #1 | ||
249 | - strneb r1, [r0], #1 | ||
250 | + strneb r1, [ip], #1 | ||
251 | + strneb r1, [ip], #1 | ||
252 | tst r2, #1 | ||
253 | - strneb r1, [r0], #1 | ||
254 | + strneb r1, [ip], #1 | ||
255 | mov pc, lr | ||
256 | ENDPROC(memset) | ||
257 | -- | ||
258 | 1.8.4.rc3 | ||
259 | |||