diff options
Diffstat (limited to 'recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch')
-rw-r--r-- | recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch | 256 |
1 files changed, 0 insertions, 256 deletions
diff --git a/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch b/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch deleted file mode 100644 index 7b4e6528..00000000 --- a/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch +++ /dev/null | |||
@@ -1,256 +0,0 @@ | |||
1 | From b9ebe46e251c5c8becd02f92893ef514e834bf67 Mon Sep 17 00:00:00 2001 | ||
2 | From: Ivan Djelic <ivan.djelic@parrot.com> | ||
3 | Date: Wed, 6 Mar 2013 20:09:27 +0100 | ||
4 | Subject: [PATCH 3/4] ARM: 7668/1: fix memset-related crashes caused by recent | ||
5 | GCC (4.7.2) optimizations | ||
6 | |||
7 | Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on | ||
8 | assumptions about the implementation of memset and similar functions. | ||
9 | The current ARM optimized memset code does not return the value of | ||
10 | its first argument, as is usually expected from standard implementations. | ||
11 | |||
12 | For instance in the following function: | ||
13 | |||
14 | void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) | ||
15 | { | ||
16 | memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); | ||
17 | waiter->magic = waiter; | ||
18 | INIT_LIST_HEAD(&waiter->list); | ||
19 | } | ||
20 | |||
21 | compiled as: | ||
22 | |||
23 | 800554d0 <debug_mutex_lock_common>: | ||
24 | 800554d0: e92d4008 push {r3, lr} | ||
25 | 800554d4: e1a00001 mov r0, r1 | ||
26 | 800554d8: e3a02010 mov r2, #16 ; 0x10 | ||
27 | 800554dc: e3a01011 mov r1, #17 ; 0x11 | ||
28 | 800554e0: eb04426e bl 80165ea0 <memset> | ||
29 | 800554e4: e1a03000 mov r3, r0 | ||
30 | 800554e8: e583000c str r0, [r3, #12] | ||
31 | 800554ec: e5830000 str r0, [r3] | ||
32 | 800554f0: e5830004 str r0, [r3, #4] | ||
33 | 800554f4: e8bd8008 pop {r3, pc} | ||
34 | |||
35 | GCC assumes memset returns the value of pointer 'waiter' in register r0; causing | ||
36 | register/memory corruptions. | ||
37 | |||
38 | This patch fixes the return value of the assembly version of memset. | ||
39 | It adds a 'mov' instruction and merges an additional load+store into | ||
40 | existing load/store instructions. | ||
41 | For ease of review, here is a breakdown of the patch into 4 simple steps: | ||
42 | |||
43 | Step 1 | ||
44 | ====== | ||
45 | Perform the following substitutions: | ||
46 | ip -> r8, then | ||
47 | r0 -> ip, | ||
48 | and insert 'mov ip, r0' as the first statement of the function. | ||
49 | At this point, we have a memset() implementation returning the proper result, | ||
50 | but corrupting r8 on some paths (the ones that were using ip). | ||
51 | |||
52 | Step 2 | ||
53 | ====== | ||
54 | Make sure r8 is saved and restored when (! CALGN(1)+0) == 1: | ||
55 | |||
56 | save r8: | ||
57 | - str lr, [sp, #-4]! | ||
58 | + stmfd sp!, {r8, lr} | ||
59 | |||
60 | and restore r8 on both exit paths: | ||
61 | - ldmeqfd sp!, {pc} @ Now <64 bytes to go. | ||
62 | + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. | ||
63 | (...) | ||
64 | tst r2, #16 | ||
65 | stmneia ip!, {r1, r3, r8, lr} | ||
66 | - ldr lr, [sp], #4 | ||
67 | + ldmfd sp!, {r8, lr} | ||
68 | |||
69 | Step 3 | ||
70 | ====== | ||
71 | Make sure r8 is saved and restored when (! CALGN(1)+0) == 0: | ||
72 | |||
73 | save r8: | ||
74 | - stmfd sp!, {r4-r7, lr} | ||
75 | + stmfd sp!, {r4-r8, lr} | ||
76 | |||
77 | and restore r8 on both exit paths: | ||
78 | bgt 3b | ||
79 | - ldmeqfd sp!, {r4-r7, pc} | ||
80 | + ldmeqfd sp!, {r4-r8, pc} | ||
81 | (...) | ||
82 | tst r2, #16 | ||
83 | stmneia ip!, {r4-r7} | ||
84 | - ldmfd sp!, {r4-r7, lr} | ||
85 | + ldmfd sp!, {r4-r8, lr} | ||
86 | |||
87 | Step 4 | ||
88 | ====== | ||
89 | Rewrite register list "r4-r7, r8" as "r4-r8". | ||
90 | |||
91 | Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com> | ||
92 | Reviewed-by: Nicolas Pitre <nico@linaro.org> | ||
93 | Signed-off-by: Dirk Behme <dirk.behme@gmail.com> | ||
94 | Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> | ||
95 | Signed-off-by: Khem Raj <raj.khem@gmail.com> | ||
96 | --- | ||
97 | arch/arm/lib/memset.S | 85 ++++++++++++++++++++++++++------------------------- | ||
98 | 1 file changed, 44 insertions(+), 41 deletions(-) | ||
99 | |||
100 | diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S | ||
101 | index 650d592..d912e73 100644 | ||
102 | --- a/arch/arm/lib/memset.S | ||
103 | +++ b/arch/arm/lib/memset.S | ||
104 | @@ -19,9 +19,9 @@ | ||
105 | 1: subs r2, r2, #4 @ 1 do we have enough | ||
106 | blt 5f @ 1 bytes to align with? | ||
107 | cmp r3, #2 @ 1 | ||
108 | - strltb r1, [r0], #1 @ 1 | ||
109 | - strleb r1, [r0], #1 @ 1 | ||
110 | - strb r1, [r0], #1 @ 1 | ||
111 | + strltb r1, [ip], #1 @ 1 | ||
112 | + strleb r1, [ip], #1 @ 1 | ||
113 | + strb r1, [ip], #1 @ 1 | ||
114 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | ||
115 | /* | ||
116 | * The pointer is now aligned and the length is adjusted. Try doing the | ||
117 | @@ -29,10 +29,14 @@ | ||
118 | */ | ||
119 | |||
120 | ENTRY(memset) | ||
121 | - ands r3, r0, #3 @ 1 unaligned? | ||
122 | +/* | ||
123 | + * Preserve the contents of r0 for the return value. | ||
124 | + */ | ||
125 | + mov ip, r0 | ||
126 | + ands r3, ip, #3 @ 1 unaligned? | ||
127 | bne 1b @ 1 | ||
128 | /* | ||
129 | - * we know that the pointer in r0 is aligned to a word boundary. | ||
130 | + * we know that the pointer in ip is aligned to a word boundary. | ||
131 | */ | ||
132 | orr r1, r1, r1, lsl #8 | ||
133 | orr r1, r1, r1, lsl #16 | ||
134 | @@ -43,29 +47,28 @@ ENTRY(memset) | ||
135 | #if ! CALGN(1)+0 | ||
136 | |||
137 | /* | ||
138 | - * We need an extra register for this loop - save the return address and | ||
139 | - * use the LR | ||
140 | + * We need 2 extra registers for this loop - use r8 and the LR | ||
141 | */ | ||
142 | - str lr, [sp, #-4]! | ||
143 | - mov ip, r1 | ||
144 | + stmfd sp!, {r8, lr} | ||
145 | + mov r8, r1 | ||
146 | mov lr, r1 | ||
147 | |||
148 | 2: subs r2, r2, #64 | ||
149 | - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. | ||
150 | - stmgeia r0!, {r1, r3, ip, lr} | ||
151 | - stmgeia r0!, {r1, r3, ip, lr} | ||
152 | - stmgeia r0!, {r1, r3, ip, lr} | ||
153 | + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. | ||
154 | + stmgeia ip!, {r1, r3, r8, lr} | ||
155 | + stmgeia ip!, {r1, r3, r8, lr} | ||
156 | + stmgeia ip!, {r1, r3, r8, lr} | ||
157 | bgt 2b | ||
158 | - ldmeqfd sp!, {pc} @ Now <64 bytes to go. | ||
159 | + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. | ||
160 | /* | ||
161 | * No need to correct the count; we're only testing bits from now on | ||
162 | */ | ||
163 | tst r2, #32 | ||
164 | - stmneia r0!, {r1, r3, ip, lr} | ||
165 | - stmneia r0!, {r1, r3, ip, lr} | ||
166 | + stmneia ip!, {r1, r3, r8, lr} | ||
167 | + stmneia ip!, {r1, r3, r8, lr} | ||
168 | tst r2, #16 | ||
169 | - stmneia r0!, {r1, r3, ip, lr} | ||
170 | - ldr lr, [sp], #4 | ||
171 | + stmneia ip!, {r1, r3, r8, lr} | ||
172 | + ldmfd sp!, {r8, lr} | ||
173 | |||
174 | #else | ||
175 | |||
176 | @@ -74,54 +77,54 @@ ENTRY(memset) | ||
177 | * whole cache lines at once. | ||
178 | */ | ||
179 | |||
180 | - stmfd sp!, {r4-r7, lr} | ||
181 | + stmfd sp!, {r4-r8, lr} | ||
182 | mov r4, r1 | ||
183 | mov r5, r1 | ||
184 | mov r6, r1 | ||
185 | mov r7, r1 | ||
186 | - mov ip, r1 | ||
187 | + mov r8, r1 | ||
188 | mov lr, r1 | ||
189 | |||
190 | cmp r2, #96 | ||
191 | - tstgt r0, #31 | ||
192 | + tstgt ip, #31 | ||
193 | ble 3f | ||
194 | |||
195 | - and ip, r0, #31 | ||
196 | - rsb ip, ip, #32 | ||
197 | - sub r2, r2, ip | ||
198 | - movs ip, ip, lsl #(32 - 4) | ||
199 | - stmcsia r0!, {r4, r5, r6, r7} | ||
200 | - stmmiia r0!, {r4, r5} | ||
201 | - tst ip, #(1 << 30) | ||
202 | - mov ip, r1 | ||
203 | - strne r1, [r0], #4 | ||
204 | + and r8, ip, #31 | ||
205 | + rsb r8, r8, #32 | ||
206 | + sub r2, r2, r8 | ||
207 | + movs r8, r8, lsl #(32 - 4) | ||
208 | + stmcsia ip!, {r4, r5, r6, r7} | ||
209 | + stmmiia ip!, {r4, r5} | ||
210 | + tst r8, #(1 << 30) | ||
211 | + mov r8, r1 | ||
212 | + strne r1, [ip], #4 | ||
213 | |||
214 | 3: subs r2, r2, #64 | ||
215 | - stmgeia r0!, {r1, r3-r7, ip, lr} | ||
216 | - stmgeia r0!, {r1, r3-r7, ip, lr} | ||
217 | + stmgeia ip!, {r1, r3-r8, lr} | ||
218 | + stmgeia ip!, {r1, r3-r8, lr} | ||
219 | bgt 3b | ||
220 | - ldmeqfd sp!, {r4-r7, pc} | ||
221 | + ldmeqfd sp!, {r4-r8, pc} | ||
222 | |||
223 | tst r2, #32 | ||
224 | - stmneia r0!, {r1, r3-r7, ip, lr} | ||
225 | + stmneia ip!, {r1, r3-r8, lr} | ||
226 | tst r2, #16 | ||
227 | - stmneia r0!, {r4-r7} | ||
228 | - ldmfd sp!, {r4-r7, lr} | ||
229 | + stmneia ip!, {r4-r7} | ||
230 | + ldmfd sp!, {r4-r8, lr} | ||
231 | |||
232 | #endif | ||
233 | |||
234 | 4: tst r2, #8 | ||
235 | - stmneia r0!, {r1, r3} | ||
236 | + stmneia ip!, {r1, r3} | ||
237 | tst r2, #4 | ||
238 | - strne r1, [r0], #4 | ||
239 | + strne r1, [ip], #4 | ||
240 | /* | ||
241 | * When we get here, we've got less than 4 bytes to zero. We | ||
242 | * may have an unaligned pointer as well. | ||
243 | */ | ||
244 | 5: tst r2, #2 | ||
245 | - strneb r1, [r0], #1 | ||
246 | - strneb r1, [r0], #1 | ||
247 | + strneb r1, [ip], #1 | ||
248 | + strneb r1, [ip], #1 | ||
249 | tst r2, #1 | ||
250 | - strneb r1, [r0], #1 | ||
251 | + strneb r1, [ip], #1 | ||
252 | mov pc, lr | ||
253 | ENDPROC(memset) | ||
254 | -- | ||
255 | 1.8.2.1 | ||
256 | |||