diff options
author | Baptiste DURAND <baptiste.durand@gmail.com> | 2014-02-04 21:40:01 +0100 |
---|---|---|
committer | Denys Dmytriyenko <denys@ti.com> | 2014-02-05 18:35:44 -0500 |
commit | 07809e70f4f277d894323b7d2ed39400e0cca8e3 (patch) | |
tree | f1d0793dd5b7d949d4210c4e68ddf0a41b1de068 | |
parent | 06c1b593d2902d2011fde8d7ddfe07d2574d06ea (diff) | |
download | meta-ti-07809e70f4f277d894323b7d2ed39400e0cca8e3.tar.gz |
linux-omap4 3.4: Fix random segfaults and boot issues seen with gcc 4.8
Based on work
https://github.com/koenkooi/meta-beagleboard/commit/87efc663c4f76d3051cecf1e2c3e2bbd461d200b
Signed-off-by: Baptiste DURAND <baptiste.durand@gmail.com>
Signed-off-by: Denys Dmytriyenko <denys@ti.com>
3 files changed, 341 insertions, 0 deletions
diff --git a/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch b/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch new file mode 100644 index 00000000..7b4e6528 --- /dev/null +++ b/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch | |||
@@ -0,0 +1,256 @@ | |||
1 | From b9ebe46e251c5c8becd02f92893ef514e834bf67 Mon Sep 17 00:00:00 2001 | ||
2 | From: Ivan Djelic <ivan.djelic@parrot.com> | ||
3 | Date: Wed, 6 Mar 2013 20:09:27 +0100 | ||
4 | Subject: [PATCH 3/4] ARM: 7668/1: fix memset-related crashes caused by recent | ||
5 | GCC (4.7.2) optimizations | ||
6 | |||
7 | Recent GCC versions (e.g. GCC-4.7.2) perform optimizations based on | ||
8 | assumptions about the implementation of memset and similar functions. | ||
9 | The current ARM optimized memset code does not return the value of | ||
10 | its first argument, as is usually expected from standard implementations. | ||
11 | |||
12 | For instance in the following function: | ||
13 | |||
14 | void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) | ||
15 | { | ||
16 | memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); | ||
17 | waiter->magic = waiter; | ||
18 | INIT_LIST_HEAD(&waiter->list); | ||
19 | } | ||
20 | |||
21 | compiled as: | ||
22 | |||
23 | 800554d0 <debug_mutex_lock_common>: | ||
24 | 800554d0: e92d4008 push {r3, lr} | ||
25 | 800554d4: e1a00001 mov r0, r1 | ||
26 | 800554d8: e3a02010 mov r2, #16 ; 0x10 | ||
27 | 800554dc: e3a01011 mov r1, #17 ; 0x11 | ||
28 | 800554e0: eb04426e bl 80165ea0 <memset> | ||
29 | 800554e4: e1a03000 mov r3, r0 | ||
30 | 800554e8: e583000c str r0, [r3, #12] | ||
31 | 800554ec: e5830000 str r0, [r3] | ||
32 | 800554f0: e5830004 str r0, [r3, #4] | ||
33 | 800554f4: e8bd8008 pop {r3, pc} | ||
34 | |||
35 | GCC assumes memset returns the value of pointer 'waiter' in register r0; causing | ||
36 | register/memory corruptions. | ||
37 | |||
38 | This patch fixes the return value of the assembly version of memset. | ||
39 | It adds a 'mov' instruction and merges an additional load+store into | ||
40 | existing load/store instructions. | ||
41 | For ease of review, here is a breakdown of the patch into 4 simple steps: | ||
42 | |||
43 | Step 1 | ||
44 | ====== | ||
45 | Perform the following substitutions: | ||
46 | ip -> r8, then | ||
47 | r0 -> ip, | ||
48 | and insert 'mov ip, r0' as the first statement of the function. | ||
49 | At this point, we have a memset() implementation returning the proper result, | ||
50 | but corrupting r8 on some paths (the ones that were using ip). | ||
51 | |||
52 | Step 2 | ||
53 | ====== | ||
54 | Make sure r8 is saved and restored when (! CALGN(1)+0) == 1: | ||
55 | |||
56 | save r8: | ||
57 | - str lr, [sp, #-4]! | ||
58 | + stmfd sp!, {r8, lr} | ||
59 | |||
60 | and restore r8 on both exit paths: | ||
61 | - ldmeqfd sp!, {pc} @ Now <64 bytes to go. | ||
62 | + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. | ||
63 | (...) | ||
64 | tst r2, #16 | ||
65 | stmneia ip!, {r1, r3, r8, lr} | ||
66 | - ldr lr, [sp], #4 | ||
67 | + ldmfd sp!, {r8, lr} | ||
68 | |||
69 | Step 3 | ||
70 | ====== | ||
71 | Make sure r8 is saved and restored when (! CALGN(1)+0) == 0: | ||
72 | |||
73 | save r8: | ||
74 | - stmfd sp!, {r4-r7, lr} | ||
75 | + stmfd sp!, {r4-r8, lr} | ||
76 | |||
77 | and restore r8 on both exit paths: | ||
78 | bgt 3b | ||
79 | - ldmeqfd sp!, {r4-r7, pc} | ||
80 | + ldmeqfd sp!, {r4-r8, pc} | ||
81 | (...) | ||
82 | tst r2, #16 | ||
83 | stmneia ip!, {r4-r7} | ||
84 | - ldmfd sp!, {r4-r7, lr} | ||
85 | + ldmfd sp!, {r4-r8, lr} | ||
86 | |||
87 | Step 4 | ||
88 | ====== | ||
89 | Rewrite register list "r4-r7, r8" as "r4-r8". | ||
90 | |||
91 | Signed-off-by: Ivan Djelic <ivan.djelic@parrot.com> | ||
92 | Reviewed-by: Nicolas Pitre <nico@linaro.org> | ||
93 | Signed-off-by: Dirk Behme <dirk.behme@gmail.com> | ||
94 | Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> | ||
95 | Signed-off-by: Khem Raj <raj.khem@gmail.com> | ||
96 | --- | ||
97 | arch/arm/lib/memset.S | 85 ++++++++++++++++++++++++++------------------------- | ||
98 | 1 file changed, 44 insertions(+), 41 deletions(-) | ||
99 | |||
100 | diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S | ||
101 | index 650d592..d912e73 100644 | ||
102 | --- a/arch/arm/lib/memset.S | ||
103 | +++ b/arch/arm/lib/memset.S | ||
104 | @@ -19,9 +19,9 @@ | ||
105 | 1: subs r2, r2, #4 @ 1 do we have enough | ||
106 | blt 5f @ 1 bytes to align with? | ||
107 | cmp r3, #2 @ 1 | ||
108 | - strltb r1, [r0], #1 @ 1 | ||
109 | - strleb r1, [r0], #1 @ 1 | ||
110 | - strb r1, [r0], #1 @ 1 | ||
111 | + strltb r1, [ip], #1 @ 1 | ||
112 | + strleb r1, [ip], #1 @ 1 | ||
113 | + strb r1, [ip], #1 @ 1 | ||
114 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | ||
115 | /* | ||
116 | * The pointer is now aligned and the length is adjusted. Try doing the | ||
117 | @@ -29,10 +29,14 @@ | ||
118 | */ | ||
119 | |||
120 | ENTRY(memset) | ||
121 | - ands r3, r0, #3 @ 1 unaligned? | ||
122 | +/* | ||
123 | + * Preserve the contents of r0 for the return value. | ||
124 | + */ | ||
125 | + mov ip, r0 | ||
126 | + ands r3, ip, #3 @ 1 unaligned? | ||
127 | bne 1b @ 1 | ||
128 | /* | ||
129 | - * we know that the pointer in r0 is aligned to a word boundary. | ||
130 | + * we know that the pointer in ip is aligned to a word boundary. | ||
131 | */ | ||
132 | orr r1, r1, r1, lsl #8 | ||
133 | orr r1, r1, r1, lsl #16 | ||
134 | @@ -43,29 +47,28 @@ ENTRY(memset) | ||
135 | #if ! CALGN(1)+0 | ||
136 | |||
137 | /* | ||
138 | - * We need an extra register for this loop - save the return address and | ||
139 | - * use the LR | ||
140 | + * We need 2 extra registers for this loop - use r8 and the LR | ||
141 | */ | ||
142 | - str lr, [sp, #-4]! | ||
143 | - mov ip, r1 | ||
144 | + stmfd sp!, {r8, lr} | ||
145 | + mov r8, r1 | ||
146 | mov lr, r1 | ||
147 | |||
148 | 2: subs r2, r2, #64 | ||
149 | - stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. | ||
150 | - stmgeia r0!, {r1, r3, ip, lr} | ||
151 | - stmgeia r0!, {r1, r3, ip, lr} | ||
152 | - stmgeia r0!, {r1, r3, ip, lr} | ||
153 | + stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. | ||
154 | + stmgeia ip!, {r1, r3, r8, lr} | ||
155 | + stmgeia ip!, {r1, r3, r8, lr} | ||
156 | + stmgeia ip!, {r1, r3, r8, lr} | ||
157 | bgt 2b | ||
158 | - ldmeqfd sp!, {pc} @ Now <64 bytes to go. | ||
159 | + ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. | ||
160 | /* | ||
161 | * No need to correct the count; we're only testing bits from now on | ||
162 | */ | ||
163 | tst r2, #32 | ||
164 | - stmneia r0!, {r1, r3, ip, lr} | ||
165 | - stmneia r0!, {r1, r3, ip, lr} | ||
166 | + stmneia ip!, {r1, r3, r8, lr} | ||
167 | + stmneia ip!, {r1, r3, r8, lr} | ||
168 | tst r2, #16 | ||
169 | - stmneia r0!, {r1, r3, ip, lr} | ||
170 | - ldr lr, [sp], #4 | ||
171 | + stmneia ip!, {r1, r3, r8, lr} | ||
172 | + ldmfd sp!, {r8, lr} | ||
173 | |||
174 | #else | ||
175 | |||
176 | @@ -74,54 +77,54 @@ ENTRY(memset) | ||
177 | * whole cache lines at once. | ||
178 | */ | ||
179 | |||
180 | - stmfd sp!, {r4-r7, lr} | ||
181 | + stmfd sp!, {r4-r8, lr} | ||
182 | mov r4, r1 | ||
183 | mov r5, r1 | ||
184 | mov r6, r1 | ||
185 | mov r7, r1 | ||
186 | - mov ip, r1 | ||
187 | + mov r8, r1 | ||
188 | mov lr, r1 | ||
189 | |||
190 | cmp r2, #96 | ||
191 | - tstgt r0, #31 | ||
192 | + tstgt ip, #31 | ||
193 | ble 3f | ||
194 | |||
195 | - and ip, r0, #31 | ||
196 | - rsb ip, ip, #32 | ||
197 | - sub r2, r2, ip | ||
198 | - movs ip, ip, lsl #(32 - 4) | ||
199 | - stmcsia r0!, {r4, r5, r6, r7} | ||
200 | - stmmiia r0!, {r4, r5} | ||
201 | - tst ip, #(1 << 30) | ||
202 | - mov ip, r1 | ||
203 | - strne r1, [r0], #4 | ||
204 | + and r8, ip, #31 | ||
205 | + rsb r8, r8, #32 | ||
206 | + sub r2, r2, r8 | ||
207 | + movs r8, r8, lsl #(32 - 4) | ||
208 | + stmcsia ip!, {r4, r5, r6, r7} | ||
209 | + stmmiia ip!, {r4, r5} | ||
210 | + tst r8, #(1 << 30) | ||
211 | + mov r8, r1 | ||
212 | + strne r1, [ip], #4 | ||
213 | |||
214 | 3: subs r2, r2, #64 | ||
215 | - stmgeia r0!, {r1, r3-r7, ip, lr} | ||
216 | - stmgeia r0!, {r1, r3-r7, ip, lr} | ||
217 | + stmgeia ip!, {r1, r3-r8, lr} | ||
218 | + stmgeia ip!, {r1, r3-r8, lr} | ||
219 | bgt 3b | ||
220 | - ldmeqfd sp!, {r4-r7, pc} | ||
221 | + ldmeqfd sp!, {r4-r8, pc} | ||
222 | |||
223 | tst r2, #32 | ||
224 | - stmneia r0!, {r1, r3-r7, ip, lr} | ||
225 | + stmneia ip!, {r1, r3-r8, lr} | ||
226 | tst r2, #16 | ||
227 | - stmneia r0!, {r4-r7} | ||
228 | - ldmfd sp!, {r4-r7, lr} | ||
229 | + stmneia ip!, {r4-r7} | ||
230 | + ldmfd sp!, {r4-r8, lr} | ||
231 | |||
232 | #endif | ||
233 | |||
234 | 4: tst r2, #8 | ||
235 | - stmneia r0!, {r1, r3} | ||
236 | + stmneia ip!, {r1, r3} | ||
237 | tst r2, #4 | ||
238 | - strne r1, [r0], #4 | ||
239 | + strne r1, [ip], #4 | ||
240 | /* | ||
241 | * When we get here, we've got less than 4 bytes to zero. We | ||
242 | * may have an unaligned pointer as well. | ||
243 | */ | ||
244 | 5: tst r2, #2 | ||
245 | - strneb r1, [r0], #1 | ||
246 | - strneb r1, [r0], #1 | ||
247 | + strneb r1, [ip], #1 | ||
248 | + strneb r1, [ip], #1 | ||
249 | tst r2, #1 | ||
250 | - strneb r1, [r0], #1 | ||
251 | + strneb r1, [ip], #1 | ||
252 | mov pc, lr | ||
253 | ENDPROC(memset) | ||
254 | -- | ||
255 | 1.8.2.1 | ||
256 | |||
diff --git a/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0004-ARM-7670-1-fix-the-memset-fix.patch b/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0004-ARM-7670-1-fix-the-memset-fix.patch new file mode 100644 index 00000000..06584135 --- /dev/null +++ b/recipes-kernel/linux/linux-omap4-3.4/pandaboard/0004-ARM-7670-1-fix-the-memset-fix.patch | |||
@@ -0,0 +1,83 @@ | |||
1 | From 78429023fbec3428238e8dbdd81fa67e4619d04c Mon Sep 17 00:00:00 2001 | ||
2 | From: Nicolas Pitre <nicolas.pitre@linaro.org> | ||
3 | Date: Tue, 12 Mar 2013 13:00:42 +0100 | ||
4 | Subject: [PATCH 4/4] ARM: 7670/1: fix the memset fix | ||
5 | |||
6 | Commit 455bd4c430b0 ("ARM: 7668/1: fix memset-related crashes caused by | ||
7 | recent GCC (4.7.2) optimizations") attempted to fix a compliance issue | ||
8 | with the memset return value. However the memset itself became broken | ||
9 | by that patch for misaligned pointers. | ||
10 | |||
11 | This fixes the above by branching over the entry code from the | ||
12 | misaligned fixup code to avoid reloading the original pointer. | ||
13 | |||
14 | Also, because the function entry alignment is wrong in the Thumb mode | ||
15 | compilation, that fixup code is moved to the end. | ||
16 | |||
17 | While at it, the entry instructions are slightly reworked to help dual | ||
18 | issue pipelines. | ||
19 | |||
20 | Signed-off-by: Nicolas Pitre <nico@linaro.org> | ||
21 | Tested-by: Alexander Holler <holler@ahsoftware.de> | ||
22 | Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> | ||
23 | --- | ||
24 | arch/arm/lib/memset.S | 33 +++++++++++++-------------------- | ||
25 | 1 file changed, 13 insertions(+), 20 deletions(-) | ||
26 | |||
27 | diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S | ||
28 | index d912e73..94b0650 100644 | ||
29 | --- a/arch/arm/lib/memset.S | ||
30 | +++ b/arch/arm/lib/memset.S | ||
31 | @@ -14,31 +14,15 @@ | ||
32 | |||
33 | .text | ||
34 | .align 5 | ||
35 | - .word 0 | ||
36 | - | ||
37 | -1: subs r2, r2, #4 @ 1 do we have enough | ||
38 | - blt 5f @ 1 bytes to align with? | ||
39 | - cmp r3, #2 @ 1 | ||
40 | - strltb r1, [ip], #1 @ 1 | ||
41 | - strleb r1, [ip], #1 @ 1 | ||
42 | - strb r1, [ip], #1 @ 1 | ||
43 | - add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | ||
44 | -/* | ||
45 | - * The pointer is now aligned and the length is adjusted. Try doing the | ||
46 | - * memset again. | ||
47 | - */ | ||
48 | |||
49 | ENTRY(memset) | ||
50 | -/* | ||
51 | - * Preserve the contents of r0 for the return value. | ||
52 | - */ | ||
53 | - mov ip, r0 | ||
54 | - ands r3, ip, #3 @ 1 unaligned? | ||
55 | - bne 1b @ 1 | ||
56 | + ands r3, r0, #3 @ 1 unaligned? | ||
57 | + mov ip, r0 @ preserve r0 as return value | ||
58 | + bne 6f @ 1 | ||
59 | /* | ||
60 | * we know that the pointer in ip is aligned to a word boundary. | ||
61 | */ | ||
62 | - orr r1, r1, r1, lsl #8 | ||
63 | +1: orr r1, r1, r1, lsl #8 | ||
64 | orr r1, r1, r1, lsl #16 | ||
65 | mov r3, r1 | ||
66 | cmp r2, #16 | ||
67 | @@ -127,4 +111,13 @@ ENTRY(memset) | ||
68 | tst r2, #1 | ||
69 | strneb r1, [ip], #1 | ||
70 | mov pc, lr | ||
71 | + | ||
72 | +6: subs r2, r2, #4 @ 1 do we have enough | ||
73 | + blt 5b @ 1 bytes to align with? | ||
74 | + cmp r3, #2 @ 1 | ||
75 | + strltb r1, [ip], #1 @ 1 | ||
76 | + strleb r1, [ip], #1 @ 1 | ||
77 | + strb r1, [ip], #1 @ 1 | ||
78 | + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | ||
79 | + b 1b | ||
80 | ENDPROC(memset) | ||
81 | -- | ||
82 | 1.8.2.1 | ||
83 | |||
diff --git a/recipes-kernel/linux/linux-omap4_3.4.bb b/recipes-kernel/linux/linux-omap4_3.4.bb index 449e4eb8..52629de2 100644 --- a/recipes-kernel/linux/linux-omap4_3.4.bb +++ b/recipes-kernel/linux/linux-omap4_3.4.bb | |||
@@ -13,6 +13,8 @@ SRCREV = "c34a43ec74168b892a948b45695486f1a3d700af" | |||
13 | 13 | ||
14 | SRC_URI = "git://dev.omapzoom.org/pub/scm/integration/kernel-ubuntu.git;protocol=git;branch=ti-ubuntu-3.4-1487 \ | 14 | SRC_URI = "git://dev.omapzoom.org/pub/scm/integration/kernel-ubuntu.git;protocol=git;branch=ti-ubuntu-3.4-1487 \ |
15 | file://defconfig \ | 15 | file://defconfig \ |
16 | file://0003-ARM-7668-1-fix-memset-related-crashes-caused-by-rece.patch \ | ||
17 | file://0004-ARM-7670-1-fix-the-memset-fix.patch \ | ||
16 | " | 18 | " |
17 | 19 | ||
18 | S = "${WORKDIR}/git" | 20 | S = "${WORKDIR}/git" |