summaryrefslogtreecommitdiffstats
path: root/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.22/0007-mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_pop.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.22/0007-mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_pop.patch')
-rw-r--r--recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.22/0007-mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_pop.patch218
1 files changed, 218 insertions, 0 deletions
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.22/0007-mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_pop.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.22/0007-mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_pop.patch
new file mode 100644
index 00000000..9cac18dd
--- /dev/null
+++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.22/0007-mm-pmd_read_atomic-fix-32bit-PAE-pmd-walk-vs-pmd_pop.patch
@@ -0,0 +1,218 @@
1From 8bf1709b6925d4e05120bdfed73992d50e7f11bf Mon Sep 17 00:00:00 2001
2From: Andrea Arcangeli <aarcange@redhat.com>
3Date: Tue, 29 May 2012 15:06:49 -0700
4Subject: [PATCH 07/46] mm: pmd_read_atomic: fix 32bit PAE pmd walk vs
5 pmd_populate SMP race condition
6
7commit 26c191788f18129af0eb32a358cdaea0c7479626 upstream.
8
9When holding the mmap_sem for reading, pmd_offset_map_lock should only
10run on a pmd_t that has been read atomically from the pmdp pointer,
11otherwise we may read only half of it leading to this crash.
12
13PID: 11679 TASK: f06e8000 CPU: 3 COMMAND: "do_race_2_panic"
14 #0 [f06a9dd8] crash_kexec at c049b5ec
15 #1 [f06a9e2c] oops_end at c083d1c2
16 #2 [f06a9e40] no_context at c0433ded
17 #3 [f06a9e64] bad_area_nosemaphore at c043401a
18 #4 [f06a9e6c] __do_page_fault at c0434493
19 #5 [f06a9eec] do_page_fault at c083eb45
20 #6 [f06a9f04] error_code (via page_fault) at c083c5d5
21 EAX: 01fb470c EBX: fff35000 ECX: 00000003 EDX: 00000100 EBP:
22 00000000
23 DS: 007b ESI: 9e201000 ES: 007b EDI: 01fb4700 GS: 00e0
24 CS: 0060 EIP: c083bc14 ERR: ffffffff EFLAGS: 00010246
25 #7 [f06a9f38] _spin_lock at c083bc14
26 #8 [f06a9f44] sys_mincore at c0507b7d
27 #9 [f06a9fb0] system_call at c083becd
28 start len
29 EAX: ffffffda EBX: 9e200000 ECX: 00001000 EDX: 6228537f
30 DS: 007b ESI: 00000000 ES: 007b EDI: 003d0f00
31 SS: 007b ESP: 62285354 EBP: 62285388 GS: 0033
32 CS: 0073 EIP: 00291416 ERR: 000000da EFLAGS: 00000286
33
34This should be a longstanding bug affecting x86 32bit PAE without THP.
35Only archs with 64bit large pmd_t and 32bit unsigned long should be
36affected.
37
38With THP enabled the barrier() in pmd_none_or_trans_huge_or_clear_bad()
39would partly hide the bug when the pmd transition from none to stable,
40by forcing a re-read of the *pmd in pmd_offset_map_lock, but when THP is
41enabled a new set of problem arises by the fact could then transition
42freely in any of the none, pmd_trans_huge or pmd_trans_stable states.
43So making the barrier in pmd_none_or_trans_huge_or_clear_bad()
44unconditional isn't good idea and it would be a flakey solution.
45
46This should be fully fixed by introducing a pmd_read_atomic that reads
47the pmd in order with THP disabled, or by reading the pmd atomically
48with cmpxchg8b with THP enabled.
49
50Luckily this new race condition only triggers in the places that must
51already be covered by pmd_none_or_trans_huge_or_clear_bad() so the fix
52is localized there but this bug is not related to THP.
53
54NOTE: this can trigger on x86 32bit systems with PAE enabled with more
55than 4G of ram, otherwise the high part of the pmd will never risk to be
56truncated because it would be zero at all times, in turn so hiding the
57SMP race.
58
59This bug was discovered and fully debugged by Ulrich, quote:
60
61----
62[..]
63pmd_none_or_trans_huge_or_clear_bad() loads the content of edx and
64eax.
65
66 496 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t
67 *pmd)
68 497 {
69 498 /* depend on compiler for an atomic pmd read */
70 499 pmd_t pmdval = *pmd;
71
72 // edi = pmd pointer
730xc0507a74 <sys_mincore+548>: mov 0x8(%esp),%edi
74...
75 // edx = PTE page table high address
760xc0507a84 <sys_mincore+564>: mov 0x4(%edi),%edx
77...
78 // eax = PTE page table low address
790xc0507a8e <sys_mincore+574>: mov (%edi),%eax
80
81[..]
82
83Please note that the PMD is not read atomically. These are two "mov"
84instructions where the high order bits of the PMD entry are fetched
85first. Hence, the above machine code is prone to the following race.
86
87- The PMD entry {high|low} is 0x0000000000000000.
88 The "mov" at 0xc0507a84 loads 0x00000000 into edx.
89
90- A page fault (on another CPU) sneaks in between the two "mov"
91 instructions and instantiates the PMD.
92
93- The PMD entry {high|low} is now 0x00000003fda38067.
94 The "mov" at 0xc0507a8e loads 0xfda38067 into eax.
95----
96
97Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
98Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
99Cc: Mel Gorman <mgorman@suse.de>
100Cc: Hugh Dickins <hughd@google.com>
101Cc: Larry Woodman <lwoodman@redhat.com>
102Cc: Petr Matousek <pmatouse@redhat.com>
103Cc: Rik van Riel <riel@redhat.com>
104Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
105Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
106Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
107---
108 arch/x86/include/asm/pgtable-3level.h | 50 +++++++++++++++++++++++++++++++++
109 include/asm-generic/pgtable.h | 22 +++++++++++++--
110 2 files changed, 70 insertions(+), 2 deletions(-)
111
112diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
113index effff47..43876f1 100644
114--- a/arch/x86/include/asm/pgtable-3level.h
115+++ b/arch/x86/include/asm/pgtable-3level.h
116@@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
117 ptep->pte_low = pte.pte_low;
118 }
119
120+#define pmd_read_atomic pmd_read_atomic
121+/*
122+ * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
123+ * a "*pmdp" dereference done by gcc. Problem is, in certain places
124+ * where pte_offset_map_lock is called, concurrent page faults are
125+ * allowed, if the mmap_sem is hold for reading. An example is mincore
126+ * vs page faults vs MADV_DONTNEED. On the page fault side
127+ * pmd_populate rightfully does a set_64bit, but if we're reading the
128+ * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
129+ * because gcc will not read the 64bit of the pmd atomically. To fix
130+ * this all places running pmd_offset_map_lock() while holding the
131+ * mmap_sem in read mode, shall read the pmdp pointer using this
132+ * function to know if the pmd is null nor not, and in turn to know if
133+ * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
134+ * operations.
135+ *
136+ * Without THP if the mmap_sem is hold for reading, the
137+ * pmd can only transition from null to not null while pmd_read_atomic runs.
138+ * So there's no need of literally reading it atomically.
139+ *
140+ * With THP if the mmap_sem is hold for reading, the pmd can become
141+ * THP or null or point to a pte (and in turn become "stable") at any
142+ * time under pmd_read_atomic, so it's mandatory to read it atomically
143+ * with cmpxchg8b.
144+ */
145+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
146+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
147+{
148+ pmdval_t ret;
149+ u32 *tmp = (u32 *)pmdp;
150+
151+ ret = (pmdval_t) (*tmp);
152+ if (ret) {
153+ /*
154+ * If the low part is null, we must not read the high part
155+ * or we can end up with a partial pmd.
156+ */
157+ smp_rmb();
158+ ret |= ((pmdval_t)*(tmp + 1)) << 32;
159+ }
160+
161+ return (pmd_t) { ret };
162+}
163+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
164+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
165+{
166+ return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
167+}
168+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
169+
170 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
171 {
172 set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
173diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
174index a03c098..831924a 100644
175--- a/include/asm-generic/pgtable.h
176+++ b/include/asm-generic/pgtable.h
177@@ -445,6 +445,18 @@ static inline int pmd_write(pmd_t pmd)
178 #endif /* __HAVE_ARCH_PMD_WRITE */
179 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
180
181+#ifndef pmd_read_atomic
182+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
183+{
184+ /*
185+ * Depend on compiler for an atomic pmd read. NOTE: this is
186+ * only going to work, if the pmdval_t isn't larger than
187+ * an unsigned long.
188+ */
189+ return *pmdp;
190+}
191+#endif
192+
193 /*
194 * This function is meant to be used by sites walking pagetables with
195 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
196@@ -458,11 +470,17 @@ static inline int pmd_write(pmd_t pmd)
197 * undefined so behaving like if the pmd was none is safe (because it
198 * can return none anyway). The compiler level barrier() is critically
199 * important to compute the two checks atomically on the same pmdval.
200+ *
201+ * For 32bit kernels with a 64bit large pmd_t this automatically takes
202+ * care of reading the pmd atomically to avoid SMP race conditions
203+ * against pmd_populate() when the mmap_sem is hold for reading by the
204+ * caller (a special atomic read not done by "gcc" as in the generic
205+ * version above, is also needed when THP is disabled because the page
206+ * fault can populate the pmd from under us).
207 */
208 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
209 {
210- /* depend on compiler for an atomic pmd read */
211- pmd_t pmdval = *pmd;
212+ pmd_t pmdval = pmd_read_atomic(pmd);
213 /*
214 * The barrier will stabilize the pmdval in a register or on
215 * the stack so that it will stop changing under the code.
216--
2171.7.10
218