summaryrefslogtreecommitdiffstats
path: root/recipes-kernel/linux/files/0002-shmem-CVE-2014-4171.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-kernel/linux/files/0002-shmem-CVE-2014-4171.patch')
-rw-r--r--recipes-kernel/linux/files/0002-shmem-CVE-2014-4171.patch200
1 files changed, 0 insertions, 200 deletions
diff --git a/recipes-kernel/linux/files/0002-shmem-CVE-2014-4171.patch b/recipes-kernel/linux/files/0002-shmem-CVE-2014-4171.patch
deleted file mode 100644
index a43b895..0000000
--- a/recipes-kernel/linux/files/0002-shmem-CVE-2014-4171.patch
+++ /dev/null
@@ -1,200 +0,0 @@
1From 38d05809df1ea5272a658e7f4d5f2a3027ad2fd2 Mon Sep 17 00:00:00 2001
2From: Hugh Dickins <hughd@google.com>
3Date: Wed, 23 Jul 2014 14:00:10 -0700
4Subject: [PATCH 2/3] shmem: fix faulting into a hole, not taking i_mutex
5
6commit 8e205f779d1443a94b5ae81aa359cb535dd3021e upstream.
7
8Commit f00cdc6df7d7 ("shmem: fix faulting into a hole while it's
9punched") was buggy: Sasha sent a lockdep report to remind us that
10grabbing i_mutex in the fault path is a no-no (write syscall may already
11hold i_mutex while faulting user buffer).
12
13We tried a completely different approach (see following patch) but that
14proved inadequate: good enough for a rational workload, but not good
15enough against trinity - which forks off so many mappings of the object
16that contention on i_mmap_mutex while hole-puncher holds i_mutex builds
17into serious starvation when concurrent faults force the puncher to fall
18back to single-page unmap_mapping_range() searches of the i_mmap tree.
19
20So return to the original umbrella approach, but keep away from i_mutex
21this time. We really don't want to bloat every shmem inode with a new
22mutex or completion, just to protect this unlikely case from trinity.
23So extend the original with wait_queue_head on stack at the hole-punch
24end, and wait_queue item on the stack at the fault end.
25
26This involves further use of i_lock to guard against the races: lockdep
27has been happy so far, and I see fs/inode.c:unlock_new_inode() holds
28i_lock around wake_up_bit(), which is comparable to what we do here.
29i_lock is more convenient, but we could switch to shmem's info->lock.
30
31This issue has been tagged with CVE-2014-4171, which will require commit
32f00cdc6df7d7 and this and the following patch to be backported: we
33suggest to 3.1+, though in fact the trinity forkbomb effect might go
34back as far as 2.6.16, when madvise(,,MADV_REMOVE) came in - or might
35not, since much has changed, with i_mmap_mutex a spinlock before 3.0.
36Anyone running trinity on 3.0 and earlier? I don't think we need care.
37
38Upstream-Status: Backport
39
40Signed-off-by: Hugh Dickins <hughd@google.com>
41Reported-by: Sasha Levin <sasha.levin@oracle.com>
42Tested-by: Sasha Levin <sasha.levin@oracle.com>
43Cc: Vlastimil Babka <vbabka@suse.cz>
44Cc: Konstantin Khlebnikov <koct9i@gmail.com>
45Cc: Johannes Weiner <hannes@cmpxchg.org>
46Cc: Lukas Czerner <lczerner@redhat.com>
47Cc: Dave Jones <davej@redhat.com>
48Cc: <stable@vger.kernel.org> [3.1+]
49Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
50Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
51Signed-off-by: Jiri Slaby <jslaby@suse.cz>
52Signed-off-by: Sona Sarmadi <sona.sarmadi@enea.com>
53---
54 mm/shmem.c | 78 +++++++++++++++++++++++++++++++++++++++++---------------------
55 1 file changed, 52 insertions(+), 26 deletions(-)
56
57diff --git a/mm/shmem.c b/mm/shmem.c
58index 00d412f..6f5626f 100644
59--- a/mm/shmem.c
60+++ b/mm/shmem.c
61@@ -85,7 +85,7 @@ static struct vfsmount *shm_mnt;
62 * a time): we would prefer not to enlarge the shmem inode just for that.
63 */
64 struct shmem_falloc {
65- int mode; /* FALLOC_FL mode currently operating */
66+ wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
67 pgoff_t start; /* start of range currently being fallocated */
68 pgoff_t next; /* the next page offset to be fallocated */
69 pgoff_t nr_falloced; /* how many new pages have been fallocated */
70@@ -827,7 +827,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
71 spin_lock(&inode->i_lock);
72 shmem_falloc = inode->i_private;
73 if (shmem_falloc &&
74- !shmem_falloc->mode &&
75+ !shmem_falloc->waitq &&
76 index >= shmem_falloc->start &&
77 index < shmem_falloc->next)
78 shmem_falloc->nr_unswapped++;
79@@ -1306,38 +1306,58 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
80 * Trinity finds that probing a hole which tmpfs is punching can
81 * prevent the hole-punch from ever completing: which in turn
82 * locks writers out with its hold on i_mutex. So refrain from
83- * faulting pages into the hole while it's being punched, and
84- * wait on i_mutex to be released if vmf->flags permits.
85+ * faulting pages into the hole while it's being punched. Although
86+ * shmem_undo_range() does remove the additions, it may be unable to
87+ * keep up, as each new page needs its own unmap_mapping_range() call,
88+ * and the i_mmap tree grows ever slower to scan if new vmas are added.
89+ *
90+ * It does not matter if we sometimes reach this check just before the
91+ * hole-punch begins, so that one fault then races with the punch:
92+ * we just need to make racing faults a rare case.
93+ *
94+ * The implementation below would be much simpler if we just used a
95+ * standard mutex or completion: but we cannot take i_mutex in fault,
96+ * and bloating every shmem inode for this unlikely case would be sad.
97 */
98 if (unlikely(inode->i_private)) {
99 struct shmem_falloc *shmem_falloc;
100
101 spin_lock(&inode->i_lock);
102 shmem_falloc = inode->i_private;
103- if (!shmem_falloc ||
104- shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE ||
105- vmf->pgoff < shmem_falloc->start ||
106- vmf->pgoff >= shmem_falloc->next)
107- shmem_falloc = NULL;
108- spin_unlock(&inode->i_lock);
109- /*
110- * i_lock has protected us from taking shmem_falloc seriously
111- * once return from shmem_fallocate() went back up that stack.
112- * i_lock does not serialize with i_mutex at all, but it does
113- * not matter if sometimes we wait unnecessarily, or sometimes
114- * miss out on waiting: we just need to make those cases rare.
115- */
116- if (shmem_falloc) {
117+ if (shmem_falloc &&
118+ shmem_falloc->waitq &&
119+ vmf->pgoff >= shmem_falloc->start &&
120+ vmf->pgoff < shmem_falloc->next) {
121+ wait_queue_head_t *shmem_falloc_waitq;
122+ DEFINE_WAIT(shmem_fault_wait);
123+
124+ ret = VM_FAULT_NOPAGE;
125 if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
126 !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
127+ /* It's polite to up mmap_sem if we can */
128 up_read(&vma->vm_mm->mmap_sem);
129- mutex_lock(&inode->i_mutex);
130- mutex_unlock(&inode->i_mutex);
131- return VM_FAULT_RETRY;
132+ ret = VM_FAULT_RETRY;
133 }
134- /* cond_resched? Leave that to GUP or return to user */
135- return VM_FAULT_NOPAGE;
136+
137+ shmem_falloc_waitq = shmem_falloc->waitq;
138+ prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
139+ TASK_UNINTERRUPTIBLE);
140+ spin_unlock(&inode->i_lock);
141+ schedule();
142+
143+ /*
144+ * shmem_falloc_waitq points into the shmem_fallocate()
145+ * stack of the hole-punching task: shmem_falloc_waitq
146+ * is usually invalid by the time we reach here, but
147+ * finish_wait() does not dereference it in that case;
148+ * though i_lock needed lest racing with wake_up_all().
149+ */
150+ spin_lock(&inode->i_lock);
151+ finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
152+ spin_unlock(&inode->i_lock);
153+ return ret;
154 }
155+ spin_unlock(&inode->i_lock);
156 }
157
158 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
159@@ -1855,13 +1875,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
160
161 mutex_lock(&inode->i_mutex);
162
163- shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE;
164-
165 if (mode & FALLOC_FL_PUNCH_HOLE) {
166 struct address_space *mapping = file->f_mapping;
167 loff_t unmap_start = round_up(offset, PAGE_SIZE);
168 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
169+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
170
171+ shmem_falloc.waitq = &shmem_falloc_waitq;
172 shmem_falloc.start = unmap_start >> PAGE_SHIFT;
173 shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
174 spin_lock(&inode->i_lock);
175@@ -1873,8 +1893,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
176 1 + unmap_end - unmap_start, 0);
177 shmem_truncate_range(inode, offset, offset + len - 1);
178 /* No need to unmap again: hole-punching leaves COWed pages */
179+
180+ spin_lock(&inode->i_lock);
181+ inode->i_private = NULL;
182+ wake_up_all(&shmem_falloc_waitq);
183+ spin_unlock(&inode->i_lock);
184 error = 0;
185- goto undone;
186+ goto out;
187 }
188
189 /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
190@@ -1890,6 +1915,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
191 goto out;
192 }
193
194+ shmem_falloc.waitq = NULL;
195 shmem_falloc.start = start;
196 shmem_falloc.next = start;
197 shmem_falloc.nr_falloced = 0;
198--
1991.9.1
200