summaryrefslogtreecommitdiffstats
path: root/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0013-hugepages-fix-use-after-free-bug-in-quota-handling.patch
diff options
context:
space:
mode:
Diffstat (limited to 'recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0013-hugepages-fix-use-after-free-bug-in-quota-handling.patch')
-rw-r--r--recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0013-hugepages-fix-use-after-free-bug-in-quota-handling.patch464
1 files changed, 464 insertions, 0 deletions
diff --git a/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0013-hugepages-fix-use-after-free-bug-in-quota-handling.patch b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0013-hugepages-fix-use-after-free-bug-in-quota-handling.patch
new file mode 100644
index 00000000..bdfa3864
--- /dev/null
+++ b/recipes-kernel/linux/linux-ti33x-psp-3.2/3.2.24/0013-hugepages-fix-use-after-free-bug-in-quota-handling.patch
@@ -0,0 +1,464 @@
1From 5babdc7487f6c78c06d8e085efe841d91a77ff48 Mon Sep 17 00:00:00 2001
2From: David Gibson <david@gibson.dropbear.id.au>
3Date: Wed, 21 Mar 2012 16:34:12 -0700
4Subject: [PATCH 013/109] hugepages: fix use after free bug in "quota"
5 handling
6
7commit 90481622d75715bfcb68501280a917dbfe516029 upstream.
8
9hugetlbfs_{get,put}_quota() are badly named. They don't interact with the
10general quota handling code, and they don't much resemble its behaviour.
11Rather than being about maintaining limits on on-disk block usage by
12particular users, they are instead about maintaining limits on in-memory
13page usage (including anonymous MAP_PRIVATE copied-on-write pages)
14associated with a particular hugetlbfs filesystem instance.
15
16Worse, they work by having callbacks to the hugetlbfs filesystem code from
17the low-level page handling code, in particular from free_huge_page().
18This is a layering violation of itself, but more importantly, if the
19kernel does a get_user_pages() on hugepages (which can happen from KVM
20amongst others), then the free_huge_page() can be delayed until after the
21associated inode has already been freed. If an unmount occurs at the
22wrong time, even the hugetlbfs superblock where the "quota" limits are
23stored may have been freed.
24
25Andrew Barry proposed a patch to fix this by having hugepages, instead of
26storing a pointer to their address_space and reaching the superblock from
27there, had the hugepages store pointers directly to the superblock,
28bumping the reference count as appropriate to avoid it being freed.
29Andrew Morton rejected that version, however, on the grounds that it made
30the existing layering violation worse.
31
32This is a reworked version of Andrew's patch, which removes the extra, and
33some of the existing, layering violation. It works by introducing the
34concept of a hugepage "subpool" at the lower hugepage mm layer - that is a
35finite logical pool of hugepages to allocate from. hugetlbfs now creates
36a subpool for each filesystem instance with a page limit set, and a
37pointer to the subpool gets added to each allocated hugepage, instead of
38the address_space pointer used now. The subpool has its own lifetime and
39is only freed once all pages in it _and_ all other references to it (i.e.
40superblocks) are gone.
41
42subpools are optional - a NULL subpool pointer is taken by the code to
43mean that no subpool limits are in effect.
44
45Previous discussion of this bug found in: "Fix refcounting in hugetlbfs
46quota handling.". See: https://lkml.org/lkml/2011/8/11/28 or
47http://marc.info/?l=linux-mm&m=126928970510627&w=1
48
49v2: Fixed a bug spotted by Hillf Danton, and removed the extra parameter to
50alloc_huge_page() - since it already takes the vma, it is not necessary.
51
52Signed-off-by: Andrew Barry <abarry@cray.com>
53Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
54Cc: Hugh Dickins <hughd@google.com>
55Cc: Mel Gorman <mgorman@suse.de>
56Cc: Minchan Kim <minchan.kim@gmail.com>
57Cc: Hillf Danton <dhillf@gmail.com>
58Cc: Paul Mackerras <paulus@samba.org>
59Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
60Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
61[bwh: Backported to 3.2: adjust context to apply after commit
62 c50ac050811d6485616a193eb0f37bfbd191cc89 'hugetlb: fix resv_map leak in
63 error path', backported in 3.2.20]
64Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
65---
66 fs/hugetlbfs/inode.c | 54 +++++++-----------
67 include/linux/hugetlb.h | 14 ++++--
68 mm/hugetlb.c | 135 +++++++++++++++++++++++++++++++++++++---------
69 3 files changed, 139 insertions(+), 64 deletions(-)
70
71diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
72index 2d0ca24..ebc2f4d 100644
73--- a/fs/hugetlbfs/inode.c
74+++ b/fs/hugetlbfs/inode.c
75@@ -592,9 +592,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
76 spin_lock(&sbinfo->stat_lock);
77 /* If no limits set, just report 0 for max/free/used
78 * blocks, like simple_statfs() */
79- if (sbinfo->max_blocks >= 0) {
80- buf->f_blocks = sbinfo->max_blocks;
81- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
82+ if (sbinfo->spool) {
83+ long free_pages;
84+
85+ spin_lock(&sbinfo->spool->lock);
86+ buf->f_blocks = sbinfo->spool->max_hpages;
87+ free_pages = sbinfo->spool->max_hpages
88+ - sbinfo->spool->used_hpages;
89+ buf->f_bavail = buf->f_bfree = free_pages;
90+ spin_unlock(&sbinfo->spool->lock);
91 buf->f_files = sbinfo->max_inodes;
92 buf->f_ffree = sbinfo->free_inodes;
93 }
94@@ -610,6 +616,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
95
96 if (sbi) {
97 sb->s_fs_info = NULL;
98+
99+ if (sbi->spool)
100+ hugepage_put_subpool(sbi->spool);
101+
102 kfree(sbi);
103 }
104 }
105@@ -841,10 +851,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
106 sb->s_fs_info = sbinfo;
107 sbinfo->hstate = config.hstate;
108 spin_lock_init(&sbinfo->stat_lock);
109- sbinfo->max_blocks = config.nr_blocks;
110- sbinfo->free_blocks = config.nr_blocks;
111 sbinfo->max_inodes = config.nr_inodes;
112 sbinfo->free_inodes = config.nr_inodes;
113+ sbinfo->spool = NULL;
114+ if (config.nr_blocks != -1) {
115+ sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
116+ if (!sbinfo->spool)
117+ goto out_free;
118+ }
119 sb->s_maxbytes = MAX_LFS_FILESIZE;
120 sb->s_blocksize = huge_page_size(config.hstate);
121 sb->s_blocksize_bits = huge_page_shift(config.hstate);
122@@ -864,38 +878,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
123 sb->s_root = root;
124 return 0;
125 out_free:
126+ if (sbinfo->spool)
127+ kfree(sbinfo->spool);
128 kfree(sbinfo);
129 return -ENOMEM;
130 }
131
132-int hugetlb_get_quota(struct address_space *mapping, long delta)
133-{
134- int ret = 0;
135- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
136-
137- if (sbinfo->free_blocks > -1) {
138- spin_lock(&sbinfo->stat_lock);
139- if (sbinfo->free_blocks - delta >= 0)
140- sbinfo->free_blocks -= delta;
141- else
142- ret = -ENOMEM;
143- spin_unlock(&sbinfo->stat_lock);
144- }
145-
146- return ret;
147-}
148-
149-void hugetlb_put_quota(struct address_space *mapping, long delta)
150-{
151- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
152-
153- if (sbinfo->free_blocks > -1) {
154- spin_lock(&sbinfo->stat_lock);
155- sbinfo->free_blocks += delta;
156- spin_unlock(&sbinfo->stat_lock);
157- }
158-}
159-
160 static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
161 int flags, const char *dev_name, void *data)
162 {
163diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
164index d9d6c86..c5ed2f1 100644
165--- a/include/linux/hugetlb.h
166+++ b/include/linux/hugetlb.h
167@@ -14,6 +14,15 @@ struct user_struct;
168 #include <linux/shm.h>
169 #include <asm/tlbflush.h>
170
171+struct hugepage_subpool {
172+ spinlock_t lock;
173+ long count;
174+ long max_hpages, used_hpages;
175+};
176+
177+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
178+void hugepage_put_subpool(struct hugepage_subpool *spool);
179+
180 int PageHuge(struct page *page);
181
182 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
183@@ -138,12 +147,11 @@ struct hugetlbfs_config {
184 };
185
186 struct hugetlbfs_sb_info {
187- long max_blocks; /* blocks allowed */
188- long free_blocks; /* blocks free */
189 long max_inodes; /* inodes allowed */
190 long free_inodes; /* inodes free */
191 spinlock_t stat_lock;
192 struct hstate *hstate;
193+ struct hugepage_subpool *spool;
194 };
195
196
197@@ -166,8 +174,6 @@ extern const struct file_operations hugetlbfs_file_operations;
198 extern const struct vm_operations_struct hugetlb_vm_ops;
199 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
200 struct user_struct **user, int creat_flags);
201-int hugetlb_get_quota(struct address_space *mapping, long delta);
202-void hugetlb_put_quota(struct address_space *mapping, long delta);
203
204 static inline int is_file_hugepages(struct file *file)
205 {
206diff --git a/mm/hugetlb.c b/mm/hugetlb.c
207index 5f5c545..7c535b0 100644
208--- a/mm/hugetlb.c
209+++ b/mm/hugetlb.c
210@@ -53,6 +53,84 @@ static unsigned long __initdata default_hstate_size;
211 */
212 static DEFINE_SPINLOCK(hugetlb_lock);
213
214+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
215+{
216+ bool free = (spool->count == 0) && (spool->used_hpages == 0);
217+
218+ spin_unlock(&spool->lock);
219+
220+ /* If no pages are used, and no other handles to the subpool
221+ * remain, free the subpool the subpool remain */
222+ if (free)
223+ kfree(spool);
224+}
225+
226+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
227+{
228+ struct hugepage_subpool *spool;
229+
230+ spool = kmalloc(sizeof(*spool), GFP_KERNEL);
231+ if (!spool)
232+ return NULL;
233+
234+ spin_lock_init(&spool->lock);
235+ spool->count = 1;
236+ spool->max_hpages = nr_blocks;
237+ spool->used_hpages = 0;
238+
239+ return spool;
240+}
241+
242+void hugepage_put_subpool(struct hugepage_subpool *spool)
243+{
244+ spin_lock(&spool->lock);
245+ BUG_ON(!spool->count);
246+ spool->count--;
247+ unlock_or_release_subpool(spool);
248+}
249+
250+static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
251+ long delta)
252+{
253+ int ret = 0;
254+
255+ if (!spool)
256+ return 0;
257+
258+ spin_lock(&spool->lock);
259+ if ((spool->used_hpages + delta) <= spool->max_hpages) {
260+ spool->used_hpages += delta;
261+ } else {
262+ ret = -ENOMEM;
263+ }
264+ spin_unlock(&spool->lock);
265+
266+ return ret;
267+}
268+
269+static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
270+ long delta)
271+{
272+ if (!spool)
273+ return;
274+
275+ spin_lock(&spool->lock);
276+ spool->used_hpages -= delta;
277+ /* If hugetlbfs_put_super couldn't free spool due to
278+ * an outstanding quota reference, free it now. */
279+ unlock_or_release_subpool(spool);
280+}
281+
282+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
283+{
284+ return HUGETLBFS_SB(inode->i_sb)->spool;
285+}
286+
287+static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
288+{
289+ return subpool_inode(vma->vm_file->f_dentry->d_inode);
290+}
291+
292 /*
293 * Region tracking -- allows tracking of reservations and instantiated pages
294 * across the pages in a mapping.
295@@ -533,9 +611,9 @@ static void free_huge_page(struct page *page)
296 */
297 struct hstate *h = page_hstate(page);
298 int nid = page_to_nid(page);
299- struct address_space *mapping;
300+ struct hugepage_subpool *spool =
301+ (struct hugepage_subpool *)page_private(page);
302
303- mapping = (struct address_space *) page_private(page);
304 set_page_private(page, 0);
305 page->mapping = NULL;
306 BUG_ON(page_count(page));
307@@ -551,8 +629,7 @@ static void free_huge_page(struct page *page)
308 enqueue_huge_page(h, page);
309 }
310 spin_unlock(&hugetlb_lock);
311- if (mapping)
312- hugetlb_put_quota(mapping, 1);
313+ hugepage_subpool_put_pages(spool, 1);
314 }
315
316 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
317@@ -966,11 +1043,12 @@ static void return_unused_surplus_pages(struct hstate *h,
318 /*
319 * Determine if the huge page at addr within the vma has an associated
320 * reservation. Where it does not we will need to logically increase
321- * reservation and actually increase quota before an allocation can occur.
322- * Where any new reservation would be required the reservation change is
323- * prepared, but not committed. Once the page has been quota'd allocated
324- * an instantiated the change should be committed via vma_commit_reservation.
325- * No action is required on failure.
326+ * reservation and actually increase subpool usage before an allocation
327+ * can occur. Where any new reservation would be required the
328+ * reservation change is prepared, but not committed. Once the page
329+ * has been allocated from the subpool and instantiated the change should
330+ * be committed via vma_commit_reservation. No action is required on
331+ * failure.
332 */
333 static long vma_needs_reservation(struct hstate *h,
334 struct vm_area_struct *vma, unsigned long addr)
335@@ -1019,24 +1097,24 @@ static void vma_commit_reservation(struct hstate *h,
336 static struct page *alloc_huge_page(struct vm_area_struct *vma,
337 unsigned long addr, int avoid_reserve)
338 {
339+ struct hugepage_subpool *spool = subpool_vma(vma);
340 struct hstate *h = hstate_vma(vma);
341 struct page *page;
342- struct address_space *mapping = vma->vm_file->f_mapping;
343- struct inode *inode = mapping->host;
344 long chg;
345
346 /*
347- * Processes that did not create the mapping will have no reserves and
348- * will not have accounted against quota. Check that the quota can be
349- * made before satisfying the allocation
350- * MAP_NORESERVE mappings may also need pages and quota allocated
351- * if no reserve mapping overlaps.
352+ * Processes that did not create the mapping will have no
353+ * reserves and will not have accounted against subpool
354+ * limit. Check that the subpool limit can be made before
355+ * satisfying the allocation MAP_NORESERVE mappings may also
356+ * need pages and subpool limit allocated allocated if no reserve
357+ * mapping overlaps.
358 */
359 chg = vma_needs_reservation(h, vma, addr);
360 if (chg < 0)
361 return ERR_PTR(-VM_FAULT_OOM);
362 if (chg)
363- if (hugetlb_get_quota(inode->i_mapping, chg))
364+ if (hugepage_subpool_get_pages(spool, chg))
365 return ERR_PTR(-VM_FAULT_SIGBUS);
366
367 spin_lock(&hugetlb_lock);
368@@ -1046,12 +1124,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
369 if (!page) {
370 page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
371 if (!page) {
372- hugetlb_put_quota(inode->i_mapping, chg);
373+ hugepage_subpool_put_pages(spool, chg);
374 return ERR_PTR(-VM_FAULT_SIGBUS);
375 }
376 }
377
378- set_page_private(page, (unsigned long) mapping);
379+ set_page_private(page, (unsigned long)spool);
380
381 vma_commit_reservation(h, vma, addr);
382
383@@ -2081,6 +2159,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
384 {
385 struct hstate *h = hstate_vma(vma);
386 struct resv_map *reservations = vma_resv_map(vma);
387+ struct hugepage_subpool *spool = subpool_vma(vma);
388 unsigned long reserve;
389 unsigned long start;
390 unsigned long end;
391@@ -2096,7 +2175,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
392
393 if (reserve) {
394 hugetlb_acct_memory(h, -reserve);
395- hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
396+ hugepage_subpool_put_pages(spool, reserve);
397 }
398 }
399 }
400@@ -2326,7 +2405,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
401 address = address & huge_page_mask(h);
402 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
403 + (vma->vm_pgoff >> PAGE_SHIFT);
404- mapping = (struct address_space *)page_private(page);
405+ mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
406
407 /*
408 * Take the mapping lock for the duration of the table walk. As
409@@ -2865,11 +2944,12 @@ int hugetlb_reserve_pages(struct inode *inode,
410 {
411 long ret, chg;
412 struct hstate *h = hstate_inode(inode);
413+ struct hugepage_subpool *spool = subpool_inode(inode);
414
415 /*
416 * Only apply hugepage reservation if asked. At fault time, an
417 * attempt will be made for VM_NORESERVE to allocate a page
418- * and filesystem quota without using reserves
419+ * without using reserves
420 */
421 if (vm_flags & VM_NORESERVE)
422 return 0;
423@@ -2898,19 +2978,19 @@ int hugetlb_reserve_pages(struct inode *inode,
424 goto out_err;
425 }
426
427- /* There must be enough filesystem quota for the mapping */
428- if (hugetlb_get_quota(inode->i_mapping, chg)) {
429+ /* There must be enough pages in the subpool for the mapping */
430+ if (hugepage_subpool_get_pages(spool, chg)) {
431 ret = -ENOSPC;
432 goto out_err;
433 }
434
435 /*
436 * Check enough hugepages are available for the reservation.
437- * Hand back the quota if there are not
438+ * Hand the pages back to the subpool if there are not
439 */
440 ret = hugetlb_acct_memory(h, chg);
441 if (ret < 0) {
442- hugetlb_put_quota(inode->i_mapping, chg);
443+ hugepage_subpool_put_pages(spool, chg);
444 goto out_err;
445 }
446
447@@ -2938,12 +3018,13 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
448 {
449 struct hstate *h = hstate_inode(inode);
450 long chg = region_truncate(&inode->i_mapping->private_list, offset);
451+ struct hugepage_subpool *spool = subpool_inode(inode);
452
453 spin_lock(&inode->i_lock);
454 inode->i_blocks -= (blocks_per_huge_page(h) * freed);
455 spin_unlock(&inode->i_lock);
456
457- hugetlb_put_quota(inode->i_mapping, (chg - freed));
458+ hugepage_subpool_put_pages(spool, (chg - freed));
459 hugetlb_acct_memory(h, -(chg - freed));
460 }
461
462--
4631.7.7.6
464