summaryrefslogtreecommitdiffstats
path: root/meta/recipes-kernel/lttng/lttng-modules/0006-Add-kmalloc-failover-to-vmalloc.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-kernel/lttng/lttng-modules/0006-Add-kmalloc-failover-to-vmalloc.patch')
-rw-r--r--meta/recipes-kernel/lttng/lttng-modules/0006-Add-kmalloc-failover-to-vmalloc.patch519
1 files changed, 519 insertions, 0 deletions
diff --git a/meta/recipes-kernel/lttng/lttng-modules/0006-Add-kmalloc-failover-to-vmalloc.patch b/meta/recipes-kernel/lttng/lttng-modules/0006-Add-kmalloc-failover-to-vmalloc.patch
new file mode 100644
index 0000000000..82007691a6
--- /dev/null
+++ b/meta/recipes-kernel/lttng/lttng-modules/0006-Add-kmalloc-failover-to-vmalloc.patch
@@ -0,0 +1,519 @@
1From df57c35ddc8772652d8daa1e53da07f4c7819d8d Mon Sep 17 00:00:00 2001
2From: Michael Jeanson <mjeanson@efficios.com>
3Date: Mon, 25 Sep 2017 10:56:20 -0400
4Subject: [PATCH 6/8] Add kmalloc failover to vmalloc
5Organization: O.S. Systems Software LTDA.
6
7This patch is based on the kvmalloc helpers introduced in kernel 4.12.
8
9It will gracefully failover memory allocations of more than one page to
10vmalloc for systems under high memory pressure or fragmentation.
11
12See Linux kernel commit:
13 commit a7c3e901a46ff54c016d040847eda598a9e3e653
14 Author: Michal Hocko <mhocko@suse.com>
15 Date: Mon May 8 15:57:09 2017 -0700
16
17 mm: introduce kv[mz]alloc helpers
18
19 Patch series "kvmalloc", v5.
20
21 There are many open coded kmalloc with vmalloc fallback instances in the
22 tree. Most of them are not careful enough or simply do not care about
23 the underlying semantic of the kmalloc/page allocator which means that
24 a) some vmalloc fallbacks are basically unreachable because the kmalloc
25 part will keep retrying until it succeeds b) the page allocator can
26 invoke a really disruptive steps like the OOM killer to move forward
27 which doesn't sound appropriate when we consider that the vmalloc
28 fallback is available.
29
30 As it can be seen implementing kvmalloc requires quite an intimate
31 knowledge if the page allocator and the memory reclaim internals which
32 strongly suggests that a helper should be implemented in the memory
33 subsystem proper.
34
35 Most callers, I could find, have been converted to use the helper
36 instead. This is patch 6. There are some more relying on __GFP_REPEAT
37 in the networking stack which I have converted as well and Eric Dumazet
38 was not opposed [2] to convert them as well.
39
40 [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org
41 [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com
42
43 This patch (of 9):
44
45 Using kmalloc with the vmalloc fallback for larger allocations is a
46 common pattern in the kernel code. Yet we do not have any common helper
47 for that and so users have invented their own helpers. Some of them are
48 really creative when doing so. Let's just add kv[mz]alloc and make sure
49 it is implemented properly. This implementation makes sure to not make
50 a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also
51 to not warn about allocation failures. This also rules out the OOM
52 killer as the vmalloc is a more approapriate fallback than a disruptive
53 user visible action.
54
55Upstream-Status: Backport [2.9.4]
56
57Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
58Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
59---
60 lib/prio_heap/lttng_prio_heap.c | 7 +-
61 lib/ringbuffer/ring_buffer_backend.c | 22 ++---
62 lib/ringbuffer/ring_buffer_frontend.c | 13 +--
63 lttng-context-perf-counters.c | 6 +-
64 lttng-context.c | 6 +-
65 lttng-events.c | 6 +-
66 wrapper/vmalloc.h | 169 +++++++++++++++++++++++++++++++++-
67 7 files changed, 198 insertions(+), 31 deletions(-)
68
69diff --git a/lib/prio_heap/lttng_prio_heap.c b/lib/prio_heap/lttng_prio_heap.c
70index 6db7f52..01ed69f 100644
71--- a/lib/prio_heap/lttng_prio_heap.c
72+++ b/lib/prio_heap/lttng_prio_heap.c
73@@ -26,6 +26,7 @@
74
75 #include <linux/slab.h>
76 #include <lib/prio_heap/lttng_prio_heap.h>
77+#include <wrapper/vmalloc.h>
78
79 #ifdef DEBUG_HEAP
80 void lttng_check_heap(const struct lttng_ptr_heap *heap)
81@@ -70,12 +71,12 @@ int heap_grow(struct lttng_ptr_heap *heap, size_t new_len)
82 return 0;
83
84 heap->alloc_len = max_t(size_t, new_len, heap->alloc_len << 1);
85- new_ptrs = kmalloc(heap->alloc_len * sizeof(void *), heap->gfpmask);
86+ new_ptrs = lttng_kvmalloc(heap->alloc_len * sizeof(void *), heap->gfpmask);
87 if (!new_ptrs)
88 return -ENOMEM;
89 if (heap->ptrs)
90 memcpy(new_ptrs, heap->ptrs, heap->len * sizeof(void *));
91- kfree(heap->ptrs);
92+ lttng_kvfree(heap->ptrs);
93 heap->ptrs = new_ptrs;
94 return 0;
95 }
96@@ -109,7 +110,7 @@ int lttng_heap_init(struct lttng_ptr_heap *heap, size_t alloc_len,
97
98 void lttng_heap_free(struct lttng_ptr_heap *heap)
99 {
100- kfree(heap->ptrs);
101+ lttng_kvfree(heap->ptrs);
102 }
103
104 static void heapify(struct lttng_ptr_heap *heap, size_t i)
105diff --git a/lib/ringbuffer/ring_buffer_backend.c b/lib/ringbuffer/ring_buffer_backend.c
106index f760836..3efa1d1 100644
107--- a/lib/ringbuffer/ring_buffer_backend.c
108+++ b/lib/ringbuffer/ring_buffer_backend.c
109@@ -71,7 +71,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
110 if (unlikely(!pages))
111 goto pages_error;
112
113- bufb->array = kmalloc_node(ALIGN(sizeof(*bufb->array)
114+ bufb->array = lttng_kvmalloc_node(ALIGN(sizeof(*bufb->array)
115 * num_subbuf_alloc,
116 1 << INTERNODE_CACHE_SHIFT),
117 GFP_KERNEL | __GFP_NOWARN,
118@@ -90,7 +90,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
119 /* Allocate backend pages array elements */
120 for (i = 0; i < num_subbuf_alloc; i++) {
121 bufb->array[i] =
122- kzalloc_node(ALIGN(
123+ lttng_kvzalloc_node(ALIGN(
124 sizeof(struct lib_ring_buffer_backend_pages) +
125 sizeof(struct lib_ring_buffer_backend_page)
126 * num_pages_per_subbuf,
127@@ -102,7 +102,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
128 }
129
130 /* Allocate write-side subbuffer table */
131- bufb->buf_wsb = kzalloc_node(ALIGN(
132+ bufb->buf_wsb = lttng_kvzalloc_node(ALIGN(
133 sizeof(struct lib_ring_buffer_backend_subbuffer)
134 * num_subbuf,
135 1 << INTERNODE_CACHE_SHIFT),
136@@ -122,7 +122,7 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
137 bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0);
138
139 /* Allocate subbuffer packet counter table */
140- bufb->buf_cnt = kzalloc_node(ALIGN(
141+ bufb->buf_cnt = lttng_kvzalloc_node(ALIGN(
142 sizeof(struct lib_ring_buffer_backend_counts)
143 * num_subbuf,
144 1 << INTERNODE_CACHE_SHIFT),
145@@ -154,15 +154,15 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
146 return 0;
147
148 free_wsb:
149- kfree(bufb->buf_wsb);
150+ lttng_kvfree(bufb->buf_wsb);
151 free_array:
152 for (i = 0; (i < num_subbuf_alloc && bufb->array[i]); i++)
153- kfree(bufb->array[i]);
154+ lttng_kvfree(bufb->array[i]);
155 depopulate:
156 /* Free all allocated pages */
157 for (i = 0; (i < num_pages && pages[i]); i++)
158 __free_page(pages[i]);
159- kfree(bufb->array);
160+ lttng_kvfree(bufb->array);
161 array_error:
162 vfree(pages);
163 pages_error:
164@@ -191,14 +191,14 @@ void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb)
165 if (chanb->extra_reader_sb)
166 num_subbuf_alloc++;
167
168- kfree(bufb->buf_wsb);
169- kfree(bufb->buf_cnt);
170+ lttng_kvfree(bufb->buf_wsb);
171+ lttng_kvfree(bufb->buf_cnt);
172 for (i = 0; i < num_subbuf_alloc; i++) {
173 for (j = 0; j < bufb->num_pages_per_subbuf; j++)
174 __free_page(pfn_to_page(bufb->array[i]->p[j].pfn));
175- kfree(bufb->array[i]);
176+ lttng_kvfree(bufb->array[i]);
177 }
178- kfree(bufb->array);
179+ lttng_kvfree(bufb->array);
180 bufb->allocated = 0;
181 }
182
183diff --git a/lib/ringbuffer/ring_buffer_frontend.c b/lib/ringbuffer/ring_buffer_frontend.c
184index e77d789..1e43980 100644
185--- a/lib/ringbuffer/ring_buffer_frontend.c
186+++ b/lib/ringbuffer/ring_buffer_frontend.c
187@@ -65,6 +65,7 @@
188 #include <wrapper/kref.h>
189 #include <wrapper/percpu-defs.h>
190 #include <wrapper/timer.h>
191+#include <wrapper/vmalloc.h>
192
193 /*
194 * Internal structure representing offsets to use at a sub-buffer switch.
195@@ -147,8 +148,8 @@ void lib_ring_buffer_free(struct lib_ring_buffer *buf)
196 struct channel *chan = buf->backend.chan;
197
198 lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu);
199- kfree(buf->commit_hot);
200- kfree(buf->commit_cold);
201+ lttng_kvfree(buf->commit_hot);
202+ lttng_kvfree(buf->commit_cold);
203
204 lib_ring_buffer_backend_free(&buf->backend);
205 }
206@@ -245,7 +246,7 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
207 return ret;
208
209 buf->commit_hot =
210- kzalloc_node(ALIGN(sizeof(*buf->commit_hot)
211+ lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_hot)
212 * chan->backend.num_subbuf,
213 1 << INTERNODE_CACHE_SHIFT),
214 GFP_KERNEL | __GFP_NOWARN,
215@@ -256,7 +257,7 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
216 }
217
218 buf->commit_cold =
219- kzalloc_node(ALIGN(sizeof(*buf->commit_cold)
220+ lttng_kvzalloc_node(ALIGN(sizeof(*buf->commit_cold)
221 * chan->backend.num_subbuf,
222 1 << INTERNODE_CACHE_SHIFT),
223 GFP_KERNEL | __GFP_NOWARN,
224@@ -305,9 +306,9 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
225
226 /* Error handling */
227 free_init:
228- kfree(buf->commit_cold);
229+ lttng_kvfree(buf->commit_cold);
230 free_commit:
231- kfree(buf->commit_hot);
232+ lttng_kvfree(buf->commit_hot);
233 free_chanbuf:
234 lib_ring_buffer_backend_free(&buf->backend);
235 return ret;
236diff --git a/lttng-context-perf-counters.c b/lttng-context-perf-counters.c
237index 8afc11f..260e5d0 100644
238--- a/lttng-context-perf-counters.c
239+++ b/lttng-context-perf-counters.c
240@@ -119,7 +119,7 @@ void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
241 #endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
242 kfree(field->event_field.name);
243 kfree(field->u.perf_counter->attr);
244- kfree(events);
245+ lttng_kvfree(events);
246 kfree(field->u.perf_counter);
247 }
248
249@@ -237,7 +237,7 @@ int lttng_add_perf_counter_to_ctx(uint32_t type,
250 int ret;
251 char *name_alloc;
252
253- events = kzalloc(num_possible_cpus() * sizeof(*events), GFP_KERNEL);
254+ events = lttng_kvzalloc(num_possible_cpus() * sizeof(*events), GFP_KERNEL);
255 if (!events)
256 return -ENOMEM;
257
258@@ -372,6 +372,6 @@ name_alloc_error:
259 error_alloc_perf_field:
260 kfree(attr);
261 error_attr:
262- kfree(events);
263+ lttng_kvfree(events);
264 return ret;
265 }
266diff --git a/lttng-context.c b/lttng-context.c
267index 406f479..544e95f 100644
268--- a/lttng-context.c
269+++ b/lttng-context.c
270@@ -95,12 +95,12 @@ struct lttng_ctx_field *lttng_append_context(struct lttng_ctx **ctx_p)
271 struct lttng_ctx_field *new_fields;
272
273 ctx->allocated_fields = max_t(size_t, 1, 2 * ctx->allocated_fields);
274- new_fields = kzalloc(ctx->allocated_fields * sizeof(struct lttng_ctx_field), GFP_KERNEL);
275+ new_fields = lttng_kvzalloc(ctx->allocated_fields * sizeof(struct lttng_ctx_field), GFP_KERNEL);
276 if (!new_fields)
277 return NULL;
278 if (ctx->fields)
279 memcpy(new_fields, ctx->fields, sizeof(*ctx->fields) * ctx->nr_fields);
280- kfree(ctx->fields);
281+ lttng_kvfree(ctx->fields);
282 ctx->fields = new_fields;
283 }
284 field = &ctx->fields[ctx->nr_fields];
285@@ -240,7 +240,7 @@ void lttng_destroy_context(struct lttng_ctx *ctx)
286 if (ctx->fields[i].destroy)
287 ctx->fields[i].destroy(&ctx->fields[i]);
288 }
289- kfree(ctx->fields);
290+ lttng_kvfree(ctx->fields);
291 kfree(ctx);
292 }
293
294diff --git a/lttng-events.c b/lttng-events.c
295index c86a756..7132485 100644
296--- a/lttng-events.c
297+++ b/lttng-events.c
298@@ -130,7 +130,7 @@ struct lttng_session *lttng_session_create(void)
299 int i;
300
301 mutex_lock(&sessions_mutex);
302- session = kzalloc(sizeof(struct lttng_session), GFP_KERNEL);
303+ session = lttng_kvzalloc(sizeof(struct lttng_session), GFP_KERNEL);
304 if (!session)
305 goto err;
306 INIT_LIST_HEAD(&session->chan);
307@@ -161,7 +161,7 @@ struct lttng_session *lttng_session_create(void)
308 err_free_cache:
309 kfree(metadata_cache);
310 err_free_session:
311- kfree(session);
312+ lttng_kvfree(session);
313 err:
314 mutex_unlock(&sessions_mutex);
315 return NULL;
316@@ -210,7 +210,7 @@ void lttng_session_destroy(struct lttng_session *session)
317 kref_put(&session->metadata_cache->refcount, metadata_cache_destroy);
318 list_del(&session->list);
319 mutex_unlock(&sessions_mutex);
320- kfree(session);
321+ lttng_kvfree(session);
322 }
323
324 int lttng_session_statedump(struct lttng_session *session)
325diff --git a/wrapper/vmalloc.h b/wrapper/vmalloc.h
326index 2332439..2dd06cb 100644
327--- a/wrapper/vmalloc.h
328+++ b/wrapper/vmalloc.h
329@@ -25,6 +25,9 @@
330 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
331 */
332
333+#include <linux/version.h>
334+#include <linux/vmalloc.h>
335+
336 #ifdef CONFIG_KALLSYMS
337
338 #include <linux/kallsyms.h>
339@@ -51,8 +54,6 @@ void wrapper_vmalloc_sync_all(void)
340 }
341 #else
342
343-#include <linux/vmalloc.h>
344-
345 static inline
346 void wrapper_vmalloc_sync_all(void)
347 {
348@@ -60,4 +61,168 @@ void wrapper_vmalloc_sync_all(void)
349 }
350 #endif
351
352+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0))
353+static inline
354+void *lttng_kvmalloc_node(unsigned long size, gfp_t flags, int node)
355+{
356+ void *ret;
357+
358+ ret = kvmalloc_node(size, flags, node);
359+ if (is_vmalloc_addr(ret)) {
360+ /*
361+ * Make sure we don't trigger recursive page faults in the
362+ * tracing fast path.
363+ */
364+ wrapper_vmalloc_sync_all();
365+ }
366+ return ret;
367+}
368+
369+static inline
370+void *lttng_kvzalloc_node(unsigned long size, gfp_t flags, int node)
371+{
372+ return lttng_kvmalloc_node(size, flags | __GFP_ZERO, node);
373+}
374+
375+static inline
376+void *lttng_kvmalloc(unsigned long size, gfp_t flags)
377+{
378+ return lttng_kvmalloc_node(size, flags, NUMA_NO_NODE);
379+}
380+
381+static inline
382+void *lttng_kvzalloc(unsigned long size, gfp_t flags)
383+{
384+ return lttng_kvzalloc_node(size, flags, NUMA_NO_NODE);
385+}
386+
387+static inline
388+void lttng_kvfree(const void *addr)
389+{
390+ kvfree(addr);
391+}
392+
393+#else
394+
395+#include <linux/slab.h>
396+#include <linux/mm.h>
397+
398+/*
399+ * kallsyms wrapper of __vmalloc_node with a fallback to kmalloc_node.
400+ */
401+static inline
402+void *__lttng_vmalloc_node_fallback(unsigned long size, unsigned long align,
403+ gfp_t gfp_mask, pgprot_t prot, int node, void *caller)
404+{
405+ void *ret;
406+
407+#ifdef CONFIG_KALLSYMS
408+ /*
409+ * If we have KALLSYMS, get * __vmalloc_node which is not exported.
410+ */
411+ void *(*lttng__vmalloc_node)(unsigned long size, unsigned long align,
412+ gfp_t gfp_mask, pgprot_t prot, int node, void *caller);
413+
414+ lttng__vmalloc_node = (void *) kallsyms_lookup_funcptr("__vmalloc_node");
415+ ret = lttng__vmalloc_node(size, align, gfp_mask, prot, node, caller);
416+#else
417+ /*
418+ * If we don't have KALLSYMS, fallback to kmalloc_node.
419+ */
420+ ret = kmalloc_node(size, flags, node);
421+#endif
422+
423+ return ret;
424+}
425+
426+/**
427+ * lttng_kvmalloc_node - attempt to allocate physically contiguous memory, but upon
428+ * failure, fall back to non-contiguous (vmalloc) allocation.
429+ * @size: size of the request.
430+ * @flags: gfp mask for the allocation - must be compatible with GFP_KERNEL.
431+ *
432+ * Uses kmalloc to get the memory but if the allocation fails then falls back
433+ * to the vmalloc allocator. Use lttng_kvfree to free the memory.
434+ *
435+ * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not supported
436+ */
437+static inline
438+void *lttng_kvmalloc_node(unsigned long size, gfp_t flags, int node)
439+{
440+ void *ret;
441+
442+ /*
443+ * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
444+ * so the given set of flags has to be compatible.
445+ */
446+ WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
447+
448+ /*
449+ * If the allocation fits in a single page, do not fallback.
450+ */
451+ if (size <= PAGE_SIZE) {
452+ return kmalloc_node(size, flags, node);
453+ }
454+
455+ /*
456+ * Make sure that larger requests are not too disruptive - no OOM
457+ * killer and no allocation failure warnings as we have a fallback
458+ */
459+ ret = kmalloc_node(size, flags | __GFP_NOWARN | __GFP_NORETRY, node);
460+ if (!ret) {
461+ if (node == NUMA_NO_NODE) {
462+ /*
463+ * If no node was specified, use __vmalloc which is
464+ * always exported.
465+ */
466+ ret = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
467+ } else {
468+ /*
469+ * Otherwise, we need to select a node but __vmalloc_node
470+ * is not exported, use this fallback wrapper which uses
471+ * kallsyms if available or falls back to kmalloc_node.
472+ */
473+ ret = __lttng_vmalloc_node_fallback(size, 1,
474+ flags | __GFP_HIGHMEM, PAGE_KERNEL, node,
475+ __builtin_return_address(0));
476+ }
477+
478+ /*
479+ * Make sure we don't trigger recursive page faults in the
480+ * tracing fast path.
481+ */
482+ wrapper_vmalloc_sync_all();
483+ }
484+ return ret;
485+}
486+
487+static inline
488+void *lttng_kvzalloc_node(unsigned long size, gfp_t flags, int node)
489+{
490+ return lttng_kvmalloc_node(size, flags | __GFP_ZERO, node);
491+}
492+
493+static inline
494+void *lttng_kvmalloc(unsigned long size, gfp_t flags)
495+{
496+ return lttng_kvmalloc_node(size, flags, NUMA_NO_NODE);
497+}
498+
499+static inline
500+void *lttng_kvzalloc(unsigned long size, gfp_t flags)
501+{
502+ return lttng_kvzalloc_node(size, flags, NUMA_NO_NODE);
503+}
504+
505+static inline
506+void lttng_kvfree(const void *addr)
507+{
508+ if (is_vmalloc_addr(addr)) {
509+ vfree(addr);
510+ } else {
511+ kfree(addr);
512+ }
513+}
514+#endif
515+
516 #endif /* _LTTNG_WRAPPER_VMALLOC_H */
517--
5182.14.1
519