summaryrefslogtreecommitdiffstats
path: root/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch
diff options
context:
space:
mode:
Diffstat (limited to 'patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch')
-rw-r--r--patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch155
1 files changed, 155 insertions, 0 deletions
diff --git a/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch b/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch
new file mode 100644
index 0000000..cdb876a
--- /dev/null
+++ b/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch
@@ -0,0 +1,155 @@
1From 855ef164854307839c08c60688eaeac14f9a649e Mon Sep 17 00:00:00 2001
2From: Sebastien Boeuf <sebastien.boeuf@intel.com>
3Date: Mon, 23 Jan 2017 15:26:13 -0800
4Subject: [PATCH 153/154] x86: Return memory from guest to host kernel
5
6All virtual machines need memory to perform various tasks, but this
7memory is not released to the host after it is not used anymore. We
8have to wait for the termination of the virtual machine to get this
9memory back into the host.
10
11Ballooning mechanism is close but not designed for the same purpose.
12In case we hit memory limits of the system, the host predicts how much
13memory can be asked back from a guest, and it issues an hypercall to
14retrieve this memory.
15
16The solution proposed is different because it does not wait for host
17needs before to return memory, and it knows precisely how much memory
18it can return.
19
20The way to notify the host side about such a return is to rely on
21the new hypercall KVM_HC_RETURN_MEM. In order to avoid the CPU to be
22overloaded with too many hypercalls, we only return memory blocks of
23order 7 (512k blocks) and higher. This value has been found running
24memory tests using multiple threads allocating/freeing high amount
25of memory. Those tests were run for different order values, and 7 was
26the best tradeoff between the number of hypercalls issued and the
27amount of memory returned to the host.
28
29In order to limit performances impact related to this code addition,
30we check for blocks of order 7 or higher. This means it only costs an
31additional function call and a branch to perform this check.
32
33Furthermore, this code has been added to the "merge" codepath of the
34buddy allocator, which is not as sensitive as the "free" codepath.
35Not all blocks going through the "free" codepath will end up in the
36"merge" codepath because some of them won't find their free buddy.
37But this is a negligible amount since the kernel does not use many
38high order blocks directly. Instead, those bigger blocks are often
39broken into smaller chunks used as low order blocks. At the time
40those small blocks are released, they go through the merge path.
41
42Benchmarks such as ebizzy and will-it-scale have been run in order
43to make sure this patch does not affect kernel performances and no
44significant differences were observed.
45
46Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
47Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
48---
49 arch/x86/include/asm/kvm_para.h | 22 ++++++++++++++++++++++
50 arch/x86/kernel/kvm.c | 10 ++++++++++
51 include/linux/mm-arch-hooks.h | 8 ++++++++
52 mm/page_alloc.c | 2 ++
53 4 files changed, 42 insertions(+)
54
55diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
56index bc62e7cbf1b1..4a2f6d1adbd2 100644
57--- a/arch/x86/include/asm/kvm_para.h
58+++ b/arch/x86/include/asm/kvm_para.h
59@@ -92,6 +92,28 @@ void kvm_async_pf_task_wait(u32 token);
60 void kvm_async_pf_task_wake(u32 token);
61 u32 kvm_read_and_reset_pf_reason(void);
62 extern void kvm_disable_steal_time(void);
63+void kvm_arch_return_memory(struct page *page, unsigned int order);
64+
65+/*
66+ * This order has been found in an empirical way, running memory tests
67+ * through many iterations to assess the number of hypercalls issued
68+ * and the amount of memory returned. In case you change this order to
69+ * 6 or 8, it should not impact your performances significantly.
70+ *
71+ * Smaller values lead to less memory waste, but consume more CPU on
72+ * hypercalls. Larger values use less CPU, but do not as precisely
73+ * inform the hypervisor of which memory is free.
74+ */
75+#define RET_MEM_BUDDY_ORDER 7
76+
77+static inline void arch_buddy_merge(struct page *page, unsigned int order)
78+{
79+ if (order < RET_MEM_BUDDY_ORDER)
80+ return;
81+
82+ kvm_arch_return_memory(page, order);
83+}
84+#define arch_buddy_merge arch_buddy_merge
85
86 #ifdef CONFIG_PARAVIRT_SPINLOCKS
87 void __init kvm_spinlock_init(void);
88diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
89index edbbfc854e39..14167b3f6514 100644
90--- a/arch/x86/kernel/kvm.c
91+++ b/arch/x86/kernel/kvm.c
92@@ -552,6 +552,16 @@ static __init int activate_jump_labels(void)
93 }
94 arch_initcall(activate_jump_labels);
95
96+void kvm_arch_return_memory(struct page *page, unsigned int order)
97+{
98+ if (!kvm_para_available())
99+ return;
100+
101+ kvm_hypercall2(KVM_HC_RETURN_MEM,
102+ page_to_phys(page),
103+ PAGE_SIZE << order);
104+}
105+
106 #ifdef CONFIG_PARAVIRT_SPINLOCKS
107
108 /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
109diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h
110index 4efc3f56e6df..26eb3a05a8a3 100644
111--- a/include/linux/mm-arch-hooks.h
112+++ b/include/linux/mm-arch-hooks.h
113@@ -12,6 +12,7 @@
114 #define _LINUX_MM_ARCH_HOOKS_H
115
116 #include <asm/mm-arch-hooks.h>
117+#include <asm/kvm_para.h>
118
119 #ifndef arch_remap
120 static inline void arch_remap(struct mm_struct *mm,
121@@ -22,4 +23,11 @@ static inline void arch_remap(struct mm_struct *mm,
122 #define arch_remap arch_remap
123 #endif
124
125+#ifndef arch_buddy_merge
126+static inline void arch_buddy_merge(struct page *page, unsigned int order)
127+{
128+}
129+#define arch_buddy_merge arch_buddy_merge
130+#endif
131+
132 #endif /* _LINUX_MM_ARCH_HOOKS_H */
133diff --git a/mm/page_alloc.c b/mm/page_alloc.c
134index 1460e6ad5e14..5f6e6371bc6f 100644
135--- a/mm/page_alloc.c
136+++ b/mm/page_alloc.c
137@@ -64,6 +64,7 @@
138 #include <linux/page_owner.h>
139 #include <linux/kthread.h>
140 #include <linux/memcontrol.h>
141+#include <linux/mm-arch-hooks.h>
142
143 #include <asm/sections.h>
144 #include <asm/tlbflush.h>
145@@ -855,6 +856,7 @@ static inline void __free_one_page(struct page *page,
146 }
147
148 done_merging:
149+ arch_buddy_merge(page, order);
150 set_page_order(page, order);
151
152 /*
153--
1542.12.1
155