diff options
author | Adrian Calianu <adrian.calianu@enea.com> | 2017-09-27 10:37:36 +0200 |
---|---|---|
committer | Adrian Calianu <adrian.calianu@enea.com> | 2017-09-27 10:37:36 +0200 |
commit | ad4d4e5cbad3ad875eaa59e0a919c014dfb6a39e (patch) | |
tree | 9f4ec55e714e2be67dab4b775775b704af454f74 /patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch | |
parent | 7579efbdb49529f36652b69d4630c6c43907f77b (diff) | |
download | enea-kernel-cache-ad4d4e5cbad3ad875eaa59e0a919c014dfb6a39e.tar.gz |
add guest features from yocto branch
Signed-off-by: Adrian Calianu <adrian.calianu@enea.com>
Diffstat (limited to 'patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch')
-rw-r--r-- | patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch new file mode 100644 index 0000000..ff9d8c0 --- /dev/null +++ b/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch | |||
@@ -0,0 +1,180 @@ | |||
1 | From d28921b5f797829e4e676f7968ae688ef96b7992 Mon Sep 17 00:00:00 2001 | ||
2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
3 | Date: Mon, 23 Jan 2017 15:08:55 -0800 | ||
4 | Subject: [PATCH 152/154] x86: kvm: Notify host to release pages | ||
5 | |||
6 | In context of hypervisors managing several virtual machines, we | ||
7 | want those virtual machines to give the memory they used back to | ||
8 | the host when they don't need it anymore. | ||
9 | |||
10 | This patch introduces a new hypercall KVM_HC_RETURN_MEM, allowing | ||
11 | the guest kernel to notify the host kernel when such event occurs. | ||
12 | And relying on do_madvise() function that we have previously exported, | ||
13 | it issues a call to this function when it receives the new hypercall. | ||
14 | |||
15 | Use of do_madvise() with MADV_DONTNEED flag will allow the guest to | ||
16 | ask for a new page without going through a new hypercall. Instead, | ||
17 | it will be able to start using that memory again as it will get | ||
18 | faulted back in as a fresh new page. That's why do_madvise() is more | ||
19 | efficient than doing vm_unmap() to return some memory to the host. | ||
20 | |||
21 | This patch introduces also a new sysctl kvm_madv_instant_free, | ||
22 | allowing user to set MADV_FREE advice instead of MADV_DONTNEED. | ||
23 | Indeed, MADV_FREE saves more performances than using MADV_DONTNEED | ||
24 | because it does not zero the pages in case the memory has not been | ||
25 | freed by the kernel. This can happen when there was no need for the | ||
26 | kernel to get this memory back, meaning it was keeping those pages | ||
27 | in the right state to be re-used by the same application. | ||
28 | MADV_FREE being a very recent advice introduced in kernel 4.5, we | ||
29 | only want to enable it through a sysctl in case the user want to | ||
30 | use it. | ||
31 | |||
32 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> | ||
33 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
34 | --- | ||
35 | arch/x86/kvm/x86.c | 17 +++++++++++++++++ | ||
36 | include/linux/mm.h | 5 +++++ | ||
37 | include/uapi/linux/kvm_para.h | 3 +++ | ||
38 | kernel/sysctl.c | 7 +++++++ | ||
39 | mm/Makefile | 2 +- | ||
40 | mm/kvm.c | 25 +++++++++++++++++++++++++ | ||
41 | 6 files changed, 58 insertions(+), 1 deletion(-) | ||
42 | create mode 100644 mm/kvm.c | ||
43 | |||
44 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | ||
45 | index 582c75311f95..683a94dd5f03 100644 | ||
46 | --- a/arch/x86/kvm/x86.c | ||
47 | +++ b/arch/x86/kvm/x86.c | ||
48 | @@ -46,6 +46,7 @@ | ||
49 | #include <linux/user-return-notifier.h> | ||
50 | #include <linux/srcu.h> | ||
51 | #include <linux/slab.h> | ||
52 | +#include <linux/mm.h> | ||
53 | #include <linux/perf_event.h> | ||
54 | #include <linux/uaccess.h> | ||
55 | #include <linux/hash.h> | ||
56 | @@ -6019,6 +6020,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | ||
57 | kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); | ||
58 | } | ||
59 | |||
60 | +static int kvm_pv_return_mem_op(struct kvm *kvm, gpa_t gpa, size_t len) | ||
61 | +{ | ||
62 | + unsigned long start = gfn_to_hva(kvm, gpa_to_gfn(gpa)); | ||
63 | + | ||
64 | + if (len > KVM_MAX_RET_MEM_SIZE) | ||
65 | + return KVM_EPERM; | ||
66 | + | ||
67 | + if (kvm_is_error_hva(start + len)) | ||
68 | + return KVM_EFAULT; | ||
69 | + | ||
70 | + return do_madvise(start, len, kvm_ret_mem_advice); | ||
71 | +} | ||
72 | + | ||
73 | void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) | ||
74 | { | ||
75 | vcpu->arch.apicv_active = false; | ||
76 | @@ -6065,6 +6079,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | ||
77 | kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); | ||
78 | ret = 0; | ||
79 | break; | ||
80 | + case KVM_HC_RETURN_MEM: | ||
81 | + ret = kvm_pv_return_mem_op(vcpu->kvm, a0, a1); | ||
82 | + break; | ||
83 | default: | ||
84 | ret = -KVM_ENOSYS; | ||
85 | break; | ||
86 | diff --git a/include/linux/mm.h b/include/linux/mm.h | ||
87 | index 925ec25f99a8..833f23d98baa 100644 | ||
88 | --- a/include/linux/mm.h | ||
89 | +++ b/include/linux/mm.h | ||
90 | @@ -2303,6 +2303,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); | ||
91 | extern int sysctl_drop_caches; | ||
92 | int drop_caches_sysctl_handler(struct ctl_table *, int, | ||
93 | void __user *, size_t *, loff_t *); | ||
94 | +extern int sysctl_kvm_madv_instant_free; | ||
95 | +extern int kvm_ret_mem_advice; | ||
96 | +int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, | ||
97 | + void __user *buffer, size_t *length, | ||
98 | + loff_t *ppos); | ||
99 | #endif | ||
100 | |||
101 | void drop_slab(void); | ||
102 | diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h | ||
103 | index bf6cd7d5cac2..7d90f77d87d0 100644 | ||
104 | --- a/include/uapi/linux/kvm_para.h | ||
105 | +++ b/include/uapi/linux/kvm_para.h | ||
106 | @@ -23,6 +23,9 @@ | ||
107 | #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 | ||
108 | #define KVM_HC_MIPS_EXIT_VM 7 | ||
109 | #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 | ||
110 | +#define KVM_HC_RETURN_MEM 10 | ||
111 | + | ||
112 | +#define KVM_MAX_RET_MEM_SIZE (1 << 22) // 4MiB | ||
113 | |||
114 | /* | ||
115 | * hypercalls use architecture specific | ||
116 | diff --git a/kernel/sysctl.c b/kernel/sysctl.c | ||
117 | index c1095cdc0fe2..d8ae774fa042 100644 | ||
118 | --- a/kernel/sysctl.c | ||
119 | +++ b/kernel/sysctl.c | ||
120 | @@ -1398,6 +1398,13 @@ static struct ctl_table vm_table[] = { | ||
121 | .extra1 = &one, | ||
122 | .extra2 = &four, | ||
123 | }, | ||
124 | + { | ||
125 | + .procname = "kvm_madv_instant_free", | ||
126 | + .data = &sysctl_kvm_madv_instant_free, | ||
127 | + .maxlen = sizeof(int), | ||
128 | + .mode = 0644, | ||
129 | + .proc_handler = kvm_madv_instant_free_sysctl_handler, | ||
130 | + }, | ||
131 | #ifdef CONFIG_COMPACTION | ||
132 | { | ||
133 | .procname = "compact_memory", | ||
134 | diff --git a/mm/Makefile b/mm/Makefile | ||
135 | index 295bd7a9f76b..651ce0aff140 100644 | ||
136 | --- a/mm/Makefile | ||
137 | +++ b/mm/Makefile | ||
138 | @@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ | ||
139 | mm_init.o mmu_context.o percpu.o slab_common.o \ | ||
140 | compaction.o vmacache.o \ | ||
141 | interval_tree.o list_lru.o workingset.o \ | ||
142 | - prfile.o debug.o $(mmu-y) | ||
143 | + prfile.o debug.o kvm.o $(mmu-y) | ||
144 | |||
145 | obj-y += init-mm.o | ||
146 | |||
147 | diff --git a/mm/kvm.c b/mm/kvm.c | ||
148 | new file mode 100644 | ||
149 | index 000000000000..8945f6a311b9 | ||
150 | --- /dev/null | ||
151 | +++ b/mm/kvm.c | ||
152 | @@ -0,0 +1,25 @@ | ||
153 | +#include <linux/mman.h> | ||
154 | + | ||
155 | +int sysctl_kvm_madv_instant_free; | ||
156 | + | ||
157 | +int kvm_ret_mem_advice = MADV_DONTNEED; | ||
158 | +EXPORT_SYMBOL_GPL(kvm_ret_mem_advice); | ||
159 | + | ||
160 | +int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, | ||
161 | + void __user *buffer, size_t *length, loff_t *ppos) | ||
162 | +{ | ||
163 | + int ret; | ||
164 | + | ||
165 | + ret = proc_dointvec(table, write, buffer, length, ppos); | ||
166 | + if (ret) | ||
167 | + return ret; | ||
168 | + | ||
169 | +#ifdef MADV_FREE | ||
170 | + if (sysctl_kvm_madv_instant_free > 0) | ||
171 | + kvm_ret_mem_advice = MADV_FREE; | ||
172 | + else | ||
173 | + kvm_ret_mem_advice = MADV_DONTNEED; | ||
174 | +#endif | ||
175 | + | ||
176 | + return 0; | ||
177 | +} | ||
178 | -- | ||
179 | 2.12.1 | ||
180 | |||