patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

From 130d5d976b920aec243e0fa63273f3143660054b Mon Sep 17 00:00:00 2001
From: Sebastien Boeuf <sebastien.boeuf@intel.com>
Date: Mon, 23 Jan 2017 15:32:39 -0800
Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking

Lots of virtual machines are let in idle state for days until they
are terminated, and they can keep a large amount of memory in their
cache, meaning this memory cannot be used by other processes.

We tried to release this memory using existing drop_caches sysctl,
but it led to the complete cache loss while it could have been used
whether the idle process wakes up. Indeed, the process can't find any
available cached data and it directly affects performances to rebuild
it from scratch.

Instead, the solution we want is based on shrinking gradually system
cache over time. This patch adds a new sysctl shrink_caches_mb so as
to allow userspace applications indicating the kernel it should shrink
system cache up to the amount (in MiB) specified.

There is an application called "memshrinker" which uses this new
mechanism. It runs in the background and periodically releases a
specified amount of cache. This amount is based on the remaining
cache on the system, and period is computed to follow a shrinking
model. It results in saving a lot of memory for other processes
running on the system.

Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
---
 fs/drop_caches.c   | 25 +++++++++++++++++++++++++
 include/linux/mm.h |  4 ++++
 kernel/sysctl.c    |  8 ++++++++
 mm/vmscan.c        |  2 --
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 82377017130f..f8de1383498b 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -9,10 +9,12 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
+#include <linux/swap.h>
 #include "internal.h"
 
 /* A global variable is a bit ugly, but it keeps the code simple */
 int sysctl_drop_caches;
+int sysctl_shrink_caches_mb;
 
 static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
@@ -68,3 +70,26 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write,
 	}
 	return 0;
 }
+
+int shrink_caches_sysctl_handler(struct ctl_table *table, int write,
+	void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int ret;
+	unsigned long nr_to_reclaim, page_reclaimed;
+
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+	if (ret)
+		return ret;
+
+	nr_to_reclaim = sysctl_shrink_caches_mb * (1 << 20) / PAGE_SIZE;
+	if (write) {
+		page_reclaimed = shrink_all_memory(nr_to_reclaim);
+		if (page_reclaimed > 0)
+			lru_add_drain_all();
+
+		if (page_reclaimed != nr_to_reclaim)
+			return page_reclaimed;
+	}
+
+	return 0;
+}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 15e02bf3a6b3..9f9b967ad2c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2457,6 +2457,10 @@ extern int kvm_ret_mem_advice;
 int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write,
 					 void __user *buffer, size_t *length,
 					 loff_t *ppos);
+extern int sysctl_shrink_caches_mb;
+int shrink_caches_sysctl_handler(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *length,
+				 loff_t *ppos);
 #endif
 
 void drop_slab(void);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9a1611f92a2a..9b74b4f0251d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1417,6 +1417,14 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= kvm_madv_instant_free_sysctl_handler,
 	},
+	{
+		.procname       = "shrink_caches_mb",
+		.data           = &sysctl_shrink_caches_mb,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = shrink_caches_sysctl_handler,
+		.extra1         = &one,
+	},
 #ifdef CONFIG_COMPACTION
 	{
 		.procname	= "compact_memory",
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eb2f0315b8c0..b16f327b0211 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3646,7 +3646,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
-#ifdef CONFIG_HIBERNATION
 /*
  * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
  * freed pages.
@@ -3686,7 +3685,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 
 	return nr_reclaimed;
 }
-#endif /* CONFIG_HIBERNATION */
 
 /* It's optimal to keep kswapds on the same CPUs as their memory, but
    not required for correctness.  So if the last cpu in a node goes
-- 
2.15.0