From 130d5d976b920aec243e0fa63273f3143660054b Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Mon, 23 Jan 2017 15:32:39 -0800 Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking Lots of virtual machines are let in idle state for days until they are terminated, and they can keep a large amount of memory in their cache, meaning this memory cannot be used by other processes. We tried to release this memory using existing drop_caches sysctl, but it led to the complete cache loss while it could have been used whether the idle process wakes up. Indeed, the process can't find any available cached data and it directly affects performances to rebuild it from scratch. Instead, the solution we want is based on shrinking gradually system cache over time. This patch adds a new sysctl shrink_caches_mb so as to allow userspace applications indicating the kernel it should shrink system cache up to the amount (in MiB) specified. There is an application called "memshrinker" which uses this new mechanism. It runs in the background and periodically releases a specified amount of cache. This amount is based on the remaining cache on the system, and period is computed to follow a shrinking model. It results in saving a lot of memory for other processes running on the system. Suggested-by: Arjan van de Ven Signed-off-by: Sebastien Boeuf --- fs/drop_caches.c | 25 +++++++++++++++++++++++++ include/linux/mm.h | 4 ++++ kernel/sysctl.c | 8 ++++++++ mm/vmscan.c | 2 -- 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 82377017130f..f8de1383498b 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -9,10 +9,12 @@ #include #include #include +#include #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; +int sysctl_shrink_caches_mb; static void drop_pagecache_sb(struct super_block *sb, void *unused) { @@ -68,3 +70,26 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write, } return 0; } + +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret; + unsigned long nr_to_reclaim, page_reclaimed; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret) + return ret; + + nr_to_reclaim = sysctl_shrink_caches_mb * (1 << 20) / PAGE_SIZE; + if (write) { + page_reclaimed = shrink_all_memory(nr_to_reclaim); + if (page_reclaimed > 0) + lru_add_drain_all(); + + if (page_reclaimed != nr_to_reclaim) + return page_reclaimed; + } + + return 0; +} diff --git a/include/linux/mm.h b/include/linux/mm.h index 15e02bf3a6b3..9f9b967ad2c9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2457,6 +2457,10 @@ extern int kvm_ret_mem_advice; int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); +extern int sysctl_shrink_caches_mb; +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos); #endif void drop_slab(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9a1611f92a2a..9b74b4f0251d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1417,6 +1417,14 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = kvm_madv_instant_free_sysctl_handler, }, + { + .procname = "shrink_caches_mb", + .data = &sysctl_shrink_caches_mb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = shrink_caches_sysctl_handler, + .extra1 = &one, + }, #ifdef CONFIG_COMPACTION { .procname = "compact_memory", diff --git a/mm/vmscan.c b/mm/vmscan.c index eb2f0315b8c0..b16f327b0211 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3646,7 +3646,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) wake_up_interruptible(&pgdat->kswapd_wait); } -#ifdef CONFIG_HIBERNATION /* * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of * freed pages. @@ -3686,7 +3685,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) return nr_reclaimed; } -#endif /* CONFIG_HIBERNATION */ /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes -- 2.15.0