From 2c145b5233b504f5226a0f4bc44baeef33b444d8 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Mon, 23 Jan 2017 15:32:39 -0800 Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking Lots of virtual machines are let in idle state for days until they are terminated, and they can keep a large amount of memory in their cache, meaning this memory cannot be used by other processes. We tried to release this memory using existing drop_caches sysctl, but it led to the complete cache loss while it could have been used whether the idle process wakes up. Indeed, the process can't find any available cached data and it directly affects performances to rebuild it from scratch. Instead, the solution we want is based on shrinking gradually system cache over time. This patch adds a new sysctl shrink_caches_mb so as to allow userspace applications indicating the kernel it should shrink system cache up to the amount (in MiB) specified. There is an application called "memshrinker" which uses this new mechanism. It runs in the background and periodically releases a specified amount of cache. This amount is based on the remaining cache on the system, and period is computed to follow a shrinking model. It results in saving a lot of memory for other processes running on the system. Suggested-by: Arjan van de Ven Signed-off-by: Sebastien Boeuf --- fs/drop_caches.c | 25 +++++++++++++++++++++++++ include/linux/mm.h | 4 ++++ kernel/sysctl.c | 8 ++++++++ mm/vmscan.c | 2 -- 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d72d52b90433..f564dfcc13a4 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -8,10 +8,12 @@ #include #include #include +#include #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; +int sysctl_shrink_caches_mb; static void drop_pagecache_sb(struct super_block *sb, void *unused) { @@ -67,3 +69,26 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write, } return 0; } + +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret; + unsigned long nr_to_reclaim, page_reclaimed; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret) + return ret; + + nr_to_reclaim = sysctl_shrink_caches_mb * (1 << 20) / PAGE_SIZE; + if (write) { + page_reclaimed = shrink_all_memory(nr_to_reclaim); + if (page_reclaimed > 0) + lru_add_drain_all(); + + if (page_reclaimed != nr_to_reclaim) + return page_reclaimed; + } + + return 0; +} diff --git a/include/linux/mm.h b/include/linux/mm.h index 833f23d98baa..0bb66c1c31c9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2308,6 +2308,10 @@ extern int kvm_ret_mem_advice; int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); +extern int sysctl_shrink_caches_mb; +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos); #endif void drop_slab(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d8ae774fa042..5dc9a46ae212 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1405,6 +1405,14 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = kvm_madv_instant_free_sysctl_handler, }, + { + .procname = "shrink_caches_mb", + .data = &sysctl_shrink_caches_mb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = shrink_caches_sysctl_handler, + .extra1 = &one, + }, #ifdef CONFIG_COMPACTION { .procname = "compact_memory", diff --git a/mm/vmscan.c b/mm/vmscan.c index 30a88b945a44..1198e74d1860 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3525,7 +3525,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) wake_up_interruptible(&pgdat->kswapd_wait); } -#ifdef CONFIG_HIBERNATION /* * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of * freed pages. @@ -3564,7 +3563,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) return nr_reclaimed; } -#endif /* CONFIG_HIBERNATION */ /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes -- 2.12.1