From b1abf1299c2f635db21370f989ac9dc2ecbe4189 Mon Sep 17 00:00:00 2001 From: Martin Borg Date: Wed, 9 May 2018 15:29:03 +0200 Subject: boot_time_opt_guest: update guest boot time optimization patches for 4.14 The new patches are based on: https://github.com/clearlinux-pkgs/linux-kvm commit 5a0a30430116735fecb22b269e4ca10a08147d8d Signed-off-by: Martin Borg Signed-off-by: Adrian Dudau --- ...kip-synchronize_rcu-on-single-CPU-systems.patch | 34 ---- ...kip-synchronize_rcu-if-there-is-no-old-op.patch | 38 +++++ .../0103-fbcon-enable-no-blink-by-default.patch | 26 +++ ...kip-synchronize_rcu-if-there-is-no-old-op.patch | 38 ----- .../0104-fbcon-enable-no-blink-by-default.patch | 26 --- .../0104-mm-reduce-vmstat-wakeups.patch | 25 +++ .../boot_time_opt_guest/0105-vmstats-wakeups.patch | 28 --- .../0106-cgroup-delayed-work.patch | 133 +++++++++++++++ patches/boot_time_opt_guest/0106-pci-probe.patch | 123 ------------- patches/boot_time_opt_guest/0107-cgroup.patch | 107 ------------ .../0107-smpboot-reuse-timer-calibration.patch | 45 +++++ patches/boot_time_opt_guest/0108-perf.patch | 28 +++ .../0108-smpboot-reuse-timer-calibration.patch | 45 ----- .../0109-pci-probe-identify-known-devices.patch | 190 +++++++++++++++++++++ patches/boot_time_opt_guest/0109-perf.patch | 28 --- .../0110-init-no-wait-for-the-known-devices.patch | 39 +++++ .../0110-pci-probe-identify-known-devices.patch | 190 --------------------- .../0111-init-no-wait-for-the-known-devices.patch | 39 ----- patches/boot_time_opt_guest/0111-ksm-wakeups.patch | 32 ++++ patches/boot_time_opt_guest/0112-ksm-wakeups.patch | 32 ---- .../0113-init-do_mounts-recreate-dev-root.patch | 42 ----- ...-setting-user.-attributes-on-symlinks-by-.patch | 56 ------ .../0151-mm-Export-do_madvise.patch | 16 +- ...0152-x86-kvm-Notify-host-to-release-pages.patch | 57 ++++--- ...6-Return-memory-from-guest-to-host-kernel.patch | 16 +- ...54-sysctl-vm-Fine-grained-cache-shrinking.patch | 18 +- .../boot_time_opt_guest/guest_boot_time_opt.scc | 20 +-- 27 files changed, 619 insertions(+), 852 deletions(-) delete mode 100644 patches/boot_time_opt_guest/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch create mode 100644 patches/boot_time_opt_guest/0102-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch create mode 100644 patches/boot_time_opt_guest/0103-fbcon-enable-no-blink-by-default.patch delete mode 100644 patches/boot_time_opt_guest/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch delete mode 100644 patches/boot_time_opt_guest/0104-fbcon-enable-no-blink-by-default.patch create mode 100644 patches/boot_time_opt_guest/0104-mm-reduce-vmstat-wakeups.patch delete mode 100644 patches/boot_time_opt_guest/0105-vmstats-wakeups.patch create mode 100644 patches/boot_time_opt_guest/0106-cgroup-delayed-work.patch delete mode 100644 patches/boot_time_opt_guest/0106-pci-probe.patch delete mode 100644 patches/boot_time_opt_guest/0107-cgroup.patch create mode 100644 patches/boot_time_opt_guest/0107-smpboot-reuse-timer-calibration.patch create mode 100644 patches/boot_time_opt_guest/0108-perf.patch delete mode 100644 patches/boot_time_opt_guest/0108-smpboot-reuse-timer-calibration.patch create mode 100644 patches/boot_time_opt_guest/0109-pci-probe-identify-known-devices.patch delete mode 100644 patches/boot_time_opt_guest/0109-perf.patch create mode 100644 patches/boot_time_opt_guest/0110-init-no-wait-for-the-known-devices.patch delete mode 100644 patches/boot_time_opt_guest/0110-pci-probe-identify-known-devices.patch delete mode 100644 patches/boot_time_opt_guest/0111-init-no-wait-for-the-known-devices.patch create mode 100644 patches/boot_time_opt_guest/0111-ksm-wakeups.patch delete mode 100644 patches/boot_time_opt_guest/0112-ksm-wakeups.patch delete mode 100644 patches/boot_time_opt_guest/0113-init-do_mounts-recreate-dev-root.patch delete mode 100644 patches/boot_time_opt_guest/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch diff --git a/patches/boot_time_opt_guest/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch b/patches/boot_time_opt_guest/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch deleted file mode 100644 index 1de2a6b..0000000 --- a/patches/boot_time_opt_guest/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 6b0fb5b2a7a157c04d8ab6ad71b092034d0048bf Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Wed, 11 Feb 2015 16:19:26 -0600 -Subject: [PATCH 102/114] cpuidle: skip synchronize_rcu() on single CPU systems - -synchronize_rcu() is pretty expensive, and on single CPU systems we don't need -it in this specific case, so skip it. - -Signed-off-by: Arjan van de Ven -Signed-off-by: Miguel Bernal Marin ---- - drivers/cpuidle/cpuidle.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c -index 62810ff3b00f..f1d110411098 100644 ---- a/drivers/cpuidle/cpuidle.c -+++ b/drivers/cpuidle/cpuidle.c -@@ -324,8 +324,11 @@ void cpuidle_uninstall_idle_handler(void) - /* - * Make sure external observers (such as the scheduler) - * are done looking at pointed idle states. -+ * This is only relevant if there is more than one cpu, -+ * if there is only one CPU, that is us... and we're -+ * coherent to ourselves. - */ -- synchronize_rcu(); -+ - } - - /** --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0102-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch b/patches/boot_time_opt_guest/0102-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch new file mode 100644 index 0000000..9cc08f3 --- /dev/null +++ b/patches/boot_time_opt_guest/0102-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch @@ -0,0 +1,38 @@ +From 092250008c4240d58086102bd63baf56ad400feb Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 11 Feb 2015 16:25:16 -0600 +Subject: [PATCH 102/114] sysrq: skip synchronize_rcu() if there is no old op + +synchronize_rcu() is expensive. Currently it is called as part of the sysrq +registration/unregistration, which happens during boot several times. +Now, the reason for the synchronize_rcu() is to allow an old registered +operation to expire properly... which is pointless if the old operation +is NULL... +So we can save the common case of the old operation being NULL a lot of time +by just checking for non-NULL prior to the synchronize_rcu() + +Signed-off-by: Arjan van de Ven +Signed-off-by: Miguel Bernal Marin +--- + drivers/tty/sysrq.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c +index 3ffc1ce29023..5a1d87b99e62 100644 +--- a/drivers/tty/sysrq.c ++++ b/drivers/tty/sysrq.c +@@ -1067,8 +1067,10 @@ static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p, + * A concurrent __handle_sysrq either got the old op or the new op. + * Wait for it to go away before returning, so the code for an old + * op is not freed (eg. on module unload) while it is in use. ++ * This is only relevant if the old op is not NULL of course. + */ +- synchronize_rcu(); ++ if (remove_op_p) ++ synchronize_rcu(); + + return retval; + } +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0103-fbcon-enable-no-blink-by-default.patch b/patches/boot_time_opt_guest/0103-fbcon-enable-no-blink-by-default.patch new file mode 100644 index 0000000..b678102 --- /dev/null +++ b/patches/boot_time_opt_guest/0103-fbcon-enable-no-blink-by-default.patch @@ -0,0 +1,26 @@ +From 63fd40ed7d52c48d4f1edef2e928692e0f2ffac2 Mon Sep 17 00:00:00 2001 +From: Jose Carlos Venegas Munoz +Date: Mon, 13 Apr 2015 11:26:36 -0500 +Subject: [PATCH 103/114] fbcon: enable no blink by default + +Author: Arjan van de Ven +--- + drivers/video/fbdev/core/fbcon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c +index 04612f9..a82cde1 100644 +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -148,7 +148,7 @@ static const struct consw fb_con; + + static int fbcon_set_origin(struct vc_data *); + +-static int fbcon_cursor_noblink; ++static int fbcon_cursor_noblink = 1; + + #define divides(a, b) ((!(a) || (b)%(a)) ? 0 : 1) + +-- +2.15.1 + diff --git a/patches/boot_time_opt_guest/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch b/patches/boot_time_opt_guest/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch deleted file mode 100644 index d3a20fb..0000000 --- a/patches/boot_time_opt_guest/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 7be707833bb35c295eb702d13cf73ac9390e4b31 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Wed, 11 Feb 2015 16:25:16 -0600 -Subject: [PATCH 103/114] sysrq: skip synchronize_rcu() if there is no old op - -synchronize_rcu() is expensive. Currently it is called as part of the sysrq -registration/unregistration, which happens during boot several times. -Now, the reason for the synchronize_rcu() is to allow an old registered -operation to expire properly... which is pointless if the old operation -is NULL... -So we can save the common case of the old operation being NULL a lot of time -by just checking for non-NULL prior to the synchronize_rcu() - -Signed-off-by: Arjan van de Ven -Signed-off-by: Miguel Bernal Marin ---- - drivers/tty/sysrq.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c -index 701c085bb19b..c60c7ba57ad9 100644 ---- a/drivers/tty/sysrq.c -+++ b/drivers/tty/sysrq.c -@@ -1065,8 +1065,10 @@ static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p, - * A concurrent __handle_sysrq either got the old op or the new op. - * Wait for it to go away before returning, so the code for an old - * op is not freed (eg. on module unload) while it is in use. -+ * This is only relevant if the old op is not NULL of course. - */ -- synchronize_rcu(); -+ if (remove_op_p) -+ synchronize_rcu(); - - return retval; - } --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0104-fbcon-enable-no-blink-by-default.patch b/patches/boot_time_opt_guest/0104-fbcon-enable-no-blink-by-default.patch deleted file mode 100644 index 715c195..0000000 --- a/patches/boot_time_opt_guest/0104-fbcon-enable-no-blink-by-default.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 5899ff79ed4e3514420e1530a3588a922832dae5 Mon Sep 17 00:00:00 2001 -From: Jose Carlos Venegas Munoz -Date: Mon, 13 Apr 2015 11:26:36 -0500 -Subject: [PATCH 104/114] fbcon: enable no blink by default - -Author: Arjan van de Ven ---- - drivers/video/console/fbcon.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c -index a44f5627b82a..95b73366b86f 100644 ---- a/drivers/video/console/fbcon.c -+++ b/drivers/video/console/fbcon.c -@@ -146,7 +146,7 @@ static const struct consw fb_con; - - static int fbcon_set_origin(struct vc_data *); - --static int fbcon_cursor_noblink; -+static int fbcon_cursor_noblink = 1; - - #define divides(a, b) ((!(a) || (b)%(a)) ? 0 : 1) - --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0104-mm-reduce-vmstat-wakeups.patch b/patches/boot_time_opt_guest/0104-mm-reduce-vmstat-wakeups.patch new file mode 100644 index 0000000..d9c42b6 --- /dev/null +++ b/patches/boot_time_opt_guest/0104-mm-reduce-vmstat-wakeups.patch @@ -0,0 +1,25 @@ +From e09e490bbc40c9e10047af76577efb099eb915d1 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Mon, 1 May 2017 01:00:51 +0000 +Subject: [PATCH 104/114] mm: reduce vmstat wakeups + +--- + mm/vmstat.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/vmstat.c b/mm/vmstat.c +index 76f73670200a..3cc9788e0be2 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1562,7 +1562,7 @@ static const struct file_operations proc_vmstat_file_operations = { + + #ifdef CONFIG_SMP + static DEFINE_PER_CPU(struct delayed_work, vmstat_work); +-int sysctl_stat_interval __read_mostly = HZ; ++int sysctl_stat_interval __read_mostly = 8 * HZ; + + #ifdef CONFIG_PROC_FS + static void refresh_vm_stats(struct work_struct *work) +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0105-vmstats-wakeups.patch b/patches/boot_time_opt_guest/0105-vmstats-wakeups.patch deleted file mode 100644 index 09b109a..0000000 --- a/patches/boot_time_opt_guest/0105-vmstats-wakeups.patch +++ /dev/null @@ -1,28 +0,0 @@ -From ff47b4e9be8113b4ba05d6f2afee3db6904bc10f Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Wed, 11 Feb 2015 16:47:20 -0600 -Subject: [PATCH 105/114] vmstats: wakeups - -Author: Arjan van de Ven - -Signed-off-by: Miguel Bernal Marin ---- - mm/vmstat.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/mm/vmstat.c b/mm/vmstat.c -index 7c28df36f50f..efe1b6797139 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -1549,7 +1549,7 @@ static const struct file_operations proc_vmstat_file_operations = { - #ifdef CONFIG_SMP - static struct workqueue_struct *vmstat_wq; - static DEFINE_PER_CPU(struct delayed_work, vmstat_work); --int sysctl_stat_interval __read_mostly = HZ; -+int sysctl_stat_interval __read_mostly = 8 * HZ; - - #ifdef CONFIG_PROC_FS - static void refresh_vm_stats(struct work_struct *work) --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0106-cgroup-delayed-work.patch b/patches/boot_time_opt_guest/0106-cgroup-delayed-work.patch new file mode 100644 index 0000000..438ed97 --- /dev/null +++ b/patches/boot_time_opt_guest/0106-cgroup-delayed-work.patch @@ -0,0 +1,133 @@ +From f80cc54895e35a762036382c73bc48ac813e05a5 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Fri, 28 Aug 2015 11:00:36 -0500 +Subject: [PATCH] cgroup: delayed work + +--- + include/linux/cgroup-defs.h | 2 +- + kernel/cgroup/cgroup-internal.h | 8 ++++++++ + kernel/cgroup/cgroup-v1.c | 8 -------- + kernel/cgroup/cgroup.c | 21 ++++++++++++--------- + 4 files changed, 21 insertions(+), 18 deletions(-) + +diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h +index ec47101cb1bf..4827bf0809d7 100644 +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -136,7 +136,7 @@ struct cgroup_subsys_state { + + /* percpu_ref killing and RCU release */ + struct rcu_head rcu_head; +- struct work_struct destroy_work; ++ struct delayed_work destroy_work; + + /* + * PI: the parent css. Placed here for cache proximity to following +diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h +index 00f4d6bf048f..854ef3216429 100644 +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -8,6 +8,14 @@ + #include + + /* ++ * pidlists linger the following amount before being destroyed. The goal ++ * is avoiding frequent destruction in the middle of consecutive read calls ++ * Expiring in the middle is a performance problem not a correctness one. ++ * 1 sec should be enough. ++ */ ++#define CGROUP_PIDLIST_DESTROY_DELAY round_jiffies_relative(HZ) ++ ++/* + * A cgroup can be associated with multiple css_sets as different tasks may + * belong to different cgroups on different hierarchies. In the other + * direction, a css_set is naturally associated with multiple cgroups. +diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c +index 85d75152402d..60bb59d44d01 100644 +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -16,14 +16,6 @@ + + #include + +-/* +- * pidlists linger the following amount before being destroyed. The goal +- * is avoiding frequent destruction in the middle of consecutive read calls +- * Expiring in the middle is a performance problem not a correctness one. +- * 1 sec should be enough. +- */ +-#define CGROUP_PIDLIST_DESTROY_DELAY HZ +- + /* Controllers blocked by the commandline in v1 */ + static u16 cgroup_no_v1_mask; + +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index 8d4e85eae42c..7c8294298983 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -3841,8 +3841,9 @@ static struct cftype cgroup_base_files[] = { + */ + static void css_free_work_fn(struct work_struct *work) + { ++ struct delayed_work *dwork = to_delayed_work(work); + struct cgroup_subsys_state *css = +- container_of(work, struct cgroup_subsys_state, destroy_work); ++ container_of(dwork, struct cgroup_subsys_state, destroy_work); + struct cgroup_subsys *ss = css->ss; + struct cgroup *cgrp = css->cgroup; + +@@ -3891,14 +3892,15 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) + struct cgroup_subsys_state *css = + container_of(rcu_head, struct cgroup_subsys_state, rcu_head); + +- INIT_WORK(&css->destroy_work, css_free_work_fn); +- queue_work(cgroup_destroy_wq, &css->destroy_work); ++ INIT_DELAYED_WORK(&css->destroy_work, css_free_work_fn); ++ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); + } + + static void css_release_work_fn(struct work_struct *work) + { ++ struct delayed_work *dwork = to_delayed_work(work); + struct cgroup_subsys_state *css = +- container_of(work, struct cgroup_subsys_state, destroy_work); ++ container_of(dwork, struct cgroup_subsys_state, destroy_work); + struct cgroup_subsys *ss = css->ss; + struct cgroup *cgrp = css->cgroup; + +@@ -3943,8 +3945,8 @@ static void css_release(struct percpu_ref *ref) + struct cgroup_subsys_state *css = + container_of(ref, struct cgroup_subsys_state, refcnt); + +- INIT_WORK(&css->destroy_work, css_release_work_fn); +- queue_work(cgroup_destroy_wq, &css->destroy_work); ++ INIT_DELAYED_WORK(&css->destroy_work, css_release_work_fn); ++ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); + } + + static void init_and_link_css(struct cgroup_subsys_state *css, +@@ -4225,8 +4227,9 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) + */ + static void css_killed_work_fn(struct work_struct *work) + { ++ struct delayed_work *dwork = to_delayed_work(work); + struct cgroup_subsys_state *css = +- container_of(work, struct cgroup_subsys_state, destroy_work); ++ container_of(dwork, struct cgroup_subsys_state, destroy_work); + + mutex_lock(&cgroup_mutex); + +@@ -4247,8 +4250,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref) + container_of(ref, struct cgroup_subsys_state, refcnt); + + if (atomic_dec_and_test(&css->online_cnt)) { +- INIT_WORK(&css->destroy_work, css_killed_work_fn); +- queue_work(cgroup_destroy_wq, &css->destroy_work); ++ INIT_DELAYED_WORK(&css->destroy_work, css_killed_work_fn); ++ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); + } + } + +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0106-pci-probe.patch b/patches/boot_time_opt_guest/0106-pci-probe.patch deleted file mode 100644 index 5045926..0000000 --- a/patches/boot_time_opt_guest/0106-pci-probe.patch +++ /dev/null @@ -1,123 +0,0 @@ -From b225caf8f743b9f5f9e84d0df711ee0c17e049ae Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Wed, 11 Feb 2015 16:53:08 -0600 -Subject: [PATCH 106/114] pci: probe - -Author: Arjan van de Ven - -Signed-off-by: Miguel Bernal Marin ---- - drivers/pci/probe.c | 43 ++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 40 insertions(+), 3 deletions(-) - -diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c -index 204960e70333..7399a06698da 100644 ---- a/drivers/pci/probe.c -+++ b/drivers/pci/probe.c -@@ -182,6 +182,10 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, - - mask = type ? PCI_ROM_ADDRESS_MASK : ~0; - -+ res->name = pci_name(dev); -+ -+ printk("clr: Starting probe for %s\n", res->name); -+ - /* No printks while decoding is disabled! */ - if (!dev->mmio_always_on) { - pci_read_config_word(dev, PCI_COMMAND, &orig_cmd); -@@ -191,8 +195,6 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, - } - } - -- res->name = pci_name(dev); -- - pci_read_config_dword(dev, pos, &l); - pci_write_config_dword(dev, pos, l | mask); - pci_read_config_dword(dev, pos, &sz); -@@ -324,6 +326,8 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) - if (dev->non_compliant_bars) - return; - -+ printk("clr: pci_read_bases start\n"); -+ - for (pos = 0; pos < howmany; pos++) { - struct resource *res = &dev->resource[pos]; - reg = PCI_BASE_ADDRESS_0 + (pos << 2); -@@ -332,11 +336,13 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) - - if (rom) { - struct resource *res = &dev->resource[PCI_ROM_RESOURCE]; -+ printk("clr: rom path\n"); - dev->rom_base_reg = rom; - res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | - IORESOURCE_READONLY | IORESOURCE_SIZEALIGN; - __pci_read_base(dev, pci_bar_mem32, res, rom); - } -+ printk("clr: pci_read_bases end\n"); - } - - static void pci_read_bridge_io(struct pci_bus *child) -@@ -1311,6 +1317,28 @@ static void pci_msi_setup_pci_dev(struct pci_dev *dev) - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); - } - -+static int guess_bar_count(int class) -+{ -+ if (class == 0x068000) -+ return 0; -+ if (class == 0x020000) -+ return 2; -+ if (class == 0x010000) -+ return 2; -+ if (class == 0x00ff00) -+ return 1; -+ return 6; -+} -+ -+static int has_rom(int class, int rom) -+{ -+ if (class == 0x020000) -+ return 0; -+ if (class == 0x010000 || class == 0x00ff00) -+ return 0; -+ return rom; -+} -+ - /** - * pci_setup_device - fill in class and map information of a device - * @dev: the device structure to fill -@@ -1329,6 +1357,9 @@ int pci_setup_device(struct pci_dev *dev) - int pos = 0; - struct pci_bus_region region; - struct resource *res; -+ int maxbar; -+ -+ printk("clr: pci_setup_device start\n"); - - if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) - return -EIO; -@@ -1383,7 +1414,11 @@ int pci_setup_device(struct pci_dev *dev) - if (class == PCI_CLASS_BRIDGE_PCI) - goto bad; - pci_read_irq(dev); -- pci_read_bases(dev, 6, PCI_ROM_ADDRESS); -+ -+ maxbar = guess_bar_count(dev->class); -+ -+ if (class != PCI_CLASS_STORAGE_IDE) -+ pci_read_bases(dev, maxbar, has_rom(dev->class, PCI_ROM_ADDRESS)); - pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor); - pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device); - -@@ -1468,6 +1503,8 @@ int pci_setup_device(struct pci_dev *dev) - dev->class = PCI_CLASS_NOT_DEFINED << 8; - } - -+ printk("clr: pci_setup_device end\n"); -+ - /* We found a fine healthy device, go go go... */ - return 0; - } --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0107-cgroup.patch b/patches/boot_time_opt_guest/0107-cgroup.patch deleted file mode 100644 index d68c686..0000000 --- a/patches/boot_time_opt_guest/0107-cgroup.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 0adc5bfd84939d11d3c172eab0a00bfab4aadb46 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Fri, 28 Aug 2015 11:00:36 -0500 -Subject: [PATCH 107/114] cgroup - -Author: Arjan van de Ven - -Signed-off-by: Miguel Bernal Marin -Signed-off-by: Jose Carlos Venegas Munoz ---- - include/linux/cgroup-defs.h | 2 +- - kernel/cgroup.c | 24 ++++++++++++++---------- - 2 files changed, 15 insertions(+), 11 deletions(-) - -diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h -index 861b4677fc5b..5d3c345ee60c 100644 ---- a/include/linux/cgroup-defs.h -+++ b/include/linux/cgroup-defs.h -@@ -137,7 +137,7 @@ struct cgroup_subsys_state { - - /* percpu_ref killing and RCU release */ - struct rcu_head rcu_head; -- struct work_struct destroy_work; -+ struct delayed_work destroy_work; - }; - - /* -diff --git a/kernel/cgroup.c b/kernel/cgroup.c -index 53bbca7c4859..6de39d8213ed 100644 ---- a/kernel/cgroup.c -+++ b/kernel/cgroup.c -@@ -73,7 +73,7 @@ - * Expiring in the middle is a performance problem not a correctness one. - * 1 sec should be enough. - */ --#define CGROUP_PIDLIST_DESTROY_DELAY HZ -+#define CGROUP_PIDLIST_DESTROY_DELAY round_jiffies_relative(HZ) - - #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \ - MAX_CFTYPE_NAME + 2) -@@ -4986,8 +4986,9 @@ static struct cftype cgroup_legacy_base_files[] = { - */ - static void css_free_work_fn(struct work_struct *work) - { -+ struct delayed_work *dwork = to_delayed_work(work); - struct cgroup_subsys_state *css = -- container_of(work, struct cgroup_subsys_state, destroy_work); -+ container_of(dwork, struct cgroup_subsys_state, destroy_work); - struct cgroup_subsys *ss = css->ss; - struct cgroup *cgrp = css->cgroup; - -@@ -5036,14 +5037,15 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) - struct cgroup_subsys_state *css = - container_of(rcu_head, struct cgroup_subsys_state, rcu_head); - -- INIT_WORK(&css->destroy_work, css_free_work_fn); -- queue_work(cgroup_destroy_wq, &css->destroy_work); -+ INIT_DELAYED_WORK(&css->destroy_work, css_free_work_fn); -+ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); - } - - static void css_release_work_fn(struct work_struct *work) - { -+ struct delayed_work *dwork = to_delayed_work(work); - struct cgroup_subsys_state *css = -- container_of(work, struct cgroup_subsys_state, destroy_work); -+ container_of(dwork, struct cgroup_subsys_state, destroy_work); - struct cgroup_subsys *ss = css->ss; - struct cgroup *cgrp = css->cgroup; - -@@ -5088,8 +5090,9 @@ static void css_release(struct percpu_ref *ref) - struct cgroup_subsys_state *css = - container_of(ref, struct cgroup_subsys_state, refcnt); - -- INIT_WORK(&css->destroy_work, css_release_work_fn); -- queue_work(cgroup_destroy_wq, &css->destroy_work); -+ INIT_DELAYED_WORK(&css->destroy_work, css_release_work_fn); -+ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); -+ - } - - static void init_and_link_css(struct cgroup_subsys_state *css, -@@ -5371,8 +5374,9 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, - */ - static void css_killed_work_fn(struct work_struct *work) - { -+ struct delayed_work *dwork = to_delayed_work(work); - struct cgroup_subsys_state *css = -- container_of(work, struct cgroup_subsys_state, destroy_work); -+ container_of(dwork, struct cgroup_subsys_state, destroy_work); - - mutex_lock(&cgroup_mutex); - -@@ -5393,8 +5397,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref) - container_of(ref, struct cgroup_subsys_state, refcnt); - - if (atomic_dec_and_test(&css->online_cnt)) { -- INIT_WORK(&css->destroy_work, css_killed_work_fn); -- queue_work(cgroup_destroy_wq, &css->destroy_work); -+ INIT_DELAYED_WORK(&css->destroy_work, css_killed_work_fn); -+ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); - } - } - --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0107-smpboot-reuse-timer-calibration.patch b/patches/boot_time_opt_guest/0107-smpboot-reuse-timer-calibration.patch new file mode 100644 index 0000000..23b873f --- /dev/null +++ b/patches/boot_time_opt_guest/0107-smpboot-reuse-timer-calibration.patch @@ -0,0 +1,45 @@ +From 12409839a8fcf30e6b83290f30f1187efe4a58b6 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 11 Feb 2015 17:28:14 -0600 +Subject: [PATCH 107/114] smpboot: reuse timer calibration + +NO point recalibrating for known-constant tsc... saves 200ms+ of boot time. + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + arch/x86/kernel/smpboot.c | 2 +- + arch/x86/kernel/tsc.c | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index f04479a8f74f..6f41d74350ec 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -764,7 +764,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) + pr_debug("Waiting for send to finish...\n"); + send_status = safe_apic_wait_icr_idle(); + +- udelay(init_udelay); ++ udelay(100); + + pr_debug("Deasserting INIT\n"); + +diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c +index 714dfba6a1e7..96cddaf0f48b 100644 +--- a/arch/x86/kernel/tsc.c ++++ b/arch/x86/kernel/tsc.c +@@ -1438,6 +1438,9 @@ unsigned long calibrate_delay_is_known(void) + if (!mask) + return 0; + ++ if (cpu !=0) ++ return cpu_data(0).loops_per_jiffy; ++ + sibling = cpumask_any_but(mask, cpu); + if (sibling < nr_cpu_ids) + return cpu_data(sibling).loops_per_jiffy; +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0108-perf.patch b/patches/boot_time_opt_guest/0108-perf.patch new file mode 100644 index 0000000..085e99e --- /dev/null +++ b/patches/boot_time_opt_guest/0108-perf.patch @@ -0,0 +1,28 @@ +From 8340ac120b7c7b86cdb7eb06d31d767683ad7413 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 4 Nov 2015 15:17:10 -0600 +Subject: [PATCH 108/114] perf + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + arch/x86/events/intel/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c +index 110ce8238466..98ee5d338a16 100644 +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -4062,7 +4062,7 @@ __init int intel_pmu_init(void) + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { +- er->extra_msr_access = check_msr(er->msr, 0x11UL); ++ er->extra_msr_access = false; + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0108-smpboot-reuse-timer-calibration.patch b/patches/boot_time_opt_guest/0108-smpboot-reuse-timer-calibration.patch deleted file mode 100644 index 48be94a..0000000 --- a/patches/boot_time_opt_guest/0108-smpboot-reuse-timer-calibration.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 634947be6c24d844af5f6ecf59453f2ddc09e032 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Wed, 11 Feb 2015 17:28:14 -0600 -Subject: [PATCH 108/114] smpboot: reuse timer calibration - -NO point recalibrating for known-constant tsc... saves 200ms+ of boot time. - -Author: Arjan van de Ven - -Signed-off-by: Miguel Bernal Marin ---- - arch/x86/kernel/smpboot.c | 2 +- - arch/x86/kernel/tsc.c | 3 +++ - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c -index 99b920d0e516..e17bb425bb52 100644 ---- a/arch/x86/kernel/smpboot.c -+++ b/arch/x86/kernel/smpboot.c -@@ -761,7 +761,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - -- udelay(init_udelay); -+ udelay(100); - - pr_debug("Deasserting INIT\n"); - -diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c -index 37e7cf544e51..e99be8a6a132 100644 ---- a/arch/x86/kernel/tsc.c -+++ b/arch/x86/kernel/tsc.c -@@ -1413,6 +1413,9 @@ unsigned long calibrate_delay_is_known(void) - if (!mask) - return 0; - -+ if (cpu !=0) -+ return cpu_data(0).loops_per_jiffy; -+ - sibling = cpumask_any_but(mask, cpu); - if (sibling < nr_cpu_ids) - return cpu_data(sibling).loops_per_jiffy; --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0109-pci-probe-identify-known-devices.patch b/patches/boot_time_opt_guest/0109-pci-probe-identify-known-devices.patch new file mode 100644 index 0000000..264ef3e --- /dev/null +++ b/patches/boot_time_opt_guest/0109-pci-probe-identify-known-devices.patch @@ -0,0 +1,190 @@ +From 25e68c5e04b0eec5490a1c7d3887990cd33ccf1b Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Sat, 14 Feb 2015 09:49:41 -0600 +Subject: [PATCH 109/114] pci: probe: identify known devices + +Author: Arjan van de Ven +Modify-by: Miguel Bernal Marin + +Signed-off-by: Miguel Bernal Marin +--- + drivers/pci/probe.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 156 insertions(+) + +diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c +index 010c5e1c43dd..69cc7224fec2 100644 +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -163,6 +163,159 @@ static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar) + + #define PCI_COMMAND_DECODE_ENABLE (PCI_COMMAND_MEMORY | PCI_COMMAND_IO) + ++/* shortcut version of __pci_read_base where we know the sizes already */ ++int __pci_read_base_shortcut(struct pci_dev *dev, enum pci_bar_type type, ++ struct resource *res, unsigned int pos, u32 sz_in, u32 sz2_in) ++{ ++ u32 l, sz; ++ u64 l64, sz64, mask64; ++ struct pci_bus_region region, inverted_region; ++ ++ res->name = pci_name(dev); ++ ++ pci_read_config_dword(dev, pos, &l); ++ ++ sz = sz_in; ++ ++ /* ++ * All bits set in sz means the device isn't working properly. ++ * If the BAR isn't implemented, all bits must be 0. If it's a ++ * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit ++ * 1 must be clear. ++ * Here we set the size and is not 0xffffffff ++ */ ++ ++ /* ++ * I don't know how l can have all bits set. Copied from old code. ++ * Maybe it fixes a bug on some ancient platform. ++ */ ++ if (l == 0xffffffff) ++ l = 0; ++ ++ if (type == pci_bar_unknown) { ++ res->flags = decode_bar(dev, l); ++ res->flags |= IORESOURCE_SIZEALIGN; ++ if (res->flags & IORESOURCE_IO) { ++ l64 = l & PCI_BASE_ADDRESS_IO_MASK; ++ sz64 = sz & PCI_BASE_ADDRESS_IO_MASK; ++ mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT; ++ } else { ++ l64 = l & PCI_BASE_ADDRESS_MEM_MASK; ++ sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK; ++ mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK; ++ } ++ } else { ++ res->flags |= (l & IORESOURCE_ROM_ENABLE); ++ l64 = l & PCI_ROM_ADDRESS_MASK; ++ sz64 = sz & PCI_ROM_ADDRESS_MASK; ++ mask64 = (u32)PCI_ROM_ADDRESS_MASK; ++ } ++ ++ if (res->flags & IORESOURCE_MEM_64) { ++ pci_read_config_dword(dev, pos + 4, &l); ++ sz = sz2_in; ++ ++ l64 |= ((u64)l << 32); ++ sz64 |= ((u64)sz << 32); ++ mask64 |= ((u64)~0 << 32); ++ } ++ ++ if (!sz64) ++ goto fail; ++ ++ sz64 = pci_size(l64, sz64, mask64); ++ if (!sz64) { ++ dev_info(&dev->dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n", ++ pos); ++ goto fail; ++ } ++ ++ if (res->flags & IORESOURCE_MEM_64) { ++ if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && ++ sz64 > 0x100000000ULL) { ++ res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; ++ res->start = 0; ++ res->end = 0; ++ dev_err(&dev->dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n", ++ pos, (unsigned long long)sz64); ++ goto out; ++ } ++ ++ if ((sizeof(dma_addr_t) < 8) && l) { ++ /* Above 32-bit boundary; try to reallocate */ ++ res->flags |= IORESOURCE_UNSET; ++ res->start = 0; ++ res->end = sz64; ++ dev_info(&dev->dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n", ++ pos, (unsigned long long)l64); ++ goto out; ++ } ++ } ++ ++ region.start = l64; ++ region.end = l64 + sz64; ++ ++ pcibios_bus_to_resource(dev->bus, res, ®ion); ++ pcibios_resource_to_bus(dev->bus, &inverted_region, res); ++ ++ /* ++ * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is ++ * the corresponding resource address (the physical address used by ++ * the CPU. Converting that resource address back to a bus address ++ * should yield the original BAR value: ++ * ++ * resource_to_bus(bus_to_resource(A)) == A ++ * ++ * If it doesn't, CPU accesses to "bus_to_resource(A)" will not ++ * be claimed by the device. ++ */ ++ if (inverted_region.start != region.start) { ++ res->flags |= IORESOURCE_UNSET; ++ res->start = 0; ++ res->end = region.end - region.start; ++ dev_info(&dev->dev, "reg 0x%x: initial BAR value %#010llx invalid\n", ++ pos, (unsigned long long)region.start); ++ } ++ ++ goto out; ++ ++ ++fail: ++ res->flags = 0; ++out: ++ if (res->flags) ++ dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res); ++ ++ return (res->flags & IORESOURCE_MEM_64) ? 1 : 0; ++} ++ ++static int is_known_device(struct pci_dev *dev, int pos, int *sz) ++{ ++ /* Red Hat, Inc : Virtio network device */ ++ if (dev->vendor == 0x1af4 && dev->device == 0x1000) { ++ if (pos == 0x10) { ++ *sz = 0xffffffe1; ++ return 1; ++ } ++ if (pos == 0x14) { ++ *sz = 0xfffff000; ++ return 1; ++ } ++ } ++ /* Red Hat, Inc : Virtio block device */ ++ if (dev->vendor == 0x1af4 && dev->device == 0x1001) { ++ if (pos == 0x10) { ++ *sz = 0xffffffc1; ++ return 1; ++ } ++ if (pos == 0x14) { ++ *sz = 0xfffff000; ++ return 1; ++ } ++ } ++ return 0; ++} ++ + /** + * pci_read_base - read a PCI BAR + * @dev: the PCI device +@@ -182,6 +335,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, + + mask = type ? PCI_ROM_ADDRESS_MASK : ~0; + ++ if (is_known_device(dev, pos, &sz)) ++ return __pci_read_base_shortcut(dev, type, res, pos, sz, 0); ++ + res->name = pci_name(dev); + + printk("clr: Starting probe for %s\n", res->name); +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0109-perf.patch b/patches/boot_time_opt_guest/0109-perf.patch deleted file mode 100644 index 75f50f6..0000000 --- a/patches/boot_time_opt_guest/0109-perf.patch +++ /dev/null @@ -1,28 +0,0 @@ -From cce700dfbd5fdbf72b96e6479ca539ab4d880ce2 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Wed, 4 Nov 2015 15:17:10 -0600 -Subject: [PATCH 109/114] perf - -Author: Arjan van de Ven - -Signed-off-by: Miguel Bernal Marin ---- - arch/x86/events/intel/core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c -index eb1484c86bb4..c13ea26ac066 100644 ---- a/arch/x86/events/intel/core.c -+++ b/arch/x86/events/intel/core.c -@@ -4040,7 +4040,7 @@ __init int intel_pmu_init(void) - */ - if (x86_pmu.extra_regs) { - for (er = x86_pmu.extra_regs; er->msr; er++) { -- er->extra_msr_access = check_msr(er->msr, 0x11UL); -+ er->extra_msr_access = false; - /* Disable LBR select mapping */ - if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) - x86_pmu.lbr_sel_map = NULL; --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0110-init-no-wait-for-the-known-devices.patch b/patches/boot_time_opt_guest/0110-init-no-wait-for-the-known-devices.patch new file mode 100644 index 0000000..3021ab2 --- /dev/null +++ b/patches/boot_time_opt_guest/0110-init-no-wait-for-the-known-devices.patch @@ -0,0 +1,39 @@ +From 33a759b3c86e05a2d3e9d201edc8caa123beb16a Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Mon, 22 Jun 2015 09:33:33 -0500 +Subject: [PATCH 110/114] init: no wait for the known devices + +No wait for the known devices to complete their probing + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + init/do_mounts.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/init/do_mounts.c b/init/do_mounts.c +index c2de5104aad2..40725f0f5fb3 100644 +--- a/init/do_mounts.c ++++ b/init/do_mounts.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -563,7 +564,8 @@ void __init prepare_namespace(void) + * For example, it is not atypical to wait 5 seconds here + * for the touchpad of a laptop to initialize. + */ +- wait_for_device_probe(); ++ //wait_for_device_probe(); ++ async_synchronize_full(); + + md_run_setup(); + +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0110-pci-probe-identify-known-devices.patch b/patches/boot_time_opt_guest/0110-pci-probe-identify-known-devices.patch deleted file mode 100644 index 742a045..0000000 --- a/patches/boot_time_opt_guest/0110-pci-probe-identify-known-devices.patch +++ /dev/null @@ -1,190 +0,0 @@ -From c662d99134b67c58e63ecc17c2531588a3a51596 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Sat, 14 Feb 2015 09:49:41 -0600 -Subject: [PATCH 110/114] pci: probe: identify known devices - -Author: Arjan van de Ven -Modify-by: Miguel Bernal Marin - -Signed-off-by: Miguel Bernal Marin ---- - drivers/pci/probe.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 156 insertions(+) - -diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c -index 7399a06698da..4fb2d7fed4c5 100644 ---- a/drivers/pci/probe.c -+++ b/drivers/pci/probe.c -@@ -163,6 +163,159 @@ static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar) - - #define PCI_COMMAND_DECODE_ENABLE (PCI_COMMAND_MEMORY | PCI_COMMAND_IO) - -+/* shortcut version of __pci_read_base where we know the sizes already */ -+int __pci_read_base_shortcut(struct pci_dev *dev, enum pci_bar_type type, -+ struct resource *res, unsigned int pos, u32 sz_in, u32 sz2_in) -+{ -+ u32 l, sz; -+ u64 l64, sz64, mask64; -+ struct pci_bus_region region, inverted_region; -+ -+ res->name = pci_name(dev); -+ -+ pci_read_config_dword(dev, pos, &l); -+ -+ sz = sz_in; -+ -+ /* -+ * All bits set in sz means the device isn't working properly. -+ * If the BAR isn't implemented, all bits must be 0. If it's a -+ * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit -+ * 1 must be clear. -+ * Here we set the size and is not 0xffffffff -+ */ -+ -+ /* -+ * I don't know how l can have all bits set. Copied from old code. -+ * Maybe it fixes a bug on some ancient platform. -+ */ -+ if (l == 0xffffffff) -+ l = 0; -+ -+ if (type == pci_bar_unknown) { -+ res->flags = decode_bar(dev, l); -+ res->flags |= IORESOURCE_SIZEALIGN; -+ if (res->flags & IORESOURCE_IO) { -+ l64 = l & PCI_BASE_ADDRESS_IO_MASK; -+ sz64 = sz & PCI_BASE_ADDRESS_IO_MASK; -+ mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT; -+ } else { -+ l64 = l & PCI_BASE_ADDRESS_MEM_MASK; -+ sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK; -+ mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK; -+ } -+ } else { -+ res->flags |= (l & IORESOURCE_ROM_ENABLE); -+ l64 = l & PCI_ROM_ADDRESS_MASK; -+ sz64 = sz & PCI_ROM_ADDRESS_MASK; -+ mask64 = (u32)PCI_ROM_ADDRESS_MASK; -+ } -+ -+ if (res->flags & IORESOURCE_MEM_64) { -+ pci_read_config_dword(dev, pos + 4, &l); -+ sz = sz2_in; -+ -+ l64 |= ((u64)l << 32); -+ sz64 |= ((u64)sz << 32); -+ mask64 |= ((u64)~0 << 32); -+ } -+ -+ if (!sz64) -+ goto fail; -+ -+ sz64 = pci_size(l64, sz64, mask64); -+ if (!sz64) { -+ dev_info(&dev->dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n", -+ pos); -+ goto fail; -+ } -+ -+ if (res->flags & IORESOURCE_MEM_64) { -+ if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && -+ sz64 > 0x100000000ULL) { -+ res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; -+ res->start = 0; -+ res->end = 0; -+ dev_err(&dev->dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n", -+ pos, (unsigned long long)sz64); -+ goto out; -+ } -+ -+ if ((sizeof(dma_addr_t) < 8) && l) { -+ /* Above 32-bit boundary; try to reallocate */ -+ res->flags |= IORESOURCE_UNSET; -+ res->start = 0; -+ res->end = sz64; -+ dev_info(&dev->dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n", -+ pos, (unsigned long long)l64); -+ goto out; -+ } -+ } -+ -+ region.start = l64; -+ region.end = l64 + sz64; -+ -+ pcibios_bus_to_resource(dev->bus, res, ®ion); -+ pcibios_resource_to_bus(dev->bus, &inverted_region, res); -+ -+ /* -+ * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is -+ * the corresponding resource address (the physical address used by -+ * the CPU. Converting that resource address back to a bus address -+ * should yield the original BAR value: -+ * -+ * resource_to_bus(bus_to_resource(A)) == A -+ * -+ * If it doesn't, CPU accesses to "bus_to_resource(A)" will not -+ * be claimed by the device. -+ */ -+ if (inverted_region.start != region.start) { -+ res->flags |= IORESOURCE_UNSET; -+ res->start = 0; -+ res->end = region.end - region.start; -+ dev_info(&dev->dev, "reg 0x%x: initial BAR value %#010llx invalid\n", -+ pos, (unsigned long long)region.start); -+ } -+ -+ goto out; -+ -+ -+fail: -+ res->flags = 0; -+out: -+ if (res->flags) -+ dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res); -+ -+ return (res->flags & IORESOURCE_MEM_64) ? 1 : 0; -+} -+ -+static int is_known_device(struct pci_dev *dev, int pos, int *sz) -+{ -+ /* Red Hat, Inc : Virtio network device */ -+ if (dev->vendor == 0x1af4 && dev->device == 0x1000) { -+ if (pos == 0x10) { -+ *sz = 0xffffffe1; -+ return 1; -+ } -+ if (pos == 0x14) { -+ *sz = 0xfffff000; -+ return 1; -+ } -+ } -+ /* Red Hat, Inc : Virtio block device */ -+ if (dev->vendor == 0x1af4 && dev->device == 0x1001) { -+ if (pos == 0x10) { -+ *sz = 0xffffffc1; -+ return 1; -+ } -+ if (pos == 0x14) { -+ *sz = 0xfffff000; -+ return 1; -+ } -+ } -+ return 0; -+} -+ - /** - * pci_read_base - read a PCI BAR - * @dev: the PCI device -@@ -182,6 +335,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, - - mask = type ? PCI_ROM_ADDRESS_MASK : ~0; - -+ if (is_known_device(dev, pos, &sz)) -+ return __pci_read_base_shortcut(dev, type, res, pos, sz, 0); -+ - res->name = pci_name(dev); - - printk("clr: Starting probe for %s\n", res->name); --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0111-init-no-wait-for-the-known-devices.patch b/patches/boot_time_opt_guest/0111-init-no-wait-for-the-known-devices.patch deleted file mode 100644 index 701a18d..0000000 --- a/patches/boot_time_opt_guest/0111-init-no-wait-for-the-known-devices.patch +++ /dev/null @@ -1,39 +0,0 @@ -From be2ab4809c6b5058fbf3cd54c0f59c56416e572c Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Mon, 22 Jun 2015 09:33:33 -0500 -Subject: [PATCH 111/114] init: no wait for the known devices - -No wait for the known devices to complete their probing - -Author: Arjan van de Ven - -Signed-off-by: Miguel Bernal Marin ---- - init/do_mounts.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/init/do_mounts.c b/init/do_mounts.c -index c2de5104aad2..40725f0f5fb3 100644 ---- a/init/do_mounts.c -+++ b/init/do_mounts.c -@@ -28,6 +28,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -563,7 +564,8 @@ void __init prepare_namespace(void) - * For example, it is not atypical to wait 5 seconds here - * for the touchpad of a laptop to initialize. - */ -- wait_for_device_probe(); -+ //wait_for_device_probe(); -+ async_synchronize_full(); - - md_run_setup(); - --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0111-ksm-wakeups.patch b/patches/boot_time_opt_guest/0111-ksm-wakeups.patch new file mode 100644 index 0000000..c4472a1 --- /dev/null +++ b/patches/boot_time_opt_guest/0111-ksm-wakeups.patch @@ -0,0 +1,32 @@ +From 7cb71dfb076d5201c7cee4b4c3e85d98f9695cc3 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Mon, 14 Mar 2016 11:06:46 -0600 +Subject: [PATCH 111/114] ksm-wakeups + +reduce wakeups in ksm +--- + mm/ksm.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/mm/ksm.c b/mm/ksm.c +index 216184af0e19..1e7e4e8bb5f9 100644 +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -1767,8 +1767,12 @@ static int ksm_scan_thread(void *nothing) + try_to_freeze(); + + if (ksmd_should_run()) { +- schedule_timeout_interruptible( +- msecs_to_jiffies(ksm_thread_sleep_millisecs)); ++ if (ksm_thread_sleep_millisecs >= 1000) ++ schedule_timeout_interruptible( ++ msecs_to_jiffies(round_jiffies_relative(ksm_thread_sleep_millisecs))); ++ else ++ schedule_timeout_interruptible( ++ msecs_to_jiffies(ksm_thread_sleep_millisecs)); + } else { + wait_event_freezable(ksm_thread_wait, + ksmd_should_run() || kthread_should_stop()); +-- +2.13.2 + diff --git a/patches/boot_time_opt_guest/0112-ksm-wakeups.patch b/patches/boot_time_opt_guest/0112-ksm-wakeups.patch deleted file mode 100644 index b131e3f..0000000 --- a/patches/boot_time_opt_guest/0112-ksm-wakeups.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 2dc48e4b5c651691b7028991b64c935047b41b19 Mon Sep 17 00:00:00 2001 -From: Arjan van de Ven -Date: Mon, 14 Mar 2016 11:06:46 -0600 -Subject: [PATCH 112/114] ksm-wakeups - -reduce wakeups in ksm ---- - mm/ksm.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/mm/ksm.c b/mm/ksm.c -index 9ae6011a41f8..eecd3ff669e2 100644 ---- a/mm/ksm.c -+++ b/mm/ksm.c -@@ -1725,8 +1725,12 @@ static int ksm_scan_thread(void *nothing) - try_to_freeze(); - - if (ksmd_should_run()) { -- schedule_timeout_interruptible( -- msecs_to_jiffies(ksm_thread_sleep_millisecs)); -+ if (ksm_thread_sleep_millisecs >= 1000) -+ schedule_timeout_interruptible( -+ msecs_to_jiffies(round_jiffies_relative(ksm_thread_sleep_millisecs))); -+ else -+ schedule_timeout_interruptible( -+ msecs_to_jiffies(ksm_thread_sleep_millisecs)); - } else { - wait_event_freezable(ksm_thread_wait, - ksmd_should_run() || kthread_should_stop()); --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0113-init-do_mounts-recreate-dev-root.patch b/patches/boot_time_opt_guest/0113-init-do_mounts-recreate-dev-root.patch deleted file mode 100644 index 047eddb..0000000 --- a/patches/boot_time_opt_guest/0113-init-do_mounts-recreate-dev-root.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 179b7f41d5509f93cd297cc81c5d8da4a3123d9d Mon Sep 17 00:00:00 2001 -From: Miguel Bernal Marin -Date: Fri, 20 Nov 2015 14:01:26 -0600 -Subject: [PATCH 113/114] init: do_mounts: recreate /dev/root - -Rootfs shows as is mounted in /dev/root, but this devices is not present in -/dev directory. - -Signed-off-by: Miguel Bernal Marin ---- - init/do_mounts.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/init/do_mounts.c b/init/do_mounts.c -index 40725f0f5fb3..78b5b1dba8ca 100644 ---- a/init/do_mounts.c -+++ b/init/do_mounts.c -@@ -550,6 +550,7 @@ void __init mount_root(void) - void __init prepare_namespace(void) - { - int is_floppy; -+ int err; - - if (root_delay) { - printk(KERN_INFO "Waiting %d sec before mounting root device...\n", -@@ -604,6 +605,13 @@ void __init prepare_namespace(void) - devtmpfs_mount("dev"); - sys_mount(".", "/", NULL, MS_MOVE, NULL); - sys_chroot("."); -+#ifdef CONFIG_BLOCK -+ /* recreate the /dev/root */ -+ err = create_dev("/dev/root", ROOT_DEV); -+ -+ if (err < 0) -+ pr_emerg("Failed to create /dev/root: %d\n", err); -+#endif - } - - static bool is_tmpfs; --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch b/patches/boot_time_opt_guest/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch deleted file mode 100644 index dee9058..0000000 --- a/patches/boot_time_opt_guest/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 02fd2e6a7c708bf973209f9b238c5c61cbf15239 Mon Sep 17 00:00:00 2001 -From: Alan Cox -Date: Thu, 10 Mar 2016 15:11:28 +0000 -Subject: [PATCH 114/114] xattr: allow setting user.* attributes on symlinks by - owner - -Kvmtool and clear containers supports using user attributes to label host -files with the virtual uid/guid of the file in the container. This allows an -end user to manage their files and a complete uid space without all the ugly -namespace stuff. - -The one gap in the support is symlinks because an end user can change the -ownership of a symbolic link. We support attributes on these files as you -can already (as root) set security attributes on them. - -The current rules seem slightly over-paranoid and as we have a use case this -patch enables updating the attributes on a symbolic link IFF you are the -owner of the synlink (as permissions are not usually meaningful on the link -itself). - -Signed-off-by: Alan Cox ---- - fs/xattr.c | 14 ++++++++------ - 1 file changed, 8 insertions(+), 6 deletions(-) - -diff --git a/fs/xattr.c b/fs/xattr.c -index 7e3317cf4045..e005c30acb2c 100644 ---- a/fs/xattr.c -+++ b/fs/xattr.c -@@ -118,15 +118,17 @@ xattr_permission(struct inode *inode, const char *name, int mask) - } - - /* -- * In the user.* namespace, only regular files and directories can have -- * extended attributes. For sticky directories, only the owner and -- * privileged users can write attributes. -+ * In the user.* namespace, only regular files, symbolic links, and -+ * directories can have extended attributes. For symbolic links and -+ * sticky directories, only the owner and privileged users can write -+ * attributes. - */ - if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { -- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) -+ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) - return (mask & MAY_WRITE) ? -EPERM : -ENODATA; -- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && -- (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) -+ if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX)) -+ || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE) -+ && !inode_owner_or_capable(inode)) - return -EPERM; - } - --- -2.11.1 - diff --git a/patches/boot_time_opt_guest/0151-mm-Export-do_madvise.patch b/patches/boot_time_opt_guest/0151-mm-Export-do_madvise.patch index a6dbff7..5a37ea3 100644 --- a/patches/boot_time_opt_guest/0151-mm-Export-do_madvise.patch +++ b/patches/boot_time_opt_guest/0151-mm-Export-do_madvise.patch @@ -1,4 +1,4 @@ -From 99b4cdcce43ad0f706120bef26fef8c628c572cf Mon Sep 17 00:00:00 2001 +From c8193d2eb7b27a1250d810d49d2f2114912972c8 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Mon, 23 Jan 2017 15:03:52 -0800 Subject: [PATCH 151/154] mm: Export do_madvise() @@ -21,10 +21,10 @@ Signed-off-by: Sebastien Boeuf 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h -index 0b5b2e4df14e..925ec25f99a8 100644 +index 6f543a47fc92..bf52e0498247 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -2450,5 +2450,7 @@ void __init setup_nr_node_ids(void); +@@ -2557,5 +2557,7 @@ void __init setup_nr_node_ids(void); static inline void setup_nr_node_ids(void) {} #endif @@ -33,10 +33,10 @@ index 0b5b2e4df14e..925ec25f99a8 100644 #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/madvise.c b/mm/madvise.c -index 93fb63e88b5e..c8bbf93d4978 100644 +index 25b78ee4fc2c..77cfd2580e1b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c -@@ -618,9 +618,7 @@ madvise_behavior_valid(int behavior) +@@ -699,9 +699,7 @@ madvise_behavior_valid(int behavior) } /* @@ -47,7 +47,7 @@ index 93fb63e88b5e..c8bbf93d4978 100644 * handle paging I/O in this VM area. The idea is to help the kernel * use appropriate read-ahead and caching techniques. The information * provided is advisory only, and can be safely disregarded by the -@@ -673,7 +671,7 @@ madvise_behavior_valid(int behavior) +@@ -754,7 +752,7 @@ madvise_behavior_valid(int behavior) * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ @@ -56,7 +56,7 @@ index 93fb63e88b5e..c8bbf93d4978 100644 { unsigned long end, tmp; struct vm_area_struct *vma, *prev; -@@ -767,3 +765,22 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) +@@ -849,3 +847,22 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) return error; } @@ -80,5 +80,5 @@ index 93fb63e88b5e..c8bbf93d4978 100644 + return do_madvise(start, len_in, behavior); +} -- -2.12.1 +2.13.2 diff --git a/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch index 5f44930..7197ce7 100644 --- a/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch +++ b/patches/boot_time_opt_guest/0152-x86-kvm-Notify-host-to-release-pages.patch @@ -1,4 +1,4 @@ -From d28921b5f797829e4e676f7968ae688ef96b7992 Mon Sep 17 00:00:00 2001 +From c08f0e4d768db796098c8bdc64c3358baee076e7 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Mon, 23 Jan 2017 15:08:55 -0800 Subject: [PATCH 152/154] x86: kvm: Notify host to release pages @@ -37,15 +37,15 @@ Signed-off-by: Sebastien Boeuf include/uapi/linux/kvm_para.h | 3 +++ kernel/sysctl.c | 7 +++++++ mm/Makefile | 2 +- - mm/kvm.c | 25 +++++++++++++++++++++++++ - 6 files changed, 58 insertions(+), 1 deletion(-) + mm/kvm.c | 26 ++++++++++++++++++++++++++ + 6 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 mm/kvm.c diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 582c75311f95..683a94dd5f03 100644 +index 0e846f0cb83b..7bd380ff8dfa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -46,6 +46,7 @@ +@@ -45,6 +45,7 @@ #include #include #include @@ -53,7 +53,7 @@ index 582c75311f95..683a94dd5f03 100644 #include #include #include -@@ -6019,6 +6020,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) +@@ -6206,6 +6207,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } @@ -73,10 +73,10 @@ index 582c75311f95..683a94dd5f03 100644 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) { vcpu->arch.apicv_active = false; -@@ -6065,6 +6079,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) - kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); - ret = 0; +@@ -6257,6 +6271,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) + ret = kvm_pv_clock_pairing(vcpu, a0, a1); break; + #endif + case KVM_HC_RETURN_MEM: + ret = kvm_pv_return_mem_op(vcpu->kvm, a0, a1); + break; @@ -84,10 +84,10 @@ index 582c75311f95..683a94dd5f03 100644 ret = -KVM_ENOSYS; break; diff --git a/include/linux/mm.h b/include/linux/mm.h -index 925ec25f99a8..833f23d98baa 100644 +index bf52e0498247..d8bcf5c4b996 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -2303,6 +2303,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); +@@ -2406,6 +2406,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); extern int sysctl_drop_caches; int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); @@ -100,13 +100,13 @@ index 925ec25f99a8..833f23d98baa 100644 void drop_slab(void); diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h -index bf6cd7d5cac2..7d90f77d87d0 100644 +index fed506aeff62..ebc482ce7d38 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h -@@ -23,6 +23,9 @@ - #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 +@@ -25,6 +25,9 @@ #define KVM_HC_MIPS_EXIT_VM 7 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 + #define KVM_HC_CLOCK_PAIRING 9 +#define KVM_HC_RETURN_MEM 10 + +#define KVM_MAX_RET_MEM_SIZE (1 << 22) // 4MiB @@ -114,10 +114,10 @@ index bf6cd7d5cac2..7d90f77d87d0 100644 /* * hypercalls use architecture specific diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index c1095cdc0fe2..d8ae774fa042 100644 +index 4dfba1a76cc3..771a930cadfa 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c -@@ -1398,6 +1398,13 @@ static struct ctl_table vm_table[] = { +@@ -1387,6 +1387,13 @@ static struct ctl_table vm_table[] = { .extra1 = &one, .extra2 = &four, }, @@ -132,25 +132,26 @@ index c1095cdc0fe2..d8ae774fa042 100644 { .procname = "compact_memory", diff --git a/mm/Makefile b/mm/Makefile -index 295bd7a..6455723 100644 +index 026f6a828a50..28d95bed7e1d 100644 --- a/mm/Makefile +++ b/mm/Makefile -@@ -47,6 +47,8 @@ else - obj-y += bootmem.o - endif +@@ -39,7 +39,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ + mm_init.o mmu_context.o percpu.o slab_common.o \ + compaction.o vmacache.o swap_slots.o \ + interval_tree.o list_lru.o workingset.o \ +- debug.o $(mmu-y) ++ debug.o kvm.o $(mmu-y) + + obj-y += init-mm.o -+obj-y += kvm.o -+ - obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o - ifdef CONFIG_MMU - obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o diff --git a/mm/kvm.c b/mm/kvm.c new file mode 100644 -index 000000000000..8945f6a311b9 +index 000000000000..1c5600788221 --- /dev/null +++ b/mm/kvm.c -@@ -0,0 +1,25 @@ +@@ -0,0 +1,26 @@ +#include ++#include + +int sysctl_kvm_madv_instant_free; + @@ -176,5 +177,5 @@ index 000000000000..8945f6a311b9 + return 0; +} -- -2.12.1 +2.13.2 diff --git a/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch b/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch index cdb876a..e0364ef 100644 --- a/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch +++ b/patches/boot_time_opt_guest/0153-x86-Return-memory-from-guest-to-host-kernel.patch @@ -1,4 +1,4 @@ -From 855ef164854307839c08c60688eaeac14f9a649e Mon Sep 17 00:00:00 2001 +From 986184ccf3e478cf9eeffc635e444ea025eb4052 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Mon, 23 Jan 2017 15:26:13 -0800 Subject: [PATCH 153/154] x86: Return memory from guest to host kernel @@ -86,10 +86,10 @@ index bc62e7cbf1b1..4a2f6d1adbd2 100644 #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c -index edbbfc854e39..14167b3f6514 100644 +index 43e10d6fdbed..8479d3fb6206 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c -@@ -552,6 +552,16 @@ static __init int activate_jump_labels(void) +@@ -549,6 +549,16 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); @@ -131,18 +131,18 @@ index 4efc3f56e6df..26eb3a05a8a3 100644 + #endif /* _LINUX_MM_ARCH_HOOKS_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 1460e6ad5e14..5f6e6371bc6f 100644 +index 2302f250d6b1..65e9121583e1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -64,6 +64,7 @@ +@@ -65,6 +65,7 @@ #include #include #include +#include + #include #include - #include -@@ -855,6 +856,7 @@ static inline void __free_one_page(struct page *page, +@@ -869,6 +870,7 @@ static inline void __free_one_page(struct page *page, } done_merging: @@ -151,5 +151,5 @@ index 1460e6ad5e14..5f6e6371bc6f 100644 /* -- -2.12.1 +2.13.2 diff --git a/patches/boot_time_opt_guest/0154-sysctl-vm-Fine-grained-cache-shrinking.patch b/patches/boot_time_opt_guest/0154-sysctl-vm-Fine-grained-cache-shrinking.patch index 07d4a83..a5d2b29 100644 --- a/patches/boot_time_opt_guest/0154-sysctl-vm-Fine-grained-cache-shrinking.patch +++ b/patches/boot_time_opt_guest/0154-sysctl-vm-Fine-grained-cache-shrinking.patch @@ -1,4 +1,4 @@ -From 2c145b5233b504f5226a0f4bc44baeef33b444d8 Mon Sep 17 00:00:00 2001 +From c7d8564d5d34c615e5ab03aa1e270888f49ff8b5 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Mon, 23 Jan 2017 15:32:39 -0800 Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking @@ -79,10 +79,10 @@ index d72d52b90433..f564dfcc13a4 100644 + return 0; +} diff --git a/include/linux/mm.h b/include/linux/mm.h -index 833f23d98baa..0bb66c1c31c9 100644 +index d8bcf5c4b996..9a1fc3cecac8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -2308,6 +2308,10 @@ extern int kvm_ret_mem_advice; +@@ -2411,6 +2411,10 @@ extern int kvm_ret_mem_advice; int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); @@ -94,10 +94,10 @@ index 833f23d98baa..0bb66c1c31c9 100644 void drop_slab(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index d8ae774fa042..5dc9a46ae212 100644 +index 771a930cadfa..3bdd8030b7af 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c -@@ -1405,6 +1405,14 @@ static struct ctl_table vm_table[] = { +@@ -1394,6 +1394,14 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = kvm_madv_instant_free_sysctl_handler, }, @@ -113,10 +113,10 @@ index d8ae774fa042..5dc9a46ae212 100644 { .procname = "compact_memory", diff --git a/mm/vmscan.c b/mm/vmscan.c -index 30a88b945a44..1198e74d1860 100644 +index 8ad39bbc79e6..d977e489d7f1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -3525,7 +3525,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) +@@ -3574,7 +3574,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) wake_up_interruptible(&pgdat->kswapd_wait); } @@ -124,7 +124,7 @@ index 30a88b945a44..1198e74d1860 100644 /* * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of * freed pages. -@@ -3564,7 +3563,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) +@@ -3614,7 +3613,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) return nr_reclaimed; } @@ -133,5 +133,5 @@ index 30a88b945a44..1198e74d1860 100644 /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes -- -2.12.1 +2.13.2 diff --git a/patches/boot_time_opt_guest/guest_boot_time_opt.scc b/patches/boot_time_opt_guest/guest_boot_time_opt.scc index 3636c01..c571cbf 100644 --- a/patches/boot_time_opt_guest/guest_boot_time_opt.scc +++ b/patches/boot_time_opt_guest/guest_boot_time_opt.scc @@ -1,17 +1,15 @@ define KFEATURE_DESCRIPTION "Boot time optimization changes ported from ClearLinux , https://github.com/clearlinux-pkgs/linux-kvm" define KFEATURE_COMPATIBILITY all -patch 0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch -patch 0104-fbcon-enable-no-blink-by-default.patch -patch 0105-vmstats-wakeups.patch -# Remove patch because it causes ixgvbevf to not initialize correctly in the guest -#patch 0106-pci-probe.patch -patch 0107-cgroup.patch -patch 0108-smpboot-reuse-timer-calibration.patch -patch 0109-perf.patch -patch 0110-pci-probe-identify-known-devices.patch -patch 0111-init-no-wait-for-the-known-devices.patch -patch 0112-ksm-wakeups.patch +patch 0102-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch +patch 0103-fbcon-enable-no-blink-by-default.patch +patch 0104-mm-reduce-vmstat-wakeups.patch +patch 0106-cgroup-delayed-work.patch +patch 0107-smpboot-reuse-timer-calibration.patch +patch 0108-perf.patch +patch 0109-pci-probe-identify-known-devices.patch +patch 0110-init-no-wait-for-the-known-devices.patch +patch 0111-ksm-wakeups.patch patch 0151-mm-Export-do_madvise.patch patch 0152-x86-kvm-Notify-host-to-release-pages.patch -- cgit v1.2.3-54-g00ecf