From c47f2bfcefee5d746dd72af26faec2b44ff553b5 Mon Sep 17 00:00:00 2001 From: Adrian Calianu Date: Tue, 5 Sep 2017 17:22:26 +0200 Subject: add new kernel features needed by guest Signed-off-by: Adrian Calianu --- ...kip-synchronize_rcu-on-single-CPU-systems.patch | 34 ++++ ...kip-synchronize_rcu-if-there-is-no-old-op.patch | 38 +++++ .../0104-fbcon-enable-no-blink-by-default.patch | 26 +++ patches/boot_time_opt/0105-vmstats-wakeups.patch | 28 +++ patches/boot_time_opt/0106-pci-probe.patch | 123 +++++++++++++ patches/boot_time_opt/0107-cgroup.patch | 107 ++++++++++++ .../0108-smpboot-reuse-timer-calibration.patch | 45 +++++ patches/boot_time_opt/0109-perf.patch | 28 +++ .../0110-pci-probe-identify-known-devices.patch | 190 +++++++++++++++++++++ .../0111-init-no-wait-for-the-known-devices.patch | 39 +++++ patches/boot_time_opt/0112-ksm-wakeups.patch | 32 ++++ .../0113-init-do_mounts-recreate-dev-root.patch | 42 +++++ ...-setting-user.-attributes-on-symlinks-by-.patch | 56 ++++++ ...0152-x86-kvm-Notify-host-to-release-pages.patch | 18 +- patches/boot_time_opt/guest_boot_time_opt.scc | 19 +++ ...-Enea-Linux-guest-boot-start-end-messages.patch | 95 +++++++++++ .../kernel_startend_msg/kernel_startend_msg.scc | 2 +- 17 files changed, 912 insertions(+), 10 deletions(-) create mode 100644 patches/boot_time_opt/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch create mode 100644 patches/boot_time_opt/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch create mode 100644 patches/boot_time_opt/0104-fbcon-enable-no-blink-by-default.patch create mode 100644 patches/boot_time_opt/0105-vmstats-wakeups.patch create mode 100644 patches/boot_time_opt/0106-pci-probe.patch create mode 100644 patches/boot_time_opt/0107-cgroup.patch create mode 100644 patches/boot_time_opt/0108-smpboot-reuse-timer-calibration.patch create mode 100644 patches/boot_time_opt/0109-perf.patch create mode 100644 patches/boot_time_opt/0110-pci-probe-identify-known-devices.patch create mode 100644 patches/boot_time_opt/0111-init-no-wait-for-the-known-devices.patch create mode 100644 patches/boot_time_opt/0112-ksm-wakeups.patch create mode 100644 patches/boot_time_opt/0113-init-do_mounts-recreate-dev-root.patch create mode 100644 patches/boot_time_opt/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch create mode 100644 patches/boot_time_opt/guest_boot_time_opt.scc create mode 100644 patches/kernel_startend_msg/0001-printk-add-Enea-Linux-guest-boot-start-end-messages.patch (limited to 'patches') diff --git a/patches/boot_time_opt/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch b/patches/boot_time_opt/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch new file mode 100644 index 0000000..1de2a6b --- /dev/null +++ b/patches/boot_time_opt/0102-cpuidle-skip-synchronize_rcu-on-single-CPU-systems.patch @@ -0,0 +1,34 @@ +From 6b0fb5b2a7a157c04d8ab6ad71b092034d0048bf Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 11 Feb 2015 16:19:26 -0600 +Subject: [PATCH 102/114] cpuidle: skip synchronize_rcu() on single CPU systems + +synchronize_rcu() is pretty expensive, and on single CPU systems we don't need +it in this specific case, so skip it. + +Signed-off-by: Arjan van de Ven +Signed-off-by: Miguel Bernal Marin +--- + drivers/cpuidle/cpuidle.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c +index 62810ff3b00f..f1d110411098 100644 +--- a/drivers/cpuidle/cpuidle.c ++++ b/drivers/cpuidle/cpuidle.c +@@ -324,8 +324,11 @@ void cpuidle_uninstall_idle_handler(void) + /* + * Make sure external observers (such as the scheduler) + * are done looking at pointed idle states. ++ * This is only relevant if there is more than one cpu, ++ * if there is only one CPU, that is us... and we're ++ * coherent to ourselves. + */ +- synchronize_rcu(); ++ + } + + /** +-- +2.11.1 + diff --git a/patches/boot_time_opt/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch b/patches/boot_time_opt/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch new file mode 100644 index 0000000..d3a20fb --- /dev/null +++ b/patches/boot_time_opt/0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch @@ -0,0 +1,38 @@ +From 7be707833bb35c295eb702d13cf73ac9390e4b31 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 11 Feb 2015 16:25:16 -0600 +Subject: [PATCH 103/114] sysrq: skip synchronize_rcu() if there is no old op + +synchronize_rcu() is expensive. Currently it is called as part of the sysrq +registration/unregistration, which happens during boot several times. +Now, the reason for the synchronize_rcu() is to allow an old registered +operation to expire properly... which is pointless if the old operation +is NULL... +So we can save the common case of the old operation being NULL a lot of time +by just checking for non-NULL prior to the synchronize_rcu() + +Signed-off-by: Arjan van de Ven +Signed-off-by: Miguel Bernal Marin +--- + drivers/tty/sysrq.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c +index 701c085bb19b..c60c7ba57ad9 100644 +--- a/drivers/tty/sysrq.c ++++ b/drivers/tty/sysrq.c +@@ -1065,8 +1065,10 @@ static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p, + * A concurrent __handle_sysrq either got the old op or the new op. + * Wait for it to go away before returning, so the code for an old + * op is not freed (eg. on module unload) while it is in use. ++ * This is only relevant if the old op is not NULL of course. + */ +- synchronize_rcu(); ++ if (remove_op_p) ++ synchronize_rcu(); + + return retval; + } +-- +2.11.1 + diff --git a/patches/boot_time_opt/0104-fbcon-enable-no-blink-by-default.patch b/patches/boot_time_opt/0104-fbcon-enable-no-blink-by-default.patch new file mode 100644 index 0000000..715c195 --- /dev/null +++ b/patches/boot_time_opt/0104-fbcon-enable-no-blink-by-default.patch @@ -0,0 +1,26 @@ +From 5899ff79ed4e3514420e1530a3588a922832dae5 Mon Sep 17 00:00:00 2001 +From: Jose Carlos Venegas Munoz +Date: Mon, 13 Apr 2015 11:26:36 -0500 +Subject: [PATCH 104/114] fbcon: enable no blink by default + +Author: Arjan van de Ven +--- + drivers/video/console/fbcon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c +index a44f5627b82a..95b73366b86f 100644 +--- a/drivers/video/console/fbcon.c ++++ b/drivers/video/console/fbcon.c +@@ -146,7 +146,7 @@ static const struct consw fb_con; + + static int fbcon_set_origin(struct vc_data *); + +-static int fbcon_cursor_noblink; ++static int fbcon_cursor_noblink = 1; + + #define divides(a, b) ((!(a) || (b)%(a)) ? 0 : 1) + +-- +2.11.1 + diff --git a/patches/boot_time_opt/0105-vmstats-wakeups.patch b/patches/boot_time_opt/0105-vmstats-wakeups.patch new file mode 100644 index 0000000..09b109a --- /dev/null +++ b/patches/boot_time_opt/0105-vmstats-wakeups.patch @@ -0,0 +1,28 @@ +From ff47b4e9be8113b4ba05d6f2afee3db6904bc10f Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 11 Feb 2015 16:47:20 -0600 +Subject: [PATCH 105/114] vmstats: wakeups + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + mm/vmstat.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/vmstat.c b/mm/vmstat.c +index 7c28df36f50f..efe1b6797139 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1549,7 +1549,7 @@ static const struct file_operations proc_vmstat_file_operations = { + #ifdef CONFIG_SMP + static struct workqueue_struct *vmstat_wq; + static DEFINE_PER_CPU(struct delayed_work, vmstat_work); +-int sysctl_stat_interval __read_mostly = HZ; ++int sysctl_stat_interval __read_mostly = 8 * HZ; + + #ifdef CONFIG_PROC_FS + static void refresh_vm_stats(struct work_struct *work) +-- +2.11.1 + diff --git a/patches/boot_time_opt/0106-pci-probe.patch b/patches/boot_time_opt/0106-pci-probe.patch new file mode 100644 index 0000000..5045926 --- /dev/null +++ b/patches/boot_time_opt/0106-pci-probe.patch @@ -0,0 +1,123 @@ +From b225caf8f743b9f5f9e84d0df711ee0c17e049ae Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 11 Feb 2015 16:53:08 -0600 +Subject: [PATCH 106/114] pci: probe + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + drivers/pci/probe.c | 43 ++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 40 insertions(+), 3 deletions(-) + +diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c +index 204960e70333..7399a06698da 100644 +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -182,6 +182,10 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, + + mask = type ? PCI_ROM_ADDRESS_MASK : ~0; + ++ res->name = pci_name(dev); ++ ++ printk("clr: Starting probe for %s\n", res->name); ++ + /* No printks while decoding is disabled! */ + if (!dev->mmio_always_on) { + pci_read_config_word(dev, PCI_COMMAND, &orig_cmd); +@@ -191,8 +195,6 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, + } + } + +- res->name = pci_name(dev); +- + pci_read_config_dword(dev, pos, &l); + pci_write_config_dword(dev, pos, l | mask); + pci_read_config_dword(dev, pos, &sz); +@@ -324,6 +326,8 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) + if (dev->non_compliant_bars) + return; + ++ printk("clr: pci_read_bases start\n"); ++ + for (pos = 0; pos < howmany; pos++) { + struct resource *res = &dev->resource[pos]; + reg = PCI_BASE_ADDRESS_0 + (pos << 2); +@@ -332,11 +336,13 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) + + if (rom) { + struct resource *res = &dev->resource[PCI_ROM_RESOURCE]; ++ printk("clr: rom path\n"); + dev->rom_base_reg = rom; + res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | + IORESOURCE_READONLY | IORESOURCE_SIZEALIGN; + __pci_read_base(dev, pci_bar_mem32, res, rom); + } ++ printk("clr: pci_read_bases end\n"); + } + + static void pci_read_bridge_io(struct pci_bus *child) +@@ -1311,6 +1317,28 @@ static void pci_msi_setup_pci_dev(struct pci_dev *dev) + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + } + ++static int guess_bar_count(int class) ++{ ++ if (class == 0x068000) ++ return 0; ++ if (class == 0x020000) ++ return 2; ++ if (class == 0x010000) ++ return 2; ++ if (class == 0x00ff00) ++ return 1; ++ return 6; ++} ++ ++static int has_rom(int class, int rom) ++{ ++ if (class == 0x020000) ++ return 0; ++ if (class == 0x010000 || class == 0x00ff00) ++ return 0; ++ return rom; ++} ++ + /** + * pci_setup_device - fill in class and map information of a device + * @dev: the device structure to fill +@@ -1329,6 +1357,9 @@ int pci_setup_device(struct pci_dev *dev) + int pos = 0; + struct pci_bus_region region; + struct resource *res; ++ int maxbar; ++ ++ printk("clr: pci_setup_device start\n"); + + if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type)) + return -EIO; +@@ -1383,7 +1414,11 @@ int pci_setup_device(struct pci_dev *dev) + if (class == PCI_CLASS_BRIDGE_PCI) + goto bad; + pci_read_irq(dev); +- pci_read_bases(dev, 6, PCI_ROM_ADDRESS); ++ ++ maxbar = guess_bar_count(dev->class); ++ ++ if (class != PCI_CLASS_STORAGE_IDE) ++ pci_read_bases(dev, maxbar, has_rom(dev->class, PCI_ROM_ADDRESS)); + pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor); + pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device); + +@@ -1468,6 +1503,8 @@ int pci_setup_device(struct pci_dev *dev) + dev->class = PCI_CLASS_NOT_DEFINED << 8; + } + ++ printk("clr: pci_setup_device end\n"); ++ + /* We found a fine healthy device, go go go... */ + return 0; + } +-- +2.11.1 + diff --git a/patches/boot_time_opt/0107-cgroup.patch b/patches/boot_time_opt/0107-cgroup.patch new file mode 100644 index 0000000..d68c686 --- /dev/null +++ b/patches/boot_time_opt/0107-cgroup.patch @@ -0,0 +1,107 @@ +From 0adc5bfd84939d11d3c172eab0a00bfab4aadb46 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Fri, 28 Aug 2015 11:00:36 -0500 +Subject: [PATCH 107/114] cgroup + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +Signed-off-by: Jose Carlos Venegas Munoz +--- + include/linux/cgroup-defs.h | 2 +- + kernel/cgroup.c | 24 ++++++++++++++---------- + 2 files changed, 15 insertions(+), 11 deletions(-) + +diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h +index 861b4677fc5b..5d3c345ee60c 100644 +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -137,7 +137,7 @@ struct cgroup_subsys_state { + + /* percpu_ref killing and RCU release */ + struct rcu_head rcu_head; +- struct work_struct destroy_work; ++ struct delayed_work destroy_work; + }; + + /* +diff --git a/kernel/cgroup.c b/kernel/cgroup.c +index 53bbca7c4859..6de39d8213ed 100644 +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -73,7 +73,7 @@ + * Expiring in the middle is a performance problem not a correctness one. + * 1 sec should be enough. + */ +-#define CGROUP_PIDLIST_DESTROY_DELAY HZ ++#define CGROUP_PIDLIST_DESTROY_DELAY round_jiffies_relative(HZ) + + #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \ + MAX_CFTYPE_NAME + 2) +@@ -4986,8 +4986,9 @@ static struct cftype cgroup_legacy_base_files[] = { + */ + static void css_free_work_fn(struct work_struct *work) + { ++ struct delayed_work *dwork = to_delayed_work(work); + struct cgroup_subsys_state *css = +- container_of(work, struct cgroup_subsys_state, destroy_work); ++ container_of(dwork, struct cgroup_subsys_state, destroy_work); + struct cgroup_subsys *ss = css->ss; + struct cgroup *cgrp = css->cgroup; + +@@ -5036,14 +5037,15 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) + struct cgroup_subsys_state *css = + container_of(rcu_head, struct cgroup_subsys_state, rcu_head); + +- INIT_WORK(&css->destroy_work, css_free_work_fn); +- queue_work(cgroup_destroy_wq, &css->destroy_work); ++ INIT_DELAYED_WORK(&css->destroy_work, css_free_work_fn); ++ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); + } + + static void css_release_work_fn(struct work_struct *work) + { ++ struct delayed_work *dwork = to_delayed_work(work); + struct cgroup_subsys_state *css = +- container_of(work, struct cgroup_subsys_state, destroy_work); ++ container_of(dwork, struct cgroup_subsys_state, destroy_work); + struct cgroup_subsys *ss = css->ss; + struct cgroup *cgrp = css->cgroup; + +@@ -5088,8 +5090,9 @@ static void css_release(struct percpu_ref *ref) + struct cgroup_subsys_state *css = + container_of(ref, struct cgroup_subsys_state, refcnt); + +- INIT_WORK(&css->destroy_work, css_release_work_fn); +- queue_work(cgroup_destroy_wq, &css->destroy_work); ++ INIT_DELAYED_WORK(&css->destroy_work, css_release_work_fn); ++ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); ++ + } + + static void init_and_link_css(struct cgroup_subsys_state *css, +@@ -5371,8 +5374,9 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, + */ + static void css_killed_work_fn(struct work_struct *work) + { ++ struct delayed_work *dwork = to_delayed_work(work); + struct cgroup_subsys_state *css = +- container_of(work, struct cgroup_subsys_state, destroy_work); ++ container_of(dwork, struct cgroup_subsys_state, destroy_work); + + mutex_lock(&cgroup_mutex); + +@@ -5393,8 +5397,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref) + container_of(ref, struct cgroup_subsys_state, refcnt); + + if (atomic_dec_and_test(&css->online_cnt)) { +- INIT_WORK(&css->destroy_work, css_killed_work_fn); +- queue_work(cgroup_destroy_wq, &css->destroy_work); ++ INIT_DELAYED_WORK(&css->destroy_work, css_killed_work_fn); ++ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY); + } + } + +-- +2.11.1 + diff --git a/patches/boot_time_opt/0108-smpboot-reuse-timer-calibration.patch b/patches/boot_time_opt/0108-smpboot-reuse-timer-calibration.patch new file mode 100644 index 0000000..48be94a --- /dev/null +++ b/patches/boot_time_opt/0108-smpboot-reuse-timer-calibration.patch @@ -0,0 +1,45 @@ +From 634947be6c24d844af5f6ecf59453f2ddc09e032 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 11 Feb 2015 17:28:14 -0600 +Subject: [PATCH 108/114] smpboot: reuse timer calibration + +NO point recalibrating for known-constant tsc... saves 200ms+ of boot time. + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + arch/x86/kernel/smpboot.c | 2 +- + arch/x86/kernel/tsc.c | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c +index 99b920d0e516..e17bb425bb52 100644 +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -761,7 +761,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) + pr_debug("Waiting for send to finish...\n"); + send_status = safe_apic_wait_icr_idle(); + +- udelay(init_udelay); ++ udelay(100); + + pr_debug("Deasserting INIT\n"); + +diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c +index 37e7cf544e51..e99be8a6a132 100644 +--- a/arch/x86/kernel/tsc.c ++++ b/arch/x86/kernel/tsc.c +@@ -1413,6 +1413,9 @@ unsigned long calibrate_delay_is_known(void) + if (!mask) + return 0; + ++ if (cpu !=0) ++ return cpu_data(0).loops_per_jiffy; ++ + sibling = cpumask_any_but(mask, cpu); + if (sibling < nr_cpu_ids) + return cpu_data(sibling).loops_per_jiffy; +-- +2.11.1 + diff --git a/patches/boot_time_opt/0109-perf.patch b/patches/boot_time_opt/0109-perf.patch new file mode 100644 index 0000000..75f50f6 --- /dev/null +++ b/patches/boot_time_opt/0109-perf.patch @@ -0,0 +1,28 @@ +From cce700dfbd5fdbf72b96e6479ca539ab4d880ce2 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Wed, 4 Nov 2015 15:17:10 -0600 +Subject: [PATCH 109/114] perf + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + arch/x86/events/intel/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c +index eb1484c86bb4..c13ea26ac066 100644 +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -4040,7 +4040,7 @@ __init int intel_pmu_init(void) + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { +- er->extra_msr_access = check_msr(er->msr, 0x11UL); ++ er->extra_msr_access = false; + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; +-- +2.11.1 + diff --git a/patches/boot_time_opt/0110-pci-probe-identify-known-devices.patch b/patches/boot_time_opt/0110-pci-probe-identify-known-devices.patch new file mode 100644 index 0000000..742a045 --- /dev/null +++ b/patches/boot_time_opt/0110-pci-probe-identify-known-devices.patch @@ -0,0 +1,190 @@ +From c662d99134b67c58e63ecc17c2531588a3a51596 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Sat, 14 Feb 2015 09:49:41 -0600 +Subject: [PATCH 110/114] pci: probe: identify known devices + +Author: Arjan van de Ven +Modify-by: Miguel Bernal Marin + +Signed-off-by: Miguel Bernal Marin +--- + drivers/pci/probe.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 156 insertions(+) + +diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c +index 7399a06698da..4fb2d7fed4c5 100644 +--- a/drivers/pci/probe.c ++++ b/drivers/pci/probe.c +@@ -163,6 +163,159 @@ static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar) + + #define PCI_COMMAND_DECODE_ENABLE (PCI_COMMAND_MEMORY | PCI_COMMAND_IO) + ++/* shortcut version of __pci_read_base where we know the sizes already */ ++int __pci_read_base_shortcut(struct pci_dev *dev, enum pci_bar_type type, ++ struct resource *res, unsigned int pos, u32 sz_in, u32 sz2_in) ++{ ++ u32 l, sz; ++ u64 l64, sz64, mask64; ++ struct pci_bus_region region, inverted_region; ++ ++ res->name = pci_name(dev); ++ ++ pci_read_config_dword(dev, pos, &l); ++ ++ sz = sz_in; ++ ++ /* ++ * All bits set in sz means the device isn't working properly. ++ * If the BAR isn't implemented, all bits must be 0. If it's a ++ * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit ++ * 1 must be clear. ++ * Here we set the size and is not 0xffffffff ++ */ ++ ++ /* ++ * I don't know how l can have all bits set. Copied from old code. ++ * Maybe it fixes a bug on some ancient platform. ++ */ ++ if (l == 0xffffffff) ++ l = 0; ++ ++ if (type == pci_bar_unknown) { ++ res->flags = decode_bar(dev, l); ++ res->flags |= IORESOURCE_SIZEALIGN; ++ if (res->flags & IORESOURCE_IO) { ++ l64 = l & PCI_BASE_ADDRESS_IO_MASK; ++ sz64 = sz & PCI_BASE_ADDRESS_IO_MASK; ++ mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT; ++ } else { ++ l64 = l & PCI_BASE_ADDRESS_MEM_MASK; ++ sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK; ++ mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK; ++ } ++ } else { ++ res->flags |= (l & IORESOURCE_ROM_ENABLE); ++ l64 = l & PCI_ROM_ADDRESS_MASK; ++ sz64 = sz & PCI_ROM_ADDRESS_MASK; ++ mask64 = (u32)PCI_ROM_ADDRESS_MASK; ++ } ++ ++ if (res->flags & IORESOURCE_MEM_64) { ++ pci_read_config_dword(dev, pos + 4, &l); ++ sz = sz2_in; ++ ++ l64 |= ((u64)l << 32); ++ sz64 |= ((u64)sz << 32); ++ mask64 |= ((u64)~0 << 32); ++ } ++ ++ if (!sz64) ++ goto fail; ++ ++ sz64 = pci_size(l64, sz64, mask64); ++ if (!sz64) { ++ dev_info(&dev->dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n", ++ pos); ++ goto fail; ++ } ++ ++ if (res->flags & IORESOURCE_MEM_64) { ++ if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) && ++ sz64 > 0x100000000ULL) { ++ res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED; ++ res->start = 0; ++ res->end = 0; ++ dev_err(&dev->dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n", ++ pos, (unsigned long long)sz64); ++ goto out; ++ } ++ ++ if ((sizeof(dma_addr_t) < 8) && l) { ++ /* Above 32-bit boundary; try to reallocate */ ++ res->flags |= IORESOURCE_UNSET; ++ res->start = 0; ++ res->end = sz64; ++ dev_info(&dev->dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n", ++ pos, (unsigned long long)l64); ++ goto out; ++ } ++ } ++ ++ region.start = l64; ++ region.end = l64 + sz64; ++ ++ pcibios_bus_to_resource(dev->bus, res, ®ion); ++ pcibios_resource_to_bus(dev->bus, &inverted_region, res); ++ ++ /* ++ * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is ++ * the corresponding resource address (the physical address used by ++ * the CPU. Converting that resource address back to a bus address ++ * should yield the original BAR value: ++ * ++ * resource_to_bus(bus_to_resource(A)) == A ++ * ++ * If it doesn't, CPU accesses to "bus_to_resource(A)" will not ++ * be claimed by the device. ++ */ ++ if (inverted_region.start != region.start) { ++ res->flags |= IORESOURCE_UNSET; ++ res->start = 0; ++ res->end = region.end - region.start; ++ dev_info(&dev->dev, "reg 0x%x: initial BAR value %#010llx invalid\n", ++ pos, (unsigned long long)region.start); ++ } ++ ++ goto out; ++ ++ ++fail: ++ res->flags = 0; ++out: ++ if (res->flags) ++ dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res); ++ ++ return (res->flags & IORESOURCE_MEM_64) ? 1 : 0; ++} ++ ++static int is_known_device(struct pci_dev *dev, int pos, int *sz) ++{ ++ /* Red Hat, Inc : Virtio network device */ ++ if (dev->vendor == 0x1af4 && dev->device == 0x1000) { ++ if (pos == 0x10) { ++ *sz = 0xffffffe1; ++ return 1; ++ } ++ if (pos == 0x14) { ++ *sz = 0xfffff000; ++ return 1; ++ } ++ } ++ /* Red Hat, Inc : Virtio block device */ ++ if (dev->vendor == 0x1af4 && dev->device == 0x1001) { ++ if (pos == 0x10) { ++ *sz = 0xffffffc1; ++ return 1; ++ } ++ if (pos == 0x14) { ++ *sz = 0xfffff000; ++ return 1; ++ } ++ } ++ return 0; ++} ++ + /** + * pci_read_base - read a PCI BAR + * @dev: the PCI device +@@ -182,6 +335,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, + + mask = type ? PCI_ROM_ADDRESS_MASK : ~0; + ++ if (is_known_device(dev, pos, &sz)) ++ return __pci_read_base_shortcut(dev, type, res, pos, sz, 0); ++ + res->name = pci_name(dev); + + printk("clr: Starting probe for %s\n", res->name); +-- +2.11.1 + diff --git a/patches/boot_time_opt/0111-init-no-wait-for-the-known-devices.patch b/patches/boot_time_opt/0111-init-no-wait-for-the-known-devices.patch new file mode 100644 index 0000000..701a18d --- /dev/null +++ b/patches/boot_time_opt/0111-init-no-wait-for-the-known-devices.patch @@ -0,0 +1,39 @@ +From be2ab4809c6b5058fbf3cd54c0f59c56416e572c Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Mon, 22 Jun 2015 09:33:33 -0500 +Subject: [PATCH 111/114] init: no wait for the known devices + +No wait for the known devices to complete their probing + +Author: Arjan van de Ven + +Signed-off-by: Miguel Bernal Marin +--- + init/do_mounts.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/init/do_mounts.c b/init/do_mounts.c +index c2de5104aad2..40725f0f5fb3 100644 +--- a/init/do_mounts.c ++++ b/init/do_mounts.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -563,7 +564,8 @@ void __init prepare_namespace(void) + * For example, it is not atypical to wait 5 seconds here + * for the touchpad of a laptop to initialize. + */ +- wait_for_device_probe(); ++ //wait_for_device_probe(); ++ async_synchronize_full(); + + md_run_setup(); + +-- +2.11.1 + diff --git a/patches/boot_time_opt/0112-ksm-wakeups.patch b/patches/boot_time_opt/0112-ksm-wakeups.patch new file mode 100644 index 0000000..b131e3f --- /dev/null +++ b/patches/boot_time_opt/0112-ksm-wakeups.patch @@ -0,0 +1,32 @@ +From 2dc48e4b5c651691b7028991b64c935047b41b19 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven +Date: Mon, 14 Mar 2016 11:06:46 -0600 +Subject: [PATCH 112/114] ksm-wakeups + +reduce wakeups in ksm +--- + mm/ksm.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/mm/ksm.c b/mm/ksm.c +index 9ae6011a41f8..eecd3ff669e2 100644 +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -1725,8 +1725,12 @@ static int ksm_scan_thread(void *nothing) + try_to_freeze(); + + if (ksmd_should_run()) { +- schedule_timeout_interruptible( +- msecs_to_jiffies(ksm_thread_sleep_millisecs)); ++ if (ksm_thread_sleep_millisecs >= 1000) ++ schedule_timeout_interruptible( ++ msecs_to_jiffies(round_jiffies_relative(ksm_thread_sleep_millisecs))); ++ else ++ schedule_timeout_interruptible( ++ msecs_to_jiffies(ksm_thread_sleep_millisecs)); + } else { + wait_event_freezable(ksm_thread_wait, + ksmd_should_run() || kthread_should_stop()); +-- +2.11.1 + diff --git a/patches/boot_time_opt/0113-init-do_mounts-recreate-dev-root.patch b/patches/boot_time_opt/0113-init-do_mounts-recreate-dev-root.patch new file mode 100644 index 0000000..047eddb --- /dev/null +++ b/patches/boot_time_opt/0113-init-do_mounts-recreate-dev-root.patch @@ -0,0 +1,42 @@ +From 179b7f41d5509f93cd297cc81c5d8da4a3123d9d Mon Sep 17 00:00:00 2001 +From: Miguel Bernal Marin +Date: Fri, 20 Nov 2015 14:01:26 -0600 +Subject: [PATCH 113/114] init: do_mounts: recreate /dev/root + +Rootfs shows as is mounted in /dev/root, but this devices is not present in +/dev directory. + +Signed-off-by: Miguel Bernal Marin +--- + init/do_mounts.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/init/do_mounts.c b/init/do_mounts.c +index 40725f0f5fb3..78b5b1dba8ca 100644 +--- a/init/do_mounts.c ++++ b/init/do_mounts.c +@@ -550,6 +550,7 @@ void __init mount_root(void) + void __init prepare_namespace(void) + { + int is_floppy; ++ int err; + + if (root_delay) { + printk(KERN_INFO "Waiting %d sec before mounting root device...\n", +@@ -604,6 +605,13 @@ void __init prepare_namespace(void) + devtmpfs_mount("dev"); + sys_mount(".", "/", NULL, MS_MOVE, NULL); + sys_chroot("."); ++#ifdef CONFIG_BLOCK ++ /* recreate the /dev/root */ ++ err = create_dev("/dev/root", ROOT_DEV); ++ ++ if (err < 0) ++ pr_emerg("Failed to create /dev/root: %d\n", err); ++#endif + } + + static bool is_tmpfs; +-- +2.11.1 + diff --git a/patches/boot_time_opt/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch b/patches/boot_time_opt/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch new file mode 100644 index 0000000..dee9058 --- /dev/null +++ b/patches/boot_time_opt/0114-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch @@ -0,0 +1,56 @@ +From 02fd2e6a7c708bf973209f9b238c5c61cbf15239 Mon Sep 17 00:00:00 2001 +From: Alan Cox +Date: Thu, 10 Mar 2016 15:11:28 +0000 +Subject: [PATCH 114/114] xattr: allow setting user.* attributes on symlinks by + owner + +Kvmtool and clear containers supports using user attributes to label host +files with the virtual uid/guid of the file in the container. This allows an +end user to manage their files and a complete uid space without all the ugly +namespace stuff. + +The one gap in the support is symlinks because an end user can change the +ownership of a symbolic link. We support attributes on these files as you +can already (as root) set security attributes on them. + +The current rules seem slightly over-paranoid and as we have a use case this +patch enables updating the attributes on a symbolic link IFF you are the +owner of the synlink (as permissions are not usually meaningful on the link +itself). + +Signed-off-by: Alan Cox +--- + fs/xattr.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/fs/xattr.c b/fs/xattr.c +index 7e3317cf4045..e005c30acb2c 100644 +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -118,15 +118,17 @@ xattr_permission(struct inode *inode, const char *name, int mask) + } + + /* +- * In the user.* namespace, only regular files and directories can have +- * extended attributes. For sticky directories, only the owner and +- * privileged users can write attributes. ++ * In the user.* namespace, only regular files, symbolic links, and ++ * directories can have extended attributes. For symbolic links and ++ * sticky directories, only the owner and privileged users can write ++ * attributes. + */ + if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { +- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) ++ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) + return (mask & MAY_WRITE) ? -EPERM : -ENODATA; +- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && +- (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) ++ if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX)) ++ || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE) ++ && !inode_owner_or_capable(inode)) + return -EPERM; + } + +-- +2.11.1 + diff --git a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch index 5f44930..ff9d8c0 100644 --- a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch +++ b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch @@ -132,18 +132,18 @@ index c1095cdc0fe2..d8ae774fa042 100644 { .procname = "compact_memory", diff --git a/mm/Makefile b/mm/Makefile -index 295bd7a..6455723 100644 +index 295bd7a9f76b..651ce0aff140 100644 --- a/mm/Makefile +++ b/mm/Makefile -@@ -47,6 +47,8 @@ else - obj-y += bootmem.o - endif +@@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ + mm_init.o mmu_context.o percpu.o slab_common.o \ + compaction.o vmacache.o \ + interval_tree.o list_lru.o workingset.o \ +- prfile.o debug.o $(mmu-y) ++ prfile.o debug.o kvm.o $(mmu-y) + + obj-y += init-mm.o -+obj-y += kvm.o -+ - obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o - ifdef CONFIG_MMU - obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o diff --git a/mm/kvm.c b/mm/kvm.c new file mode 100644 index 000000000000..8945f6a311b9 diff --git a/patches/boot_time_opt/guest_boot_time_opt.scc b/patches/boot_time_opt/guest_boot_time_opt.scc new file mode 100644 index 0000000..3636c01 --- /dev/null +++ b/patches/boot_time_opt/guest_boot_time_opt.scc @@ -0,0 +1,19 @@ +define KFEATURE_DESCRIPTION "Boot time optimization changes ported from ClearLinux , https://github.com/clearlinux-pkgs/linux-kvm" +define KFEATURE_COMPATIBILITY all + +patch 0103-sysrq-skip-synchronize_rcu-if-there-is-no-old-op.patch +patch 0104-fbcon-enable-no-blink-by-default.patch +patch 0105-vmstats-wakeups.patch +# Remove patch because it causes ixgvbevf to not initialize correctly in the guest +#patch 0106-pci-probe.patch +patch 0107-cgroup.patch +patch 0108-smpboot-reuse-timer-calibration.patch +patch 0109-perf.patch +patch 0110-pci-probe-identify-known-devices.patch +patch 0111-init-no-wait-for-the-known-devices.patch +patch 0112-ksm-wakeups.patch + +patch 0151-mm-Export-do_madvise.patch +patch 0152-x86-kvm-Notify-host-to-release-pages.patch +patch 0153-x86-Return-memory-from-guest-to-host-kernel.patch +patch 0154-sysctl-vm-Fine-grained-cache-shrinking.patch diff --git a/patches/kernel_startend_msg/0001-printk-add-Enea-Linux-guest-boot-start-end-messages.patch b/patches/kernel_startend_msg/0001-printk-add-Enea-Linux-guest-boot-start-end-messages.patch new file mode 100644 index 0000000..ce5fb21 --- /dev/null +++ b/patches/kernel_startend_msg/0001-printk-add-Enea-Linux-guest-boot-start-end-messages.patch @@ -0,0 +1,95 @@ +From b91730ba705d151577974d5fb9f5371a4569b467 Mon Sep 17 00:00:00 2001 +From: Adrian Calianu +Date: Tue, 6 Jun 2017 15:47:54 +0200 +Subject: [PATCH 1/1] printk: add Enea Linux boot start/end messages + +Signed-off-by: Adrian Calianu +--- + arch/x86/boot/compressed/misc.c | 23 ++++++++++++----------- + init/main.c | 4 +++- + 2 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c +index b3c5a5f0..9fdf3c6 100644 +--- a/arch/x86/boot/compressed/misc.c ++++ b/arch/x86/boot/compressed/misc.c +@@ -202,10 +202,10 @@ static void handle_relocations(void *output, unsigned long output_len, + delta = virt_addr - LOAD_PHYSICAL_ADDR; + + if (!delta) { +- debug_putstr("No relocation needed... "); ++ /* debug_putstr("No relocation needed... "); */ + return; + } +- debug_putstr("Performing relocations... "); ++ /* debug_putstr("Performing relocations... "); */ + + /* + * Process relocations: 32 bit relocations first then 64 bit after. +@@ -286,7 +286,7 @@ static void parse_elf(void *output) + return; + } + +- debug_putstr("Parsing ELF... "); ++ /* debug_putstr("Parsing ELF... ");*/ + + phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); + if (!phdrs) +@@ -360,17 +360,18 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + cols = boot_params->screen_info.orig_video_cols; + + console_init(); +- debug_putstr("early console in extract_kernel\n"); ++ /* debug_putstr("early console in extract_kernel\n");*/ ++ debug_putstr("\n");debug_putstr("\nEnea Linux guest kernel boot start\n"); + + free_mem_ptr = heap; /* Heap */ + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + + /* Report initial kernel position details. */ +- debug_putaddr(input_data); +- debug_putaddr(input_len); +- debug_putaddr(output); +- debug_putaddr(output_len); +- debug_putaddr(kernel_total_size); ++ /*debug_putaddr(input_data); */ ++ /*debug_putaddr(input_len); */ ++ /*debug_putaddr(output); */ ++ /*debug_putaddr(output_len); */ ++ /*debug_putaddr(kernel_total_size);*/ + + /* + * The memory hole needed for the kernel is the larger of either +@@ -401,11 +402,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, + error("Destination virtual address changed when not relocatable"); + #endif + +- debug_putstr("\nDecompressing Linux... "); ++ /*debug_putstr("\nDecompressing Linux... ");*/ + __decompress(input_data, input_len, NULL, NULL, output, output_len, + NULL, error); + parse_elf(output); + handle_relocations(output, output_len, virt_addr); +- debug_putstr("done.\nBooting the kernel.\n"); ++ /*debug_putstr("done.\nBooting the kernel.\n");*/ + return output; + } +diff --git a/init/main.c b/init/main.c +index 8358cbe..613caa1 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -976,8 +976,10 @@ static int __ref kernel_init(void *unused) + if (!try_to_run_init_process("/sbin/init") || + !try_to_run_init_process("/etc/init") || + !try_to_run_init_process("/bin/init") || +- !try_to_run_init_process("/bin/sh")) ++ !try_to_run_init_process("/bin/sh")) { ++ printk(KERN_EMERG "Enea Linux guest kernel boot end\n"); + return 0; ++ } + + panic("No working init found. Try passing init= option to kernel. " + "See Linux Documentation/init.txt for guidance."); +-- +2.7.4 + diff --git a/patches/kernel_startend_msg/kernel_startend_msg.scc b/patches/kernel_startend_msg/kernel_startend_msg.scc index e6da49c..ae49e89 100644 --- a/patches/kernel_startend_msg/kernel_startend_msg.scc +++ b/patches/kernel_startend_msg/kernel_startend_msg.scc @@ -1,4 +1,4 @@ define KFEATURE_DESCRIPTION "Enable the kernel to output messages when it starts and ends booting" define KFEATURE_COMPATIBILITY all -patch 0001-printk-add-Enea-Linux-boot-start-end-messages.patch +patch 0001-printk-add-Enea-Linux-guest-boot-start-end-messages.patch -- cgit v1.2.3-54-g00ecf