diff options
Diffstat (limited to 'patches/boot_time_opt')
32 files changed, 2238 insertions, 0 deletions
diff --git a/patches/boot_time_opt/0011-drm-i915-fbc-sanitize-fbc-GEN-greater-than-9.patch b/patches/boot_time_opt/0011-drm-i915-fbc-sanitize-fbc-GEN-greater-than-9.patch new file mode 100644 index 0000000..33debcd --- /dev/null +++ b/patches/boot_time_opt/0011-drm-i915-fbc-sanitize-fbc-GEN-greater-than-9.patch | |||
@@ -0,0 +1,25 @@ | |||
1 | From 07639791f247ae7a807444106b9b7611f070d02b Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Fri, 6 Jan 2017 13:28:29 +0000 | ||
4 | Subject: [PATCH] drm/i915/fbc: sanitize fbc GEN greater than 9 | ||
5 | |||
6 | --- | ||
7 | drivers/gpu/drm/i915/intel_fbc.c | 2 +- | ||
8 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
9 | |||
10 | diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c | ||
11 | index c43dd9abce79..f5a2560840f3 100644 | ||
12 | --- a/drivers/gpu/drm/i915/intel_fbc.c | ||
13 | +++ b/drivers/gpu/drm/i915/intel_fbc.c | ||
14 | @@ -1262,7 +1262,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) | ||
15 | if (!HAS_FBC(dev_priv)) | ||
16 | return 0; | ||
17 | |||
18 | - if (IS_BROADWELL(dev_priv)) | ||
19 | + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) | ||
20 | return 1; | ||
21 | |||
22 | return 0; | ||
23 | -- | ||
24 | 2.11.1 | ||
25 | |||
diff --git a/patches/boot_time_opt/0101-kvm-silence-kvm-unhandled-rdmsr.patch b/patches/boot_time_opt/0101-kvm-silence-kvm-unhandled-rdmsr.patch new file mode 100644 index 0000000..aeb3abf --- /dev/null +++ b/patches/boot_time_opt/0101-kvm-silence-kvm-unhandled-rdmsr.patch | |||
@@ -0,0 +1,29 @@ | |||
1 | From f45c353859fc0ceb75fef3a2f4a2c179dfa378d7 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Tue, 23 Jun 2015 01:16:45 -0500 | ||
4 | Subject: [PATCH 101/124] kvm: silence kvm unhandled rdmsr | ||
5 | |||
6 | Author: Arjan van de Ven <arjan@linux.intel.com> | ||
7 | |||
8 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
9 | Signed-off-by: Jose Carlos Venegas Munoz <jos.c.venegas.munoz@intel.com> | ||
10 | --- | ||
11 | arch/x86/kvm/x86.c | 2 +- | ||
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | ||
15 | index 731044efb195..582c75311f95 100644 | ||
16 | --- a/arch/x86/kvm/x86.c | ||
17 | +++ b/arch/x86/kvm/x86.c | ||
18 | @@ -2506,7 +2506,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
19 | if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) | ||
20 | return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); | ||
21 | if (!ignore_msrs) { | ||
22 | - vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index); | ||
23 | +// vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index); | ||
24 | return 1; | ||
25 | } else { | ||
26 | vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); | ||
27 | -- | ||
28 | 2.11.1 | ||
29 | |||
diff --git a/patches/boot_time_opt/0102-i8042-decrease-debug-message-level-to-info.patch b/patches/boot_time_opt/0102-i8042-decrease-debug-message-level-to-info.patch new file mode 100644 index 0000000..96fd92b --- /dev/null +++ b/patches/boot_time_opt/0102-i8042-decrease-debug-message-level-to-info.patch | |||
@@ -0,0 +1,65 @@ | |||
1 | From 7e847b13b753ec632fef2f1ffa0d8f5b444c967b Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Tue, 23 Jun 2015 01:26:52 -0500 | ||
4 | Subject: [PATCH 102/124] i8042: decrease debug message level to info | ||
5 | |||
6 | Author: Arjan van de Ven <arjan@linux.intel.com> | ||
7 | |||
8 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
9 | Signed-off-by: Jose Carlos Venegas Munoz <jos.c.venegas.munoz@intel.com> | ||
10 | --- | ||
11 | drivers/input/serio/i8042.c | 10 +++++----- | ||
12 | 1 file changed, 5 insertions(+), 5 deletions(-) | ||
13 | |||
14 | diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c | ||
15 | index 89abfdb539ac..5317c41b049e 100644 | ||
16 | --- a/drivers/input/serio/i8042.c | ||
17 | +++ b/drivers/input/serio/i8042.c | ||
18 | @@ -593,7 +593,7 @@ static int i8042_enable_kbd_port(void) | ||
19 | if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { | ||
20 | i8042_ctr &= ~I8042_CTR_KBDINT; | ||
21 | i8042_ctr |= I8042_CTR_KBDDIS; | ||
22 | - pr_err("Failed to enable KBD port\n"); | ||
23 | + pr_info("Failed to enable KBD port\n"); | ||
24 | return -EIO; | ||
25 | } | ||
26 | |||
27 | @@ -612,7 +612,7 @@ static int i8042_enable_aux_port(void) | ||
28 | if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { | ||
29 | i8042_ctr &= ~I8042_CTR_AUXINT; | ||
30 | i8042_ctr |= I8042_CTR_AUXDIS; | ||
31 | - pr_err("Failed to enable AUX port\n"); | ||
32 | + pr_info("Failed to enable AUX port\n"); | ||
33 | return -EIO; | ||
34 | } | ||
35 | |||
36 | @@ -704,7 +704,7 @@ static int __init i8042_check_mux(void) | ||
37 | i8042_ctr &= ~I8042_CTR_AUXINT; | ||
38 | |||
39 | if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { | ||
40 | - pr_err("Failed to disable AUX port, can't use MUX\n"); | ||
41 | + pr_info("Failed to disable AUX port, can't use MUX\n"); | ||
42 | return -EIO; | ||
43 | } | ||
44 | |||
45 | @@ -927,7 +927,7 @@ static int i8042_controller_selftest(void) | ||
46 | do { | ||
47 | |||
48 | if (i8042_command(¶m, I8042_CMD_CTL_TEST)) { | ||
49 | - pr_err("i8042 controller selftest timeout\n"); | ||
50 | + pr_info("i8042 controller selftest timeout\n"); | ||
51 | return -ENODEV; | ||
52 | } | ||
53 | |||
54 | @@ -949,7 +949,7 @@ static int i8042_controller_selftest(void) | ||
55 | pr_info("giving up on controller selftest, continuing anyway...\n"); | ||
56 | return 0; | ||
57 | #else | ||
58 | - pr_err("i8042 controller selftest failed\n"); | ||
59 | + pr_info("i8042 controller selftest failed\n"); | ||
60 | return -EIO; | ||
61 | #endif | ||
62 | } | ||
63 | -- | ||
64 | 2.11.1 | ||
65 | |||
diff --git a/patches/boot_time_opt/0103-init-do_mounts-recreate-dev-root.patch b/patches/boot_time_opt/0103-init-do_mounts-recreate-dev-root.patch new file mode 100644 index 0000000..bb7bb9f --- /dev/null +++ b/patches/boot_time_opt/0103-init-do_mounts-recreate-dev-root.patch | |||
@@ -0,0 +1,42 @@ | |||
1 | From 838abc7e5f43ea40a2cc05ebd6c7321b6d84b057 Mon Sep 17 00:00:00 2001 | ||
2 | From: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
3 | Date: Fri, 20 Nov 2015 14:01:26 -0600 | ||
4 | Subject: [PATCH 103/124] init: do_mounts: recreate /dev/root | ||
5 | |||
6 | Rootfs shows as is mounted in /dev/root, but this devices is not present in | ||
7 | /dev directory. | ||
8 | |||
9 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
10 | --- | ||
11 | init/do_mounts.c | 8 ++++++++ | ||
12 | 1 file changed, 8 insertions(+) | ||
13 | |||
14 | diff --git a/init/do_mounts.c b/init/do_mounts.c | ||
15 | index dea5de95c2dd..d74a346b2dfa 100644 | ||
16 | --- a/init/do_mounts.c | ||
17 | +++ b/init/do_mounts.c | ||
18 | @@ -549,6 +549,7 @@ void __init mount_root(void) | ||
19 | void __init prepare_namespace(void) | ||
20 | { | ||
21 | int is_floppy; | ||
22 | + int err; | ||
23 | |||
24 | if (root_delay) { | ||
25 | printk(KERN_INFO "Waiting %d sec before mounting root device...\n", | ||
26 | @@ -602,6 +603,13 @@ void __init prepare_namespace(void) | ||
27 | devtmpfs_mount("dev"); | ||
28 | sys_mount(".", "/", NULL, MS_MOVE, NULL); | ||
29 | sys_chroot("."); | ||
30 | +#ifdef CONFIG_BLOCK | ||
31 | + /* recreate the /dev/root */ | ||
32 | + err = create_dev("/dev/root", ROOT_DEV); | ||
33 | + | ||
34 | + if (err < 0) | ||
35 | + pr_emerg("Failed to create /dev/root: %d\n", err); | ||
36 | +#endif | ||
37 | } | ||
38 | |||
39 | static bool is_tmpfs; | ||
40 | -- | ||
41 | 2.11.1 | ||
42 | |||
diff --git a/patches/boot_time_opt/0104-Increase-the-ext4-default-commit-age.patch b/patches/boot_time_opt/0104-Increase-the-ext4-default-commit-age.patch new file mode 100644 index 0000000..fb709b4 --- /dev/null +++ b/patches/boot_time_opt/0104-Increase-the-ext4-default-commit-age.patch | |||
@@ -0,0 +1,35 @@ | |||
1 | From b6970d43f97325c9acc7bd942dcd192586d8d407 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Mon, 11 Jan 2016 10:01:44 -0600 | ||
4 | Subject: [PATCH 104/124] Increase the ext4 default commit age | ||
5 | |||
6 | Both the VM and EXT4 have a "commit to disk after X seconds" time. | ||
7 | Currently the EXT4 time is shorter than our VM time, which is a bit | ||
8 | suboptional, | ||
9 | it's better for performance to let the VM do the writeouts in bulk | ||
10 | rather than something deep in the journalling layer. | ||
11 | |||
12 | (DISTRO TWEAK -- NOT FOR UPSTREAM) | ||
13 | |||
14 | Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> | ||
15 | Signed-off-by: Jose Carlos Venegas Munoz <jose.carlos.venegas.munoz@intel.com> | ||
16 | --- | ||
17 | include/linux/jbd2.h | 2 +- | ||
18 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
19 | |||
20 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h | ||
21 | index dfaa1f4dcb0c..9955fd6c6159 100644 | ||
22 | --- a/include/linux/jbd2.h | ||
23 | +++ b/include/linux/jbd2.h | ||
24 | @@ -47,7 +47,7 @@ | ||
25 | /* | ||
26 | * The default maximum commit age, in seconds. | ||
27 | */ | ||
28 | -#define JBD2_DEFAULT_MAX_COMMIT_AGE 5 | ||
29 | +#define JBD2_DEFAULT_MAX_COMMIT_AGE 30 | ||
30 | |||
31 | #ifdef CONFIG_JBD2_DEBUG | ||
32 | /* | ||
33 | -- | ||
34 | 2.11.1 | ||
35 | |||
diff --git a/patches/boot_time_opt/0105-silence-rapl.patch b/patches/boot_time_opt/0105-silence-rapl.patch new file mode 100644 index 0000000..4dd78fc --- /dev/null +++ b/patches/boot_time_opt/0105-silence-rapl.patch | |||
@@ -0,0 +1,25 @@ | |||
1 | From 558d32869c8d8e302dd3810610d62e1c69a8ebce Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Mon, 14 Mar 2016 11:22:09 -0600 | ||
4 | Subject: [PATCH 105/124] silence rapl | ||
5 | |||
6 | --- | ||
7 | drivers/powercap/intel_rapl.c | 2 +- | ||
8 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
9 | |||
10 | diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c | ||
11 | index 3c71f608b444..450aff027d42 100644 | ||
12 | --- a/drivers/powercap/intel_rapl.c | ||
13 | +++ b/drivers/powercap/intel_rapl.c | ||
14 | @@ -1684,7 +1684,7 @@ static int __init rapl_init(void) | ||
15 | |||
16 | id = x86_match_cpu(rapl_ids); | ||
17 | if (!id) { | ||
18 | - pr_err("driver does not support CPU family %d model %d\n", | ||
19 | + pr_info("driver does not support CPU family %d model %d\n", | ||
20 | boot_cpu_data.x86, boot_cpu_data.x86_model); | ||
21 | |||
22 | return -ENODEV; | ||
23 | -- | ||
24 | 2.11.1 | ||
25 | |||
diff --git a/patches/boot_time_opt/0106-pci-pme-wakeups.patch b/patches/boot_time_opt/0106-pci-pme-wakeups.patch new file mode 100644 index 0000000..f0a4799 --- /dev/null +++ b/patches/boot_time_opt/0106-pci-pme-wakeups.patch | |||
@@ -0,0 +1,27 @@ | |||
1 | From 1f44219cd74f5c3b97e2c85af87141e1bddf0555 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Mon, 14 Mar 2016 11:10:58 -0600 | ||
4 | Subject: [PATCH 106/124] pci pme wakeups | ||
5 | |||
6 | Reduce wakeups for PME checks, which are a workaround for miswired | ||
7 | boards (sadly, too many of them) in laptops. | ||
8 | --- | ||
9 | drivers/pci/pci.c | 2 +- | ||
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c | ||
13 | index eda6a7cf0e54..82a623255059 100644 | ||
14 | --- a/drivers/pci/pci.c | ||
15 | +++ b/drivers/pci/pci.c | ||
16 | @@ -57,7 +57,7 @@ struct pci_pme_device { | ||
17 | struct pci_dev *dev; | ||
18 | }; | ||
19 | |||
20 | -#define PME_TIMEOUT 1000 /* How long between PME checks */ | ||
21 | +#define PME_TIMEOUT 4000 /* How long between PME checks */ | ||
22 | |||
23 | static void pci_dev_d3_sleep(struct pci_dev *dev) | ||
24 | { | ||
25 | -- | ||
26 | 2.11.1 | ||
27 | |||
diff --git a/patches/boot_time_opt/0107-ksm-wakeups.patch b/patches/boot_time_opt/0107-ksm-wakeups.patch new file mode 100644 index 0000000..2b25625 --- /dev/null +++ b/patches/boot_time_opt/0107-ksm-wakeups.patch | |||
@@ -0,0 +1,34 @@ | |||
1 | From a5de04044d428bf54472365e7dc07958aa184daf Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Mon, 14 Mar 2016 11:06:46 -0600 | ||
4 | Subject: [PATCH 107/124] ksm-wakeups | ||
5 | |||
6 | reduce wakeups in ksm by adding rounding (aligning) when the sleep times are 1 second or longer | ||
7 | |||
8 | Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> | ||
9 | --- | ||
10 | mm/ksm.c | 8 ++++++-- | ||
11 | 1 file changed, 6 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/mm/ksm.c b/mm/ksm.c | ||
14 | index 9ae6011a41f8..eecd3ff669e2 100644 | ||
15 | --- a/mm/ksm.c | ||
16 | +++ b/mm/ksm.c | ||
17 | @@ -1725,8 +1725,12 @@ static int ksm_scan_thread(void *nothing) | ||
18 | try_to_freeze(); | ||
19 | |||
20 | if (ksmd_should_run()) { | ||
21 | - schedule_timeout_interruptible( | ||
22 | - msecs_to_jiffies(ksm_thread_sleep_millisecs)); | ||
23 | + if (ksm_thread_sleep_millisecs >= 1000) | ||
24 | + schedule_timeout_interruptible( | ||
25 | + msecs_to_jiffies(round_jiffies_relative(ksm_thread_sleep_millisecs))); | ||
26 | + else | ||
27 | + schedule_timeout_interruptible( | ||
28 | + msecs_to_jiffies(ksm_thread_sleep_millisecs)); | ||
29 | } else { | ||
30 | wait_event_freezable(ksm_thread_wait, | ||
31 | ksmd_should_run() || kthread_should_stop()); | ||
32 | -- | ||
33 | 2.11.1 | ||
34 | |||
diff --git a/patches/boot_time_opt/0108-intel_idle-tweak-cpuidle-cstates.patch b/patches/boot_time_opt/0108-intel_idle-tweak-cpuidle-cstates.patch new file mode 100644 index 0000000..da5396c --- /dev/null +++ b/patches/boot_time_opt/0108-intel_idle-tweak-cpuidle-cstates.patch | |||
@@ -0,0 +1,227 @@ | |||
1 | From bf7e0cebaafe790f62cbc5815648d556847b7d27 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Sat, 19 Mar 2016 21:32:19 -0400 | ||
4 | Subject: [PATCH 108/124] intel_idle: tweak cpuidle cstates | ||
5 | |||
6 | Increase target_residency in cpuidle cstate | ||
7 | |||
8 | Tune intel_idle to be a bit less agressive; | ||
9 | Clear linux is cleaner in hygiene (wakupes) than the average linux, | ||
10 | so we can afford changing these in a way that increases | ||
11 | performance while keeping power efficiency | ||
12 | --- | ||
13 | drivers/idle/intel_idle.c | 74 +++++++++++------------------------------------ | ||
14 | 1 file changed, 17 insertions(+), 57 deletions(-) | ||
15 | |||
16 | diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c | ||
17 | index 4466a2f969d7..cbab050b83f0 100644 | ||
18 | --- a/drivers/idle/intel_idle.c | ||
19 | +++ b/drivers/idle/intel_idle.c | ||
20 | @@ -475,7 +475,7 @@ static struct cpuidle_state hsw_cstates[] = { | ||
21 | .desc = "MWAIT 0x10", | ||
22 | .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
23 | .exit_latency = 33, | ||
24 | - .target_residency = 100, | ||
25 | + .target_residency = 1000, | ||
26 | .enter = &intel_idle, | ||
27 | .enter_freeze = intel_idle_freeze, }, | ||
28 | { | ||
29 | @@ -483,7 +483,7 @@ static struct cpuidle_state hsw_cstates[] = { | ||
30 | .desc = "MWAIT 0x20", | ||
31 | .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
32 | .exit_latency = 133, | ||
33 | - .target_residency = 400, | ||
34 | + .target_residency = 4000, | ||
35 | .enter = &intel_idle, | ||
36 | .enter_freeze = intel_idle_freeze, }, | ||
37 | { | ||
38 | @@ -491,7 +491,7 @@ static struct cpuidle_state hsw_cstates[] = { | ||
39 | .desc = "MWAIT 0x32", | ||
40 | .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
41 | .exit_latency = 166, | ||
42 | - .target_residency = 500, | ||
43 | + .target_residency = 5000, | ||
44 | .enter = &intel_idle, | ||
45 | .enter_freeze = intel_idle_freeze, }, | ||
46 | { | ||
47 | @@ -499,7 +499,7 @@ static struct cpuidle_state hsw_cstates[] = { | ||
48 | .desc = "MWAIT 0x40", | ||
49 | .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
50 | .exit_latency = 300, | ||
51 | - .target_residency = 900, | ||
52 | + .target_residency = 9000, | ||
53 | .enter = &intel_idle, | ||
54 | .enter_freeze = intel_idle_freeze, }, | ||
55 | { | ||
56 | @@ -507,7 +507,7 @@ static struct cpuidle_state hsw_cstates[] = { | ||
57 | .desc = "MWAIT 0x50", | ||
58 | .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
59 | .exit_latency = 600, | ||
60 | - .target_residency = 1800, | ||
61 | + .target_residency = 18000, | ||
62 | .enter = &intel_idle, | ||
63 | .enter_freeze = intel_idle_freeze, }, | ||
64 | { | ||
65 | @@ -515,7 +515,7 @@ static struct cpuidle_state hsw_cstates[] = { | ||
66 | .desc = "MWAIT 0x60", | ||
67 | .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
68 | .exit_latency = 2600, | ||
69 | - .target_residency = 7700, | ||
70 | + .target_residency = 77000, | ||
71 | .enter = &intel_idle, | ||
72 | .enter_freeze = intel_idle_freeze, }, | ||
73 | { | ||
74 | @@ -531,27 +531,11 @@ static struct cpuidle_state bdw_cstates[] = { | ||
75 | .enter = &intel_idle, | ||
76 | .enter_freeze = intel_idle_freeze, }, | ||
77 | { | ||
78 | - .name = "C1E-BDW", | ||
79 | - .desc = "MWAIT 0x01", | ||
80 | - .flags = MWAIT2flg(0x01), | ||
81 | - .exit_latency = 10, | ||
82 | - .target_residency = 20, | ||
83 | - .enter = &intel_idle, | ||
84 | - .enter_freeze = intel_idle_freeze, }, | ||
85 | - { | ||
86 | - .name = "C3-BDW", | ||
87 | - .desc = "MWAIT 0x10", | ||
88 | - .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
89 | - .exit_latency = 40, | ||
90 | - .target_residency = 100, | ||
91 | - .enter = &intel_idle, | ||
92 | - .enter_freeze = intel_idle_freeze, }, | ||
93 | - { | ||
94 | .name = "C6-BDW", | ||
95 | .desc = "MWAIT 0x20", | ||
96 | .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
97 | .exit_latency = 133, | ||
98 | - .target_residency = 400, | ||
99 | + .target_residency = 4000, | ||
100 | .enter = &intel_idle, | ||
101 | .enter_freeze = intel_idle_freeze, }, | ||
102 | { | ||
103 | @@ -559,7 +543,7 @@ static struct cpuidle_state bdw_cstates[] = { | ||
104 | .desc = "MWAIT 0x32", | ||
105 | .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
106 | .exit_latency = 166, | ||
107 | - .target_residency = 500, | ||
108 | + .target_residency = 5000, | ||
109 | .enter = &intel_idle, | ||
110 | .enter_freeze = intel_idle_freeze, }, | ||
111 | { | ||
112 | @@ -567,7 +551,7 @@ static struct cpuidle_state bdw_cstates[] = { | ||
113 | .desc = "MWAIT 0x40", | ||
114 | .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
115 | .exit_latency = 300, | ||
116 | - .target_residency = 900, | ||
117 | + .target_residency = 9000, | ||
118 | .enter = &intel_idle, | ||
119 | .enter_freeze = intel_idle_freeze, }, | ||
120 | { | ||
121 | @@ -575,7 +559,7 @@ static struct cpuidle_state bdw_cstates[] = { | ||
122 | .desc = "MWAIT 0x50", | ||
123 | .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
124 | .exit_latency = 600, | ||
125 | - .target_residency = 1800, | ||
126 | + .target_residency = 18000, | ||
127 | .enter = &intel_idle, | ||
128 | .enter_freeze = intel_idle_freeze, }, | ||
129 | { | ||
130 | @@ -583,7 +567,7 @@ static struct cpuidle_state bdw_cstates[] = { | ||
131 | .desc = "MWAIT 0x60", | ||
132 | .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
133 | .exit_latency = 2600, | ||
134 | - .target_residency = 7700, | ||
135 | + .target_residency = 77000, | ||
136 | .enter = &intel_idle, | ||
137 | .enter_freeze = intel_idle_freeze, }, | ||
138 | { | ||
139 | @@ -600,27 +584,11 @@ static struct cpuidle_state skl_cstates[] = { | ||
140 | .enter = &intel_idle, | ||
141 | .enter_freeze = intel_idle_freeze, }, | ||
142 | { | ||
143 | - .name = "C1E-SKL", | ||
144 | - .desc = "MWAIT 0x01", | ||
145 | - .flags = MWAIT2flg(0x01), | ||
146 | - .exit_latency = 10, | ||
147 | - .target_residency = 20, | ||
148 | - .enter = &intel_idle, | ||
149 | - .enter_freeze = intel_idle_freeze, }, | ||
150 | - { | ||
151 | - .name = "C3-SKL", | ||
152 | - .desc = "MWAIT 0x10", | ||
153 | - .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
154 | - .exit_latency = 70, | ||
155 | - .target_residency = 100, | ||
156 | - .enter = &intel_idle, | ||
157 | - .enter_freeze = intel_idle_freeze, }, | ||
158 | - { | ||
159 | .name = "C6-SKL", | ||
160 | .desc = "MWAIT 0x20", | ||
161 | .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
162 | .exit_latency = 85, | ||
163 | - .target_residency = 200, | ||
164 | + .target_residency = 2000, | ||
165 | .enter = &intel_idle, | ||
166 | .enter_freeze = intel_idle_freeze, }, | ||
167 | { | ||
168 | @@ -628,7 +596,7 @@ static struct cpuidle_state skl_cstates[] = { | ||
169 | .desc = "MWAIT 0x33", | ||
170 | .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
171 | .exit_latency = 124, | ||
172 | - .target_residency = 800, | ||
173 | + .target_residency = 8000, | ||
174 | .enter = &intel_idle, | ||
175 | .enter_freeze = intel_idle_freeze, }, | ||
176 | { | ||
177 | @@ -636,7 +604,7 @@ static struct cpuidle_state skl_cstates[] = { | ||
178 | .desc = "MWAIT 0x40", | ||
179 | .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
180 | .exit_latency = 200, | ||
181 | - .target_residency = 800, | ||
182 | + .target_residency = 8000, | ||
183 | .enter = &intel_idle, | ||
184 | .enter_freeze = intel_idle_freeze, }, | ||
185 | { | ||
186 | @@ -644,7 +612,7 @@ static struct cpuidle_state skl_cstates[] = { | ||
187 | .desc = "MWAIT 0x50", | ||
188 | .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
189 | .exit_latency = 480, | ||
190 | - .target_residency = 5000, | ||
191 | + .target_residency = 50000, | ||
192 | .enter = &intel_idle, | ||
193 | .enter_freeze = intel_idle_freeze, }, | ||
194 | { | ||
195 | @@ -652,7 +620,7 @@ static struct cpuidle_state skl_cstates[] = { | ||
196 | .desc = "MWAIT 0x60", | ||
197 | .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
198 | .exit_latency = 890, | ||
199 | - .target_residency = 5000, | ||
200 | + .target_residency = 50000, | ||
201 | .enter = &intel_idle, | ||
202 | .enter_freeze = intel_idle_freeze, }, | ||
203 | { | ||
204 | @@ -669,19 +637,11 @@ static struct cpuidle_state skx_cstates[] = { | ||
205 | .enter = &intel_idle, | ||
206 | .enter_freeze = intel_idle_freeze, }, | ||
207 | { | ||
208 | - .name = "C1E-SKX", | ||
209 | - .desc = "MWAIT 0x01", | ||
210 | - .flags = MWAIT2flg(0x01), | ||
211 | - .exit_latency = 10, | ||
212 | - .target_residency = 20, | ||
213 | - .enter = &intel_idle, | ||
214 | - .enter_freeze = intel_idle_freeze, }, | ||
215 | - { | ||
216 | .name = "C6-SKX", | ||
217 | .desc = "MWAIT 0x20", | ||
218 | .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, | ||
219 | .exit_latency = 133, | ||
220 | - .target_residency = 600, | ||
221 | + .target_residency = 1600, | ||
222 | .enter = &intel_idle, | ||
223 | .enter_freeze = intel_idle_freeze, }, | ||
224 | { | ||
225 | -- | ||
226 | 2.11.1 | ||
227 | |||
diff --git a/patches/boot_time_opt/0109-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch b/patches/boot_time_opt/0109-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch new file mode 100644 index 0000000..70247a0 --- /dev/null +++ b/patches/boot_time_opt/0109-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch | |||
@@ -0,0 +1,56 @@ | |||
1 | From 4170571f7bb0897c90e13b2fcf3ee06990a9e774 Mon Sep 17 00:00:00 2001 | ||
2 | From: Alan Cox <alan@linux.intel.com> | ||
3 | Date: Thu, 10 Mar 2016 15:11:28 +0000 | ||
4 | Subject: [PATCH 109/124] xattr: allow setting user.* attributes on symlinks by | ||
5 | owner | ||
6 | |||
7 | Kvmtool and clear containers supports using user attributes to label host | ||
8 | files with the virtual uid/guid of the file in the container. This allows an | ||
9 | end user to manage their files and a complete uid space without all the ugly | ||
10 | namespace stuff. | ||
11 | |||
12 | The one gap in the support is symlinks because an end user can change the | ||
13 | ownership of a symbolic link. We support attributes on these files as you | ||
14 | can already (as root) set security attributes on them. | ||
15 | |||
16 | The current rules seem slightly over-paranoid and as we have a use case this | ||
17 | patch enables updating the attributes on a symbolic link IFF you are the | ||
18 | owner of the synlink (as permissions are not usually meaningful on the link | ||
19 | itself). | ||
20 | |||
21 | Signed-off-by: Alan Cox <alan@linux.intel.com> | ||
22 | --- | ||
23 | fs/xattr.c | 14 ++++++++------ | ||
24 | 1 file changed, 8 insertions(+), 6 deletions(-) | ||
25 | |||
26 | diff --git a/fs/xattr.c b/fs/xattr.c | ||
27 | index 2d13b4e62fae..580a5aeddfd2 100644 | ||
28 | --- a/fs/xattr.c | ||
29 | +++ b/fs/xattr.c | ||
30 | @@ -118,15 +118,17 @@ xattr_permission(struct inode *inode, const char *name, int mask) | ||
31 | } | ||
32 | |||
33 | /* | ||
34 | - * In the user.* namespace, only regular files and directories can have | ||
35 | - * extended attributes. For sticky directories, only the owner and | ||
36 | - * privileged users can write attributes. | ||
37 | + * In the user.* namespace, only regular files, symbolic links, and | ||
38 | + * directories can have extended attributes. For symbolic links and | ||
39 | + * sticky directories, only the owner and privileged users can write | ||
40 | + * attributes. | ||
41 | */ | ||
42 | if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { | ||
43 | - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) | ||
44 | + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) | ||
45 | return (mask & MAY_WRITE) ? -EPERM : -ENODATA; | ||
46 | - if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && | ||
47 | - (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) | ||
48 | + if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX)) | ||
49 | + || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE) | ||
50 | + && !inode_owner_or_capable(inode)) | ||
51 | return -EPERM; | ||
52 | } | ||
53 | |||
54 | -- | ||
55 | 2.11.1 | ||
56 | |||
diff --git a/patches/boot_time_opt/0110-init_task-faster-timerslack.patch b/patches/boot_time_opt/0110-init_task-faster-timerslack.patch new file mode 100644 index 0000000..b0075ff --- /dev/null +++ b/patches/boot_time_opt/0110-init_task-faster-timerslack.patch | |||
@@ -0,0 +1,32 @@ | |||
1 | From 42c2cb32259b76fb1f6713d99c4f0922e97bcc8d Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Wed, 23 Mar 2016 14:52:41 +0000 | ||
4 | Subject: [PATCH 110/124] init_task: faster timerslack | ||
5 | |||
6 | the default tuning is a compromise between client power and server performance; | ||
7 | for a server distro like Clear Linux, we don't need to compromise. | ||
8 | (for non-server usages we have different kernel binaries) | ||
9 | |||
10 | in principle this can be done as a patch to systemd as well, but we have a shared | ||
11 | systemd between usages while we have different kernels, so the logistics | ||
12 | for where the patch goes work out better here | ||
13 | --- | ||
14 | include/linux/init_task.h | 2 +- | ||
15 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
16 | |||
17 | diff --git a/include/linux/init_task.h b/include/linux/init_task.h | ||
18 | index 325f649d77ff..e0eb261e17cb 100644 | ||
19 | --- a/include/linux/init_task.h | ||
20 | +++ b/include/linux/init_task.h | ||
21 | @@ -249,7 +249,7 @@ extern struct task_group root_task_group; | ||
22 | .journal_info = NULL, \ | ||
23 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ | ||
24 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ | ||
25 | - .timer_slack_ns = 50000, /* 50 usec default slack */ \ | ||
26 | + .timer_slack_ns = 1000, /* 1 usec default slack */ \ | ||
27 | .pids = { \ | ||
28 | [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ | ||
29 | [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ | ||
30 | -- | ||
31 | 2.11.1 | ||
32 | |||
diff --git a/patches/boot_time_opt/0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch b/patches/boot_time_opt/0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch new file mode 100644 index 0000000..7d0def8 --- /dev/null +++ b/patches/boot_time_opt/0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch | |||
@@ -0,0 +1,158 @@ | |||
1 | From 3152053ea1ea3aa77bcc7e990d48ef84621ff6c9 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Sat, 9 Apr 2016 22:41:37 +0000 | ||
4 | Subject: [PATCH 112/124] fs: ext4: fsync: optimize double-fsync() a bunch | ||
5 | |||
6 | There are cases where EXT4 is a bit too conservative sending barriers down to the disk; | ||
7 | there are cases where the transaction in progress is not the one that sent the barrier | ||
8 | (in other words: the fsync is for a file for which the IO happened more time ago | ||
9 | and all data was already sent to the disk). For that case, a more performing tradeoff | ||
10 | can be made on SSD devices (which have the ability to flush their dram caches in a hurry | ||
11 | on a power fail event) where the barrier gets sent to the disk, but we don't need to wait | ||
12 | for the barrier to complete. Any consecutive IO will block on the barrier correctly. | ||
13 | --- | ||
14 | block/bio.c | 20 ++++++++++++++++++++ | ||
15 | block/blk-flush.c | 41 +++++++++++++++++++++++++++++++++++++++++ | ||
16 | fs/ext4/fsync.c | 6 +++++- | ||
17 | include/linux/bio.h | 1 + | ||
18 | include/linux/blkdev.h | 5 +++++ | ||
19 | 5 files changed, 72 insertions(+), 1 deletion(-) | ||
20 | |||
21 | diff --git a/block/bio.c b/block/bio.c | ||
22 | index db85c5753a76..80f5ab6b536a 100644 | ||
23 | --- a/block/bio.c | ||
24 | +++ b/block/bio.c | ||
25 | @@ -882,6 +882,26 @@ int submit_bio_wait(struct bio *bio) | ||
26 | } | ||
27 | EXPORT_SYMBOL(submit_bio_wait); | ||
28 | |||
29 | +static void submit_bio_nowait_endio(struct bio *bio) | ||
30 | +{ | ||
31 | + bio_put(bio); | ||
32 | +} | ||
33 | + | ||
34 | +/** | ||
35 | + * submit_bio_nowait - submit a bio for fire-and-forget | ||
36 | + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | ||
37 | + * @bio: The &struct bio which describes the I/O | ||
38 | + * | ||
39 | + * Simple wrapper around submit_bio() that takes care of bio_put() on completion | ||
40 | + */ | ||
41 | +void submit_bio_nowait(struct bio *bio) | ||
42 | +{ | ||
43 | + bio->bi_end_io = submit_bio_nowait_endio; | ||
44 | + bio->bi_opf |= REQ_SYNC; | ||
45 | + submit_bio(bio); | ||
46 | +} | ||
47 | +EXPORT_SYMBOL(submit_bio_nowait); | ||
48 | + | ||
49 | /** | ||
50 | * bio_advance - increment/complete a bio by some number of bytes | ||
51 | * @bio: bio to advance | ||
52 | diff --git a/block/blk-flush.c b/block/blk-flush.c | ||
53 | index 3c882cbc7541..b2dfcfe01ed7 100644 | ||
54 | --- a/block/blk-flush.c | ||
55 | +++ b/block/blk-flush.c | ||
56 | @@ -530,6 +530,47 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, | ||
57 | } | ||
58 | EXPORT_SYMBOL(blkdev_issue_flush); | ||
59 | |||
60 | +/** | ||
61 | + * blkdev_issue_flush_nowait - queue a flush | ||
62 | + * @bdev: blockdev to issue flush for | ||
63 | + * @gfp_mask: memory allocation flags (for bio_alloc) | ||
64 | + * @error_sector: error sector | ||
65 | + * | ||
66 | + * Description: | ||
67 | + * Issue a flush for the block device in question. Caller can supply | ||
68 | + * room for storing the error offset in case of a flush error, if they | ||
69 | + * wish to. If WAIT flag is not passed then caller may check only what | ||
70 | + * request was pushed in some internal queue for later handling. | ||
71 | + */ | ||
72 | +void blkdev_issue_flush_nowait(struct block_device *bdev, gfp_t gfp_mask) | ||
73 | +{ | ||
74 | + struct request_queue *q; | ||
75 | + struct bio *bio; | ||
76 | + | ||
77 | + if (bdev->bd_disk == NULL) | ||
78 | + return; | ||
79 | + | ||
80 | + q = bdev_get_queue(bdev); | ||
81 | + if (!q) | ||
82 | + return; | ||
83 | + | ||
84 | + /* | ||
85 | + * some block devices may not have their queue correctly set up here | ||
86 | + * (e.g. loop device without a backing file) and so issuing a flush | ||
87 | + * here will panic. Ensure there is a request function before issuing | ||
88 | + * the flush. | ||
89 | + */ | ||
90 | + if (!q->make_request_fn) | ||
91 | + return; | ||
92 | + | ||
93 | + bio = bio_alloc(gfp_mask, 0); | ||
94 | + bio->bi_bdev = bdev; | ||
95 | + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); | ||
96 | + | ||
97 | + submit_bio_nowait(bio); | ||
98 | +} | ||
99 | +EXPORT_SYMBOL(blkdev_issue_flush_nowait); | ||
100 | + | ||
101 | struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, | ||
102 | int node, int cmd_size) | ||
103 | { | ||
104 | diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c | ||
105 | index 88effb1053c7..a58966c18172 100644 | ||
106 | --- a/fs/ext4/fsync.c | ||
107 | +++ b/fs/ext4/fsync.c | ||
108 | @@ -150,7 +150,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | ||
109 | ret = jbd2_complete_transaction(journal, commit_tid); | ||
110 | if (needs_barrier) { | ||
111 | issue_flush: | ||
112 | - err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | ||
113 | + err = 0; | ||
114 | + if (!blk_queue_nonrot(bdev_get_queue(inode->i_sb->s_bdev))) | ||
115 | + err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | ||
116 | + else | ||
117 | + blkdev_issue_flush_nowait(inode->i_sb->s_bdev, GFP_KERNEL); | ||
118 | if (!ret) | ||
119 | ret = err; | ||
120 | } | ||
121 | diff --git a/include/linux/bio.h b/include/linux/bio.h | ||
122 | index 97cb48f03dc7..3f055e6541e0 100644 | ||
123 | --- a/include/linux/bio.h | ||
124 | +++ b/include/linux/bio.h | ||
125 | @@ -421,6 +421,7 @@ struct request_queue; | ||
126 | extern int bio_phys_segments(struct request_queue *, struct bio *); | ||
127 | |||
128 | extern int submit_bio_wait(struct bio *bio); | ||
129 | +extern void submit_bio_nowait(struct bio *bio); | ||
130 | extern void bio_advance(struct bio *, unsigned); | ||
131 | |||
132 | extern void bio_init(struct bio *); | ||
133 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h | ||
134 | index f6a816129856..727684abf21e 100644 | ||
135 | --- a/include/linux/blkdev.h | ||
136 | +++ b/include/linux/blkdev.h | ||
137 | @@ -1144,6 +1144,7 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | ||
138 | #define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */ | ||
139 | |||
140 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); | ||
141 | +extern void blkdev_issue_flush_nowait(struct block_device *, gfp_t); | ||
142 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | ||
143 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | ||
144 | extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, | ||
145 | @@ -1745,6 +1746,10 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | +static inline void blkdev_issue_flush_nowait(struct block_device *bdev, gfp_t gfp_mask) | ||
150 | +{ | ||
151 | +} | ||
152 | + | ||
153 | #endif /* CONFIG_BLOCK */ | ||
154 | |||
155 | #endif | ||
156 | -- | ||
157 | 2.11.1 | ||
158 | |||
diff --git a/patches/boot_time_opt/0113-overload-on-wakeup.patch b/patches/boot_time_opt/0113-overload-on-wakeup.patch new file mode 100644 index 0000000..a3a6bce --- /dev/null +++ b/patches/boot_time_opt/0113-overload-on-wakeup.patch | |||
@@ -0,0 +1,43 @@ | |||
1 | From 9f25d18f45a8391488feb9783404f2f79b7090f4 Mon Sep 17 00:00:00 2001 | ||
2 | From: jplozi <jplozi@unice.fr> | ||
3 | Date: Fri, 11 Mar 2016 15:18:06 +0100 | ||
4 | Subject: [PATCH 113/124] overload on wakeup | ||
5 | |||
6 | source https://github.com/jplozi/wastedcores | ||
7 | |||
8 | as an experiment, apply the learnings from the wasted-cores paper | ||
9 | and see how the performance works out. With the data from this we should | ||
10 | be able to work with Peter and the rest of the scheduler folks on | ||
11 | a more permanent/elegant solution. | ||
12 | --- | ||
13 | kernel/sched/fair.c | 14 ++++++++++++++ | ||
14 | 1 file changed, 14 insertions(+) | ||
15 | |||
16 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
17 | index c242944f5cbd..5132c828161e 100644 | ||
18 | --- a/kernel/sched/fair.c | ||
19 | +++ b/kernel/sched/fair.c | ||
20 | @@ -5638,6 +5638,20 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f | ||
21 | } | ||
22 | |||
23 | rcu_read_lock(); | ||
24 | + | ||
25 | + if (cpu_rq(prev_cpu)->nr_running) { | ||
26 | + int _cpu; | ||
27 | + | ||
28 | + for_each_online_cpu(_cpu) { | ||
29 | + if (!cpumask_test_cpu(_cpu, tsk_cpus_allowed(p)) || | ||
30 | + cpu_rq(_cpu)->nr_running) | ||
31 | + continue; | ||
32 | + | ||
33 | + rcu_read_unlock(); | ||
34 | + return _cpu; | ||
35 | + } | ||
36 | + } | ||
37 | + | ||
38 | for_each_domain(cpu, tmp) { | ||
39 | if (!(tmp->flags & SD_LOAD_BALANCE)) | ||
40 | break; | ||
41 | -- | ||
42 | 2.11.1 | ||
43 | |||
diff --git a/patches/boot_time_opt/0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch b/patches/boot_time_opt/0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch new file mode 100644 index 0000000..c6bf036 --- /dev/null +++ b/patches/boot_time_opt/0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch | |||
@@ -0,0 +1,83 @@ | |||
1 | From 3a1512b4ed3922f88936b95731aaff706e7286a9 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Wed, 11 Feb 2015 16:05:23 -0600 | ||
4 | Subject: [PATCH 114/124] bootstats: add printk's to measure boot time in more | ||
5 | detail | ||
6 | |||
7 | Few distro-tweaks to add printk's to visualize boot time better | ||
8 | |||
9 | Author: Arjan van de Ven <arjan@linux.intel.com> | ||
10 | |||
11 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
12 | --- | ||
13 | arch/x86/kernel/alternative.c | 4 ++++ | ||
14 | drivers/base/firmware_class.c | 2 ++ | ||
15 | init/main.c | 2 +- | ||
16 | kernel/kmod.c | 2 ++ | ||
17 | 4 files changed, 9 insertions(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c | ||
20 | index 5cb272a7a5a3..d28fb7aae4ce 100644 | ||
21 | --- a/arch/x86/kernel/alternative.c | ||
22 | +++ b/arch/x86/kernel/alternative.c | ||
23 | @@ -626,7 +626,9 @@ void __init alternative_instructions(void) | ||
24 | * patching. | ||
25 | */ | ||
26 | |||
27 | + printk("clr: Applying alternatives\n"); | ||
28 | apply_alternatives(__alt_instructions, __alt_instructions_end); | ||
29 | + printk("clr: Applying alternatives done\n"); | ||
30 | |||
31 | #ifdef CONFIG_SMP | ||
32 | /* Patch to UP if other cpus not imminent. */ | ||
33 | @@ -637,6 +639,8 @@ void __init alternative_instructions(void) | ||
34 | _text, _etext); | ||
35 | } | ||
36 | |||
37 | + printk("clr: Applying alternatives smp done\n"); | ||
38 | + | ||
39 | if (!uniproc_patched || num_possible_cpus() == 1) | ||
40 | free_init_pages("SMP alternatives", | ||
41 | (unsigned long)__smp_locks, | ||
42 | diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c | ||
43 | index a95e1e572697..b29467031be6 100644 | ||
44 | --- a/drivers/base/firmware_class.c | ||
45 | +++ b/drivers/base/firmware_class.c | ||
46 | @@ -1224,6 +1224,8 @@ request_firmware(const struct firmware **firmware_p, const char *name, | ||
47 | { | ||
48 | int ret; | ||
49 | |||
50 | + printk("clr: request_firmware: %s\n", name); | ||
51 | + | ||
52 | /* Need to pin this module until return */ | ||
53 | __module_get(THIS_MODULE); | ||
54 | ret = _request_firmware(firmware_p, name, device, NULL, 0, | ||
55 | diff --git a/init/main.c b/init/main.c | ||
56 | index 2858be732f6d..f1d8c3fdbf05 100644 | ||
57 | --- a/init/main.c | ||
58 | +++ b/init/main.c | ||
59 | @@ -751,7 +751,7 @@ static int __init_or_module do_one_initcall_debug(initcall_t fn) | ||
60 | unsigned long long duration; | ||
61 | int ret; | ||
62 | |||
63 | - printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); | ||
64 | + printk(KERN_DEBUG "calling %pF @ %i\n", fn, raw_smp_processor_id()); | ||
65 | calltime = ktime_get(); | ||
66 | ret = fn(); | ||
67 | rettime = ktime_get(); | ||
68 | diff --git a/kernel/kmod.c b/kernel/kmod.c | ||
69 | index 0277d1216f80..dc5a6edd3895 100644 | ||
70 | --- a/kernel/kmod.c | ||
71 | +++ b/kernel/kmod.c | ||
72 | @@ -76,6 +76,8 @@ static int call_modprobe(char *module_name, int wait) | ||
73 | NULL | ||
74 | }; | ||
75 | |||
76 | + printk("clr: call_modprobe: %s %i \n", module_name, wait); | ||
77 | + | ||
78 | char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL); | ||
79 | if (!argv) | ||
80 | goto out; | ||
81 | -- | ||
82 | 2.11.1 | ||
83 | |||
diff --git a/patches/boot_time_opt/0115-fix-initcall-timestamps.patch b/patches/boot_time_opt/0115-fix-initcall-timestamps.patch new file mode 100644 index 0000000..cdf2af1 --- /dev/null +++ b/patches/boot_time_opt/0115-fix-initcall-timestamps.patch | |||
@@ -0,0 +1,42 @@ | |||
1 | From 5b5ad2c9b9b555d20aeba1f895d0c9d1c2a77776 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Thu, 2 Jun 2016 23:36:32 -0500 | ||
4 | Subject: [PATCH 115/124] fix initcall timestamps | ||
5 | |||
6 | Print more finegrained initcall timings | ||
7 | |||
8 | use the tsc instead of the jiffies clock for initcall_debug | ||
9 | --- | ||
10 | init/main.c | 12 ++++++------ | ||
11 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
12 | |||
13 | diff --git a/init/main.c b/init/main.c | ||
14 | index f1d8c3fdbf05..8358cbe6ab13 100644 | ||
15 | --- a/init/main.c | ||
16 | +++ b/init/main.c | ||
17 | @@ -747,16 +747,16 @@ __setup("initcall_blacklist=", initcall_blacklist); | ||
18 | |||
19 | static int __init_or_module do_one_initcall_debug(initcall_t fn) | ||
20 | { | ||
21 | - ktime_t calltime, delta, rettime; | ||
22 | + unsigned long long calltime, delta, rettime; | ||
23 | unsigned long long duration; | ||
24 | int ret; | ||
25 | |||
26 | - printk(KERN_DEBUG "calling %pF @ %i\n", fn, raw_smp_processor_id()); | ||
27 | - calltime = ktime_get(); | ||
28 | + printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); | ||
29 | + calltime = local_clock(); | ||
30 | ret = fn(); | ||
31 | - rettime = ktime_get(); | ||
32 | - delta = ktime_sub(rettime, calltime); | ||
33 | - duration = (unsigned long long) ktime_to_ns(delta) >> 10; | ||
34 | + rettime = local_clock(); | ||
35 | + delta = rettime - calltime; | ||
36 | + duration = delta >> 10; | ||
37 | printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", | ||
38 | fn, ret, duration); | ||
39 | |||
40 | -- | ||
41 | 2.11.1 | ||
42 | |||
diff --git a/patches/boot_time_opt/0116-smpboot-reuse-timer-calibration.patch b/patches/boot_time_opt/0116-smpboot-reuse-timer-calibration.patch new file mode 100644 index 0000000..d1f71b5 --- /dev/null +++ b/patches/boot_time_opt/0116-smpboot-reuse-timer-calibration.patch | |||
@@ -0,0 +1,31 @@ | |||
1 | From 16104411cc5a7b20f310e3ecede85343ee6ce6b9 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Wed, 11 Feb 2015 17:28:14 -0600 | ||
4 | Subject: [PATCH 116/124] smpboot: reuse timer calibration | ||
5 | |||
6 | NO point recalibrating for known-constant tsc... saves 200ms+ of boot time. | ||
7 | |||
8 | Author: Arjan van de Ven <arjan@linux.intel.com> | ||
9 | |||
10 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
11 | --- | ||
12 | arch/x86/kernel/tsc.c | 3 +++ | ||
13 | 1 file changed, 3 insertions(+) | ||
14 | |||
15 | diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c | ||
16 | index 46b2f41f8b05..88553c1f21f1 100644 | ||
17 | --- a/arch/x86/kernel/tsc.c | ||
18 | +++ b/arch/x86/kernel/tsc.c | ||
19 | @@ -1384,6 +1384,9 @@ unsigned long calibrate_delay_is_known(void) | ||
20 | if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) | ||
21 | return 0; | ||
22 | |||
23 | + if (cpu != 0) | ||
24 | + return cpu_data(0).loops_per_jiffy; | ||
25 | + | ||
26 | if (!mask) | ||
27 | return 0; | ||
28 | |||
29 | -- | ||
30 | 2.11.1 | ||
31 | |||
diff --git a/patches/boot_time_opt/0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch b/patches/boot_time_opt/0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch new file mode 100644 index 0000000..978e09f --- /dev/null +++ b/patches/boot_time_opt/0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch | |||
@@ -0,0 +1,156 @@ | |||
1 | From fd1f55138c242bd9aeec374ff611064bdc89b359 Mon Sep 17 00:00:00 2001 | ||
2 | From: Jim Kukunas <james.t.kukunas@linux.intel.com> | ||
3 | Date: Fri, 27 May 2016 09:26:51 -0400 | ||
4 | Subject: [PATCH 117/124] raid6: add Kconfig option to skip raid6 benchmarking | ||
5 | |||
6 | Adds CONFIG_RAID6_FORCE_ALGO, which causes the kernel to not benchmark | ||
7 | each raid recovery and syndrome generation algorithm, and instead use | ||
8 | the version selected via Kconfig (CONFIG_RAID6_FORCE_{INT,SSSE3,AVX2}). | ||
9 | In the case, the selected algorithm is not supported by the processor at | ||
10 | runtime, a fallback is used. | ||
11 | |||
12 | Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com> | ||
13 | --- | ||
14 | lib/Kconfig | 3 +-- | ||
15 | lib/raid6/Kconfig | 38 ++++++++++++++++++++++++++++++++++++ | ||
16 | lib/raid6/algos.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ | ||
17 | 3 files changed, 97 insertions(+), 2 deletions(-) | ||
18 | create mode 100644 lib/raid6/Kconfig | ||
19 | |||
20 | diff --git a/lib/Kconfig b/lib/Kconfig | ||
21 | index 260a80e313b9..b3efd21db2fd 100644 | ||
22 | --- a/lib/Kconfig | ||
23 | +++ b/lib/Kconfig | ||
24 | @@ -7,8 +7,7 @@ config BINARY_PRINTF | ||
25 | |||
26 | menu "Library routines" | ||
27 | |||
28 | -config RAID6_PQ | ||
29 | - tristate | ||
30 | +source "lib/raid6/Kconfig" | ||
31 | |||
32 | config BITREVERSE | ||
33 | tristate | ||
34 | diff --git a/lib/raid6/Kconfig b/lib/raid6/Kconfig | ||
35 | new file mode 100644 | ||
36 | index 000000000000..d881d6be89bb | ||
37 | --- /dev/null | ||
38 | +++ b/lib/raid6/Kconfig | ||
39 | @@ -0,0 +1,38 @@ | ||
40 | +menu "RAID 6" | ||
41 | + | ||
42 | +config RAID6_PQ | ||
43 | + tristate | ||
44 | + | ||
45 | +config RAID6_FORCE_ALGO | ||
46 | + bool "Always use specified recovery algorithm" | ||
47 | + default n | ||
48 | + depends on RAID6_PQ | ||
49 | + help | ||
50 | + If this option is not set, on every boot the kernel will | ||
51 | + benchmark each optimized version of the RAID6 recovery and | ||
52 | + syndrome generation algorithms and will select the one that | ||
53 | + performs best. Microbenchmarking each version negatively | ||
54 | + affects boot time. | ||
55 | + | ||
56 | + Enabling this option skips the benchmark at boot, and | ||
57 | + instead always uses the algorithm selected. The only exception | ||
58 | + is if the selected algorithm relies on a cpu feature not | ||
59 | + supported at runtime. In this case, one of the lower performance | ||
60 | + fallbacks are used. | ||
61 | + | ||
62 | +choice | ||
63 | + prompt "RAID6 Recovery Algorithm" | ||
64 | + default RAID6_FORCE_INT | ||
65 | + depends on RAID6_FORCE_ALGO | ||
66 | + ---help--- | ||
67 | + Select the RAID6 recovery algorithm to unconditionally use | ||
68 | + | ||
69 | + config RAID6_FORCE_INT | ||
70 | + bool "Reference Implementation" | ||
71 | + config RAID6_FORCE_SSSE3 | ||
72 | + bool "SSSE3" | ||
73 | + config RAID6_FORCE_AVX2 | ||
74 | + bool "AVX2" | ||
75 | +endchoice | ||
76 | + | ||
77 | +endmenu | ||
78 | diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c | ||
79 | index 7857049fd7d3..29332d2a04a5 100644 | ||
80 | --- a/lib/raid6/algos.c | ||
81 | +++ b/lib/raid6/algos.c | ||
82 | @@ -125,6 +125,63 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { | ||
83 | #define time_before(x, y) ((x) < (y)) | ||
84 | #endif | ||
85 | |||
86 | +#ifdef CONFIG_RAID6_FORCE_ALGO | ||
87 | +/* TODO don't compile in algos that will never be used */ | ||
88 | +int __init raid6_select_algo(void) | ||
89 | +{ | ||
90 | + const struct raid6_recov_calls *recov_fallback = &raid6_recov_intx1; | ||
91 | + const struct raid6_recov_calls *recov_algo; | ||
92 | + const struct raid6_calls *gen_fallback; | ||
93 | + const struct raid6_calls *gen_algo; | ||
94 | + | ||
95 | +#if defined(__i386__) | ||
96 | + gen_fallback = &raid6_intx32; | ||
97 | +#elif defined(__x86_64__) | ||
98 | + gen_fallback = &raid6_sse2x2; | ||
99 | +#else | ||
100 | +# error "TODO" | ||
101 | +#endif | ||
102 | + | ||
103 | +#if defined(CONFIG_RAID6_FORCE_INT) | ||
104 | + recov_algo = &raid6_recov_intx1; | ||
105 | + gen_algo = &raid6_intx32; | ||
106 | + | ||
107 | +#elif defined(CONFIG_RAID6_FORCE_SSSE3) | ||
108 | + recov_algo = &raid6_recov_ssse3; | ||
109 | +#if defined(__i386__) | ||
110 | + gen_algo = &raid6_sse2x2; | ||
111 | +#else | ||
112 | + gen_algo = &raid6_sse2x4; | ||
113 | +#endif | ||
114 | + | ||
115 | +#elif defined(CONFIG_RAID6_FORCE_AVX2) | ||
116 | + recov_algo = &raid6_recov_avx2; | ||
117 | + | ||
118 | +#if defined(__i386__) | ||
119 | + gen_algo = &raid6_avx2x2; | ||
120 | +#else | ||
121 | + gen_algo = &raid6_avx2x4; | ||
122 | +#endif | ||
123 | + | ||
124 | +#else | ||
125 | +#error "RAID6 Forced Recov Algo: Unsupported selection" | ||
126 | +#endif | ||
127 | + | ||
128 | + if (recov_algo->valid != NULL && recov_algo->valid() == 0) | ||
129 | + recov_algo = recov_fallback; | ||
130 | + | ||
131 | + pr_info("raid6: Forced to use recovery algorithm %s\n", recov_algo->name); | ||
132 | + | ||
133 | + raid6_2data_recov = recov_algo->data2; | ||
134 | + raid6_datap_recov = recov_algo->datap; | ||
135 | + | ||
136 | + pr_info("raid6: Forced gen() algo %s\n", gen_algo->name); | ||
137 | + | ||
138 | + raid6_call = *gen_algo; | ||
139 | + | ||
140 | + return gen_algo && recov_algo ? 0 : -EINVAL; | ||
141 | +} | ||
142 | +#else | ||
143 | static inline const struct raid6_recov_calls *raid6_choose_recov(void) | ||
144 | { | ||
145 | const struct raid6_recov_calls *const *algo; | ||
146 | @@ -256,6 +313,7 @@ int __init raid6_select_algo(void) | ||
147 | |||
148 | return gen_best && rec_best ? 0 : -EINVAL; | ||
149 | } | ||
150 | +#endif | ||
151 | |||
152 | static void raid6_exit(void) | ||
153 | { | ||
154 | -- | ||
155 | 2.11.1 | ||
156 | |||
diff --git a/patches/boot_time_opt/0118-Initialize-ata-before-graphics.patch b/patches/boot_time_opt/0118-Initialize-ata-before-graphics.patch new file mode 100644 index 0000000..70e07c8 --- /dev/null +++ b/patches/boot_time_opt/0118-Initialize-ata-before-graphics.patch | |||
@@ -0,0 +1,47 @@ | |||
1 | From fbc1ab7c18a9c960a0bff293a93620d581658f8d Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Thu, 2 Jun 2016 23:36:32 -0500 | ||
4 | Subject: [PATCH 118/124] Initialize ata before graphics | ||
5 | |||
6 | ATA init is the long pole in the boot process, and its asynchronous. | ||
7 | move the graphics init after it so that ata and graphics initialize | ||
8 | in parallel | ||
9 | --- | ||
10 | drivers/Makefile | 11 ++++++----- | ||
11 | 1 file changed, 6 insertions(+), 5 deletions(-) | ||
12 | |||
13 | diff --git a/drivers/Makefile b/drivers/Makefile | ||
14 | index 194d20bee7dc..2785e4c6b30f 100644 | ||
15 | --- a/drivers/Makefile | ||
16 | +++ b/drivers/Makefile | ||
17 | @@ -55,14 +55,9 @@ obj-y += char/ | ||
18 | # iommu/ comes before gpu as gpu are using iommu controllers | ||
19 | obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ | ||
20 | |||
21 | -# gpu/ comes after char for AGP vs DRM startup and after iommu | ||
22 | -obj-y += gpu/ | ||
23 | |||
24 | obj-$(CONFIG_CONNECTOR) += connector/ | ||
25 | |||
26 | -# i810fb and intelfb depend on char/agp/ | ||
27 | -obj-$(CONFIG_FB_I810) += video/fbdev/i810/ | ||
28 | -obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ | ||
29 | |||
30 | obj-$(CONFIG_PARPORT) += parport/ | ||
31 | obj-$(CONFIG_NVM) += lightnvm/ | ||
32 | @@ -76,6 +71,12 @@ obj-$(CONFIG_IDE) += ide/ | ||
33 | obj-$(CONFIG_SCSI) += scsi/ | ||
34 | obj-y += nvme/ | ||
35 | obj-$(CONFIG_ATA) += ata/ | ||
36 | + | ||
37 | +# gpu/ comes after char for AGP vs DRM startup and after iommu | ||
38 | +obj-y += gpu/ | ||
39 | +# i810fb and intelfb depend on char/agp/ | ||
40 | +obj-$(CONFIG_FB_I810) += video/fbdev/i810/ | ||
41 | +obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ | ||
42 | obj-$(CONFIG_TARGET_CORE) += target/ | ||
43 | obj-$(CONFIG_MTD) += mtd/ | ||
44 | obj-$(CONFIG_SPI) += spi/ | ||
45 | -- | ||
46 | 2.11.1 | ||
47 | |||
diff --git a/patches/boot_time_opt/0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch b/patches/boot_time_opt/0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch new file mode 100644 index 0000000..a068afb --- /dev/null +++ b/patches/boot_time_opt/0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch | |||
@@ -0,0 +1,311 @@ | |||
1 | From d9390cb702de5cbef64f893efd2344c4f58dae82 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Mon, 25 Jul 2016 06:44:34 -0500 | ||
4 | Subject: [PATCH 119/124] reduce e1000e boot time by tightening sleep ranges | ||
5 | |||
6 | The e1000e driver is a great user of the usleep_range() API, | ||
7 | and has any nice ranges that in principle help power management. | ||
8 | |||
9 | However the ranges that are used only during system startup are | ||
10 | very long (and can add easily 100 msec to the boot time) while | ||
11 | the power savings of such long ranges is irrelevant due to the | ||
12 | one-off, boot only, nature of these functions. | ||
13 | |||
14 | This patch shrinks some of the longest ranges to be shorter | ||
15 | (while still using a power friendly 1 msec range); this saves | ||
16 | 100msec+ of boot time on my BDW NUCs | ||
17 | |||
18 | Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> | ||
19 | --- | ||
20 | drivers/net/ethernet/intel/e1000e/80003es2lan.c | 2 +- | ||
21 | drivers/net/ethernet/intel/e1000e/82571.c | 2 +- | ||
22 | drivers/net/ethernet/intel/e1000e/ethtool.c | 14 +++++++------- | ||
23 | drivers/net/ethernet/intel/e1000e/ich8lan.c | 20 ++++++++++---------- | ||
24 | drivers/net/ethernet/intel/e1000e/mac.c | 2 +- | ||
25 | drivers/net/ethernet/intel/e1000e/netdev.c | 14 +++++++------- | ||
26 | drivers/net/ethernet/intel/e1000e/nvm.c | 2 +- | ||
27 | 7 files changed, 28 insertions(+), 28 deletions(-) | ||
28 | |||
29 | diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c | ||
30 | index cd391376036c..b5759899eeb8 100644 | ||
31 | --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c | ||
32 | +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c | ||
33 | @@ -698,7 +698,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) | ||
34 | ew32(TCTL, E1000_TCTL_PSP); | ||
35 | e1e_flush(); | ||
36 | |||
37 | - usleep_range(10000, 20000); | ||
38 | + usleep_range(10000, 11000); | ||
39 | |||
40 | ctrl = er32(CTRL); | ||
41 | |||
42 | diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c | ||
43 | index 6b03c8553e59..d31145269dd9 100644 | ||
44 | --- a/drivers/net/ethernet/intel/e1000e/82571.c | ||
45 | +++ b/drivers/net/ethernet/intel/e1000e/82571.c | ||
46 | @@ -977,7 +977,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) | ||
47 | ew32(TCTL, tctl); | ||
48 | e1e_flush(); | ||
49 | |||
50 | - usleep_range(10000, 20000); | ||
51 | + usleep_range(10000, 11000); | ||
52 | |||
53 | /* Must acquire the MDIO ownership before MAC reset. | ||
54 | * Ownership defaults to firmware after a reset. | ||
55 | diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c | ||
56 | index 7aff68a4a4df..7cb689bd41f8 100644 | ||
57 | --- a/drivers/net/ethernet/intel/e1000e/ethtool.c | ||
58 | +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c | ||
59 | @@ -1023,7 +1023,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) | ||
60 | /* Disable all the interrupts */ | ||
61 | ew32(IMC, 0xFFFFFFFF); | ||
62 | e1e_flush(); | ||
63 | - usleep_range(10000, 20000); | ||
64 | + usleep_range(10000, 11000); | ||
65 | |||
66 | /* Test each interrupt */ | ||
67 | for (i = 0; i < 10; i++) { | ||
68 | @@ -1055,7 +1055,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) | ||
69 | ew32(IMC, mask); | ||
70 | ew32(ICS, mask); | ||
71 | e1e_flush(); | ||
72 | - usleep_range(10000, 20000); | ||
73 | + usleep_range(10000, 11000); | ||
74 | |||
75 | if (adapter->test_icr & mask) { | ||
76 | *data = 3; | ||
77 | @@ -1073,7 +1073,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) | ||
78 | ew32(IMS, mask); | ||
79 | ew32(ICS, mask); | ||
80 | e1e_flush(); | ||
81 | - usleep_range(10000, 20000); | ||
82 | + usleep_range(10000, 11000); | ||
83 | |||
84 | if (!(adapter->test_icr & mask)) { | ||
85 | *data = 4; | ||
86 | @@ -1091,7 +1091,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) | ||
87 | ew32(IMC, ~mask & 0x00007FFF); | ||
88 | ew32(ICS, ~mask & 0x00007FFF); | ||
89 | e1e_flush(); | ||
90 | - usleep_range(10000, 20000); | ||
91 | + usleep_range(10000, 11000); | ||
92 | |||
93 | if (adapter->test_icr) { | ||
94 | *data = 5; | ||
95 | @@ -1103,7 +1103,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) | ||
96 | /* Disable all the interrupts */ | ||
97 | ew32(IMC, 0xFFFFFFFF); | ||
98 | e1e_flush(); | ||
99 | - usleep_range(10000, 20000); | ||
100 | + usleep_range(10000, 11000); | ||
101 | |||
102 | /* Unhook test interrupt handler */ | ||
103 | free_irq(irq, netdev); | ||
104 | @@ -1479,7 +1479,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter) | ||
105 | */ | ||
106 | ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK); | ||
107 | e1e_flush(); | ||
108 | - usleep_range(10000, 20000); | ||
109 | + usleep_range(10000, 11000); | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | @@ -1592,7 +1592,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) | ||
114 | hw->phy.media_type == e1000_media_type_internal_serdes) { | ||
115 | ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); | ||
116 | e1e_flush(); | ||
117 | - usleep_range(10000, 20000); | ||
118 | + usleep_range(10000, 11000); | ||
119 | break; | ||
120 | } | ||
121 | /* Fall Through */ | ||
122 | diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c | ||
123 | index f3aaca743ea3..bef75cec259f 100644 | ||
124 | --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c | ||
125 | +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c | ||
126 | @@ -289,7 +289,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) | ||
127 | u16 count = 20; | ||
128 | |||
129 | do { | ||
130 | - usleep_range(5000, 10000); | ||
131 | + usleep_range(5000, 6000); | ||
132 | } while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--); | ||
133 | |||
134 | msleep(30); | ||
135 | @@ -422,7 +422,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) | ||
136 | /* Ungate automatic PHY configuration on non-managed 82579 */ | ||
137 | if ((hw->mac.type == e1000_pch2lan) && | ||
138 | !(fwsm & E1000_ICH_FWSM_FW_VALID)) { | ||
139 | - usleep_range(10000, 20000); | ||
140 | + usleep_range(10000, 11000); | ||
141 | e1000_gate_hw_phy_config_ich8lan(hw, false); | ||
142 | } | ||
143 | |||
144 | @@ -547,7 +547,7 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw) | ||
145 | phy->id = 0; | ||
146 | while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) && | ||
147 | (i++ < 100)) { | ||
148 | - usleep_range(1000, 2000); | ||
149 | + usleep_range(1000, 1100); | ||
150 | ret_val = e1000e_get_phy_id(hw); | ||
151 | if (ret_val) | ||
152 | return ret_val; | ||
153 | @@ -1259,7 +1259,7 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) | ||
154 | goto out; | ||
155 | } | ||
156 | |||
157 | - usleep_range(10000, 20000); | ||
158 | + usleep_range(10000, 11000); | ||
159 | } | ||
160 | e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); | ||
161 | |||
162 | @@ -2011,7 +2011,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) | ||
163 | |||
164 | while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && | ||
165 | (i++ < 30)) | ||
166 | - usleep_range(10000, 20000); | ||
167 | + usleep_range(10000, 11000); | ||
168 | return blocked ? E1000_BLK_PHY_RESET : 0; | ||
169 | } | ||
170 | |||
171 | @@ -2827,7 +2827,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) | ||
172 | return 0; | ||
173 | |||
174 | /* Allow time for h/w to get to quiescent state after reset */ | ||
175 | - usleep_range(10000, 20000); | ||
176 | + usleep_range(10000, 11000); | ||
177 | |||
178 | /* Perform any necessary post-reset workarounds */ | ||
179 | switch (hw->mac.type) { | ||
180 | @@ -2863,7 +2863,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) | ||
181 | if (hw->mac.type == e1000_pch2lan) { | ||
182 | /* Ungate automatic PHY configuration on non-managed 82579 */ | ||
183 | if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { | ||
184 | - usleep_range(10000, 20000); | ||
185 | + usleep_range(10000, 11000); | ||
186 | e1000_gate_hw_phy_config_ich8lan(hw, false); | ||
187 | } | ||
188 | |||
189 | @@ -3884,7 +3884,7 @@ static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw) | ||
190 | */ | ||
191 | if (!ret_val) { | ||
192 | nvm->ops.reload(hw); | ||
193 | - usleep_range(10000, 20000); | ||
194 | + usleep_range(10000, 11000); | ||
195 | } | ||
196 | |||
197 | out: | ||
198 | @@ -4035,7 +4035,7 @@ static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) | ||
199 | */ | ||
200 | if (!ret_val) { | ||
201 | nvm->ops.reload(hw); | ||
202 | - usleep_range(10000, 20000); | ||
203 | + usleep_range(10000, 11000); | ||
204 | } | ||
205 | |||
206 | out: | ||
207 | @@ -4658,7 +4658,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) | ||
208 | ew32(TCTL, E1000_TCTL_PSP); | ||
209 | e1e_flush(); | ||
210 | |||
211 | - usleep_range(10000, 20000); | ||
212 | + usleep_range(10000, 11000); | ||
213 | |||
214 | /* Workaround for ICH8 bit corruption issue in FIFO memory */ | ||
215 | if (hw->mac.type == e1000_ich8lan) { | ||
216 | diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c | ||
217 | index b322011ec282..eecbf7a12735 100644 | ||
218 | --- a/drivers/net/ethernet/intel/e1000e/mac.c | ||
219 | +++ b/drivers/net/ethernet/intel/e1000e/mac.c | ||
220 | @@ -815,7 +815,7 @@ static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) | ||
221 | * milliseconds even if the other end is doing it in SW). | ||
222 | */ | ||
223 | for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { | ||
224 | - usleep_range(10000, 20000); | ||
225 | + usleep_range(10000, 11000); | ||
226 | status = er32(STATUS); | ||
227 | if (status & E1000_STATUS_LU) | ||
228 | break; | ||
229 | diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c | ||
230 | index 7017281ba2dc..7d68d694ed9e 100644 | ||
231 | --- a/drivers/net/ethernet/intel/e1000e/netdev.c | ||
232 | +++ b/drivers/net/ethernet/intel/e1000e/netdev.c | ||
233 | @@ -3206,7 +3206,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) | ||
234 | if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX)) | ||
235 | ew32(RCTL, rctl & ~E1000_RCTL_EN); | ||
236 | e1e_flush(); | ||
237 | - usleep_range(10000, 20000); | ||
238 | + usleep_range(10000, 11000); | ||
239 | |||
240 | if (adapter->flags2 & FLAG2_DMA_BURST) { | ||
241 | /* set the writeback threshold (only takes effect if the RDTR | ||
242 | @@ -4258,7 +4258,7 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset) | ||
243 | |||
244 | /* flush both disables and wait for them to finish */ | ||
245 | e1e_flush(); | ||
246 | - usleep_range(10000, 20000); | ||
247 | + usleep_range(10000, 11000); | ||
248 | |||
249 | e1000_irq_disable(adapter); | ||
250 | |||
251 | @@ -4296,7 +4296,7 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter) | ||
252 | { | ||
253 | might_sleep(); | ||
254 | while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) | ||
255 | - usleep_range(1000, 2000); | ||
256 | + usleep_range(1000, 1100); | ||
257 | e1000e_down(adapter, true); | ||
258 | e1000e_up(adapter); | ||
259 | clear_bit(__E1000_RESETTING, &adapter->state); | ||
260 | @@ -4671,7 +4671,7 @@ int e1000e_close(struct net_device *netdev) | ||
261 | int count = E1000_CHECK_RESET_COUNT; | ||
262 | |||
263 | while (test_bit(__E1000_RESETTING, &adapter->state) && count--) | ||
264 | - usleep_range(10000, 20000); | ||
265 | + usleep_range(10000, 11000); | ||
266 | |||
267 | WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); | ||
268 | |||
269 | @@ -5996,7 +5996,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) | ||
270 | } | ||
271 | |||
272 | while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) | ||
273 | - usleep_range(1000, 2000); | ||
274 | + usleep_range(1000, 1100); | ||
275 | /* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */ | ||
276 | adapter->max_frame_size = max_frame; | ||
277 | e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu); | ||
278 | @@ -6276,7 +6276,7 @@ static int e1000e_pm_freeze(struct device *dev) | ||
279 | int count = E1000_CHECK_RESET_COUNT; | ||
280 | |||
281 | while (test_bit(__E1000_RESETTING, &adapter->state) && count--) | ||
282 | - usleep_range(10000, 20000); | ||
283 | + usleep_range(10000, 11000); | ||
284 | |||
285 | WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); | ||
286 | |||
287 | @@ -6687,7 +6687,7 @@ static int e1000e_pm_runtime_suspend(struct device *dev) | ||
288 | int count = E1000_CHECK_RESET_COUNT; | ||
289 | |||
290 | while (test_bit(__E1000_RESETTING, &adapter->state) && count--) | ||
291 | - usleep_range(10000, 20000); | ||
292 | + usleep_range(10000, 11000); | ||
293 | |||
294 | WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); | ||
295 | |||
296 | diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c | ||
297 | index 2efd80dfd88e..38f7c8fb3061 100644 | ||
298 | --- a/drivers/net/ethernet/intel/e1000e/nvm.c | ||
299 | +++ b/drivers/net/ethernet/intel/e1000e/nvm.c | ||
300 | @@ -410,7 +410,7 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) | ||
301 | break; | ||
302 | } | ||
303 | } | ||
304 | - usleep_range(10000, 20000); | ||
305 | + usleep_range(10000, 11000); | ||
306 | nvm->ops.release(hw); | ||
307 | } | ||
308 | |||
309 | -- | ||
310 | 2.11.1 | ||
311 | |||
diff --git a/patches/boot_time_opt/0120-give-rdrand-some-credit.patch b/patches/boot_time_opt/0120-give-rdrand-some-credit.patch new file mode 100644 index 0000000..4b1669c --- /dev/null +++ b/patches/boot_time_opt/0120-give-rdrand-some-credit.patch | |||
@@ -0,0 +1,30 @@ | |||
1 | From 5cc978db25b2c92707f68b15098ac39901fb5aac Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Fri, 29 Jul 2016 19:10:52 +0000 | ||
4 | Subject: [PATCH 120/124] give rdrand some credit | ||
5 | |||
6 | try to credit rdrand/rdseed with some entropy | ||
7 | |||
8 | In VMs but even modern hardware, we're super starved for entropy, and while we can | ||
9 | and do wear a tin foil hat, it's very hard to argue that | ||
10 | rdrand and rdtsc add zero entropy. | ||
11 | --- | ||
12 | drivers/char/random.c | 2 ++ | ||
13 | 1 file changed, 2 insertions(+) | ||
14 | |||
15 | diff --git a/drivers/char/random.c b/drivers/char/random.c | ||
16 | index d6876d506220..fca09af81b2c 100644 | ||
17 | --- a/drivers/char/random.c | ||
18 | +++ b/drivers/char/random.c | ||
19 | @@ -1638,6 +1638,8 @@ static void init_std_data(struct entropy_store *r) | ||
20 | if (!arch_get_random_seed_long(&rv) && | ||
21 | !arch_get_random_long(&rv)) | ||
22 | rv = random_get_entropy(); | ||
23 | + else | ||
24 | + credit_entropy_bits(r, 1); | ||
25 | mix_pool_bytes(r, &rv, sizeof(rv)); | ||
26 | } | ||
27 | mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); | ||
28 | -- | ||
29 | 2.11.1 | ||
30 | |||
diff --git a/patches/boot_time_opt/0121-e1000e-change-default-policy.patch b/patches/boot_time_opt/0121-e1000e-change-default-policy.patch new file mode 100644 index 0000000..bf3e13d --- /dev/null +++ b/patches/boot_time_opt/0121-e1000e-change-default-policy.patch | |||
@@ -0,0 +1,27 @@ | |||
1 | From 5b4707fc2aa8c49aa18a60136880bf05a3e29071 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Sat, 10 Dec 2016 14:29:52 +0000 | ||
4 | Subject: [PATCH 121/124] e1000e: change default policy | ||
5 | |||
6 | change the default irq mitigation policy for e1000e to be | ||
7 | more HPC/cluster friendly | ||
8 | --- | ||
9 | drivers/net/ethernet/intel/e1000e/param.c | 2 +- | ||
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c | ||
13 | index 6d8c39abee16..ef1122ad3b98 100644 | ||
14 | --- a/drivers/net/ethernet/intel/e1000e/param.c | ||
15 | +++ b/drivers/net/ethernet/intel/e1000e/param.c | ||
16 | @@ -92,7 +92,7 @@ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); | ||
17 | * Valid Range: 100-100000 or one of: 0=off, 1=dynamic, 3=dynamic conservative | ||
18 | */ | ||
19 | E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate"); | ||
20 | -#define DEFAULT_ITR 3 | ||
21 | +#define DEFAULT_ITR 1 | ||
22 | #define MAX_ITR 100000 | ||
23 | #define MIN_ITR 100 | ||
24 | |||
25 | -- | ||
26 | 2.11.1 | ||
27 | |||
diff --git a/patches/boot_time_opt/0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch b/patches/boot_time_opt/0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch new file mode 100644 index 0000000..eb44cec --- /dev/null +++ b/patches/boot_time_opt/0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch | |||
@@ -0,0 +1,28 @@ | |||
1 | From 5cf7ba4ba9c9d770aad9e52deaa3730f259df9f1 Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Fri, 6 Jan 2017 15:34:09 +0000 | ||
4 | Subject: [PATCH 122/124] ipv4/tcp: allow the memory tuning for tcp to go a | ||
5 | little bigger than default | ||
6 | |||
7 | --- | ||
8 | net/ipv4/tcp.c | 4 ++-- | ||
9 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
10 | |||
11 | diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c | ||
12 | index 6a90a0e130dc..32e43ce7c60e 100644 | ||
13 | --- a/net/ipv4/tcp.c | ||
14 | +++ b/net/ipv4/tcp.c | ||
15 | @@ -3341,8 +3341,8 @@ void __init tcp_init(void) | ||
16 | tcp_init_mem(); | ||
17 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ | ||
18 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); | ||
19 | - max_wshare = min(4UL*1024*1024, limit); | ||
20 | - max_rshare = min(6UL*1024*1024, limit); | ||
21 | + max_wshare = min(16UL*1024*1024, limit); | ||
22 | + max_rshare = min(16UL*1024*1024, limit); | ||
23 | |||
24 | sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; | ||
25 | sysctl_tcp_wmem[1] = 16*1024; | ||
26 | -- | ||
27 | 2.11.1 | ||
28 | |||
diff --git a/patches/boot_time_opt/0123-igb-no-runtime-pm-to-fix-reboot-oops.patch b/patches/boot_time_opt/0123-igb-no-runtime-pm-to-fix-reboot-oops.patch new file mode 100644 index 0000000..ce4964e --- /dev/null +++ b/patches/boot_time_opt/0123-igb-no-runtime-pm-to-fix-reboot-oops.patch | |||
@@ -0,0 +1,27 @@ | |||
1 | From 10f0c995ce6aaf6b3ffa78377f1a12ad0477057a Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Thu, 12 Jan 2017 18:17:14 +0000 | ||
4 | Subject: [PATCH 123/124] igb: no runtime pm to fix reboot oops | ||
5 | |||
6 | Causes oops on reboot due to a race between runtime resume and shutdown | ||
7 | --- | ||
8 | drivers/net/ethernet/intel/igb/igb_main.c | 3 --- | ||
9 | 1 file changed, 3 deletions(-) | ||
10 | |||
11 | diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c | ||
12 | index 9affd7c198bd..8ade77e75b36 100644 | ||
13 | --- a/drivers/net/ethernet/intel/igb/igb_main.c | ||
14 | +++ b/drivers/net/ethernet/intel/igb/igb_main.c | ||
15 | @@ -238,9 +238,6 @@ static struct pci_driver igb_driver = { | ||
16 | .id_table = igb_pci_tbl, | ||
17 | .probe = igb_probe, | ||
18 | .remove = igb_remove, | ||
19 | -#ifdef CONFIG_PM | ||
20 | - .driver.pm = &igb_pm_ops, | ||
21 | -#endif | ||
22 | .shutdown = igb_shutdown, | ||
23 | .sriov_configure = igb_pci_sriov_configure, | ||
24 | .err_handler = &igb_err_handler | ||
25 | -- | ||
26 | 2.11.1 | ||
27 | |||
diff --git a/patches/boot_time_opt/0124-tweak-perfbias.patch b/patches/boot_time_opt/0124-tweak-perfbias.patch new file mode 100644 index 0000000..56a2865 --- /dev/null +++ b/patches/boot_time_opt/0124-tweak-perfbias.patch | |||
@@ -0,0 +1,32 @@ | |||
1 | From 03e2c414a860264511dae5bbfc6d7e62b8b94f0f Mon Sep 17 00:00:00 2001 | ||
2 | From: Arjan van de Ven <arjan@linux.intel.com> | ||
3 | Date: Sun, 22 Jan 2017 18:51:13 +0000 | ||
4 | Subject: [PATCH 124/124] tweak perfbias | ||
5 | |||
6 | --- | ||
7 | arch/x86/kernel/cpu/intel.c | 6 +++--- | ||
8 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
9 | |||
10 | diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c | ||
11 | index fcd484d2bb03..13ae40f10bd4 100644 | ||
12 | --- a/arch/x86/kernel/cpu/intel.c | ||
13 | +++ b/arch/x86/kernel/cpu/intel.c | ||
14 | @@ -434,12 +434,12 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c) | ||
15 | return; | ||
16 | |||
17 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | ||
18 | - if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) | ||
19 | + if ((epb & 0xF) >= ENERGY_PERF_BIAS_NORMAL) | ||
20 | return; | ||
21 | |||
22 | - pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); | ||
23 | + pr_warn_once("ENERGY_PERF_BIAS: Set to 'performance', was 'normal'\n"); | ||
24 | pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); | ||
25 | - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; | ||
26 | + epb = (epb & ~0xF) | ENERGY_PERF_BIAS_PERFORMANCE; | ||
27 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | ||
28 | } | ||
29 | |||
30 | -- | ||
31 | 2.11.1 | ||
32 | |||
diff --git a/patches/boot_time_opt/0125-e1000e-increase-pause-and-refresh-time.patch b/patches/boot_time_opt/0125-e1000e-increase-pause-and-refresh-time.patch new file mode 100644 index 0000000..1c50e74 --- /dev/null +++ b/patches/boot_time_opt/0125-e1000e-increase-pause-and-refresh-time.patch | |||
@@ -0,0 +1,33 @@ | |||
1 | From 6730c1ae12a567d56092d15540d2f971be95b936 Mon Sep 17 00:00:00 2001 | ||
2 | From: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
3 | Date: Mon, 27 Mar 2017 16:01:56 -0600 | ||
4 | Subject: [PATCH] e1000e: increase pause and refresh time | ||
5 | |||
6 | Suggested-by: Tim Pepper <timothy.c.pepper@linux.intel.com> | ||
7 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com> | ||
8 | --- | ||
9 | drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- | ||
10 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
11 | |||
12 | diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c | ||
13 | index 7d68d694ed9e..1db390a52656 100644 | ||
14 | --- a/drivers/net/ethernet/intel/e1000e/netdev.c | ||
15 | +++ b/drivers/net/ethernet/intel/e1000e/netdev.c | ||
16 | @@ -4032,12 +4032,12 @@ void e1000e_reset(struct e1000_adapter *adapter) | ||
17 | case e1000_pch2lan: | ||
18 | case e1000_pch_lpt: | ||
19 | case e1000_pch_spt: | ||
20 | - fc->refresh_time = 0x0400; | ||
21 | + fc->refresh_time = 0xFFFF; | ||
22 | + fc->pause_time = 0xFFFF; | ||
23 | |||
24 | if (adapter->netdev->mtu <= ETH_DATA_LEN) { | ||
25 | fc->high_water = 0x05C20; | ||
26 | fc->low_water = 0x05048; | ||
27 | - fc->pause_time = 0x0650; | ||
28 | break; | ||
29 | } | ||
30 | |||
31 | -- | ||
32 | 2.12.2 | ||
33 | |||
diff --git a/patches/boot_time_opt/0151-mm-Export-do_madvise.patch b/patches/boot_time_opt/0151-mm-Export-do_madvise.patch new file mode 100644 index 0000000..a6dbff7 --- /dev/null +++ b/patches/boot_time_opt/0151-mm-Export-do_madvise.patch | |||
@@ -0,0 +1,84 @@ | |||
1 | From 99b4cdcce43ad0f706120bef26fef8c628c572cf Mon Sep 17 00:00:00 2001 | ||
2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
3 | Date: Mon, 23 Jan 2017 15:03:52 -0800 | ||
4 | Subject: [PATCH 151/154] mm: Export do_madvise() | ||
5 | |||
6 | Combined with some interesting flags madvise() system call | ||
7 | allows to free memory more smartly and more efficiently than | ||
8 | we could do with a simple free(). The issue is that is not | ||
9 | available for kernel modules that could need it. | ||
10 | |||
11 | In order to solve this lack of support, this patch exports | ||
12 | do_madvise() so as to make it available to the entire kernel. | ||
13 | The already existing madvise() system call is unchanged and | ||
14 | now relies on this new do_madvise() function. | ||
15 | |||
16 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> | ||
17 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
18 | --- | ||
19 | include/linux/mm.h | 2 ++ | ||
20 | mm/madvise.c | 25 +++++++++++++++++++++---- | ||
21 | 2 files changed, 23 insertions(+), 4 deletions(-) | ||
22 | |||
23 | diff --git a/include/linux/mm.h b/include/linux/mm.h | ||
24 | index 0b5b2e4df14e..925ec25f99a8 100644 | ||
25 | --- a/include/linux/mm.h | ||
26 | +++ b/include/linux/mm.h | ||
27 | @@ -2450,5 +2450,7 @@ void __init setup_nr_node_ids(void); | ||
28 | static inline void setup_nr_node_ids(void) {} | ||
29 | #endif | ||
30 | |||
31 | +extern int do_madvise(unsigned long start, size_t len_in, int behavior); | ||
32 | + | ||
33 | #endif /* __KERNEL__ */ | ||
34 | #endif /* _LINUX_MM_H */ | ||
35 | diff --git a/mm/madvise.c b/mm/madvise.c | ||
36 | index 93fb63e88b5e..c8bbf93d4978 100644 | ||
37 | --- a/mm/madvise.c | ||
38 | +++ b/mm/madvise.c | ||
39 | @@ -618,9 +618,7 @@ madvise_behavior_valid(int behavior) | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | - * The madvise(2) system call. | ||
44 | - * | ||
45 | - * Applications can use madvise() to advise the kernel how it should | ||
46 | + * Kernel modules can use do_madvise() to advise the kernel how it should | ||
47 | * handle paging I/O in this VM area. The idea is to help the kernel | ||
48 | * use appropriate read-ahead and caching techniques. The information | ||
49 | * provided is advisory only, and can be safely disregarded by the | ||
50 | @@ -673,7 +671,7 @@ madvise_behavior_valid(int behavior) | ||
51 | * -EBADF - map exists, but area maps something that isn't a file. | ||
52 | * -EAGAIN - a kernel resource was temporarily unavailable. | ||
53 | */ | ||
54 | -SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | ||
55 | +int do_madvise(unsigned long start, size_t len_in, int behavior) | ||
56 | { | ||
57 | unsigned long end, tmp; | ||
58 | struct vm_area_struct *vma, *prev; | ||
59 | @@ -767,3 +765,22 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | ||
60 | |||
61 | return error; | ||
62 | } | ||
63 | +EXPORT_SYMBOL_GPL(do_madvise); | ||
64 | + | ||
65 | +/* | ||
66 | + * The madvise(2) system call. | ||
67 | + * | ||
68 | + * Applications can use madvise() system call to advise the kernel how | ||
69 | + * it should handle paging I/O in this VM area. The idea is to help | ||
70 | + * the kernel use appropriate read-ahead and caching techniques. The | ||
71 | + * information provided is advisory only, and can be safely disregarded | ||
72 | + * by the kernel without affecting the correct operation of the application. | ||
73 | + * | ||
74 | + * behavior values are the same than the ones defined in madvise() | ||
75 | + * | ||
76 | + * return values are the same than the ones defined in madvise() | ||
77 | + */ | ||
78 | +SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | ||
79 | +{ | ||
80 | + return do_madvise(start, len_in, behavior); | ||
81 | +} | ||
82 | -- | ||
83 | 2.12.1 | ||
84 | |||
diff --git a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch new file mode 100644 index 0000000..5f44930 --- /dev/null +++ b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch | |||
@@ -0,0 +1,180 @@ | |||
1 | From d28921b5f797829e4e676f7968ae688ef96b7992 Mon Sep 17 00:00:00 2001 | ||
2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
3 | Date: Mon, 23 Jan 2017 15:08:55 -0800 | ||
4 | Subject: [PATCH 152/154] x86: kvm: Notify host to release pages | ||
5 | |||
6 | In context of hypervisors managing several virtual machines, we | ||
7 | want those virtual machines to give the memory they used back to | ||
8 | the host when they don't need it anymore. | ||
9 | |||
10 | This patch introduces a new hypercall KVM_HC_RETURN_MEM, allowing | ||
11 | the guest kernel to notify the host kernel when such event occurs. | ||
12 | And relying on do_madvise() function that we have previously exported, | ||
13 | it issues a call to this function when it receives the new hypercall. | ||
14 | |||
15 | Use of do_madvise() with MADV_DONTNEED flag will allow the guest to | ||
16 | ask for a new page without going through a new hypercall. Instead, | ||
17 | it will be able to start using that memory again as it will get | ||
18 | faulted back in as a fresh new page. That's why do_madvise() is more | ||
19 | efficient than doing vm_unmap() to return some memory to the host. | ||
20 | |||
21 | This patch introduces also a new sysctl kvm_madv_instant_free, | ||
22 | allowing user to set MADV_FREE advice instead of MADV_DONTNEED. | ||
23 | Indeed, MADV_FREE saves more performances than using MADV_DONTNEED | ||
24 | because it does not zero the pages in case the memory has not been | ||
25 | freed by the kernel. This can happen when there was no need for the | ||
26 | kernel to get this memory back, meaning it was keeping those pages | ||
27 | in the right state to be re-used by the same application. | ||
28 | MADV_FREE being a very recent advice introduced in kernel 4.5, we | ||
29 | only want to enable it through a sysctl in case the user want to | ||
30 | use it. | ||
31 | |||
32 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> | ||
33 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
34 | --- | ||
35 | arch/x86/kvm/x86.c | 17 +++++++++++++++++ | ||
36 | include/linux/mm.h | 5 +++++ | ||
37 | include/uapi/linux/kvm_para.h | 3 +++ | ||
38 | kernel/sysctl.c | 7 +++++++ | ||
39 | mm/Makefile | 2 +- | ||
40 | mm/kvm.c | 25 +++++++++++++++++++++++++ | ||
41 | 6 files changed, 58 insertions(+), 1 deletion(-) | ||
42 | create mode 100644 mm/kvm.c | ||
43 | |||
44 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | ||
45 | index 582c75311f95..683a94dd5f03 100644 | ||
46 | --- a/arch/x86/kvm/x86.c | ||
47 | +++ b/arch/x86/kvm/x86.c | ||
48 | @@ -46,6 +46,7 @@ | ||
49 | #include <linux/user-return-notifier.h> | ||
50 | #include <linux/srcu.h> | ||
51 | #include <linux/slab.h> | ||
52 | +#include <linux/mm.h> | ||
53 | #include <linux/perf_event.h> | ||
54 | #include <linux/uaccess.h> | ||
55 | #include <linux/hash.h> | ||
56 | @@ -6019,6 +6020,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | ||
57 | kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); | ||
58 | } | ||
59 | |||
60 | +static int kvm_pv_return_mem_op(struct kvm *kvm, gpa_t gpa, size_t len) | ||
61 | +{ | ||
62 | + unsigned long start = gfn_to_hva(kvm, gpa_to_gfn(gpa)); | ||
63 | + | ||
64 | + if (len > KVM_MAX_RET_MEM_SIZE) | ||
65 | + return KVM_EPERM; | ||
66 | + | ||
67 | + if (kvm_is_error_hva(start + len)) | ||
68 | + return KVM_EFAULT; | ||
69 | + | ||
70 | + return do_madvise(start, len, kvm_ret_mem_advice); | ||
71 | +} | ||
72 | + | ||
73 | void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) | ||
74 | { | ||
75 | vcpu->arch.apicv_active = false; | ||
76 | @@ -6065,6 +6079,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | ||
77 | kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); | ||
78 | ret = 0; | ||
79 | break; | ||
80 | + case KVM_HC_RETURN_MEM: | ||
81 | + ret = kvm_pv_return_mem_op(vcpu->kvm, a0, a1); | ||
82 | + break; | ||
83 | default: | ||
84 | ret = -KVM_ENOSYS; | ||
85 | break; | ||
86 | diff --git a/include/linux/mm.h b/include/linux/mm.h | ||
87 | index 925ec25f99a8..833f23d98baa 100644 | ||
88 | --- a/include/linux/mm.h | ||
89 | +++ b/include/linux/mm.h | ||
90 | @@ -2303,6 +2303,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); | ||
91 | extern int sysctl_drop_caches; | ||
92 | int drop_caches_sysctl_handler(struct ctl_table *, int, | ||
93 | void __user *, size_t *, loff_t *); | ||
94 | +extern int sysctl_kvm_madv_instant_free; | ||
95 | +extern int kvm_ret_mem_advice; | ||
96 | +int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, | ||
97 | + void __user *buffer, size_t *length, | ||
98 | + loff_t *ppos); | ||
99 | #endif | ||
100 | |||
101 | void drop_slab(void); | ||
102 | diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h | ||
103 | index bf6cd7d5cac2..7d90f77d87d0 100644 | ||
104 | --- a/include/uapi/linux/kvm_para.h | ||
105 | +++ b/include/uapi/linux/kvm_para.h | ||
106 | @@ -23,6 +23,9 @@ | ||
107 | #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 | ||
108 | #define KVM_HC_MIPS_EXIT_VM 7 | ||
109 | #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 | ||
110 | +#define KVM_HC_RETURN_MEM 10 | ||
111 | + | ||
112 | +#define KVM_MAX_RET_MEM_SIZE (1 << 22) // 4MiB | ||
113 | |||
114 | /* | ||
115 | * hypercalls use architecture specific | ||
116 | diff --git a/kernel/sysctl.c b/kernel/sysctl.c | ||
117 | index c1095cdc0fe2..d8ae774fa042 100644 | ||
118 | --- a/kernel/sysctl.c | ||
119 | +++ b/kernel/sysctl.c | ||
120 | @@ -1398,6 +1398,13 @@ static struct ctl_table vm_table[] = { | ||
121 | .extra1 = &one, | ||
122 | .extra2 = &four, | ||
123 | }, | ||
124 | + { | ||
125 | + .procname = "kvm_madv_instant_free", | ||
126 | + .data = &sysctl_kvm_madv_instant_free, | ||
127 | + .maxlen = sizeof(int), | ||
128 | + .mode = 0644, | ||
129 | + .proc_handler = kvm_madv_instant_free_sysctl_handler, | ||
130 | + }, | ||
131 | #ifdef CONFIG_COMPACTION | ||
132 | { | ||
133 | .procname = "compact_memory", | ||
134 | diff --git a/mm/Makefile b/mm/Makefile | ||
135 | index 295bd7a..6455723 100644 | ||
136 | --- a/mm/Makefile | ||
137 | +++ b/mm/Makefile | ||
138 | @@ -47,6 +47,8 @@ else | ||
139 | obj-y += bootmem.o | ||
140 | endif | ||
141 | |||
142 | +obj-y += kvm.o | ||
143 | + | ||
144 | obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o | ||
145 | ifdef CONFIG_MMU | ||
146 | obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o | ||
147 | diff --git a/mm/kvm.c b/mm/kvm.c | ||
148 | new file mode 100644 | ||
149 | index 000000000000..8945f6a311b9 | ||
150 | --- /dev/null | ||
151 | +++ b/mm/kvm.c | ||
152 | @@ -0,0 +1,25 @@ | ||
153 | +#include <linux/mman.h> | ||
154 | + | ||
155 | +int sysctl_kvm_madv_instant_free; | ||
156 | + | ||
157 | +int kvm_ret_mem_advice = MADV_DONTNEED; | ||
158 | +EXPORT_SYMBOL_GPL(kvm_ret_mem_advice); | ||
159 | + | ||
160 | +int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, | ||
161 | + void __user *buffer, size_t *length, loff_t *ppos) | ||
162 | +{ | ||
163 | + int ret; | ||
164 | + | ||
165 | + ret = proc_dointvec(table, write, buffer, length, ppos); | ||
166 | + if (ret) | ||
167 | + return ret; | ||
168 | + | ||
169 | +#ifdef MADV_FREE | ||
170 | + if (sysctl_kvm_madv_instant_free > 0) | ||
171 | + kvm_ret_mem_advice = MADV_FREE; | ||
172 | + else | ||
173 | + kvm_ret_mem_advice = MADV_DONTNEED; | ||
174 | +#endif | ||
175 | + | ||
176 | + return 0; | ||
177 | +} | ||
178 | -- | ||
179 | 2.12.1 | ||
180 | |||
diff --git a/patches/boot_time_opt/0153-x86-Return-memory-from-guest-to-host-kernel.patch b/patches/boot_time_opt/0153-x86-Return-memory-from-guest-to-host-kernel.patch new file mode 100644 index 0000000..cdb876a --- /dev/null +++ b/patches/boot_time_opt/0153-x86-Return-memory-from-guest-to-host-kernel.patch | |||
@@ -0,0 +1,155 @@ | |||
1 | From 855ef164854307839c08c60688eaeac14f9a649e Mon Sep 17 00:00:00 2001 | ||
2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
3 | Date: Mon, 23 Jan 2017 15:26:13 -0800 | ||
4 | Subject: [PATCH 153/154] x86: Return memory from guest to host kernel | ||
5 | |||
6 | All virtual machines need memory to perform various tasks, but this | ||
7 | memory is not released to the host after it is not used anymore. We | ||
8 | have to wait for the termination of the virtual machine to get this | ||
9 | memory back into the host. | ||
10 | |||
11 | Ballooning mechanism is close but not designed for the same purpose. | ||
12 | In case we hit memory limits of the system, the host predicts how much | ||
13 | memory can be asked back from a guest, and it issues an hypercall to | ||
14 | retrieve this memory. | ||
15 | |||
16 | The solution proposed is different because it does not wait for host | ||
17 | needs before to return memory, and it knows precisely how much memory | ||
18 | it can return. | ||
19 | |||
20 | The way to notify the host side about such a return is to rely on | ||
21 | the new hypercall KVM_HC_RETURN_MEM. In order to avoid the CPU to be | ||
22 | overloaded with too many hypercalls, we only return memory blocks of | ||
23 | order 7 (512k blocks) and higher. This value has been found running | ||
24 | memory tests using multiple threads allocating/freeing high amount | ||
25 | of memory. Those tests were run for different order values, and 7 was | ||
26 | the best tradeoff between the number of hypercalls issued and the | ||
27 | amount of memory returned to the host. | ||
28 | |||
29 | In order to limit performances impact related to this code addition, | ||
30 | we check for blocks of order 7 or higher. This means it only costs an | ||
31 | additional function call and a branch to perform this check. | ||
32 | |||
33 | Furthermore, this code has been added to the "merge" codepath of the | ||
34 | buddy allocator, which is not as sensitive as the "free" codepath. | ||
35 | Not all blocks going through the "free" codepath will end up in the | ||
36 | "merge" codepath because some of them won't find their free buddy. | ||
37 | But this is a negligible amount since the kernel does not use many | ||
38 | high order blocks directly. Instead, those bigger blocks are often | ||
39 | broken into smaller chunks used as low order blocks. At the time | ||
40 | those small blocks are released, they go through the merge path. | ||
41 | |||
42 | Benchmarks such as ebizzy and will-it-scale have been run in order | ||
43 | to make sure this patch does not affect kernel performances and no | ||
44 | significant differences were observed. | ||
45 | |||
46 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> | ||
47 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
48 | --- | ||
49 | arch/x86/include/asm/kvm_para.h | 22 ++++++++++++++++++++++ | ||
50 | arch/x86/kernel/kvm.c | 10 ++++++++++ | ||
51 | include/linux/mm-arch-hooks.h | 8 ++++++++ | ||
52 | mm/page_alloc.c | 2 ++ | ||
53 | 4 files changed, 42 insertions(+) | ||
54 | |||
55 | diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h | ||
56 | index bc62e7cbf1b1..4a2f6d1adbd2 100644 | ||
57 | --- a/arch/x86/include/asm/kvm_para.h | ||
58 | +++ b/arch/x86/include/asm/kvm_para.h | ||
59 | @@ -92,6 +92,28 @@ void kvm_async_pf_task_wait(u32 token); | ||
60 | void kvm_async_pf_task_wake(u32 token); | ||
61 | u32 kvm_read_and_reset_pf_reason(void); | ||
62 | extern void kvm_disable_steal_time(void); | ||
63 | +void kvm_arch_return_memory(struct page *page, unsigned int order); | ||
64 | + | ||
65 | +/* | ||
66 | + * This order has been found in an empirical way, running memory tests | ||
67 | + * through many iterations to assess the number of hypercalls issued | ||
68 | + * and the amount of memory returned. In case you change this order to | ||
69 | + * 6 or 8, it should not impact your performances significantly. | ||
70 | + * | ||
71 | + * Smaller values lead to less memory waste, but consume more CPU on | ||
72 | + * hypercalls. Larger values use less CPU, but do not as precisely | ||
73 | + * inform the hypervisor of which memory is free. | ||
74 | + */ | ||
75 | +#define RET_MEM_BUDDY_ORDER 7 | ||
76 | + | ||
77 | +static inline void arch_buddy_merge(struct page *page, unsigned int order) | ||
78 | +{ | ||
79 | + if (order < RET_MEM_BUDDY_ORDER) | ||
80 | + return; | ||
81 | + | ||
82 | + kvm_arch_return_memory(page, order); | ||
83 | +} | ||
84 | +#define arch_buddy_merge arch_buddy_merge | ||
85 | |||
86 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
87 | void __init kvm_spinlock_init(void); | ||
88 | diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c | ||
89 | index edbbfc854e39..14167b3f6514 100644 | ||
90 | --- a/arch/x86/kernel/kvm.c | ||
91 | +++ b/arch/x86/kernel/kvm.c | ||
92 | @@ -552,6 +552,16 @@ static __init int activate_jump_labels(void) | ||
93 | } | ||
94 | arch_initcall(activate_jump_labels); | ||
95 | |||
96 | +void kvm_arch_return_memory(struct page *page, unsigned int order) | ||
97 | +{ | ||
98 | + if (!kvm_para_available()) | ||
99 | + return; | ||
100 | + | ||
101 | + kvm_hypercall2(KVM_HC_RETURN_MEM, | ||
102 | + page_to_phys(page), | ||
103 | + PAGE_SIZE << order); | ||
104 | +} | ||
105 | + | ||
106 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
107 | |||
108 | /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ | ||
109 | diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h | ||
110 | index 4efc3f56e6df..26eb3a05a8a3 100644 | ||
111 | --- a/include/linux/mm-arch-hooks.h | ||
112 | +++ b/include/linux/mm-arch-hooks.h | ||
113 | @@ -12,6 +12,7 @@ | ||
114 | #define _LINUX_MM_ARCH_HOOKS_H | ||
115 | |||
116 | #include <asm/mm-arch-hooks.h> | ||
117 | +#include <asm/kvm_para.h> | ||
118 | |||
119 | #ifndef arch_remap | ||
120 | static inline void arch_remap(struct mm_struct *mm, | ||
121 | @@ -22,4 +23,11 @@ static inline void arch_remap(struct mm_struct *mm, | ||
122 | #define arch_remap arch_remap | ||
123 | #endif | ||
124 | |||
125 | +#ifndef arch_buddy_merge | ||
126 | +static inline void arch_buddy_merge(struct page *page, unsigned int order) | ||
127 | +{ | ||
128 | +} | ||
129 | +#define arch_buddy_merge arch_buddy_merge | ||
130 | +#endif | ||
131 | + | ||
132 | #endif /* _LINUX_MM_ARCH_HOOKS_H */ | ||
133 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | ||
134 | index 1460e6ad5e14..5f6e6371bc6f 100644 | ||
135 | --- a/mm/page_alloc.c | ||
136 | +++ b/mm/page_alloc.c | ||
137 | @@ -64,6 +64,7 @@ | ||
138 | #include <linux/page_owner.h> | ||
139 | #include <linux/kthread.h> | ||
140 | #include <linux/memcontrol.h> | ||
141 | +#include <linux/mm-arch-hooks.h> | ||
142 | |||
143 | #include <asm/sections.h> | ||
144 | #include <asm/tlbflush.h> | ||
145 | @@ -855,6 +856,7 @@ static inline void __free_one_page(struct page *page, | ||
146 | } | ||
147 | |||
148 | done_merging: | ||
149 | + arch_buddy_merge(page, order); | ||
150 | set_page_order(page, order); | ||
151 | |||
152 | /* | ||
153 | -- | ||
154 | 2.12.1 | ||
155 | |||
diff --git a/patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch b/patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch new file mode 100644 index 0000000..07d4a83 --- /dev/null +++ b/patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch | |||
@@ -0,0 +1,137 @@ | |||
1 | From 2c145b5233b504f5226a0f4bc44baeef33b444d8 Mon Sep 17 00:00:00 2001 | ||
2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
3 | Date: Mon, 23 Jan 2017 15:32:39 -0800 | ||
4 | Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking | ||
5 | |||
6 | Lots of virtual machines are let in idle state for days until they | ||
7 | are terminated, and they can keep a large amount of memory in their | ||
8 | cache, meaning this memory cannot be used by other processes. | ||
9 | |||
10 | We tried to release this memory using existing drop_caches sysctl, | ||
11 | but it led to the complete cache loss while it could have been used | ||
12 | whether the idle process wakes up. Indeed, the process can't find any | ||
13 | available cached data and it directly affects performances to rebuild | ||
14 | it from scratch. | ||
15 | |||
16 | Instead, the solution we want is based on shrinking gradually system | ||
17 | cache over time. This patch adds a new sysctl shrink_caches_mb so as | ||
18 | to allow userspace applications indicating the kernel it should shrink | ||
19 | system cache up to the amount (in MiB) specified. | ||
20 | |||
21 | There is an application called "memshrinker" which uses this new | ||
22 | mechanism. It runs in the background and periodically releases a | ||
23 | specified amount of cache. This amount is based on the remaining | ||
24 | cache on the system, and period is computed to follow a shrinking | ||
25 | model. It results in saving a lot of memory for other processes | ||
26 | running on the system. | ||
27 | |||
28 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com> | ||
29 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com> | ||
30 | --- | ||
31 | fs/drop_caches.c | 25 +++++++++++++++++++++++++ | ||
32 | include/linux/mm.h | 4 ++++ | ||
33 | kernel/sysctl.c | 8 ++++++++ | ||
34 | mm/vmscan.c | 2 -- | ||
35 | 4 files changed, 37 insertions(+), 2 deletions(-) | ||
36 | |||
37 | diff --git a/fs/drop_caches.c b/fs/drop_caches.c | ||
38 | index d72d52b90433..f564dfcc13a4 100644 | ||
39 | --- a/fs/drop_caches.c | ||
40 | +++ b/fs/drop_caches.c | ||
41 | @@ -8,10 +8,12 @@ | ||
42 | #include <linux/writeback.h> | ||
43 | #include <linux/sysctl.h> | ||
44 | #include <linux/gfp.h> | ||
45 | +#include <linux/swap.h> | ||
46 | #include "internal.h" | ||
47 | |||
48 | /* A global variable is a bit ugly, but it keeps the code simple */ | ||
49 | int sysctl_drop_caches; | ||
50 | +int sysctl_shrink_caches_mb; | ||
51 | |||
52 | static void drop_pagecache_sb(struct super_block *sb, void *unused) | ||
53 | { | ||
54 | @@ -67,3 +69,26 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write, | ||
55 | } | ||
56 | return 0; | ||
57 | } | ||
58 | + | ||
59 | +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, | ||
60 | + void __user *buffer, size_t *length, loff_t *ppos) | ||
61 | +{ | ||
62 | + int ret; | ||
63 | + unsigned long nr_to_reclaim, page_reclaimed; | ||
64 | + | ||
65 | + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); | ||
66 | + if (ret) | ||
67 | + return ret; | ||
68 | + | ||
69 | + nr_to_reclaim = sysctl_shrink_caches_mb * (1 << 20) / PAGE_SIZE; | ||
70 | + if (write) { | ||
71 | + page_reclaimed = shrink_all_memory(nr_to_reclaim); | ||
72 | + if (page_reclaimed > 0) | ||
73 | + lru_add_drain_all(); | ||
74 | + | ||
75 | + if (page_reclaimed != nr_to_reclaim) | ||
76 | + return page_reclaimed; | ||
77 | + } | ||
78 | + | ||
79 | + return 0; | ||
80 | +} | ||
81 | diff --git a/include/linux/mm.h b/include/linux/mm.h | ||
82 | index 833f23d98baa..0bb66c1c31c9 100644 | ||
83 | --- a/include/linux/mm.h | ||
84 | +++ b/include/linux/mm.h | ||
85 | @@ -2308,6 +2308,10 @@ extern int kvm_ret_mem_advice; | ||
86 | int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, | ||
87 | void __user *buffer, size_t *length, | ||
88 | loff_t *ppos); | ||
89 | +extern int sysctl_shrink_caches_mb; | ||
90 | +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, | ||
91 | + void __user *buffer, size_t *length, | ||
92 | + loff_t *ppos); | ||
93 | #endif | ||
94 | |||
95 | void drop_slab(void); | ||
96 | diff --git a/kernel/sysctl.c b/kernel/sysctl.c | ||
97 | index d8ae774fa042..5dc9a46ae212 100644 | ||
98 | --- a/kernel/sysctl.c | ||
99 | +++ b/kernel/sysctl.c | ||
100 | @@ -1405,6 +1405,14 @@ static struct ctl_table vm_table[] = { | ||
101 | .mode = 0644, | ||
102 | .proc_handler = kvm_madv_instant_free_sysctl_handler, | ||
103 | }, | ||
104 | + { | ||
105 | + .procname = "shrink_caches_mb", | ||
106 | + .data = &sysctl_shrink_caches_mb, | ||
107 | + .maxlen = sizeof(int), | ||
108 | + .mode = 0644, | ||
109 | + .proc_handler = shrink_caches_sysctl_handler, | ||
110 | + .extra1 = &one, | ||
111 | + }, | ||
112 | #ifdef CONFIG_COMPACTION | ||
113 | { | ||
114 | .procname = "compact_memory", | ||
115 | diff --git a/mm/vmscan.c b/mm/vmscan.c | ||
116 | index 30a88b945a44..1198e74d1860 100644 | ||
117 | --- a/mm/vmscan.c | ||
118 | +++ b/mm/vmscan.c | ||
119 | @@ -3525,7 +3525,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) | ||
120 | wake_up_interruptible(&pgdat->kswapd_wait); | ||
121 | } | ||
122 | |||
123 | -#ifdef CONFIG_HIBERNATION | ||
124 | /* | ||
125 | * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of | ||
126 | * freed pages. | ||
127 | @@ -3564,7 +3563,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | ||
128 | |||
129 | return nr_reclaimed; | ||
130 | } | ||
131 | -#endif /* CONFIG_HIBERNATION */ | ||
132 | |||
133 | /* It's optimal to keep kswapds on the same CPUs as their memory, but | ||
134 | not required for correctness. So if the last cpu in a node goes | ||
135 | -- | ||
136 | 2.12.1 | ||
137 | |||
diff --git a/patches/boot_time_opt/boot_time_opt.scc b/patches/boot_time_opt/boot_time_opt.scc new file mode 100644 index 0000000..1ffb857 --- /dev/null +++ b/patches/boot_time_opt/boot_time_opt.scc | |||
@@ -0,0 +1,29 @@ | |||
1 | define KFEATURE_DESCRIPTION "Boot time optimization changes ported from ClearLinux, https://github.com/clearlinux-pkgs/linux-lts and https://github.com/clearlinux-pkgs/linux-kvm" | ||
2 | define KFEATURE_COMPATIBILITY all | ||
3 | |||
4 | patch 0101-kvm-silence-kvm-unhandled-rdmsr.patch | ||
5 | patch 0102-i8042-decrease-debug-message-level-to-info.patch | ||
6 | watch 0104-Increase-the-ext4-default-commit-age.patch | ||
7 | patch 0105-silence-rapl.patch | ||
8 | patch 0106-pci-pme-wakeups.patch | ||
9 | patch 0107-ksm-wakeups.patch | ||
10 | patch 0108-intel_idle-tweak-cpuidle-cstates.patch | ||
11 | patch 0110-init_task-faster-timerslack.patch | ||
12 | patch 0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch | ||
13 | patch 0113-overload-on-wakeup.patch | ||
14 | patch 0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch | ||
15 | patch 0115-fix-initcall-timestamps.patch | ||
16 | patch 0116-smpboot-reuse-timer-calibration.patch | ||
17 | patch 0118-Initialize-ata-before-graphics.patch | ||
18 | patch 0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch | ||
19 | patch 0120-give-rdrand-some-credit.patch | ||
20 | patch 0121-e1000e-change-default-policy.patch | ||
21 | watch 0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch | ||
22 | patch 0123-igb-no-runtime-pm-to-fix-reboot-oops.patch | ||
23 | patch 0124-tweak-perfbias.patch | ||
24 | patch 0125-e1000e-increase-pause-and-refresh-time.patch | ||
25 | |||
26 | patch 0151-mm-Export-do_madvise.patch | ||
27 | patch 0152-x86-kvm-Notify-host-to-release-pages.patch | ||
28 | patch 0153-x86-Return-memory-from-guest-to-host-kernel.patch | ||
29 | patch 0154-sysctl-vm-Fine-grained-cache-shrinking.patch | ||
diff --git a/patches/boot_time_opt/raid_alg.cfg b/patches/boot_time_opt/raid_alg.cfg new file mode 100644 index 0000000..6df4a7c --- /dev/null +++ b/patches/boot_time_opt/raid_alg.cfg | |||
@@ -0,0 +1,3 @@ | |||
1 | CONFIG_RAID6_FORCE_ALGO=y | ||
2 | CONFIG_RAID6_FORCE_INT=y | ||
3 | CONFIG_RAID6_FORCE_AVX2=y | ||
diff --git a/patches/boot_time_opt/raid_alg.scc b/patches/boot_time_opt/raid_alg.scc new file mode 100644 index 0000000..98dd713 --- /dev/null +++ b/patches/boot_time_opt/raid_alg.scc | |||
@@ -0,0 +1,5 @@ | |||
1 | define KFEATURE_DESCRIPTION "Use AVX2 for RAID recovery algorithm" | ||
2 | define KFEATURE_COMPATIBILITY all | ||
3 | |||
4 | patch 0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch | ||
5 | kconf non-hardware raid_alg.cfg | ||