summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Calianu <adrian.calianu@enea.com>2017-05-22 08:43:50 +0200
committerAdrian Dudau <adrian.dudau@enea.com>2017-05-23 14:58:35 +0200
commit16b0e3313f53566481c106ace9992e477f8efe9b (patch)
tree5ec03a62db7836d2fb59813cb0c912751556c378
parent28da254d1c5ae012a6e064671aa14850c2a21a25 (diff)
downloadenea-kernel-cache-16b0e3313f53566481c106ace9992e477f8efe9b.tar.gz
patches: Boot time optimizations with ClearLinux patches
Signed-off-by: Adrian Calianu <adrian.calianu@enea.com> Signed-off-by: Adrian Dudau <adrian.dudau@enea.com>
-rw-r--r--patches/boot_time_opt/0011-drm-i915-fbc-sanitize-fbc-GEN-greater-than-9.patch25
-rw-r--r--patches/boot_time_opt/0101-kvm-silence-kvm-unhandled-rdmsr.patch29
-rw-r--r--patches/boot_time_opt/0102-i8042-decrease-debug-message-level-to-info.patch65
-rw-r--r--patches/boot_time_opt/0103-init-do_mounts-recreate-dev-root.patch42
-rw-r--r--patches/boot_time_opt/0104-Increase-the-ext4-default-commit-age.patch35
-rw-r--r--patches/boot_time_opt/0105-silence-rapl.patch25
-rw-r--r--patches/boot_time_opt/0106-pci-pme-wakeups.patch27
-rw-r--r--patches/boot_time_opt/0107-ksm-wakeups.patch34
-rw-r--r--patches/boot_time_opt/0108-intel_idle-tweak-cpuidle-cstates.patch227
-rw-r--r--patches/boot_time_opt/0109-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch56
-rw-r--r--patches/boot_time_opt/0110-init_task-faster-timerslack.patch32
-rw-r--r--patches/boot_time_opt/0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch158
-rw-r--r--patches/boot_time_opt/0113-overload-on-wakeup.patch43
-rw-r--r--patches/boot_time_opt/0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch83
-rw-r--r--patches/boot_time_opt/0115-fix-initcall-timestamps.patch42
-rw-r--r--patches/boot_time_opt/0116-smpboot-reuse-timer-calibration.patch31
-rw-r--r--patches/boot_time_opt/0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch156
-rw-r--r--patches/boot_time_opt/0118-Initialize-ata-before-graphics.patch47
-rw-r--r--patches/boot_time_opt/0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch311
-rw-r--r--patches/boot_time_opt/0120-give-rdrand-some-credit.patch30
-rw-r--r--patches/boot_time_opt/0121-e1000e-change-default-policy.patch27
-rw-r--r--patches/boot_time_opt/0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch28
-rw-r--r--patches/boot_time_opt/0123-igb-no-runtime-pm-to-fix-reboot-oops.patch27
-rw-r--r--patches/boot_time_opt/0124-tweak-perfbias.patch32
-rw-r--r--patches/boot_time_opt/0125-e1000e-increase-pause-and-refresh-time.patch33
-rw-r--r--patches/boot_time_opt/0151-mm-Export-do_madvise.patch84
-rw-r--r--patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch180
-rw-r--r--patches/boot_time_opt/0153-x86-Return-memory-from-guest-to-host-kernel.patch155
-rw-r--r--patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch137
-rw-r--r--patches/boot_time_opt/boot_time_opt.scc29
-rw-r--r--patches/boot_time_opt/raid_alg.cfg3
-rw-r--r--patches/boot_time_opt/raid_alg.scc5
32 files changed, 2238 insertions, 0 deletions
diff --git a/patches/boot_time_opt/0011-drm-i915-fbc-sanitize-fbc-GEN-greater-than-9.patch b/patches/boot_time_opt/0011-drm-i915-fbc-sanitize-fbc-GEN-greater-than-9.patch
new file mode 100644
index 0000000..33debcd
--- /dev/null
+++ b/patches/boot_time_opt/0011-drm-i915-fbc-sanitize-fbc-GEN-greater-than-9.patch
@@ -0,0 +1,25 @@
1From 07639791f247ae7a807444106b9b7611f070d02b Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Fri, 6 Jan 2017 13:28:29 +0000
4Subject: [PATCH] drm/i915/fbc: sanitize fbc GEN greater than 9
5
6---
7 drivers/gpu/drm/i915/intel_fbc.c | 2 +-
8 1 file changed, 1 insertion(+), 1 deletion(-)
9
10diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
11index c43dd9abce79..f5a2560840f3 100644
12--- a/drivers/gpu/drm/i915/intel_fbc.c
13+++ b/drivers/gpu/drm/i915/intel_fbc.c
14@@ -1262,7 +1262,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
15 if (!HAS_FBC(dev_priv))
16 return 0;
17
18- if (IS_BROADWELL(dev_priv))
19+ if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9)
20 return 1;
21
22 return 0;
23--
242.11.1
25
diff --git a/patches/boot_time_opt/0101-kvm-silence-kvm-unhandled-rdmsr.patch b/patches/boot_time_opt/0101-kvm-silence-kvm-unhandled-rdmsr.patch
new file mode 100644
index 0000000..aeb3abf
--- /dev/null
+++ b/patches/boot_time_opt/0101-kvm-silence-kvm-unhandled-rdmsr.patch
@@ -0,0 +1,29 @@
1From f45c353859fc0ceb75fef3a2f4a2c179dfa378d7 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Tue, 23 Jun 2015 01:16:45 -0500
4Subject: [PATCH 101/124] kvm: silence kvm unhandled rdmsr
5
6Author: Arjan van de Ven <arjan@linux.intel.com>
7
8Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
9Signed-off-by: Jose Carlos Venegas Munoz <jos.c.venegas.munoz@intel.com>
10---
11 arch/x86/kvm/x86.c | 2 +-
12 1 file changed, 1 insertion(+), 1 deletion(-)
13
14diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
15index 731044efb195..582c75311f95 100644
16--- a/arch/x86/kvm/x86.c
17+++ b/arch/x86/kvm/x86.c
18@@ -2506,7 +2506,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
19 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
20 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
21 if (!ignore_msrs) {
22- vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
23+// vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
24 return 1;
25 } else {
26 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
27--
282.11.1
29
diff --git a/patches/boot_time_opt/0102-i8042-decrease-debug-message-level-to-info.patch b/patches/boot_time_opt/0102-i8042-decrease-debug-message-level-to-info.patch
new file mode 100644
index 0000000..96fd92b
--- /dev/null
+++ b/patches/boot_time_opt/0102-i8042-decrease-debug-message-level-to-info.patch
@@ -0,0 +1,65 @@
1From 7e847b13b753ec632fef2f1ffa0d8f5b444c967b Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Tue, 23 Jun 2015 01:26:52 -0500
4Subject: [PATCH 102/124] i8042: decrease debug message level to info
5
6Author: Arjan van de Ven <arjan@linux.intel.com>
7
8Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
9Signed-off-by: Jose Carlos Venegas Munoz <jos.c.venegas.munoz@intel.com>
10---
11 drivers/input/serio/i8042.c | 10 +++++-----
12 1 file changed, 5 insertions(+), 5 deletions(-)
13
14diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
15index 89abfdb539ac..5317c41b049e 100644
16--- a/drivers/input/serio/i8042.c
17+++ b/drivers/input/serio/i8042.c
18@@ -593,7 +593,7 @@ static int i8042_enable_kbd_port(void)
19 if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
20 i8042_ctr &= ~I8042_CTR_KBDINT;
21 i8042_ctr |= I8042_CTR_KBDDIS;
22- pr_err("Failed to enable KBD port\n");
23+ pr_info("Failed to enable KBD port\n");
24 return -EIO;
25 }
26
27@@ -612,7 +612,7 @@ static int i8042_enable_aux_port(void)
28 if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
29 i8042_ctr &= ~I8042_CTR_AUXINT;
30 i8042_ctr |= I8042_CTR_AUXDIS;
31- pr_err("Failed to enable AUX port\n");
32+ pr_info("Failed to enable AUX port\n");
33 return -EIO;
34 }
35
36@@ -704,7 +704,7 @@ static int __init i8042_check_mux(void)
37 i8042_ctr &= ~I8042_CTR_AUXINT;
38
39 if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
40- pr_err("Failed to disable AUX port, can't use MUX\n");
41+ pr_info("Failed to disable AUX port, can't use MUX\n");
42 return -EIO;
43 }
44
45@@ -927,7 +927,7 @@ static int i8042_controller_selftest(void)
46 do {
47
48 if (i8042_command(&param, I8042_CMD_CTL_TEST)) {
49- pr_err("i8042 controller selftest timeout\n");
50+ pr_info("i8042 controller selftest timeout\n");
51 return -ENODEV;
52 }
53
54@@ -949,7 +949,7 @@ static int i8042_controller_selftest(void)
55 pr_info("giving up on controller selftest, continuing anyway...\n");
56 return 0;
57 #else
58- pr_err("i8042 controller selftest failed\n");
59+ pr_info("i8042 controller selftest failed\n");
60 return -EIO;
61 #endif
62 }
63--
642.11.1
65
diff --git a/patches/boot_time_opt/0103-init-do_mounts-recreate-dev-root.patch b/patches/boot_time_opt/0103-init-do_mounts-recreate-dev-root.patch
new file mode 100644
index 0000000..bb7bb9f
--- /dev/null
+++ b/patches/boot_time_opt/0103-init-do_mounts-recreate-dev-root.patch
@@ -0,0 +1,42 @@
1From 838abc7e5f43ea40a2cc05ebd6c7321b6d84b057 Mon Sep 17 00:00:00 2001
2From: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
3Date: Fri, 20 Nov 2015 14:01:26 -0600
4Subject: [PATCH 103/124] init: do_mounts: recreate /dev/root
5
6Rootfs shows as is mounted in /dev/root, but this devices is not present in
7/dev directory.
8
9Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
10---
11 init/do_mounts.c | 8 ++++++++
12 1 file changed, 8 insertions(+)
13
14diff --git a/init/do_mounts.c b/init/do_mounts.c
15index dea5de95c2dd..d74a346b2dfa 100644
16--- a/init/do_mounts.c
17+++ b/init/do_mounts.c
18@@ -549,6 +549,7 @@ void __init mount_root(void)
19 void __init prepare_namespace(void)
20 {
21 int is_floppy;
22+ int err;
23
24 if (root_delay) {
25 printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
26@@ -602,6 +603,13 @@ void __init prepare_namespace(void)
27 devtmpfs_mount("dev");
28 sys_mount(".", "/", NULL, MS_MOVE, NULL);
29 sys_chroot(".");
30+#ifdef CONFIG_BLOCK
31+ /* recreate the /dev/root */
32+ err = create_dev("/dev/root", ROOT_DEV);
33+
34+ if (err < 0)
35+ pr_emerg("Failed to create /dev/root: %d\n", err);
36+#endif
37 }
38
39 static bool is_tmpfs;
40--
412.11.1
42
diff --git a/patches/boot_time_opt/0104-Increase-the-ext4-default-commit-age.patch b/patches/boot_time_opt/0104-Increase-the-ext4-default-commit-age.patch
new file mode 100644
index 0000000..fb709b4
--- /dev/null
+++ b/patches/boot_time_opt/0104-Increase-the-ext4-default-commit-age.patch
@@ -0,0 +1,35 @@
1From b6970d43f97325c9acc7bd942dcd192586d8d407 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Mon, 11 Jan 2016 10:01:44 -0600
4Subject: [PATCH 104/124] Increase the ext4 default commit age
5
6Both the VM and EXT4 have a "commit to disk after X seconds" time.
7Currently the EXT4 time is shorter than our VM time, which is a bit
8suboptional,
9it's better for performance to let the VM do the writeouts in bulk
10rather than something deep in the journalling layer.
11
12(DISTRO TWEAK -- NOT FOR UPSTREAM)
13
14Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
15Signed-off-by: Jose Carlos Venegas Munoz <jose.carlos.venegas.munoz@intel.com>
16---
17 include/linux/jbd2.h | 2 +-
18 1 file changed, 1 insertion(+), 1 deletion(-)
19
20diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
21index dfaa1f4dcb0c..9955fd6c6159 100644
22--- a/include/linux/jbd2.h
23+++ b/include/linux/jbd2.h
24@@ -47,7 +47,7 @@
25 /*
26 * The default maximum commit age, in seconds.
27 */
28-#define JBD2_DEFAULT_MAX_COMMIT_AGE 5
29+#define JBD2_DEFAULT_MAX_COMMIT_AGE 30
30
31 #ifdef CONFIG_JBD2_DEBUG
32 /*
33--
342.11.1
35
diff --git a/patches/boot_time_opt/0105-silence-rapl.patch b/patches/boot_time_opt/0105-silence-rapl.patch
new file mode 100644
index 0000000..4dd78fc
--- /dev/null
+++ b/patches/boot_time_opt/0105-silence-rapl.patch
@@ -0,0 +1,25 @@
1From 558d32869c8d8e302dd3810610d62e1c69a8ebce Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Mon, 14 Mar 2016 11:22:09 -0600
4Subject: [PATCH 105/124] silence rapl
5
6---
7 drivers/powercap/intel_rapl.c | 2 +-
8 1 file changed, 1 insertion(+), 1 deletion(-)
9
10diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
11index 3c71f608b444..450aff027d42 100644
12--- a/drivers/powercap/intel_rapl.c
13+++ b/drivers/powercap/intel_rapl.c
14@@ -1684,7 +1684,7 @@ static int __init rapl_init(void)
15
16 id = x86_match_cpu(rapl_ids);
17 if (!id) {
18- pr_err("driver does not support CPU family %d model %d\n",
19+ pr_info("driver does not support CPU family %d model %d\n",
20 boot_cpu_data.x86, boot_cpu_data.x86_model);
21
22 return -ENODEV;
23--
242.11.1
25
diff --git a/patches/boot_time_opt/0106-pci-pme-wakeups.patch b/patches/boot_time_opt/0106-pci-pme-wakeups.patch
new file mode 100644
index 0000000..f0a4799
--- /dev/null
+++ b/patches/boot_time_opt/0106-pci-pme-wakeups.patch
@@ -0,0 +1,27 @@
1From 1f44219cd74f5c3b97e2c85af87141e1bddf0555 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Mon, 14 Mar 2016 11:10:58 -0600
4Subject: [PATCH 106/124] pci pme wakeups
5
6Reduce wakeups for PME checks, which are a workaround for miswired
7boards (sadly, too many of them) in laptops.
8---
9 drivers/pci/pci.c | 2 +-
10 1 file changed, 1 insertion(+), 1 deletion(-)
11
12diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
13index eda6a7cf0e54..82a623255059 100644
14--- a/drivers/pci/pci.c
15+++ b/drivers/pci/pci.c
16@@ -57,7 +57,7 @@ struct pci_pme_device {
17 struct pci_dev *dev;
18 };
19
20-#define PME_TIMEOUT 1000 /* How long between PME checks */
21+#define PME_TIMEOUT 4000 /* How long between PME checks */
22
23 static void pci_dev_d3_sleep(struct pci_dev *dev)
24 {
25--
262.11.1
27
diff --git a/patches/boot_time_opt/0107-ksm-wakeups.patch b/patches/boot_time_opt/0107-ksm-wakeups.patch
new file mode 100644
index 0000000..2b25625
--- /dev/null
+++ b/patches/boot_time_opt/0107-ksm-wakeups.patch
@@ -0,0 +1,34 @@
1From a5de04044d428bf54472365e7dc07958aa184daf Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Mon, 14 Mar 2016 11:06:46 -0600
4Subject: [PATCH 107/124] ksm-wakeups
5
6reduce wakeups in ksm by adding rounding (aligning) when the sleep times are 1 second or longer
7
8Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
9---
10 mm/ksm.c | 8 ++++++--
11 1 file changed, 6 insertions(+), 2 deletions(-)
12
13diff --git a/mm/ksm.c b/mm/ksm.c
14index 9ae6011a41f8..eecd3ff669e2 100644
15--- a/mm/ksm.c
16+++ b/mm/ksm.c
17@@ -1725,8 +1725,12 @@ static int ksm_scan_thread(void *nothing)
18 try_to_freeze();
19
20 if (ksmd_should_run()) {
21- schedule_timeout_interruptible(
22- msecs_to_jiffies(ksm_thread_sleep_millisecs));
23+ if (ksm_thread_sleep_millisecs >= 1000)
24+ schedule_timeout_interruptible(
25+ msecs_to_jiffies(round_jiffies_relative(ksm_thread_sleep_millisecs)));
26+ else
27+ schedule_timeout_interruptible(
28+ msecs_to_jiffies(ksm_thread_sleep_millisecs));
29 } else {
30 wait_event_freezable(ksm_thread_wait,
31 ksmd_should_run() || kthread_should_stop());
32--
332.11.1
34
diff --git a/patches/boot_time_opt/0108-intel_idle-tweak-cpuidle-cstates.patch b/patches/boot_time_opt/0108-intel_idle-tweak-cpuidle-cstates.patch
new file mode 100644
index 0000000..da5396c
--- /dev/null
+++ b/patches/boot_time_opt/0108-intel_idle-tweak-cpuidle-cstates.patch
@@ -0,0 +1,227 @@
1From bf7e0cebaafe790f62cbc5815648d556847b7d27 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Sat, 19 Mar 2016 21:32:19 -0400
4Subject: [PATCH 108/124] intel_idle: tweak cpuidle cstates
5
6Increase target_residency in cpuidle cstate
7
8Tune intel_idle to be a bit less agressive;
9Clear linux is cleaner in hygiene (wakupes) than the average linux,
10so we can afford changing these in a way that increases
11performance while keeping power efficiency
12---
13 drivers/idle/intel_idle.c | 74 +++++++++++------------------------------------
14 1 file changed, 17 insertions(+), 57 deletions(-)
15
16diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
17index 4466a2f969d7..cbab050b83f0 100644
18--- a/drivers/idle/intel_idle.c
19+++ b/drivers/idle/intel_idle.c
20@@ -475,7 +475,7 @@ static struct cpuidle_state hsw_cstates[] = {
21 .desc = "MWAIT 0x10",
22 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
23 .exit_latency = 33,
24- .target_residency = 100,
25+ .target_residency = 1000,
26 .enter = &intel_idle,
27 .enter_freeze = intel_idle_freeze, },
28 {
29@@ -483,7 +483,7 @@ static struct cpuidle_state hsw_cstates[] = {
30 .desc = "MWAIT 0x20",
31 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
32 .exit_latency = 133,
33- .target_residency = 400,
34+ .target_residency = 4000,
35 .enter = &intel_idle,
36 .enter_freeze = intel_idle_freeze, },
37 {
38@@ -491,7 +491,7 @@ static struct cpuidle_state hsw_cstates[] = {
39 .desc = "MWAIT 0x32",
40 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
41 .exit_latency = 166,
42- .target_residency = 500,
43+ .target_residency = 5000,
44 .enter = &intel_idle,
45 .enter_freeze = intel_idle_freeze, },
46 {
47@@ -499,7 +499,7 @@ static struct cpuidle_state hsw_cstates[] = {
48 .desc = "MWAIT 0x40",
49 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
50 .exit_latency = 300,
51- .target_residency = 900,
52+ .target_residency = 9000,
53 .enter = &intel_idle,
54 .enter_freeze = intel_idle_freeze, },
55 {
56@@ -507,7 +507,7 @@ static struct cpuidle_state hsw_cstates[] = {
57 .desc = "MWAIT 0x50",
58 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
59 .exit_latency = 600,
60- .target_residency = 1800,
61+ .target_residency = 18000,
62 .enter = &intel_idle,
63 .enter_freeze = intel_idle_freeze, },
64 {
65@@ -515,7 +515,7 @@ static struct cpuidle_state hsw_cstates[] = {
66 .desc = "MWAIT 0x60",
67 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
68 .exit_latency = 2600,
69- .target_residency = 7700,
70+ .target_residency = 77000,
71 .enter = &intel_idle,
72 .enter_freeze = intel_idle_freeze, },
73 {
74@@ -531,27 +531,11 @@ static struct cpuidle_state bdw_cstates[] = {
75 .enter = &intel_idle,
76 .enter_freeze = intel_idle_freeze, },
77 {
78- .name = "C1E-BDW",
79- .desc = "MWAIT 0x01",
80- .flags = MWAIT2flg(0x01),
81- .exit_latency = 10,
82- .target_residency = 20,
83- .enter = &intel_idle,
84- .enter_freeze = intel_idle_freeze, },
85- {
86- .name = "C3-BDW",
87- .desc = "MWAIT 0x10",
88- .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
89- .exit_latency = 40,
90- .target_residency = 100,
91- .enter = &intel_idle,
92- .enter_freeze = intel_idle_freeze, },
93- {
94 .name = "C6-BDW",
95 .desc = "MWAIT 0x20",
96 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
97 .exit_latency = 133,
98- .target_residency = 400,
99+ .target_residency = 4000,
100 .enter = &intel_idle,
101 .enter_freeze = intel_idle_freeze, },
102 {
103@@ -559,7 +543,7 @@ static struct cpuidle_state bdw_cstates[] = {
104 .desc = "MWAIT 0x32",
105 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
106 .exit_latency = 166,
107- .target_residency = 500,
108+ .target_residency = 5000,
109 .enter = &intel_idle,
110 .enter_freeze = intel_idle_freeze, },
111 {
112@@ -567,7 +551,7 @@ static struct cpuidle_state bdw_cstates[] = {
113 .desc = "MWAIT 0x40",
114 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
115 .exit_latency = 300,
116- .target_residency = 900,
117+ .target_residency = 9000,
118 .enter = &intel_idle,
119 .enter_freeze = intel_idle_freeze, },
120 {
121@@ -575,7 +559,7 @@ static struct cpuidle_state bdw_cstates[] = {
122 .desc = "MWAIT 0x50",
123 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
124 .exit_latency = 600,
125- .target_residency = 1800,
126+ .target_residency = 18000,
127 .enter = &intel_idle,
128 .enter_freeze = intel_idle_freeze, },
129 {
130@@ -583,7 +567,7 @@ static struct cpuidle_state bdw_cstates[] = {
131 .desc = "MWAIT 0x60",
132 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
133 .exit_latency = 2600,
134- .target_residency = 7700,
135+ .target_residency = 77000,
136 .enter = &intel_idle,
137 .enter_freeze = intel_idle_freeze, },
138 {
139@@ -600,27 +584,11 @@ static struct cpuidle_state skl_cstates[] = {
140 .enter = &intel_idle,
141 .enter_freeze = intel_idle_freeze, },
142 {
143- .name = "C1E-SKL",
144- .desc = "MWAIT 0x01",
145- .flags = MWAIT2flg(0x01),
146- .exit_latency = 10,
147- .target_residency = 20,
148- .enter = &intel_idle,
149- .enter_freeze = intel_idle_freeze, },
150- {
151- .name = "C3-SKL",
152- .desc = "MWAIT 0x10",
153- .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
154- .exit_latency = 70,
155- .target_residency = 100,
156- .enter = &intel_idle,
157- .enter_freeze = intel_idle_freeze, },
158- {
159 .name = "C6-SKL",
160 .desc = "MWAIT 0x20",
161 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
162 .exit_latency = 85,
163- .target_residency = 200,
164+ .target_residency = 2000,
165 .enter = &intel_idle,
166 .enter_freeze = intel_idle_freeze, },
167 {
168@@ -628,7 +596,7 @@ static struct cpuidle_state skl_cstates[] = {
169 .desc = "MWAIT 0x33",
170 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
171 .exit_latency = 124,
172- .target_residency = 800,
173+ .target_residency = 8000,
174 .enter = &intel_idle,
175 .enter_freeze = intel_idle_freeze, },
176 {
177@@ -636,7 +604,7 @@ static struct cpuidle_state skl_cstates[] = {
178 .desc = "MWAIT 0x40",
179 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
180 .exit_latency = 200,
181- .target_residency = 800,
182+ .target_residency = 8000,
183 .enter = &intel_idle,
184 .enter_freeze = intel_idle_freeze, },
185 {
186@@ -644,7 +612,7 @@ static struct cpuidle_state skl_cstates[] = {
187 .desc = "MWAIT 0x50",
188 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
189 .exit_latency = 480,
190- .target_residency = 5000,
191+ .target_residency = 50000,
192 .enter = &intel_idle,
193 .enter_freeze = intel_idle_freeze, },
194 {
195@@ -652,7 +620,7 @@ static struct cpuidle_state skl_cstates[] = {
196 .desc = "MWAIT 0x60",
197 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
198 .exit_latency = 890,
199- .target_residency = 5000,
200+ .target_residency = 50000,
201 .enter = &intel_idle,
202 .enter_freeze = intel_idle_freeze, },
203 {
204@@ -669,19 +637,11 @@ static struct cpuidle_state skx_cstates[] = {
205 .enter = &intel_idle,
206 .enter_freeze = intel_idle_freeze, },
207 {
208- .name = "C1E-SKX",
209- .desc = "MWAIT 0x01",
210- .flags = MWAIT2flg(0x01),
211- .exit_latency = 10,
212- .target_residency = 20,
213- .enter = &intel_idle,
214- .enter_freeze = intel_idle_freeze, },
215- {
216 .name = "C6-SKX",
217 .desc = "MWAIT 0x20",
218 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
219 .exit_latency = 133,
220- .target_residency = 600,
221+ .target_residency = 1600,
222 .enter = &intel_idle,
223 .enter_freeze = intel_idle_freeze, },
224 {
225--
2262.11.1
227
diff --git a/patches/boot_time_opt/0109-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch b/patches/boot_time_opt/0109-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch
new file mode 100644
index 0000000..70247a0
--- /dev/null
+++ b/patches/boot_time_opt/0109-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch
@@ -0,0 +1,56 @@
1From 4170571f7bb0897c90e13b2fcf3ee06990a9e774 Mon Sep 17 00:00:00 2001
2From: Alan Cox <alan@linux.intel.com>
3Date: Thu, 10 Mar 2016 15:11:28 +0000
4Subject: [PATCH 109/124] xattr: allow setting user.* attributes on symlinks by
5 owner
6
7Kvmtool and clear containers supports using user attributes to label host
8files with the virtual uid/guid of the file in the container. This allows an
9end user to manage their files and a complete uid space without all the ugly
10namespace stuff.
11
12The one gap in the support is symlinks because an end user can change the
13ownership of a symbolic link. We support attributes on these files as you
14can already (as root) set security attributes on them.
15
16The current rules seem slightly over-paranoid and as we have a use case this
17patch enables updating the attributes on a symbolic link IFF you are the
18owner of the synlink (as permissions are not usually meaningful on the link
19itself).
20
21Signed-off-by: Alan Cox <alan@linux.intel.com>
22---
23 fs/xattr.c | 14 ++++++++------
24 1 file changed, 8 insertions(+), 6 deletions(-)
25
26diff --git a/fs/xattr.c b/fs/xattr.c
27index 2d13b4e62fae..580a5aeddfd2 100644
28--- a/fs/xattr.c
29+++ b/fs/xattr.c
30@@ -118,15 +118,17 @@ xattr_permission(struct inode *inode, const char *name, int mask)
31 }
32
33 /*
34- * In the user.* namespace, only regular files and directories can have
35- * extended attributes. For sticky directories, only the owner and
36- * privileged users can write attributes.
37+ * In the user.* namespace, only regular files, symbolic links, and
38+ * directories can have extended attributes. For symbolic links and
39+ * sticky directories, only the owner and privileged users can write
40+ * attributes.
41 */
42 if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
43- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
44+ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode))
45 return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
46- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
47- (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
48+ if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX))
49+ || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE)
50+ && !inode_owner_or_capable(inode))
51 return -EPERM;
52 }
53
54--
552.11.1
56
diff --git a/patches/boot_time_opt/0110-init_task-faster-timerslack.patch b/patches/boot_time_opt/0110-init_task-faster-timerslack.patch
new file mode 100644
index 0000000..b0075ff
--- /dev/null
+++ b/patches/boot_time_opt/0110-init_task-faster-timerslack.patch
@@ -0,0 +1,32 @@
1From 42c2cb32259b76fb1f6713d99c4f0922e97bcc8d Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Wed, 23 Mar 2016 14:52:41 +0000
4Subject: [PATCH 110/124] init_task: faster timerslack
5
6the default tuning is a compromise between client power and server performance;
7for a server distro like Clear Linux, we don't need to compromise.
8(for non-server usages we have different kernel binaries)
9
10in principle this can be done as a patch to systemd as well, but we have a shared
11systemd between usages while we have different kernels, so the logistics
12for where the patch goes work out better here
13---
14 include/linux/init_task.h | 2 +-
15 1 file changed, 1 insertion(+), 1 deletion(-)
16
17diff --git a/include/linux/init_task.h b/include/linux/init_task.h
18index 325f649d77ff..e0eb261e17cb 100644
19--- a/include/linux/init_task.h
20+++ b/include/linux/init_task.h
21@@ -249,7 +249,7 @@ extern struct task_group root_task_group;
22 .journal_info = NULL, \
23 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
24 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
25- .timer_slack_ns = 50000, /* 50 usec default slack */ \
26+ .timer_slack_ns = 1000, /* 1 usec default slack */ \
27 .pids = { \
28 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
29 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
30--
312.11.1
32
diff --git a/patches/boot_time_opt/0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch b/patches/boot_time_opt/0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch
new file mode 100644
index 0000000..7d0def8
--- /dev/null
+++ b/patches/boot_time_opt/0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch
@@ -0,0 +1,158 @@
1From 3152053ea1ea3aa77bcc7e990d48ef84621ff6c9 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Sat, 9 Apr 2016 22:41:37 +0000
4Subject: [PATCH 112/124] fs: ext4: fsync: optimize double-fsync() a bunch
5
6There are cases where EXT4 is a bit too conservative sending barriers down to the disk;
7there are cases where the transaction in progress is not the one that sent the barrier
8(in other words: the fsync is for a file for which the IO happened more time ago
9and all data was already sent to the disk). For that case, a more performing tradeoff
10can be made on SSD devices (which have the ability to flush their dram caches in a hurry
11on a power fail event) where the barrier gets sent to the disk, but we don't need to wait
12for the barrier to complete. Any consecutive IO will block on the barrier correctly.
13---
14 block/bio.c | 20 ++++++++++++++++++++
15 block/blk-flush.c | 41 +++++++++++++++++++++++++++++++++++++++++
16 fs/ext4/fsync.c | 6 +++++-
17 include/linux/bio.h | 1 +
18 include/linux/blkdev.h | 5 +++++
19 5 files changed, 72 insertions(+), 1 deletion(-)
20
21diff --git a/block/bio.c b/block/bio.c
22index db85c5753a76..80f5ab6b536a 100644
23--- a/block/bio.c
24+++ b/block/bio.c
25@@ -882,6 +882,26 @@ int submit_bio_wait(struct bio *bio)
26 }
27 EXPORT_SYMBOL(submit_bio_wait);
28
29+static void submit_bio_nowait_endio(struct bio *bio)
30+{
31+ bio_put(bio);
32+}
33+
34+/**
35+ * submit_bio_nowait - submit a bio for fire-and-forget
36+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
37+ * @bio: The &struct bio which describes the I/O
38+ *
39+ * Simple wrapper around submit_bio() that takes care of bio_put() on completion
40+ */
41+void submit_bio_nowait(struct bio *bio)
42+{
43+ bio->bi_end_io = submit_bio_nowait_endio;
44+ bio->bi_opf |= REQ_SYNC;
45+ submit_bio(bio);
46+}
47+EXPORT_SYMBOL(submit_bio_nowait);
48+
49 /**
50 * bio_advance - increment/complete a bio by some number of bytes
51 * @bio: bio to advance
52diff --git a/block/blk-flush.c b/block/blk-flush.c
53index 3c882cbc7541..b2dfcfe01ed7 100644
54--- a/block/blk-flush.c
55+++ b/block/blk-flush.c
56@@ -530,6 +530,47 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
57 }
58 EXPORT_SYMBOL(blkdev_issue_flush);
59
60+/**
61+ * blkdev_issue_flush_nowait - queue a flush
62+ * @bdev: blockdev to issue flush for
63+ * @gfp_mask: memory allocation flags (for bio_alloc)
64+ * @error_sector: error sector
65+ *
66+ * Description:
67+ * Issue a flush for the block device in question. Caller can supply
68+ * room for storing the error offset in case of a flush error, if they
69+ * wish to. If WAIT flag is not passed then caller may check only what
70+ * request was pushed in some internal queue for later handling.
71+ */
72+void blkdev_issue_flush_nowait(struct block_device *bdev, gfp_t gfp_mask)
73+{
74+ struct request_queue *q;
75+ struct bio *bio;
76+
77+ if (bdev->bd_disk == NULL)
78+ return;
79+
80+ q = bdev_get_queue(bdev);
81+ if (!q)
82+ return;
83+
84+ /*
85+ * some block devices may not have their queue correctly set up here
86+ * (e.g. loop device without a backing file) and so issuing a flush
87+ * here will panic. Ensure there is a request function before issuing
88+ * the flush.
89+ */
90+ if (!q->make_request_fn)
91+ return;
92+
93+ bio = bio_alloc(gfp_mask, 0);
94+ bio->bi_bdev = bdev;
95+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
96+
97+ submit_bio_nowait(bio);
98+}
99+EXPORT_SYMBOL(blkdev_issue_flush_nowait);
100+
101 struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
102 int node, int cmd_size)
103 {
104diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
105index 88effb1053c7..a58966c18172 100644
106--- a/fs/ext4/fsync.c
107+++ b/fs/ext4/fsync.c
108@@ -150,7 +150,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
109 ret = jbd2_complete_transaction(journal, commit_tid);
110 if (needs_barrier) {
111 issue_flush:
112- err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
113+ err = 0;
114+ if (!blk_queue_nonrot(bdev_get_queue(inode->i_sb->s_bdev)))
115+ err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
116+ else
117+ blkdev_issue_flush_nowait(inode->i_sb->s_bdev, GFP_KERNEL);
118 if (!ret)
119 ret = err;
120 }
121diff --git a/include/linux/bio.h b/include/linux/bio.h
122index 97cb48f03dc7..3f055e6541e0 100644
123--- a/include/linux/bio.h
124+++ b/include/linux/bio.h
125@@ -421,6 +421,7 @@ struct request_queue;
126 extern int bio_phys_segments(struct request_queue *, struct bio *);
127
128 extern int submit_bio_wait(struct bio *bio);
129+extern void submit_bio_nowait(struct bio *bio);
130 extern void bio_advance(struct bio *, unsigned);
131
132 extern void bio_init(struct bio *);
133diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
134index f6a816129856..727684abf21e 100644
135--- a/include/linux/blkdev.h
136+++ b/include/linux/blkdev.h
137@@ -1144,6 +1144,7 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
138 #define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */
139
140 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
141+extern void blkdev_issue_flush_nowait(struct block_device *, gfp_t);
142 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
143 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
144 extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
145@@ -1745,6 +1746,10 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
146 return 0;
147 }
148
149+static inline void blkdev_issue_flush_nowait(struct block_device *bdev, gfp_t gfp_mask)
150+{
151+}
152+
153 #endif /* CONFIG_BLOCK */
154
155 #endif
156--
1572.11.1
158
diff --git a/patches/boot_time_opt/0113-overload-on-wakeup.patch b/patches/boot_time_opt/0113-overload-on-wakeup.patch
new file mode 100644
index 0000000..a3a6bce
--- /dev/null
+++ b/patches/boot_time_opt/0113-overload-on-wakeup.patch
@@ -0,0 +1,43 @@
1From 9f25d18f45a8391488feb9783404f2f79b7090f4 Mon Sep 17 00:00:00 2001
2From: jplozi <jplozi@unice.fr>
3Date: Fri, 11 Mar 2016 15:18:06 +0100
4Subject: [PATCH 113/124] overload on wakeup
5
6source https://github.com/jplozi/wastedcores
7
8as an experiment, apply the learnings from the wasted-cores paper
9and see how the performance works out. With the data from this we should
10be able to work with Peter and the rest of the scheduler folks on
11a more permanent/elegant solution.
12---
13 kernel/sched/fair.c | 14 ++++++++++++++
14 1 file changed, 14 insertions(+)
15
16diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
17index c242944f5cbd..5132c828161e 100644
18--- a/kernel/sched/fair.c
19+++ b/kernel/sched/fair.c
20@@ -5638,6 +5638,20 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
21 }
22
23 rcu_read_lock();
24+
25+ if (cpu_rq(prev_cpu)->nr_running) {
26+ int _cpu;
27+
28+ for_each_online_cpu(_cpu) {
29+ if (!cpumask_test_cpu(_cpu, tsk_cpus_allowed(p)) ||
30+ cpu_rq(_cpu)->nr_running)
31+ continue;
32+
33+ rcu_read_unlock();
34+ return _cpu;
35+ }
36+ }
37+
38 for_each_domain(cpu, tmp) {
39 if (!(tmp->flags & SD_LOAD_BALANCE))
40 break;
41--
422.11.1
43
diff --git a/patches/boot_time_opt/0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch b/patches/boot_time_opt/0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch
new file mode 100644
index 0000000..c6bf036
--- /dev/null
+++ b/patches/boot_time_opt/0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch
@@ -0,0 +1,83 @@
1From 3a1512b4ed3922f88936b95731aaff706e7286a9 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Wed, 11 Feb 2015 16:05:23 -0600
4Subject: [PATCH 114/124] bootstats: add printk's to measure boot time in more
5 detail
6
7Few distro-tweaks to add printk's to visualize boot time better
8
9Author: Arjan van de Ven <arjan@linux.intel.com>
10
11Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
12---
13 arch/x86/kernel/alternative.c | 4 ++++
14 drivers/base/firmware_class.c | 2 ++
15 init/main.c | 2 +-
16 kernel/kmod.c | 2 ++
17 4 files changed, 9 insertions(+), 1 deletion(-)
18
19diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
20index 5cb272a7a5a3..d28fb7aae4ce 100644
21--- a/arch/x86/kernel/alternative.c
22+++ b/arch/x86/kernel/alternative.c
23@@ -626,7 +626,9 @@ void __init alternative_instructions(void)
24 * patching.
25 */
26
27+ printk("clr: Applying alternatives\n");
28 apply_alternatives(__alt_instructions, __alt_instructions_end);
29+ printk("clr: Applying alternatives done\n");
30
31 #ifdef CONFIG_SMP
32 /* Patch to UP if other cpus not imminent. */
33@@ -637,6 +639,8 @@ void __init alternative_instructions(void)
34 _text, _etext);
35 }
36
37+ printk("clr: Applying alternatives smp done\n");
38+
39 if (!uniproc_patched || num_possible_cpus() == 1)
40 free_init_pages("SMP alternatives",
41 (unsigned long)__smp_locks,
42diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
43index a95e1e572697..b29467031be6 100644
44--- a/drivers/base/firmware_class.c
45+++ b/drivers/base/firmware_class.c
46@@ -1224,6 +1224,8 @@ request_firmware(const struct firmware **firmware_p, const char *name,
47 {
48 int ret;
49
50+ printk("clr: request_firmware: %s\n", name);
51+
52 /* Need to pin this module until return */
53 __module_get(THIS_MODULE);
54 ret = _request_firmware(firmware_p, name, device, NULL, 0,
55diff --git a/init/main.c b/init/main.c
56index 2858be732f6d..f1d8c3fdbf05 100644
57--- a/init/main.c
58+++ b/init/main.c
59@@ -751,7 +751,7 @@ static int __init_or_module do_one_initcall_debug(initcall_t fn)
60 unsigned long long duration;
61 int ret;
62
63- printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
64+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, raw_smp_processor_id());
65 calltime = ktime_get();
66 ret = fn();
67 rettime = ktime_get();
68diff --git a/kernel/kmod.c b/kernel/kmod.c
69index 0277d1216f80..dc5a6edd3895 100644
70--- a/kernel/kmod.c
71+++ b/kernel/kmod.c
72@@ -76,6 +76,8 @@ static int call_modprobe(char *module_name, int wait)
73 NULL
74 };
75
76+ printk("clr: call_modprobe: %s %i \n", module_name, wait);
77+
78 char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
79 if (!argv)
80 goto out;
81--
822.11.1
83
diff --git a/patches/boot_time_opt/0115-fix-initcall-timestamps.patch b/patches/boot_time_opt/0115-fix-initcall-timestamps.patch
new file mode 100644
index 0000000..cdf2af1
--- /dev/null
+++ b/patches/boot_time_opt/0115-fix-initcall-timestamps.patch
@@ -0,0 +1,42 @@
1From 5b5ad2c9b9b555d20aeba1f895d0c9d1c2a77776 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Thu, 2 Jun 2016 23:36:32 -0500
4Subject: [PATCH 115/124] fix initcall timestamps
5
6Print more finegrained initcall timings
7
8use the tsc instead of the jiffies clock for initcall_debug
9---
10 init/main.c | 12 ++++++------
11 1 file changed, 6 insertions(+), 6 deletions(-)
12
13diff --git a/init/main.c b/init/main.c
14index f1d8c3fdbf05..8358cbe6ab13 100644
15--- a/init/main.c
16+++ b/init/main.c
17@@ -747,16 +747,16 @@ __setup("initcall_blacklist=", initcall_blacklist);
18
19 static int __init_or_module do_one_initcall_debug(initcall_t fn)
20 {
21- ktime_t calltime, delta, rettime;
22+ unsigned long long calltime, delta, rettime;
23 unsigned long long duration;
24 int ret;
25
26- printk(KERN_DEBUG "calling %pF @ %i\n", fn, raw_smp_processor_id());
27- calltime = ktime_get();
28+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
29+ calltime = local_clock();
30 ret = fn();
31- rettime = ktime_get();
32- delta = ktime_sub(rettime, calltime);
33- duration = (unsigned long long) ktime_to_ns(delta) >> 10;
34+ rettime = local_clock();
35+ delta = rettime - calltime;
36+ duration = delta >> 10;
37 printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
38 fn, ret, duration);
39
40--
412.11.1
42
diff --git a/patches/boot_time_opt/0116-smpboot-reuse-timer-calibration.patch b/patches/boot_time_opt/0116-smpboot-reuse-timer-calibration.patch
new file mode 100644
index 0000000..d1f71b5
--- /dev/null
+++ b/patches/boot_time_opt/0116-smpboot-reuse-timer-calibration.patch
@@ -0,0 +1,31 @@
1From 16104411cc5a7b20f310e3ecede85343ee6ce6b9 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Wed, 11 Feb 2015 17:28:14 -0600
4Subject: [PATCH 116/124] smpboot: reuse timer calibration
5
6NO point recalibrating for known-constant tsc... saves 200ms+ of boot time.
7
8Author: Arjan van de Ven <arjan@linux.intel.com>
9
10Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
11---
12 arch/x86/kernel/tsc.c | 3 +++
13 1 file changed, 3 insertions(+)
14
15diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
16index 46b2f41f8b05..88553c1f21f1 100644
17--- a/arch/x86/kernel/tsc.c
18+++ b/arch/x86/kernel/tsc.c
19@@ -1384,6 +1384,9 @@ unsigned long calibrate_delay_is_known(void)
20 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
21 return 0;
22
23+ if (cpu != 0)
24+ return cpu_data(0).loops_per_jiffy;
25+
26 if (!mask)
27 return 0;
28
29--
302.11.1
31
diff --git a/patches/boot_time_opt/0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch b/patches/boot_time_opt/0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch
new file mode 100644
index 0000000..978e09f
--- /dev/null
+++ b/patches/boot_time_opt/0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch
@@ -0,0 +1,156 @@
1From fd1f55138c242bd9aeec374ff611064bdc89b359 Mon Sep 17 00:00:00 2001
2From: Jim Kukunas <james.t.kukunas@linux.intel.com>
3Date: Fri, 27 May 2016 09:26:51 -0400
4Subject: [PATCH 117/124] raid6: add Kconfig option to skip raid6 benchmarking
5
6Adds CONFIG_RAID6_FORCE_ALGO, which causes the kernel to not benchmark
7each raid recovery and syndrome generation algorithm, and instead use
8the version selected via Kconfig (CONFIG_RAID6_FORCE_{INT,SSSE3,AVX2}).
9In the case, the selected algorithm is not supported by the processor at
10runtime, a fallback is used.
11
12Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
13---
14 lib/Kconfig | 3 +--
15 lib/raid6/Kconfig | 38 ++++++++++++++++++++++++++++++++++++
16 lib/raid6/algos.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
17 3 files changed, 97 insertions(+), 2 deletions(-)
18 create mode 100644 lib/raid6/Kconfig
19
20diff --git a/lib/Kconfig b/lib/Kconfig
21index 260a80e313b9..b3efd21db2fd 100644
22--- a/lib/Kconfig
23+++ b/lib/Kconfig
24@@ -7,8 +7,7 @@ config BINARY_PRINTF
25
26 menu "Library routines"
27
28-config RAID6_PQ
29- tristate
30+source "lib/raid6/Kconfig"
31
32 config BITREVERSE
33 tristate
34diff --git a/lib/raid6/Kconfig b/lib/raid6/Kconfig
35new file mode 100644
36index 000000000000..d881d6be89bb
37--- /dev/null
38+++ b/lib/raid6/Kconfig
39@@ -0,0 +1,38 @@
40+menu "RAID 6"
41+
42+config RAID6_PQ
43+ tristate
44+
45+config RAID6_FORCE_ALGO
46+ bool "Always use specified recovery algorithm"
47+ default n
48+ depends on RAID6_PQ
49+ help
50+ If this option is not set, on every boot the kernel will
51+ benchmark each optimized version of the RAID6 recovery and
52+ syndrome generation algorithms and will select the one that
53+ performs best. Microbenchmarking each version negatively
54+ affects boot time.
55+
56+ Enabling this option skips the benchmark at boot, and
57+ instead always uses the algorithm selected. The only exception
58+ is if the selected algorithm relies on a cpu feature not
59+ supported at runtime. In this case, one of the lower performance
60+ fallbacks are used.
61+
62+choice
63+ prompt "RAID6 Recovery Algorithm"
64+ default RAID6_FORCE_INT
65+ depends on RAID6_FORCE_ALGO
66+ ---help---
67+ Select the RAID6 recovery algorithm to unconditionally use
68+
69+ config RAID6_FORCE_INT
70+ bool "Reference Implementation"
71+ config RAID6_FORCE_SSSE3
72+ bool "SSSE3"
73+ config RAID6_FORCE_AVX2
74+ bool "AVX2"
75+endchoice
76+
77+endmenu
78diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
79index 7857049fd7d3..29332d2a04a5 100644
80--- a/lib/raid6/algos.c
81+++ b/lib/raid6/algos.c
82@@ -125,6 +125,63 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
83 #define time_before(x, y) ((x) < (y))
84 #endif
85
86+#ifdef CONFIG_RAID6_FORCE_ALGO
87+/* TODO don't compile in algos that will never be used */
88+int __init raid6_select_algo(void)
89+{
90+ const struct raid6_recov_calls *recov_fallback = &raid6_recov_intx1;
91+ const struct raid6_recov_calls *recov_algo;
92+ const struct raid6_calls *gen_fallback;
93+ const struct raid6_calls *gen_algo;
94+
95+#if defined(__i386__)
96+ gen_fallback = &raid6_intx32;
97+#elif defined(__x86_64__)
98+ gen_fallback = &raid6_sse2x2;
99+#else
100+# error "TODO"
101+#endif
102+
103+#if defined(CONFIG_RAID6_FORCE_INT)
104+ recov_algo = &raid6_recov_intx1;
105+ gen_algo = &raid6_intx32;
106+
107+#elif defined(CONFIG_RAID6_FORCE_SSSE3)
108+ recov_algo = &raid6_recov_ssse3;
109+#if defined(__i386__)
110+ gen_algo = &raid6_sse2x2;
111+#else
112+ gen_algo = &raid6_sse2x4;
113+#endif
114+
115+#elif defined(CONFIG_RAID6_FORCE_AVX2)
116+ recov_algo = &raid6_recov_avx2;
117+
118+#if defined(__i386__)
119+ gen_algo = &raid6_avx2x2;
120+#else
121+ gen_algo = &raid6_avx2x4;
122+#endif
123+
124+#else
125+#error "RAID6 Forced Recov Algo: Unsupported selection"
126+#endif
127+
128+ if (recov_algo->valid != NULL && recov_algo->valid() == 0)
129+ recov_algo = recov_fallback;
130+
131+ pr_info("raid6: Forced to use recovery algorithm %s\n", recov_algo->name);
132+
133+ raid6_2data_recov = recov_algo->data2;
134+ raid6_datap_recov = recov_algo->datap;
135+
136+ pr_info("raid6: Forced gen() algo %s\n", gen_algo->name);
137+
138+ raid6_call = *gen_algo;
139+
140+ return gen_algo && recov_algo ? 0 : -EINVAL;
141+}
142+#else
143 static inline const struct raid6_recov_calls *raid6_choose_recov(void)
144 {
145 const struct raid6_recov_calls *const *algo;
146@@ -256,6 +313,7 @@ int __init raid6_select_algo(void)
147
148 return gen_best && rec_best ? 0 : -EINVAL;
149 }
150+#endif
151
152 static void raid6_exit(void)
153 {
154--
1552.11.1
156
diff --git a/patches/boot_time_opt/0118-Initialize-ata-before-graphics.patch b/patches/boot_time_opt/0118-Initialize-ata-before-graphics.patch
new file mode 100644
index 0000000..70e07c8
--- /dev/null
+++ b/patches/boot_time_opt/0118-Initialize-ata-before-graphics.patch
@@ -0,0 +1,47 @@
1From fbc1ab7c18a9c960a0bff293a93620d581658f8d Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Thu, 2 Jun 2016 23:36:32 -0500
4Subject: [PATCH 118/124] Initialize ata before graphics
5
6ATA init is the long pole in the boot process, and its asynchronous.
7move the graphics init after it so that ata and graphics initialize
8in parallel
9---
10 drivers/Makefile | 11 ++++++-----
11 1 file changed, 6 insertions(+), 5 deletions(-)
12
13diff --git a/drivers/Makefile b/drivers/Makefile
14index 194d20bee7dc..2785e4c6b30f 100644
15--- a/drivers/Makefile
16+++ b/drivers/Makefile
17@@ -55,14 +55,9 @@ obj-y += char/
18 # iommu/ comes before gpu as gpu are using iommu controllers
19 obj-$(CONFIG_IOMMU_SUPPORT) += iommu/
20
21-# gpu/ comes after char for AGP vs DRM startup and after iommu
22-obj-y += gpu/
23
24 obj-$(CONFIG_CONNECTOR) += connector/
25
26-# i810fb and intelfb depend on char/agp/
27-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
28-obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
29
30 obj-$(CONFIG_PARPORT) += parport/
31 obj-$(CONFIG_NVM) += lightnvm/
32@@ -76,6 +71,12 @@ obj-$(CONFIG_IDE) += ide/
33 obj-$(CONFIG_SCSI) += scsi/
34 obj-y += nvme/
35 obj-$(CONFIG_ATA) += ata/
36+
37+# gpu/ comes after char for AGP vs DRM startup and after iommu
38+obj-y += gpu/
39+# i810fb and intelfb depend on char/agp/
40+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
41+obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
42 obj-$(CONFIG_TARGET_CORE) += target/
43 obj-$(CONFIG_MTD) += mtd/
44 obj-$(CONFIG_SPI) += spi/
45--
462.11.1
47
diff --git a/patches/boot_time_opt/0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch b/patches/boot_time_opt/0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch
new file mode 100644
index 0000000..a068afb
--- /dev/null
+++ b/patches/boot_time_opt/0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch
@@ -0,0 +1,311 @@
1From d9390cb702de5cbef64f893efd2344c4f58dae82 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Mon, 25 Jul 2016 06:44:34 -0500
4Subject: [PATCH 119/124] reduce e1000e boot time by tightening sleep ranges
5
6The e1000e driver is a great user of the usleep_range() API,
7and has any nice ranges that in principle help power management.
8
9However the ranges that are used only during system startup are
10very long (and can add easily 100 msec to the boot time) while
11the power savings of such long ranges is irrelevant due to the
12one-off, boot only, nature of these functions.
13
14This patch shrinks some of the longest ranges to be shorter
15(while still using a power friendly 1 msec range); this saves
16100msec+ of boot time on my BDW NUCs
17
18Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
19---
20 drivers/net/ethernet/intel/e1000e/80003es2lan.c | 2 +-
21 drivers/net/ethernet/intel/e1000e/82571.c | 2 +-
22 drivers/net/ethernet/intel/e1000e/ethtool.c | 14 +++++++-------
23 drivers/net/ethernet/intel/e1000e/ich8lan.c | 20 ++++++++++----------
24 drivers/net/ethernet/intel/e1000e/mac.c | 2 +-
25 drivers/net/ethernet/intel/e1000e/netdev.c | 14 +++++++-------
26 drivers/net/ethernet/intel/e1000e/nvm.c | 2 +-
27 7 files changed, 28 insertions(+), 28 deletions(-)
28
29diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
30index cd391376036c..b5759899eeb8 100644
31--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
32+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
33@@ -698,7 +698,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
34 ew32(TCTL, E1000_TCTL_PSP);
35 e1e_flush();
36
37- usleep_range(10000, 20000);
38+ usleep_range(10000, 11000);
39
40 ctrl = er32(CTRL);
41
42diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
43index 6b03c8553e59..d31145269dd9 100644
44--- a/drivers/net/ethernet/intel/e1000e/82571.c
45+++ b/drivers/net/ethernet/intel/e1000e/82571.c
46@@ -977,7 +977,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
47 ew32(TCTL, tctl);
48 e1e_flush();
49
50- usleep_range(10000, 20000);
51+ usleep_range(10000, 11000);
52
53 /* Must acquire the MDIO ownership before MAC reset.
54 * Ownership defaults to firmware after a reset.
55diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
56index 7aff68a4a4df..7cb689bd41f8 100644
57--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
58+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
59@@ -1023,7 +1023,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
60 /* Disable all the interrupts */
61 ew32(IMC, 0xFFFFFFFF);
62 e1e_flush();
63- usleep_range(10000, 20000);
64+ usleep_range(10000, 11000);
65
66 /* Test each interrupt */
67 for (i = 0; i < 10; i++) {
68@@ -1055,7 +1055,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
69 ew32(IMC, mask);
70 ew32(ICS, mask);
71 e1e_flush();
72- usleep_range(10000, 20000);
73+ usleep_range(10000, 11000);
74
75 if (adapter->test_icr & mask) {
76 *data = 3;
77@@ -1073,7 +1073,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
78 ew32(IMS, mask);
79 ew32(ICS, mask);
80 e1e_flush();
81- usleep_range(10000, 20000);
82+ usleep_range(10000, 11000);
83
84 if (!(adapter->test_icr & mask)) {
85 *data = 4;
86@@ -1091,7 +1091,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
87 ew32(IMC, ~mask & 0x00007FFF);
88 ew32(ICS, ~mask & 0x00007FFF);
89 e1e_flush();
90- usleep_range(10000, 20000);
91+ usleep_range(10000, 11000);
92
93 if (adapter->test_icr) {
94 *data = 5;
95@@ -1103,7 +1103,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
96 /* Disable all the interrupts */
97 ew32(IMC, 0xFFFFFFFF);
98 e1e_flush();
99- usleep_range(10000, 20000);
100+ usleep_range(10000, 11000);
101
102 /* Unhook test interrupt handler */
103 free_irq(irq, netdev);
104@@ -1479,7 +1479,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter)
105 */
106 ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK);
107 e1e_flush();
108- usleep_range(10000, 20000);
109+ usleep_range(10000, 11000);
110
111 return 0;
112 }
113@@ -1592,7 +1592,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
114 hw->phy.media_type == e1000_media_type_internal_serdes) {
115 ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
116 e1e_flush();
117- usleep_range(10000, 20000);
118+ usleep_range(10000, 11000);
119 break;
120 }
121 /* Fall Through */
122diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
123index f3aaca743ea3..bef75cec259f 100644
124--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
125+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
126@@ -289,7 +289,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
127 u16 count = 20;
128
129 do {
130- usleep_range(5000, 10000);
131+ usleep_range(5000, 6000);
132 } while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--);
133
134 msleep(30);
135@@ -422,7 +422,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
136 /* Ungate automatic PHY configuration on non-managed 82579 */
137 if ((hw->mac.type == e1000_pch2lan) &&
138 !(fwsm & E1000_ICH_FWSM_FW_VALID)) {
139- usleep_range(10000, 20000);
140+ usleep_range(10000, 11000);
141 e1000_gate_hw_phy_config_ich8lan(hw, false);
142 }
143
144@@ -547,7 +547,7 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
145 phy->id = 0;
146 while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
147 (i++ < 100)) {
148- usleep_range(1000, 2000);
149+ usleep_range(1000, 1100);
150 ret_val = e1000e_get_phy_id(hw);
151 if (ret_val)
152 return ret_val;
153@@ -1259,7 +1259,7 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
154 goto out;
155 }
156
157- usleep_range(10000, 20000);
158+ usleep_range(10000, 11000);
159 }
160 e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
161
162@@ -2011,7 +2011,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
163
164 while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
165 (i++ < 30))
166- usleep_range(10000, 20000);
167+ usleep_range(10000, 11000);
168 return blocked ? E1000_BLK_PHY_RESET : 0;
169 }
170
171@@ -2827,7 +2827,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
172 return 0;
173
174 /* Allow time for h/w to get to quiescent state after reset */
175- usleep_range(10000, 20000);
176+ usleep_range(10000, 11000);
177
178 /* Perform any necessary post-reset workarounds */
179 switch (hw->mac.type) {
180@@ -2863,7 +2863,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
181 if (hw->mac.type == e1000_pch2lan) {
182 /* Ungate automatic PHY configuration on non-managed 82579 */
183 if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
184- usleep_range(10000, 20000);
185+ usleep_range(10000, 11000);
186 e1000_gate_hw_phy_config_ich8lan(hw, false);
187 }
188
189@@ -3884,7 +3884,7 @@ static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw)
190 */
191 if (!ret_val) {
192 nvm->ops.reload(hw);
193- usleep_range(10000, 20000);
194+ usleep_range(10000, 11000);
195 }
196
197 out:
198@@ -4035,7 +4035,7 @@ static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw)
199 */
200 if (!ret_val) {
201 nvm->ops.reload(hw);
202- usleep_range(10000, 20000);
203+ usleep_range(10000, 11000);
204 }
205
206 out:
207@@ -4658,7 +4658,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
208 ew32(TCTL, E1000_TCTL_PSP);
209 e1e_flush();
210
211- usleep_range(10000, 20000);
212+ usleep_range(10000, 11000);
213
214 /* Workaround for ICH8 bit corruption issue in FIFO memory */
215 if (hw->mac.type == e1000_ich8lan) {
216diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
217index b322011ec282..eecbf7a12735 100644
218--- a/drivers/net/ethernet/intel/e1000e/mac.c
219+++ b/drivers/net/ethernet/intel/e1000e/mac.c
220@@ -815,7 +815,7 @@ static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
221 * milliseconds even if the other end is doing it in SW).
222 */
223 for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
224- usleep_range(10000, 20000);
225+ usleep_range(10000, 11000);
226 status = er32(STATUS);
227 if (status & E1000_STATUS_LU)
228 break;
229diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
230index 7017281ba2dc..7d68d694ed9e 100644
231--- a/drivers/net/ethernet/intel/e1000e/netdev.c
232+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
233@@ -3206,7 +3206,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
234 if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
235 ew32(RCTL, rctl & ~E1000_RCTL_EN);
236 e1e_flush();
237- usleep_range(10000, 20000);
238+ usleep_range(10000, 11000);
239
240 if (adapter->flags2 & FLAG2_DMA_BURST) {
241 /* set the writeback threshold (only takes effect if the RDTR
242@@ -4258,7 +4258,7 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset)
243
244 /* flush both disables and wait for them to finish */
245 e1e_flush();
246- usleep_range(10000, 20000);
247+ usleep_range(10000, 11000);
248
249 e1000_irq_disable(adapter);
250
251@@ -4296,7 +4296,7 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
252 {
253 might_sleep();
254 while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
255- usleep_range(1000, 2000);
256+ usleep_range(1000, 1100);
257 e1000e_down(adapter, true);
258 e1000e_up(adapter);
259 clear_bit(__E1000_RESETTING, &adapter->state);
260@@ -4671,7 +4671,7 @@ int e1000e_close(struct net_device *netdev)
261 int count = E1000_CHECK_RESET_COUNT;
262
263 while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
264- usleep_range(10000, 20000);
265+ usleep_range(10000, 11000);
266
267 WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
268
269@@ -5996,7 +5996,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
270 }
271
272 while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
273- usleep_range(1000, 2000);
274+ usleep_range(1000, 1100);
275 /* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
276 adapter->max_frame_size = max_frame;
277 e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
278@@ -6276,7 +6276,7 @@ static int e1000e_pm_freeze(struct device *dev)
279 int count = E1000_CHECK_RESET_COUNT;
280
281 while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
282- usleep_range(10000, 20000);
283+ usleep_range(10000, 11000);
284
285 WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
286
287@@ -6687,7 +6687,7 @@ static int e1000e_pm_runtime_suspend(struct device *dev)
288 int count = E1000_CHECK_RESET_COUNT;
289
290 while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
291- usleep_range(10000, 20000);
292+ usleep_range(10000, 11000);
293
294 WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
295
296diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
297index 2efd80dfd88e..38f7c8fb3061 100644
298--- a/drivers/net/ethernet/intel/e1000e/nvm.c
299+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
300@@ -410,7 +410,7 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
301 break;
302 }
303 }
304- usleep_range(10000, 20000);
305+ usleep_range(10000, 11000);
306 nvm->ops.release(hw);
307 }
308
309--
3102.11.1
311
diff --git a/patches/boot_time_opt/0120-give-rdrand-some-credit.patch b/patches/boot_time_opt/0120-give-rdrand-some-credit.patch
new file mode 100644
index 0000000..4b1669c
--- /dev/null
+++ b/patches/boot_time_opt/0120-give-rdrand-some-credit.patch
@@ -0,0 +1,30 @@
1From 5cc978db25b2c92707f68b15098ac39901fb5aac Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Fri, 29 Jul 2016 19:10:52 +0000
4Subject: [PATCH 120/124] give rdrand some credit
5
6try to credit rdrand/rdseed with some entropy
7
8In VMs but even modern hardware, we're super starved for entropy, and while we can
9and do wear a tin foil hat, it's very hard to argue that
10rdrand and rdtsc add zero entropy.
11---
12 drivers/char/random.c | 2 ++
13 1 file changed, 2 insertions(+)
14
15diff --git a/drivers/char/random.c b/drivers/char/random.c
16index d6876d506220..fca09af81b2c 100644
17--- a/drivers/char/random.c
18+++ b/drivers/char/random.c
19@@ -1638,6 +1638,8 @@ static void init_std_data(struct entropy_store *r)
20 if (!arch_get_random_seed_long(&rv) &&
21 !arch_get_random_long(&rv))
22 rv = random_get_entropy();
23+ else
24+ credit_entropy_bits(r, 1);
25 mix_pool_bytes(r, &rv, sizeof(rv));
26 }
27 mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
28--
292.11.1
30
diff --git a/patches/boot_time_opt/0121-e1000e-change-default-policy.patch b/patches/boot_time_opt/0121-e1000e-change-default-policy.patch
new file mode 100644
index 0000000..bf3e13d
--- /dev/null
+++ b/patches/boot_time_opt/0121-e1000e-change-default-policy.patch
@@ -0,0 +1,27 @@
1From 5b4707fc2aa8c49aa18a60136880bf05a3e29071 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Sat, 10 Dec 2016 14:29:52 +0000
4Subject: [PATCH 121/124] e1000e: change default policy
5
6change the default irq mitigation policy for e1000e to be
7more HPC/cluster friendly
8---
9 drivers/net/ethernet/intel/e1000e/param.c | 2 +-
10 1 file changed, 1 insertion(+), 1 deletion(-)
11
12diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
13index 6d8c39abee16..ef1122ad3b98 100644
14--- a/drivers/net/ethernet/intel/e1000e/param.c
15+++ b/drivers/net/ethernet/intel/e1000e/param.c
16@@ -92,7 +92,7 @@ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
17 * Valid Range: 100-100000 or one of: 0=off, 1=dynamic, 3=dynamic conservative
18 */
19 E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
20-#define DEFAULT_ITR 3
21+#define DEFAULT_ITR 1
22 #define MAX_ITR 100000
23 #define MIN_ITR 100
24
25--
262.11.1
27
diff --git a/patches/boot_time_opt/0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch b/patches/boot_time_opt/0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch
new file mode 100644
index 0000000..eb44cec
--- /dev/null
+++ b/patches/boot_time_opt/0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch
@@ -0,0 +1,28 @@
1From 5cf7ba4ba9c9d770aad9e52deaa3730f259df9f1 Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Fri, 6 Jan 2017 15:34:09 +0000
4Subject: [PATCH 122/124] ipv4/tcp: allow the memory tuning for tcp to go a
5 little bigger than default
6
7---
8 net/ipv4/tcp.c | 4 ++--
9 1 file changed, 2 insertions(+), 2 deletions(-)
10
11diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
12index 6a90a0e130dc..32e43ce7c60e 100644
13--- a/net/ipv4/tcp.c
14+++ b/net/ipv4/tcp.c
15@@ -3341,8 +3341,8 @@ void __init tcp_init(void)
16 tcp_init_mem();
17 /* Set per-socket limits to no more than 1/128 the pressure threshold */
18 limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
19- max_wshare = min(4UL*1024*1024, limit);
20- max_rshare = min(6UL*1024*1024, limit);
21+ max_wshare = min(16UL*1024*1024, limit);
22+ max_rshare = min(16UL*1024*1024, limit);
23
24 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
25 sysctl_tcp_wmem[1] = 16*1024;
26--
272.11.1
28
diff --git a/patches/boot_time_opt/0123-igb-no-runtime-pm-to-fix-reboot-oops.patch b/patches/boot_time_opt/0123-igb-no-runtime-pm-to-fix-reboot-oops.patch
new file mode 100644
index 0000000..ce4964e
--- /dev/null
+++ b/patches/boot_time_opt/0123-igb-no-runtime-pm-to-fix-reboot-oops.patch
@@ -0,0 +1,27 @@
1From 10f0c995ce6aaf6b3ffa78377f1a12ad0477057a Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Thu, 12 Jan 2017 18:17:14 +0000
4Subject: [PATCH 123/124] igb: no runtime pm to fix reboot oops
5
6Causes oops on reboot due to a race between runtime resume and shutdown
7---
8 drivers/net/ethernet/intel/igb/igb_main.c | 3 ---
9 1 file changed, 3 deletions(-)
10
11diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
12index 9affd7c198bd..8ade77e75b36 100644
13--- a/drivers/net/ethernet/intel/igb/igb_main.c
14+++ b/drivers/net/ethernet/intel/igb/igb_main.c
15@@ -238,9 +238,6 @@ static struct pci_driver igb_driver = {
16 .id_table = igb_pci_tbl,
17 .probe = igb_probe,
18 .remove = igb_remove,
19-#ifdef CONFIG_PM
20- .driver.pm = &igb_pm_ops,
21-#endif
22 .shutdown = igb_shutdown,
23 .sriov_configure = igb_pci_sriov_configure,
24 .err_handler = &igb_err_handler
25--
262.11.1
27
diff --git a/patches/boot_time_opt/0124-tweak-perfbias.patch b/patches/boot_time_opt/0124-tweak-perfbias.patch
new file mode 100644
index 0000000..56a2865
--- /dev/null
+++ b/patches/boot_time_opt/0124-tweak-perfbias.patch
@@ -0,0 +1,32 @@
1From 03e2c414a860264511dae5bbfc6d7e62b8b94f0f Mon Sep 17 00:00:00 2001
2From: Arjan van de Ven <arjan@linux.intel.com>
3Date: Sun, 22 Jan 2017 18:51:13 +0000
4Subject: [PATCH 124/124] tweak perfbias
5
6---
7 arch/x86/kernel/cpu/intel.c | 6 +++---
8 1 file changed, 3 insertions(+), 3 deletions(-)
9
10diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
11index fcd484d2bb03..13ae40f10bd4 100644
12--- a/arch/x86/kernel/cpu/intel.c
13+++ b/arch/x86/kernel/cpu/intel.c
14@@ -434,12 +434,12 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
15 return;
16
17 rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
18- if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
19+ if ((epb & 0xF) >= ENERGY_PERF_BIAS_NORMAL)
20 return;
21
22- pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
23+ pr_warn_once("ENERGY_PERF_BIAS: Set to 'performance', was 'normal'\n");
24 pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
25- epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
26+ epb = (epb & ~0xF) | ENERGY_PERF_BIAS_PERFORMANCE;
27 wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
28 }
29
30--
312.11.1
32
diff --git a/patches/boot_time_opt/0125-e1000e-increase-pause-and-refresh-time.patch b/patches/boot_time_opt/0125-e1000e-increase-pause-and-refresh-time.patch
new file mode 100644
index 0000000..1c50e74
--- /dev/null
+++ b/patches/boot_time_opt/0125-e1000e-increase-pause-and-refresh-time.patch
@@ -0,0 +1,33 @@
1From 6730c1ae12a567d56092d15540d2f971be95b936 Mon Sep 17 00:00:00 2001
2From: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
3Date: Mon, 27 Mar 2017 16:01:56 -0600
4Subject: [PATCH] e1000e: increase pause and refresh time
5
6Suggested-by: Tim Pepper <timothy.c.pepper@linux.intel.com>
7Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
8---
9 drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++--
10 1 file changed, 2 insertions(+), 2 deletions(-)
11
12diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
13index 7d68d694ed9e..1db390a52656 100644
14--- a/drivers/net/ethernet/intel/e1000e/netdev.c
15+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
16@@ -4032,12 +4032,12 @@ void e1000e_reset(struct e1000_adapter *adapter)
17 case e1000_pch2lan:
18 case e1000_pch_lpt:
19 case e1000_pch_spt:
20- fc->refresh_time = 0x0400;
21+ fc->refresh_time = 0xFFFF;
22+ fc->pause_time = 0xFFFF;
23
24 if (adapter->netdev->mtu <= ETH_DATA_LEN) {
25 fc->high_water = 0x05C20;
26 fc->low_water = 0x05048;
27- fc->pause_time = 0x0650;
28 break;
29 }
30
31--
322.12.2
33
diff --git a/patches/boot_time_opt/0151-mm-Export-do_madvise.patch b/patches/boot_time_opt/0151-mm-Export-do_madvise.patch
new file mode 100644
index 0000000..a6dbff7
--- /dev/null
+++ b/patches/boot_time_opt/0151-mm-Export-do_madvise.patch
@@ -0,0 +1,84 @@
1From 99b4cdcce43ad0f706120bef26fef8c628c572cf Mon Sep 17 00:00:00 2001
2From: Sebastien Boeuf <sebastien.boeuf@intel.com>
3Date: Mon, 23 Jan 2017 15:03:52 -0800
4Subject: [PATCH 151/154] mm: Export do_madvise()
5
6Combined with some interesting flags madvise() system call
7allows to free memory more smartly and more efficiently than
8we could do with a simple free(). The issue is that is not
9available for kernel modules that could need it.
10
11In order to solve this lack of support, this patch exports
12do_madvise() so as to make it available to the entire kernel.
13The already existing madvise() system call is unchanged and
14now relies on this new do_madvise() function.
15
16Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
17Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
18---
19 include/linux/mm.h | 2 ++
20 mm/madvise.c | 25 +++++++++++++++++++++----
21 2 files changed, 23 insertions(+), 4 deletions(-)
22
23diff --git a/include/linux/mm.h b/include/linux/mm.h
24index 0b5b2e4df14e..925ec25f99a8 100644
25--- a/include/linux/mm.h
26+++ b/include/linux/mm.h
27@@ -2450,5 +2450,7 @@ void __init setup_nr_node_ids(void);
28 static inline void setup_nr_node_ids(void) {}
29 #endif
30
31+extern int do_madvise(unsigned long start, size_t len_in, int behavior);
32+
33 #endif /* __KERNEL__ */
34 #endif /* _LINUX_MM_H */
35diff --git a/mm/madvise.c b/mm/madvise.c
36index 93fb63e88b5e..c8bbf93d4978 100644
37--- a/mm/madvise.c
38+++ b/mm/madvise.c
39@@ -618,9 +618,7 @@ madvise_behavior_valid(int behavior)
40 }
41
42 /*
43- * The madvise(2) system call.
44- *
45- * Applications can use madvise() to advise the kernel how it should
46+ * Kernel modules can use do_madvise() to advise the kernel how it should
47 * handle paging I/O in this VM area. The idea is to help the kernel
48 * use appropriate read-ahead and caching techniques. The information
49 * provided is advisory only, and can be safely disregarded by the
50@@ -673,7 +671,7 @@ madvise_behavior_valid(int behavior)
51 * -EBADF - map exists, but area maps something that isn't a file.
52 * -EAGAIN - a kernel resource was temporarily unavailable.
53 */
54-SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
55+int do_madvise(unsigned long start, size_t len_in, int behavior)
56 {
57 unsigned long end, tmp;
58 struct vm_area_struct *vma, *prev;
59@@ -767,3 +765,22 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
60
61 return error;
62 }
63+EXPORT_SYMBOL_GPL(do_madvise);
64+
65+/*
66+ * The madvise(2) system call.
67+ *
68+ * Applications can use madvise() system call to advise the kernel how
69+ * it should handle paging I/O in this VM area. The idea is to help
70+ * the kernel use appropriate read-ahead and caching techniques. The
71+ * information provided is advisory only, and can be safely disregarded
72+ * by the kernel without affecting the correct operation of the application.
73+ *
74+ * behavior values are the same than the ones defined in madvise()
75+ *
76+ * return values are the same than the ones defined in madvise()
77+ */
78+SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
79+{
80+ return do_madvise(start, len_in, behavior);
81+}
82--
832.12.1
84
diff --git a/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch
new file mode 100644
index 0000000..5f44930
--- /dev/null
+++ b/patches/boot_time_opt/0152-x86-kvm-Notify-host-to-release-pages.patch
@@ -0,0 +1,180 @@
1From d28921b5f797829e4e676f7968ae688ef96b7992 Mon Sep 17 00:00:00 2001
2From: Sebastien Boeuf <sebastien.boeuf@intel.com>
3Date: Mon, 23 Jan 2017 15:08:55 -0800
4Subject: [PATCH 152/154] x86: kvm: Notify host to release pages
5
6In context of hypervisors managing several virtual machines, we
7want those virtual machines to give the memory they used back to
8the host when they don't need it anymore.
9
10This patch introduces a new hypercall KVM_HC_RETURN_MEM, allowing
11the guest kernel to notify the host kernel when such event occurs.
12And relying on do_madvise() function that we have previously exported,
13it issues a call to this function when it receives the new hypercall.
14
15Use of do_madvise() with MADV_DONTNEED flag will allow the guest to
16ask for a new page without going through a new hypercall. Instead,
17it will be able to start using that memory again as it will get
18faulted back in as a fresh new page. That's why do_madvise() is more
19efficient than doing vm_unmap() to return some memory to the host.
20
21This patch introduces also a new sysctl kvm_madv_instant_free,
22allowing user to set MADV_FREE advice instead of MADV_DONTNEED.
23Indeed, MADV_FREE saves more performances than using MADV_DONTNEED
24because it does not zero the pages in case the memory has not been
25freed by the kernel. This can happen when there was no need for the
26kernel to get this memory back, meaning it was keeping those pages
27in the right state to be re-used by the same application.
28MADV_FREE being a very recent advice introduced in kernel 4.5, we
29only want to enable it through a sysctl in case the user want to
30use it.
31
32Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
33Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
34---
35 arch/x86/kvm/x86.c | 17 +++++++++++++++++
36 include/linux/mm.h | 5 +++++
37 include/uapi/linux/kvm_para.h | 3 +++
38 kernel/sysctl.c | 7 +++++++
39 mm/Makefile | 2 +-
40 mm/kvm.c | 25 +++++++++++++++++++++++++
41 6 files changed, 58 insertions(+), 1 deletion(-)
42 create mode 100644 mm/kvm.c
43
44diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
45index 582c75311f95..683a94dd5f03 100644
46--- a/arch/x86/kvm/x86.c
47+++ b/arch/x86/kvm/x86.c
48@@ -46,6 +46,7 @@
49 #include <linux/user-return-notifier.h>
50 #include <linux/srcu.h>
51 #include <linux/slab.h>
52+#include <linux/mm.h>
53 #include <linux/perf_event.h>
54 #include <linux/uaccess.h>
55 #include <linux/hash.h>
56@@ -6019,6 +6020,19 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
57 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
58 }
59
60+static int kvm_pv_return_mem_op(struct kvm *kvm, gpa_t gpa, size_t len)
61+{
62+ unsigned long start = gfn_to_hva(kvm, gpa_to_gfn(gpa));
63+
64+ if (len > KVM_MAX_RET_MEM_SIZE)
65+ return KVM_EPERM;
66+
67+ if (kvm_is_error_hva(start + len))
68+ return KVM_EFAULT;
69+
70+ return do_madvise(start, len, kvm_ret_mem_advice);
71+}
72+
73 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
74 {
75 vcpu->arch.apicv_active = false;
76@@ -6065,6 +6079,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
77 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
78 ret = 0;
79 break;
80+ case KVM_HC_RETURN_MEM:
81+ ret = kvm_pv_return_mem_op(vcpu->kvm, a0, a1);
82+ break;
83 default:
84 ret = -KVM_ENOSYS;
85 break;
86diff --git a/include/linux/mm.h b/include/linux/mm.h
87index 925ec25f99a8..833f23d98baa 100644
88--- a/include/linux/mm.h
89+++ b/include/linux/mm.h
90@@ -2303,6 +2303,11 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
91 extern int sysctl_drop_caches;
92 int drop_caches_sysctl_handler(struct ctl_table *, int,
93 void __user *, size_t *, loff_t *);
94+extern int sysctl_kvm_madv_instant_free;
95+extern int kvm_ret_mem_advice;
96+int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write,
97+ void __user *buffer, size_t *length,
98+ loff_t *ppos);
99 #endif
100
101 void drop_slab(void);
102diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
103index bf6cd7d5cac2..7d90f77d87d0 100644
104--- a/include/uapi/linux/kvm_para.h
105+++ b/include/uapi/linux/kvm_para.h
106@@ -23,6 +23,9 @@
107 #define KVM_HC_MIPS_GET_CLOCK_FREQ 6
108 #define KVM_HC_MIPS_EXIT_VM 7
109 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8
110+#define KVM_HC_RETURN_MEM 10
111+
112+#define KVM_MAX_RET_MEM_SIZE (1 << 22) // 4MiB
113
114 /*
115 * hypercalls use architecture specific
116diff --git a/kernel/sysctl.c b/kernel/sysctl.c
117index c1095cdc0fe2..d8ae774fa042 100644
118--- a/kernel/sysctl.c
119+++ b/kernel/sysctl.c
120@@ -1398,6 +1398,13 @@ static struct ctl_table vm_table[] = {
121 .extra1 = &one,
122 .extra2 = &four,
123 },
124+ {
125+ .procname = "kvm_madv_instant_free",
126+ .data = &sysctl_kvm_madv_instant_free,
127+ .maxlen = sizeof(int),
128+ .mode = 0644,
129+ .proc_handler = kvm_madv_instant_free_sysctl_handler,
130+ },
131 #ifdef CONFIG_COMPACTION
132 {
133 .procname = "compact_memory",
134diff --git a/mm/Makefile b/mm/Makefile
135index 295bd7a..6455723 100644
136--- a/mm/Makefile
137+++ b/mm/Makefile
138@@ -47,6 +47,8 @@ else
139 obj-y += bootmem.o
140 endif
141
142+obj-y += kvm.o
143+
144 obj-$(CONFIG_ADVISE_SYSCALLS) += fadvise.o
145 ifdef CONFIG_MMU
146 obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
147diff --git a/mm/kvm.c b/mm/kvm.c
148new file mode 100644
149index 000000000000..8945f6a311b9
150--- /dev/null
151+++ b/mm/kvm.c
152@@ -0,0 +1,25 @@
153+#include <linux/mman.h>
154+
155+int sysctl_kvm_madv_instant_free;
156+
157+int kvm_ret_mem_advice = MADV_DONTNEED;
158+EXPORT_SYMBOL_GPL(kvm_ret_mem_advice);
159+
160+int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write,
161+ void __user *buffer, size_t *length, loff_t *ppos)
162+{
163+ int ret;
164+
165+ ret = proc_dointvec(table, write, buffer, length, ppos);
166+ if (ret)
167+ return ret;
168+
169+#ifdef MADV_FREE
170+ if (sysctl_kvm_madv_instant_free > 0)
171+ kvm_ret_mem_advice = MADV_FREE;
172+ else
173+ kvm_ret_mem_advice = MADV_DONTNEED;
174+#endif
175+
176+ return 0;
177+}
178--
1792.12.1
180
diff --git a/patches/boot_time_opt/0153-x86-Return-memory-from-guest-to-host-kernel.patch b/patches/boot_time_opt/0153-x86-Return-memory-from-guest-to-host-kernel.patch
new file mode 100644
index 0000000..cdb876a
--- /dev/null
+++ b/patches/boot_time_opt/0153-x86-Return-memory-from-guest-to-host-kernel.patch
@@ -0,0 +1,155 @@
1From 855ef164854307839c08c60688eaeac14f9a649e Mon Sep 17 00:00:00 2001
2From: Sebastien Boeuf <sebastien.boeuf@intel.com>
3Date: Mon, 23 Jan 2017 15:26:13 -0800
4Subject: [PATCH 153/154] x86: Return memory from guest to host kernel
5
6All virtual machines need memory to perform various tasks, but this
7memory is not released to the host after it is not used anymore. We
8have to wait for the termination of the virtual machine to get this
9memory back into the host.
10
11Ballooning mechanism is close but not designed for the same purpose.
12In case we hit memory limits of the system, the host predicts how much
13memory can be asked back from a guest, and it issues an hypercall to
14retrieve this memory.
15
16The solution proposed is different because it does not wait for host
17needs before to return memory, and it knows precisely how much memory
18it can return.
19
20The way to notify the host side about such a return is to rely on
21the new hypercall KVM_HC_RETURN_MEM. In order to avoid the CPU to be
22overloaded with too many hypercalls, we only return memory blocks of
23order 7 (512k blocks) and higher. This value has been found running
24memory tests using multiple threads allocating/freeing high amount
25of memory. Those tests were run for different order values, and 7 was
26the best tradeoff between the number of hypercalls issued and the
27amount of memory returned to the host.
28
29In order to limit performances impact related to this code addition,
30we check for blocks of order 7 or higher. This means it only costs an
31additional function call and a branch to perform this check.
32
33Furthermore, this code has been added to the "merge" codepath of the
34buddy allocator, which is not as sensitive as the "free" codepath.
35Not all blocks going through the "free" codepath will end up in the
36"merge" codepath because some of them won't find their free buddy.
37But this is a negligible amount since the kernel does not use many
38high order blocks directly. Instead, those bigger blocks are often
39broken into smaller chunks used as low order blocks. At the time
40those small blocks are released, they go through the merge path.
41
42Benchmarks such as ebizzy and will-it-scale have been run in order
43to make sure this patch does not affect kernel performances and no
44significant differences were observed.
45
46Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
47Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
48---
49 arch/x86/include/asm/kvm_para.h | 22 ++++++++++++++++++++++
50 arch/x86/kernel/kvm.c | 10 ++++++++++
51 include/linux/mm-arch-hooks.h | 8 ++++++++
52 mm/page_alloc.c | 2 ++
53 4 files changed, 42 insertions(+)
54
55diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
56index bc62e7cbf1b1..4a2f6d1adbd2 100644
57--- a/arch/x86/include/asm/kvm_para.h
58+++ b/arch/x86/include/asm/kvm_para.h
59@@ -92,6 +92,28 @@ void kvm_async_pf_task_wait(u32 token);
60 void kvm_async_pf_task_wake(u32 token);
61 u32 kvm_read_and_reset_pf_reason(void);
62 extern void kvm_disable_steal_time(void);
63+void kvm_arch_return_memory(struct page *page, unsigned int order);
64+
65+/*
66+ * This order has been found in an empirical way, running memory tests
67+ * through many iterations to assess the number of hypercalls issued
68+ * and the amount of memory returned. In case you change this order to
69+ * 6 or 8, it should not impact your performances significantly.
70+ *
71+ * Smaller values lead to less memory waste, but consume more CPU on
72+ * hypercalls. Larger values use less CPU, but do not as precisely
73+ * inform the hypervisor of which memory is free.
74+ */
75+#define RET_MEM_BUDDY_ORDER 7
76+
77+static inline void arch_buddy_merge(struct page *page, unsigned int order)
78+{
79+ if (order < RET_MEM_BUDDY_ORDER)
80+ return;
81+
82+ kvm_arch_return_memory(page, order);
83+}
84+#define arch_buddy_merge arch_buddy_merge
85
86 #ifdef CONFIG_PARAVIRT_SPINLOCKS
87 void __init kvm_spinlock_init(void);
88diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
89index edbbfc854e39..14167b3f6514 100644
90--- a/arch/x86/kernel/kvm.c
91+++ b/arch/x86/kernel/kvm.c
92@@ -552,6 +552,16 @@ static __init int activate_jump_labels(void)
93 }
94 arch_initcall(activate_jump_labels);
95
96+void kvm_arch_return_memory(struct page *page, unsigned int order)
97+{
98+ if (!kvm_para_available())
99+ return;
100+
101+ kvm_hypercall2(KVM_HC_RETURN_MEM,
102+ page_to_phys(page),
103+ PAGE_SIZE << order);
104+}
105+
106 #ifdef CONFIG_PARAVIRT_SPINLOCKS
107
108 /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
109diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h
110index 4efc3f56e6df..26eb3a05a8a3 100644
111--- a/include/linux/mm-arch-hooks.h
112+++ b/include/linux/mm-arch-hooks.h
113@@ -12,6 +12,7 @@
114 #define _LINUX_MM_ARCH_HOOKS_H
115
116 #include <asm/mm-arch-hooks.h>
117+#include <asm/kvm_para.h>
118
119 #ifndef arch_remap
120 static inline void arch_remap(struct mm_struct *mm,
121@@ -22,4 +23,11 @@ static inline void arch_remap(struct mm_struct *mm,
122 #define arch_remap arch_remap
123 #endif
124
125+#ifndef arch_buddy_merge
126+static inline void arch_buddy_merge(struct page *page, unsigned int order)
127+{
128+}
129+#define arch_buddy_merge arch_buddy_merge
130+#endif
131+
132 #endif /* _LINUX_MM_ARCH_HOOKS_H */
133diff --git a/mm/page_alloc.c b/mm/page_alloc.c
134index 1460e6ad5e14..5f6e6371bc6f 100644
135--- a/mm/page_alloc.c
136+++ b/mm/page_alloc.c
137@@ -64,6 +64,7 @@
138 #include <linux/page_owner.h>
139 #include <linux/kthread.h>
140 #include <linux/memcontrol.h>
141+#include <linux/mm-arch-hooks.h>
142
143 #include <asm/sections.h>
144 #include <asm/tlbflush.h>
145@@ -855,6 +856,7 @@ static inline void __free_one_page(struct page *page,
146 }
147
148 done_merging:
149+ arch_buddy_merge(page, order);
150 set_page_order(page, order);
151
152 /*
153--
1542.12.1
155
diff --git a/patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch b/patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch
new file mode 100644
index 0000000..07d4a83
--- /dev/null
+++ b/patches/boot_time_opt/0154-sysctl-vm-Fine-grained-cache-shrinking.patch
@@ -0,0 +1,137 @@
1From 2c145b5233b504f5226a0f4bc44baeef33b444d8 Mon Sep 17 00:00:00 2001
2From: Sebastien Boeuf <sebastien.boeuf@intel.com>
3Date: Mon, 23 Jan 2017 15:32:39 -0800
4Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking
5
6Lots of virtual machines are let in idle state for days until they
7are terminated, and they can keep a large amount of memory in their
8cache, meaning this memory cannot be used by other processes.
9
10We tried to release this memory using existing drop_caches sysctl,
11but it led to the complete cache loss while it could have been used
12whether the idle process wakes up. Indeed, the process can't find any
13available cached data and it directly affects performances to rebuild
14it from scratch.
15
16Instead, the solution we want is based on shrinking gradually system
17cache over time. This patch adds a new sysctl shrink_caches_mb so as
18to allow userspace applications indicating the kernel it should shrink
19system cache up to the amount (in MiB) specified.
20
21There is an application called "memshrinker" which uses this new
22mechanism. It runs in the background and periodically releases a
23specified amount of cache. This amount is based on the remaining
24cache on the system, and period is computed to follow a shrinking
25model. It results in saving a lot of memory for other processes
26running on the system.
27
28Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
29Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
30---
31 fs/drop_caches.c | 25 +++++++++++++++++++++++++
32 include/linux/mm.h | 4 ++++
33 kernel/sysctl.c | 8 ++++++++
34 mm/vmscan.c | 2 --
35 4 files changed, 37 insertions(+), 2 deletions(-)
36
37diff --git a/fs/drop_caches.c b/fs/drop_caches.c
38index d72d52b90433..f564dfcc13a4 100644
39--- a/fs/drop_caches.c
40+++ b/fs/drop_caches.c
41@@ -8,10 +8,12 @@
42 #include <linux/writeback.h>
43 #include <linux/sysctl.h>
44 #include <linux/gfp.h>
45+#include <linux/swap.h>
46 #include "internal.h"
47
48 /* A global variable is a bit ugly, but it keeps the code simple */
49 int sysctl_drop_caches;
50+int sysctl_shrink_caches_mb;
51
52 static void drop_pagecache_sb(struct super_block *sb, void *unused)
53 {
54@@ -67,3 +69,26 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write,
55 }
56 return 0;
57 }
58+
59+int shrink_caches_sysctl_handler(struct ctl_table *table, int write,
60+ void __user *buffer, size_t *length, loff_t *ppos)
61+{
62+ int ret;
63+ unsigned long nr_to_reclaim, page_reclaimed;
64+
65+ ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
66+ if (ret)
67+ return ret;
68+
69+ nr_to_reclaim = sysctl_shrink_caches_mb * (1 << 20) / PAGE_SIZE;
70+ if (write) {
71+ page_reclaimed = shrink_all_memory(nr_to_reclaim);
72+ if (page_reclaimed > 0)
73+ lru_add_drain_all();
74+
75+ if (page_reclaimed != nr_to_reclaim)
76+ return page_reclaimed;
77+ }
78+
79+ return 0;
80+}
81diff --git a/include/linux/mm.h b/include/linux/mm.h
82index 833f23d98baa..0bb66c1c31c9 100644
83--- a/include/linux/mm.h
84+++ b/include/linux/mm.h
85@@ -2308,6 +2308,10 @@ extern int kvm_ret_mem_advice;
86 int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write,
87 void __user *buffer, size_t *length,
88 loff_t *ppos);
89+extern int sysctl_shrink_caches_mb;
90+int shrink_caches_sysctl_handler(struct ctl_table *table, int write,
91+ void __user *buffer, size_t *length,
92+ loff_t *ppos);
93 #endif
94
95 void drop_slab(void);
96diff --git a/kernel/sysctl.c b/kernel/sysctl.c
97index d8ae774fa042..5dc9a46ae212 100644
98--- a/kernel/sysctl.c
99+++ b/kernel/sysctl.c
100@@ -1405,6 +1405,14 @@ static struct ctl_table vm_table[] = {
101 .mode = 0644,
102 .proc_handler = kvm_madv_instant_free_sysctl_handler,
103 },
104+ {
105+ .procname = "shrink_caches_mb",
106+ .data = &sysctl_shrink_caches_mb,
107+ .maxlen = sizeof(int),
108+ .mode = 0644,
109+ .proc_handler = shrink_caches_sysctl_handler,
110+ .extra1 = &one,
111+ },
112 #ifdef CONFIG_COMPACTION
113 {
114 .procname = "compact_memory",
115diff --git a/mm/vmscan.c b/mm/vmscan.c
116index 30a88b945a44..1198e74d1860 100644
117--- a/mm/vmscan.c
118+++ b/mm/vmscan.c
119@@ -3525,7 +3525,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
120 wake_up_interruptible(&pgdat->kswapd_wait);
121 }
122
123-#ifdef CONFIG_HIBERNATION
124 /*
125 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
126 * freed pages.
127@@ -3564,7 +3563,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
128
129 return nr_reclaimed;
130 }
131-#endif /* CONFIG_HIBERNATION */
132
133 /* It's optimal to keep kswapds on the same CPUs as their memory, but
134 not required for correctness. So if the last cpu in a node goes
135--
1362.12.1
137
diff --git a/patches/boot_time_opt/boot_time_opt.scc b/patches/boot_time_opt/boot_time_opt.scc
new file mode 100644
index 0000000..1ffb857
--- /dev/null
+++ b/patches/boot_time_opt/boot_time_opt.scc
@@ -0,0 +1,29 @@
1define KFEATURE_DESCRIPTION "Boot time optimization changes ported from ClearLinux, https://github.com/clearlinux-pkgs/linux-lts and https://github.com/clearlinux-pkgs/linux-kvm"
2define KFEATURE_COMPATIBILITY all
3
4patch 0101-kvm-silence-kvm-unhandled-rdmsr.patch
5patch 0102-i8042-decrease-debug-message-level-to-info.patch
6watch 0104-Increase-the-ext4-default-commit-age.patch
7patch 0105-silence-rapl.patch
8patch 0106-pci-pme-wakeups.patch
9patch 0107-ksm-wakeups.patch
10patch 0108-intel_idle-tweak-cpuidle-cstates.patch
11patch 0110-init_task-faster-timerslack.patch
12patch 0112-fs-ext4-fsync-optimize-double-fsync-a-bunch.patch
13patch 0113-overload-on-wakeup.patch
14patch 0114-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch
15patch 0115-fix-initcall-timestamps.patch
16patch 0116-smpboot-reuse-timer-calibration.patch
17patch 0118-Initialize-ata-before-graphics.patch
18patch 0119-reduce-e1000e-boot-time-by-tightening-sleep-ranges.patch
19patch 0120-give-rdrand-some-credit.patch
20patch 0121-e1000e-change-default-policy.patch
21watch 0122-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch
22patch 0123-igb-no-runtime-pm-to-fix-reboot-oops.patch
23patch 0124-tweak-perfbias.patch
24patch 0125-e1000e-increase-pause-and-refresh-time.patch
25
26patch 0151-mm-Export-do_madvise.patch
27patch 0152-x86-kvm-Notify-host-to-release-pages.patch
28patch 0153-x86-Return-memory-from-guest-to-host-kernel.patch
29patch 0154-sysctl-vm-Fine-grained-cache-shrinking.patch
diff --git a/patches/boot_time_opt/raid_alg.cfg b/patches/boot_time_opt/raid_alg.cfg
new file mode 100644
index 0000000..6df4a7c
--- /dev/null
+++ b/patches/boot_time_opt/raid_alg.cfg
@@ -0,0 +1,3 @@
1CONFIG_RAID6_FORCE_ALGO=y
2CONFIG_RAID6_FORCE_INT=y
3CONFIG_RAID6_FORCE_AVX2=y
diff --git a/patches/boot_time_opt/raid_alg.scc b/patches/boot_time_opt/raid_alg.scc
new file mode 100644
index 0000000..98dd713
--- /dev/null
+++ b/patches/boot_time_opt/raid_alg.scc
@@ -0,0 +1,5 @@
1define KFEATURE_DESCRIPTION "Use AVX2 for RAID recovery algorithm"
2define KFEATURE_COMPATIBILITY all
3
4patch 0117-raid6-add-Kconfig-option-to-skip-raid6-benchmarking.patch
5kconf non-hardware raid_alg.cfg