1 files changed, 504 insertions, 0 deletions
diff --git a/extras/recipes-kernel/linux/linux-omap/base/0010-Miracle-patch.patch b/extras/recipes-kernel/linux/linux-omap/base/0010-Miracle-patch.patch
new file mode 100644
index 00000000..c5eba83d
--- /dev/null
+++ b/extras/recipes-kernel/linux/linux-omap/base/0010-Miracle-patch.patch
@@ -0,0 +1,504 @@
+From ce4f1f734efd638af01f1849ffffdc2746ad4a55 Mon Sep 17 00:00:00 2001
+From: Mike Galbraith <efault@gmx.de>
+Date: Fri, 19 Nov 2010 12:52:42 +0100
+Subject: [PATCH 10/28] Miracle patch
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+On Sun, 2010-11-14 at 16:26 -0800, Linus Torvalds wrote:
+> On Sun, Nov 14, 2010 at 4:15 PM, Linus Torvalds
+> <torvalds@linux-foundation.org> wrote:
+> >
+> > THAT is why I think it's so silly to try to be so strict and walk over
+> > all processes while holding a couple of spinlocks.
+>
+> Btw, let me say that I think the patch is great even with that thing
+> in. It looks clean, the thing I'm complaining about is not a big deal,
+> and it seems to perform very much as advertized. The difference with
+> autogroup scheduling is very noticeable with a simple "make -j64"
+> kernel compile.
+>
+> So I really don't think it's a big deal. The sysctl handler isn't even
+> complicated. But boy does it hurt my eyes to see a spinlock held
+> around a "do_each_thread()". And I do get the feeling that the
+> simplest way to fix it would be to just remove the code entirely, and
+> just say that "enabling/disabling may be delayed for old processes
+> with existing autogroups".
+Which is what I just did. If the oddball case isn't a big deal, the
+patch shrinks, which is a good thing. I just wanted to cover all bases.
+Patchlet with handler whacked:
+A recurring complaint from CFS users is that parallel kbuild has a negative
+impact on desktop interactivity.  This patch implements an idea from Linus,
+to automatically create task groups.  This patch only implements Linus' per
+tty task group suggestion, and only for fair class tasks, but leaves the way
+open for enhancement.
+Implementation: each task's signal struct contains an inherited pointer to a
+refcounted autogroup struct containing a task group pointer, the default for
+all tasks pointing to the init_task_group.  When a task calls __proc_set_tty(),
+the process wide reference to the default group is dropped, a new task group is
+created, and the process is moved into the new task group.  Children thereafter
+inherit this task group, and increase it's refcount.  On exit, a reference to the
+current task group is dropped when the last reference to each signal struct is
+dropped.  The task group is destroyed when the last signal struct referencing
+it is freed.   At runqueue selection time, IFF a task has no cgroup assignment,
+it's current autogroup is used.
+The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP is
+selected, but can be disabled via the boot option noautogroup, and can be
+also be turned on/off on the fly via..
+   echo [01] > /proc/sys/kernel/sched_autogroup_enabled.
+..which will automatically move tasks to/from the root task group.
+Some numbers.
+A 100% hog overhead measurement proggy pinned to the same CPU as a make -j10
+About measurement proggy:
+  pert/sec = perturbations/sec
+  min/max/avg = scheduler service latencies in usecs
+  sum/s = time accrued by the competition per sample period (1 sec here)
+  overhead = %CPU received by the competition per sample period
+pert/s:       31 >40475.37us:        3 min:  0.37 max:48103.60 avg:29573.74 sum/s:916786us overhead:90.24%
+pert/s:       23 >41237.70us:       12 min:  0.36 max:56010.39 avg:40187.01 sum/s:924301us overhead:91.99%
+pert/s:       24 >42150.22us:       12 min:  8.86 max:61265.91 avg:39459.91 sum/s:947038us overhead:92.20%
+pert/s:       26 >42344.91us:       11 min:  3.83 max:52029.60 avg:36164.70 sum/s:940282us overhead:91.12%
+pert/s:       24 >44262.90us:       14 min:  5.05 max:82735.15 avg:40314.33 sum/s:967544us overhead:92.22%
+Same load with this patch applied.
+pert/s:      229 >5484.43us:       41 min:  0.15 max:12069.42 avg:2193.81 sum/s:502382us overhead:50.24%
+pert/s:      222 >5652.28us:       43 min:  0.46 max:12077.31 avg:2248.56 sum/s:499181us overhead:49.92%
+pert/s:      211 >5809.38us:       43 min:  0.16 max:12064.78 avg:2381.70 sum/s:502538us overhead:50.25%
+pert/s:      223 >6147.92us:       43 min:  0.15 max:16107.46 avg:2282.17 sum/s:508925us overhead:50.49%
+pert/s:      218 >6252.64us:       43 min:  0.16 max:12066.13 avg:2324.11 sum/s:506656us overhead:50.27%
+Average service latency is an order of magnitude better with autogroup.
+(Imagine that pert were Xorg or whatnot instead)
+Using Mathieu Desnoyers' wakeup-latency testcase:
+With taskset -c 3 make -j 10 running..
+taskset -c 3 ./wakeup-latency& sleep 30;killall wakeup-latency
+without:
+maximum latency: 42963.2 µs
+average latency: 9077.0 µs
+missed timer events: 0
+with:
+maximum latency: 4160.7 µs
+average latency: 149.4 µs
+missed timer events: 0
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+---
+ Documentation/kernel-parameters.txt |    2 +
+ drivers/tty/tty_io.c                |    1 +
+ include/linux/sched.h               |   19 +++++
+ init/Kconfig                        |   12 +++
+ kernel/fork.c                       |    5 +-
+ kernel/sched.c                      |   25 ++++--
+ kernel/sched_autogroup.c            |  140 +++++++++++++++++++++++++++++++++++
+ kernel/sched_autogroup.h            |   18 +++++
+ kernel/sysctl.c                     |   11 +++
+ 9 files changed, 224 insertions(+), 9 deletions(-)
+ create mode 100644 kernel/sched_autogroup.c
+ create mode 100644 kernel/sched_autogroup.h
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index 01ece1b..1031923 100644
+--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
+@@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file
+        noapic          [SMP,APIC] Tells the kernel to not make use of any
+                        IOAPICs that may be present in the system.
+ 
+       noautogroup     Disable scheduler automatic task group creation.
+
+        nobats          [PPC] Do not use BATs for mapping kernel lowmem
+                        on "Classic" PPC cores.
+ 
+diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
+index 35480dd..1849f4a 100644
+--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
+@@ -3169,6 +3169,7 @@ static void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+        put_pid(tsk->signal->tty_old_pgrp);
+        tsk->signal->tty = tty_kref_get(tty);
+        tsk->signal->tty_old_pgrp = NULL;
+       sched_autogroup_create_attach(tsk);
+ }
+ 
+ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 2238745..3a775e3 100644
+--- a/include/linux/sched.h
+++ b/include/linux/sched.h
+@@ -509,6 +509,8 @@ struct thread_group_cputimer {
+        spinlock_t lock;
+ };
+ 
+struct autogroup;
+
+ /*
+  * NOTE! "signal_struct" does not have it's own
+  * locking, because a shared signal_struct always
+@@ -576,6 +578,9 @@ struct signal_struct {
+ 
+        struct tty_struct *tty; /* NULL if no tty */
+ 
+#ifdef CONFIG_SCHED_AUTOGROUP
+       struct autogroup *autogroup;
+#endif
+        /*
+         * Cumulative resource counters for dead threads in the group,
+         * and for reaped dead child processes forked by this group.
+@@ -1931,6 +1936,20 @@ int sched_rt_handler(struct ctl_table *table, int write,
+ 
+ extern unsigned int sysctl_sched_compat_yield;
+ 
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+#endif
+
+ #ifdef CONFIG_RT_MUTEXES
+ extern int rt_mutex_getprio(struct task_struct *p);
+ extern void rt_mutex_setprio(struct task_struct *p, int prio);
+diff --git a/init/Kconfig b/init/Kconfig
+index c972899..a4985d9 100644
+--- a/init/Kconfig
+++ b/init/Kconfig
+@@ -741,6 +741,18 @@ config NET_NS
+ 
+ endif # NAMESPACES
+ 
+config SCHED_AUTOGROUP
+       bool "Automatic process group scheduling"
+       select CGROUPS
+       select CGROUP_SCHED
+       select FAIR_GROUP_SCHED
+       help
+         This option optimizes the scheduler for common desktop workloads by
+         automatically creating and populating task groups.  This separation
+         of workloads isolates aggressive CPU burners (like build jobs) from
+         desktop applications.  Task group autogeneration is currently based
+         upon task tty association.
+
+ config MM_OWNER
+        bool
+ 
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 5447dc7..70ea75f 100644
+--- a/kernel/fork.c
+++ b/kernel/fork.c
+@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
+ 
+ static inline void put_signal_struct(struct signal_struct *sig)
+ {
+-       if (atomic_dec_and_test(&sig->sigcnt))
+       if (atomic_dec_and_test(&sig->sigcnt)) {
+               sched_autogroup_exit(sig);
+                free_signal_struct(sig);
+       }
+ }
+ 
+ void __put_task_struct(struct task_struct *tsk)
+@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
+        posix_cpu_timers_init_group(sig);
+ 
+        tty_audit_fork(sig);
+       sched_autogroup_fork(sig);
+ 
+        sig->oom_adj = current->signal->oom_adj;
+        sig->oom_score_adj = current->signal->oom_score_adj;
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 297d1a0..53ff9a1 100644
+--- a/kernel/sched.c
+++ b/kernel/sched.c
+@@ -78,6 +78,7 @@
+ 
+ #include "sched_cpupri.h"
+ #include "workqueue_sched.h"
+#include "sched_autogroup.h"
+ 
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/sched.h>
+@@ -605,11 +606,14 @@ static inline int cpu_of(struct rq *rq)
+  */
+ static inline struct task_group *task_group(struct task_struct *p)
+ {
+       struct task_group *tg;
+        struct cgroup_subsys_state *css;
+ 
+        css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+                        lockdep_is_held(&task_rq(p)->lock));
+-       return container_of(css, struct task_group, css);
+       tg = container_of(css, struct task_group, css);
+
+       return autogroup_task_group(p, tg);
+ }
+ 
+ /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+@@ -2063,6 +2067,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+ #include "sched_rt.c"
+#include "sched_autogroup.c"
+ #include "sched_stoptask.c"
+ #ifdef CONFIG_SCHED_DEBUG
+ # include "sched_debug.c"
+@@ -8164,7 +8169,7 @@ void __init sched_init(void)
+ #ifdef CONFIG_CGROUP_SCHED
+        list_add(&init_task_group.list, &task_groups);
+        INIT_LIST_HEAD(&init_task_group.children);
+-
+       autogroup_init(&init_task);
+ #endif /* CONFIG_CGROUP_SCHED */
+ 
+ #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
+@@ -8694,15 +8699,11 @@ void sched_destroy_group(struct task_group *tg)
+ /* change task's runqueue when it moves between groups.
+  *     The caller of this function should have put the task in its new group
+  *     by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
+- *     reflect its new group.
+ *     reflect its new group.  Called with the runqueue lock held.
+  */
+-void sched_move_task(struct task_struct *tsk)
+void __sched_move_task(struct task_struct *tsk, struct rq *rq)
+ {
+        int on_rq, running;
+-       unsigned long flags;
+-       struct rq *rq;
+-
+-       rq = task_rq_lock(tsk, &flags);
+ 
+        running = task_current(rq, tsk);
+        on_rq = tsk->se.on_rq;
+@@ -8723,7 +8724,15 @@ void sched_move_task(struct task_struct *tsk)
+                tsk->sched_class->set_curr_task(rq);
+        if (on_rq)
+                enqueue_task(rq, tsk, 0);
+}
+ 
+void sched_move_task(struct task_struct *tsk)
+{
+       struct rq *rq;
+       unsigned long flags;
+
+       rq = task_rq_lock(tsk, &flags);
+       __sched_move_task(tsk, rq);
+        task_rq_unlock(rq, &flags);
+ }
+ #endif /* CONFIG_CGROUP_SCHED */
+diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
+new file mode 100644
+index 0000000..62f1d0e
+--- /dev/null
+++ b/kernel/sched_autogroup.c
+@@ -0,0 +1,140 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
+
+struct autogroup {
+       struct kref             kref;
+       struct task_group       *tg;
+};
+
+static struct autogroup autogroup_default;
+
+static void autogroup_init(struct task_struct *init_task)
+{
+       autogroup_default.tg = &init_task_group;
+       kref_init(&autogroup_default.kref);
+       init_task->signal->autogroup = &autogroup_default;
+}
+
+static inline void autogroup_destroy(struct kref *kref)
+{
+       struct autogroup *ag = container_of(kref, struct autogroup, kref);
+       struct task_group *tg = ag->tg;
+
+       kfree(ag);
+       sched_destroy_group(tg);
+}
+
+static inline void autogroup_kref_put(struct autogroup *ag)
+{
+       kref_put(&ag->kref, autogroup_destroy);
+}
+
+static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
+{
+       kref_get(&ag->kref);
+       return ag;
+}
+
+static inline struct autogroup *autogroup_create(void)
+{
+       struct autogroup *ag = kmalloc(sizeof(*ag), GFP_KERNEL);
+
+       if (!ag)
+               goto out_fail;
+
+       ag->tg = sched_create_group(&init_task_group);
+       kref_init(&ag->kref);
+
+       if (!(IS_ERR(ag->tg)))
+               return ag;
+
+out_fail:
+       if (ag) {
+               kfree(ag);
+               WARN_ON(1);
+       } else
+               WARN_ON(1);
+
+       return autogroup_kref_get(&autogroup_default);
+}
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+       int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+
+       enabled &= (tg == &root_task_group);
+       enabled &= (p->sched_class == &fair_sched_class);
+       enabled &= (!(p->flags & PF_EXITING));
+
+       if (enabled)
+               return p->signal->autogroup->tg;
+
+       return tg;
+}
+
+static void
+autogroup_move_group(struct task_struct *p, struct autogroup *ag)
+{
+       struct autogroup *prev;
+       struct task_struct *t;
+       struct rq *rq;
+       unsigned long flags;
+
+       rq = task_rq_lock(p, &flags);
+       prev = p->signal->autogroup;
+       if (prev == ag) {
+               task_rq_unlock(rq, &flags);
+               return;
+       }
+
+       p->signal->autogroup = autogroup_kref_get(ag);
+       __sched_move_task(p, rq);
+       task_rq_unlock(rq, &flags);
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(t, &p->thread_group, thread_group) {
+               sched_move_task(t);
+       }
+       rcu_read_unlock();
+
+       autogroup_kref_put(prev);
+}
+
+void sched_autogroup_create_attach(struct task_struct *p)
+{
+       struct autogroup *ag = autogroup_create();
+
+       autogroup_move_group(p, ag);
+       /* drop extra refrence added by autogroup_create() */
+       autogroup_kref_put(ag);
+}
+EXPORT_SYMBOL(sched_autogroup_create_attach);
+
+/* currently has no users */
+void sched_autogroup_detach(struct task_struct *p)
+{
+       autogroup_move_group(p, &autogroup_default);
+}
+EXPORT_SYMBOL(sched_autogroup_detach);
+
+void sched_autogroup_fork(struct signal_struct *sig)
+{
+       sig->autogroup = autogroup_kref_get(current->signal->autogroup);
+}
+
+void sched_autogroup_exit(struct signal_struct *sig)
+{
+       autogroup_kref_put(sig->autogroup);
+}
+
+static int __init setup_autogroup(char *str)
+{
+       sysctl_sched_autogroup_enabled = 0;
+
+       return 1;
+}
+
+__setup("noautogroup", setup_autogroup);
+#endif
+diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
+new file mode 100644
+index 0000000..6048f5d
+--- /dev/null
+++ b/kernel/sched_autogroup.h
+@@ -0,0 +1,18 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+static void __sched_move_task(struct task_struct *tsk, struct rq *rq);
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg);
+
+#else /* !CONFIG_SCHED_AUTOGROUP */
+
+static inline void autogroup_init(struct task_struct *init_task) {  }
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+       return tg;
+}
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 5abfa15..b162f65 100644
+--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
+@@ -382,6 +382,17 @@ static struct ctl_table kern_table[] = {
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec,
+        },
+#ifdef CONFIG_SCHED_AUTOGROUP
+       {
+               .procname       = "sched_autogroup_enabled",
+               .data           = &sysctl_sched_autogroup_enabled,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
+ #ifdef CONFIG_PROVE_LOCKING
+        {
+                .procname       = "prove_locking",
+-- 
+1.6.6.1

diff --git a/extras/recipes-kernel/linux/linux-omap/base/0010-Miracle-patch.patch b/extras/recipes-kernel/linux/linux-omap/base/0010-Miracle-patch.patch new file mode 100644 index 00000000..c5eba83d --- /dev/null +++ b/extras/recipes-kernel/linux/linux-omap/base/0010-Miracle-patch.patch
@@ -0,0 +1,504 @@
	1	From ce4f1f734efd638af01f1849ffffdc2746ad4a55 Mon Sep 17 00:00:00 2001
	2	From: Mike Galbraith <efault@gmx.de>
	3	Date: Fri, 19 Nov 2010 12:52:42 +0100
	4	Subject: [PATCH 10/28] Miracle patch
	5	MIME-Version: 1.0
	6	Content-Type: text/plain; charset=UTF-8
	7	Content-Transfer-Encoding: 8bit
	8
	9	On Sun, 2010-11-14 at 16:26 -0800, Linus Torvalds wrote:
	10	> On Sun, Nov 14, 2010 at 4:15 PM, Linus Torvalds
	11	> <torvalds@linux-foundation.org> wrote:
	12	> >
	13	> > THAT is why I think it's so silly to try to be so strict and walk over
	14	> > all processes while holding a couple of spinlocks.
	15	>
	16	> Btw, let me say that I think the patch is great even with that thing
	17	> in. It looks clean, the thing I'm complaining about is not a big deal,
	18	> and it seems to perform very much as advertized. The difference with
	19	> autogroup scheduling is very noticeable with a simple "make -j64"
	20	> kernel compile.
	21	>
	22	> So I really don't think it's a big deal. The sysctl handler isn't even
	23	> complicated. But boy does it hurt my eyes to see a spinlock held
	24	> around a "do_each_thread()". And I do get the feeling that the
	25	> simplest way to fix it would be to just remove the code entirely, and
	26	> just say that "enabling/disabling may be delayed for old processes
	27	> with existing autogroups".
	28
	29	Which is what I just did. If the oddball case isn't a big deal, the
	30	patch shrinks, which is a good thing. I just wanted to cover all bases.
	31
	32	Patchlet with handler whacked:
	33
	34	A recurring complaint from CFS users is that parallel kbuild has a negative
	35	impact on desktop interactivity. This patch implements an idea from Linus,
	36	to automatically create task groups. This patch only implements Linus' per
	37	tty task group suggestion, and only for fair class tasks, but leaves the way
	38	open for enhancement.
	39
	40	Implementation: each task's signal struct contains an inherited pointer to a
	41	refcounted autogroup struct containing a task group pointer, the default for
	42	all tasks pointing to the init_task_group. When a task calls __proc_set_tty(),
	43	the process wide reference to the default group is dropped, a new task group is
	44	created, and the process is moved into the new task group. Children thereafter
	45	inherit this task group, and increase it's refcount. On exit, a reference to the
	46	current task group is dropped when the last reference to each signal struct is
	47	dropped. The task group is destroyed when the last signal struct referencing
	48	it is freed. At runqueue selection time, IFF a task has no cgroup assignment,
	49	it's current autogroup is used.
	50
	51	The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP is
	52	selected, but can be disabled via the boot option noautogroup, and can be
	53	also be turned on/off on the fly via..
	54	echo [01] > /proc/sys/kernel/sched_autogroup_enabled.
	55	..which will automatically move tasks to/from the root task group.
	56
	57	Some numbers.
	58
	59	A 100% hog overhead measurement proggy pinned to the same CPU as a make -j10
	60
	61	About measurement proggy:
	62	pert/sec = perturbations/sec
	63	min/max/avg = scheduler service latencies in usecs
	64	sum/s = time accrued by the competition per sample period (1 sec here)
	65	overhead = %CPU received by the competition per sample period
	66
	67	pert/s: 31 >40475.37us: 3 min: 0.37 max:48103.60 avg:29573.74 sum/s:916786us overhead:90.24%
	68	pert/s: 23 >41237.70us: 12 min: 0.36 max:56010.39 avg:40187.01 sum/s:924301us overhead:91.99%
	69	pert/s: 24 >42150.22us: 12 min: 8.86 max:61265.91 avg:39459.91 sum/s:947038us overhead:92.20%
	70	pert/s: 26 >42344.91us: 11 min: 3.83 max:52029.60 avg:36164.70 sum/s:940282us overhead:91.12%
	71	pert/s: 24 >44262.90us: 14 min: 5.05 max:82735.15 avg:40314.33 sum/s:967544us overhead:92.22%
	72
	73	Same load with this patch applied.
	74
	75	pert/s: 229 >5484.43us: 41 min: 0.15 max:12069.42 avg:2193.81 sum/s:502382us overhead:50.24%
	76	pert/s: 222 >5652.28us: 43 min: 0.46 max:12077.31 avg:2248.56 sum/s:499181us overhead:49.92%
	77	pert/s: 211 >5809.38us: 43 min: 0.16 max:12064.78 avg:2381.70 sum/s:502538us overhead:50.25%
	78	pert/s: 223 >6147.92us: 43 min: 0.15 max:16107.46 avg:2282.17 sum/s:508925us overhead:50.49%
	79	pert/s: 218 >6252.64us: 43 min: 0.16 max:12066.13 avg:2324.11 sum/s:506656us overhead:50.27%
	80
	81	Average service latency is an order of magnitude better with autogroup.
	82	(Imagine that pert were Xorg or whatnot instead)
	83
	84	Using Mathieu Desnoyers' wakeup-latency testcase:
	85
	86	With taskset -c 3 make -j 10 running..
	87
	88	taskset -c 3 ./wakeup-latency& sleep 30;killall wakeup-latency
	89
	90	without:
	91	maximum latency: 42963.2 µs
	92	average latency: 9077.0 µs
	93	missed timer events: 0
	94
	95	with:
	96	maximum latency: 4160.7 µs
	97	average latency: 149.4 µs
	98	missed timer events: 0
	99
	100	Signed-off-by: Mike Galbraith <efault@gmx.de>
	101	---
	102	Documentation/kernel-parameters.txt \| 2 +
	103	drivers/tty/tty_io.c \| 1 +
	104	include/linux/sched.h \| 19 +++++
	105	init/Kconfig \| 12 +++
	106	kernel/fork.c \| 5 +-
	107	kernel/sched.c \| 25 ++++--
	108	kernel/sched_autogroup.c \| 140 +++++++++++++++++++++++++++++++++++
	109	kernel/sched_autogroup.h \| 18 +++++
	110	kernel/sysctl.c \| 11 +++
	111	9 files changed, 224 insertions(+), 9 deletions(-)
	112	create mode 100644 kernel/sched_autogroup.c
	113	create mode 100644 kernel/sched_autogroup.h
	114
	115	diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
	116	index 01ece1b..1031923 100644
	117	--- a/Documentation/kernel-parameters.txt
	118	+++ b/Documentation/kernel-parameters.txt
	119	@@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file
	120	noapic [SMP,APIC] Tells the kernel to not make use of any
	121	IOAPICs that may be present in the system.
	122
	123	+ noautogroup Disable scheduler automatic task group creation.
	124	+
	125	nobats [PPC] Do not use BATs for mapping kernel lowmem
	126	on "Classic" PPC cores.
	127
	128	diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
	129	index 35480dd..1849f4a 100644
	130	--- a/drivers/tty/tty_io.c
	131	+++ b/drivers/tty/tty_io.c
	132	@@ -3169,6 +3169,7 @@ static void __proc_set_tty(struct task_struct tsk, struct tty_struct tty)
	133	put_pid(tsk->signal->tty_old_pgrp);
	134	tsk->signal->tty = tty_kref_get(tty);
	135	tsk->signal->tty_old_pgrp = NULL;
	136	+ sched_autogroup_create_attach(tsk);
	137	}
	138
	139	static void proc_set_tty(struct task_struct tsk, struct tty_struct tty)
	140	diff --git a/include/linux/sched.h b/include/linux/sched.h
	141	index 2238745..3a775e3 100644
	142	--- a/include/linux/sched.h
	143	+++ b/include/linux/sched.h
	144	@@ -509,6 +509,8 @@ struct thread_group_cputimer {
	145	spinlock_t lock;
	146	};
	147
	148	+struct autogroup;
	149	+
	150	/*
	151	* NOTE! "signal_struct" does not have it's own
	152	* locking, because a shared signal_struct always
	153	@@ -576,6 +578,9 @@ struct signal_struct {
	154
	155	struct tty_struct tty; / NULL if no tty */
	156
	157	+#ifdef CONFIG_SCHED_AUTOGROUP
	158	+ struct autogroup *autogroup;
	159	+#endif
	160	/*
	161	* Cumulative resource counters for dead threads in the group,
	162	* and for reaped dead child processes forked by this group.
	163	@@ -1931,6 +1936,20 @@ int sched_rt_handler(struct ctl_table *table, int write,
	164
	165	extern unsigned int sysctl_sched_compat_yield;
	166
	167	+#ifdef CONFIG_SCHED_AUTOGROUP
	168	+extern unsigned int sysctl_sched_autogroup_enabled;
	169	+
	170	+extern void sched_autogroup_create_attach(struct task_struct *p);
	171	+extern void sched_autogroup_detach(struct task_struct *p);
	172	+extern void sched_autogroup_fork(struct signal_struct *sig);
	173	+extern void sched_autogroup_exit(struct signal_struct *sig);
	174	+#else
	175	+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
	176	+static inline void sched_autogroup_detach(struct task_struct *p) { }
	177	+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
	178	+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
	179	+#endif
	180	+
	181	#ifdef CONFIG_RT_MUTEXES
	182	extern int rt_mutex_getprio(struct task_struct *p);
	183	extern void rt_mutex_setprio(struct task_struct *p, int prio);
	184	diff --git a/init/Kconfig b/init/Kconfig
	185	index c972899..a4985d9 100644
	186	--- a/init/Kconfig
	187	+++ b/init/Kconfig
	188	@@ -741,6 +741,18 @@ config NET_NS
	189
	190	endif # NAMESPACES
	191
	192	+config SCHED_AUTOGROUP
	193	+ bool "Automatic process group scheduling"
	194	+ select CGROUPS
	195	+ select CGROUP_SCHED
	196	+ select FAIR_GROUP_SCHED
	197	+ help
	198	+ This option optimizes the scheduler for common desktop workloads by
	199	+ automatically creating and populating task groups. This separation
	200	+ of workloads isolates aggressive CPU burners (like build jobs) from
	201	+ desktop applications. Task group autogeneration is currently based
	202	+ upon task tty association.
	203	+
	204	config MM_OWNER
	205	bool
	206
	207	diff --git a/kernel/fork.c b/kernel/fork.c
	208	index 5447dc7..70ea75f 100644
	209	--- a/kernel/fork.c
	210	+++ b/kernel/fork.c
	211	@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
	212
	213	static inline void put_signal_struct(struct signal_struct *sig)
	214	{
	215	- if (atomic_dec_and_test(&sig->sigcnt))
	216	+ if (atomic_dec_and_test(&sig->sigcnt)) {
	217	+ sched_autogroup_exit(sig);
	218	free_signal_struct(sig);
	219	+ }
	220	}
	221
	222	void __put_task_struct(struct task_struct *tsk)
	223	@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
	224	posix_cpu_timers_init_group(sig);
	225
	226	tty_audit_fork(sig);
	227	+ sched_autogroup_fork(sig);
	228
	229	sig->oom_adj = current->signal->oom_adj;
	230	sig->oom_score_adj = current->signal->oom_score_adj;
	231	diff --git a/kernel/sched.c b/kernel/sched.c
	232	index 297d1a0..53ff9a1 100644
	233	--- a/kernel/sched.c
	234	+++ b/kernel/sched.c
	235	@@ -78,6 +78,7 @@
	236
	237	#include "sched_cpupri.h"
	238	#include "workqueue_sched.h"
	239	+#include "sched_autogroup.h"
	240
	241	#define CREATE_TRACE_POINTS
	242	#include <trace/events/sched.h>
	243	@@ -605,11 +606,14 @@ static inline int cpu_of(struct rq *rq)
	244	*/
	245	static inline struct task_group task_group(struct task_struct p)
	246	{
	247	+ struct task_group *tg;
	248	struct cgroup_subsys_state *css;
	249
	250	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
	251	lockdep_is_held(&task_rq(p)->lock));
	252	- return container_of(css, struct task_group, css);
	253	+ tg = container_of(css, struct task_group, css);
	254	+
	255	+ return autogroup_task_group(p, tg);
	256	}
	257
	258	/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
	259	@@ -2063,6 +2067,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
	260	#include "sched_idletask.c"
	261	#include "sched_fair.c"
	262	#include "sched_rt.c"
	263	+#include "sched_autogroup.c"
	264	#include "sched_stoptask.c"
	265	#ifdef CONFIG_SCHED_DEBUG
	266	# include "sched_debug.c"
	267	@@ -8164,7 +8169,7 @@ void __init sched_init(void)
	268	#ifdef CONFIG_CGROUP_SCHED
	269	list_add(&init_task_group.list, &task_groups);
	270	INIT_LIST_HEAD(&init_task_group.children);
	271	-
	272	+ autogroup_init(&init_task);
	273	#endif /* CONFIG_CGROUP_SCHED */
	274
	275	#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
	276	@@ -8694,15 +8699,11 @@ void sched_destroy_group(struct task_group *tg)
	277	/* change task's runqueue when it moves between groups.
	278	* The caller of this function should have put the task in its new group
	279	* by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
	280	- * reflect its new group.
	281	+ * reflect its new group. Called with the runqueue lock held.
	282	*/
	283	-void sched_move_task(struct task_struct *tsk)
	284	+void __sched_move_task(struct task_struct tsk, struct rq rq)
	285	{
	286	int on_rq, running;
	287	- unsigned long flags;
	288	- struct rq *rq;
	289	-
	290	- rq = task_rq_lock(tsk, &flags);
	291
	292	running = task_current(rq, tsk);
	293	on_rq = tsk->se.on_rq;
	294	@@ -8723,7 +8724,15 @@ void sched_move_task(struct task_struct *tsk)
	295	tsk->sched_class->set_curr_task(rq);
	296	if (on_rq)
	297	enqueue_task(rq, tsk, 0);
	298	+}
	299
	300	+void sched_move_task(struct task_struct *tsk)
	301	+{
	302	+ struct rq *rq;
	303	+ unsigned long flags;
	304	+
	305	+ rq = task_rq_lock(tsk, &flags);
	306	+ __sched_move_task(tsk, rq);
	307	task_rq_unlock(rq, &flags);
	308	}
	309	#endif /* CONFIG_CGROUP_SCHED */
	310	diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
	311	new file mode 100644
	312	index 0000000..62f1d0e
	313	--- /dev/null
	314	+++ b/kernel/sched_autogroup.c
	315	@@ -0,0 +1,140 @@
	316	+#ifdef CONFIG_SCHED_AUTOGROUP
	317	+
	318	+unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
	319	+
	320	+struct autogroup {
	321	+ struct kref kref;
	322	+ struct task_group *tg;
	323	+};
	324	+
	325	+static struct autogroup autogroup_default;
	326	+
	327	+static void autogroup_init(struct task_struct *init_task)
	328	+{
	329	+ autogroup_default.tg = &init_task_group;
	330	+ kref_init(&autogroup_default.kref);
	331	+ init_task->signal->autogroup = &autogroup_default;
	332	+}
	333	+
	334	+static inline void autogroup_destroy(struct kref *kref)
	335	+{
	336	+ struct autogroup *ag = container_of(kref, struct autogroup, kref);
	337	+ struct task_group *tg = ag->tg;
	338	+
	339	+ kfree(ag);
	340	+ sched_destroy_group(tg);
	341	+}
	342	+
	343	+static inline void autogroup_kref_put(struct autogroup *ag)
	344	+{
	345	+ kref_put(&ag->kref, autogroup_destroy);
	346	+}
	347	+
	348	+static inline struct autogroup autogroup_kref_get(struct autogroup ag)
	349	+{
	350	+ kref_get(&ag->kref);
	351	+ return ag;
	352	+}
	353	+
	354	+static inline struct autogroup *autogroup_create(void)
	355	+{
	356	+ struct autogroup ag = kmalloc(sizeof(ag), GFP_KERNEL);
	357	+
	358	+ if (!ag)
	359	+ goto out_fail;
	360	+
	361	+ ag->tg = sched_create_group(&init_task_group);
	362	+ kref_init(&ag->kref);
	363	+
	364	+ if (!(IS_ERR(ag->tg)))
	365	+ return ag;
	366	+
	367	+out_fail:
	368	+ if (ag) {
	369	+ kfree(ag);
	370	+ WARN_ON(1);
	371	+ } else
	372	+ WARN_ON(1);
	373	+
	374	+ return autogroup_kref_get(&autogroup_default);
	375	+}
	376	+
	377	+static inline struct task_group *
	378	+autogroup_task_group(struct task_struct p, struct task_group tg)
	379	+{
	380	+ int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
	381	+
	382	+ enabled &= (tg == &root_task_group);
	383	+ enabled &= (p->sched_class == &fair_sched_class);
	384	+ enabled &= (!(p->flags & PF_EXITING));
	385	+
	386	+ if (enabled)
	387	+ return p->signal->autogroup->tg;
	388	+
	389	+ return tg;
	390	+}
	391	+
	392	+static void
	393	+autogroup_move_group(struct task_struct p, struct autogroup ag)
	394	+{
	395	+ struct autogroup *prev;
	396	+ struct task_struct *t;
	397	+ struct rq *rq;
	398	+ unsigned long flags;
	399	+
	400	+ rq = task_rq_lock(p, &flags);
	401	+ prev = p->signal->autogroup;
	402	+ if (prev == ag) {
	403	+ task_rq_unlock(rq, &flags);
	404	+ return;
	405	+ }
	406	+
	407	+ p->signal->autogroup = autogroup_kref_get(ag);
	408	+ __sched_move_task(p, rq);
	409	+ task_rq_unlock(rq, &flags);
	410	+
	411	+ rcu_read_lock();
	412	+ list_for_each_entry_rcu(t, &p->thread_group, thread_group) {
	413	+ sched_move_task(t);
	414	+ }
	415	+ rcu_read_unlock();
	416	+
	417	+ autogroup_kref_put(prev);
	418	+}
	419	+
	420	+void sched_autogroup_create_attach(struct task_struct *p)
	421	+{
	422	+ struct autogroup *ag = autogroup_create();
	423	+
	424	+ autogroup_move_group(p, ag);
	425	+ /* drop extra refrence added by autogroup_create() */
	426	+ autogroup_kref_put(ag);
	427	+}
	428	+EXPORT_SYMBOL(sched_autogroup_create_attach);
	429	+
	430	+/* currently has no users */
	431	+void sched_autogroup_detach(struct task_struct *p)
	432	+{
	433	+ autogroup_move_group(p, &autogroup_default);
	434	+}
	435	+EXPORT_SYMBOL(sched_autogroup_detach);
	436	+
	437	+void sched_autogroup_fork(struct signal_struct *sig)
	438	+{
	439	+ sig->autogroup = autogroup_kref_get(current->signal->autogroup);
	440	+}
	441	+
	442	+void sched_autogroup_exit(struct signal_struct *sig)
	443	+{
	444	+ autogroup_kref_put(sig->autogroup);
	445	+}
	446	+
	447	+static int __init setup_autogroup(char *str)
	448	+{
	449	+ sysctl_sched_autogroup_enabled = 0;
	450	+
	451	+ return 1;
	452	+}
	453	+
	454	+__setup("noautogroup", setup_autogroup);
	455	+#endif
	456	diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
	457	new file mode 100644
	458	index 0000000..6048f5d
	459	--- /dev/null
	460	+++ b/kernel/sched_autogroup.h
	461	@@ -0,0 +1,18 @@
	462	+#ifdef CONFIG_SCHED_AUTOGROUP
	463	+
	464	+static void __sched_move_task(struct task_struct tsk, struct rq rq);
	465	+
	466	+static inline struct task_group *
	467	+autogroup_task_group(struct task_struct p, struct task_group tg);
	468	+
	469	+#else /* !CONFIG_SCHED_AUTOGROUP */
	470	+
	471	+static inline void autogroup_init(struct task_struct *init_task) { }
	472	+
	473	+static inline struct task_group *
	474	+autogroup_task_group(struct task_struct p, struct task_group tg)
	475	+{
	476	+ return tg;
	477	+}
	478	+
	479	+#endif /* CONFIG_SCHED_AUTOGROUP */
	480	diff --git a/kernel/sysctl.c b/kernel/sysctl.c
	481	index 5abfa15..b162f65 100644
	482	--- a/kernel/sysctl.c
	483	+++ b/kernel/sysctl.c
	484	@@ -382,6 +382,17 @@ static struct ctl_table kern_table[] = {
	485	.mode = 0644,
	486	.proc_handler = proc_dointvec,
	487	},
	488	+#ifdef CONFIG_SCHED_AUTOGROUP
	489	+ {
	490	+ .procname = "sched_autogroup_enabled",
	491	+ .data = &sysctl_sched_autogroup_enabled,
	492	+ .maxlen = sizeof(unsigned int),
	493	+ .mode = 0644,
	494	+ .proc_handler = proc_dointvec,
	495	+ .extra1 = &zero,
	496	+ .extra2 = &one,
	497	+ },
	498	+#endif
	499	#ifdef CONFIG_PROVE_LOCKING
	500	{
	501	.procname = "prove_locking",
	502	--
	503	1.6.6.1
	504