diff options
Diffstat (limited to 'extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch')
-rw-r--r-- | extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch | 879 |
1 files changed, 879 insertions, 0 deletions
diff --git a/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch b/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch new file mode 100644 index 00000000..731906cc --- /dev/null +++ b/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch | |||
@@ -0,0 +1,879 @@ | |||
1 | From e4c777d8314d7925e4895f00b3a7ebd64a4d830b Mon Sep 17 00:00:00 2001 | ||
2 | From: Mike Turquette <mturquette@ti.com> | ||
3 | Date: Tue, 17 May 2011 09:43:09 -0500 | ||
4 | Subject: [PATCH 2/2] cpufreq: introduce hotplug governor | ||
5 | |||
6 | The "hotplug" governor scales CPU frequency based on load, similar to | ||
7 | "ondemand". It scales up to the highest frequency when "up_threshold" | ||
8 | is crossed and scales down one frequency at a time when "down_threshold" | ||
9 | is crossed. Unlike those governors, target frequencies are determined | ||
10 | by directly accessing the CPUfreq frequency table, instead of taking | ||
11 | some percentage of maximum available frequency. | ||
12 | |||
13 | The key difference in the "hotplug" governor is that it will disable | ||
14 | auxillary CPUs when the system is very idle, and enable them again once | ||
15 | the system becomes busy. This is achieved by averaging load over | ||
16 | multiple sampling periods; if CPUs were online or offlined based on a | ||
17 | single sampling period then thrashing will occur. | ||
18 | |||
19 | Sysfs entries exist for "hotplug_in_sampling_periods" and for | ||
20 | "hotplug_out_sampling_periods" which determine how many consecutive | ||
21 | periods get averaged to determine if auxillery CPUs should be onlined or | ||
22 | offlined. Defaults are 5 periods and 20 periods respectively. | ||
23 | Otherwise the standard sysfs entries you might find for "ondemand" and | ||
24 | "conservative" governors are there. | ||
25 | |||
26 | To use this governor it is assumed that your CPUfreq driver has | ||
27 | populated the CPUfreq table, CONFIG_NO_HZ is enabled and | ||
28 | CONFIG_HOTPLUG_CPU is enabled. | ||
29 | |||
30 | Changes in V2: | ||
31 | Corrected default sampling periods | ||
32 | Optimized load history array resizing | ||
33 | Maintain load history when resizing array | ||
34 | Add locking to dbs_check_cpu | ||
35 | Switch from enable_nonboot_cpus to cpu_up | ||
36 | Switch from disable_nonboot_cpus to down_cpu | ||
37 | Fix some printks | ||
38 | Coding style around for-loops | ||
39 | |||
40 | Signed-off-by: Mike Turquette <mturquette@ti.com> | ||
41 | Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> | ||
42 | Signed-off-by: Koen Kooi <koen@dominion.thruhere.net> | ||
43 | --- | ||
44 | Documentation/cpu-freq/governors.txt | 28 ++ | ||
45 | drivers/cpufreq/Kconfig | 33 ++ | ||
46 | drivers/cpufreq/Makefile | 1 + | ||
47 | drivers/cpufreq/cpufreq_hotplug.c | 705 ++++++++++++++++++++++++++++++++++ | ||
48 | include/linux/cpufreq.h | 3 + | ||
49 | 5 files changed, 770 insertions(+), 0 deletions(-) | ||
50 | create mode 100644 drivers/cpufreq/cpufreq_hotplug.c | ||
51 | |||
52 | diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt | ||
53 | index e74d0a2..c2e3d3d 100644 | ||
54 | --- a/Documentation/cpu-freq/governors.txt | ||
55 | +++ b/Documentation/cpu-freq/governors.txt | ||
56 | @@ -193,6 +193,34 @@ governor but for the opposite direction. For example when set to its | ||
57 | default value of '20' it means that if the CPU usage needs to be below | ||
58 | 20% between samples to have the frequency decreased. | ||
59 | |||
60 | + | ||
61 | +2.6 Hotplug | ||
62 | +----------- | ||
63 | + | ||
64 | +The CPUfreq governor "hotplug" operates similary to "ondemand" and | ||
65 | +"conservative". It's decisions are based primarily on CPU load. Like | ||
66 | +"ondemand" the "hotplug" governor will ramp up to the highest frequency | ||
67 | +once the run-time tunable "up_threshold" parameter is crossed. Like | ||
68 | +"conservative", the "hotplug" governor exports a "down_threshold" | ||
69 | +parameter that is also tunable at run-time. When the "down_threshold" | ||
70 | +is crossed the CPU transitions to the next lowest frequency in the | ||
71 | +CPUfreq frequency table instead of decrementing the frequency based on a | ||
72 | +percentage of maximum load. | ||
73 | + | ||
74 | +The main reason "hotplug" governor exists is for architectures requiring | ||
75 | +that only the master CPU be online in order to hit low-power states | ||
76 | +(C-states). OMAP4 is one such example of this. The "hotplug" governor | ||
77 | +is also helpful in reducing thermal output in devices with tight thermal | ||
78 | +constraints. | ||
79 | + | ||
80 | +Auxillary CPUs are onlined/offline based on CPU load, but the decision | ||
81 | +to do so is made after averaging several sampling windows. This is to | ||
82 | +reduce CPU hotplug "thrashing", which can be caused by normal system | ||
83 | +entropy and leads to lots of spurious plug-in and plug-out transitions. | ||
84 | +The number of sampling periods averaged together is tunable via the | ||
85 | +"hotplug_in_sampling_periods" and "hotplug_out_sampling_periods" | ||
86 | +run-time tunable parameters. | ||
87 | + | ||
88 | 3. The Governor Interface in the CPUfreq Core | ||
89 | ============================================= | ||
90 | |||
91 | diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig | ||
92 | index ca8ee80..c716a0e 100644 | ||
93 | --- a/drivers/cpufreq/Kconfig | ||
94 | +++ b/drivers/cpufreq/Kconfig | ||
95 | @@ -110,6 +110,19 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE | ||
96 | Be aware that not all cpufreq drivers support the conservative | ||
97 | governor. If unsure have a look at the help section of the | ||
98 | driver. Fallback governor will be the performance governor. | ||
99 | + | ||
100 | +config CPU_FREQ_DEFAULT_GOV_HOTPLUG | ||
101 | + bool "hotplug" | ||
102 | + select CPU_FREQ_GOV_HOTPLUG | ||
103 | + select CPU_FREQ_GOV_PERFORMANCE | ||
104 | + help | ||
105 | + Use the CPUFreq governor 'hotplug' as default. This allows you | ||
106 | + to get a full dynamic frequency capable system with CPU | ||
107 | + hotplug support by simply loading your cpufreq low-level | ||
108 | + hardware driver. Be aware that not all cpufreq drivers | ||
109 | + support the hotplug governor. If unsure have a look at | ||
110 | + the help section of the driver. Fallback governor will be the | ||
111 | + performance governor. | ||
112 | endchoice | ||
113 | |||
114 | config CPU_FREQ_GOV_PERFORMANCE | ||
115 | @@ -190,4 +203,24 @@ config CPU_FREQ_GOV_CONSERVATIVE | ||
116 | |||
117 | If in doubt, say N. | ||
118 | |||
119 | +config CPU_FREQ_GOV_HOTPLUG | ||
120 | + tristate "'hotplug' cpufreq governor" | ||
121 | + depends on CPU_FREQ && NO_HZ && HOTPLUG_CPU | ||
122 | + help | ||
123 | + 'hotplug' - this driver mimics the frequency scaling behavior | ||
124 | + in 'ondemand', but with several key differences. First is | ||
125 | + that frequency transitions use the CPUFreq table directly, | ||
126 | + instead of incrementing in a percentage of the maximum | ||
127 | + available frequency. Second 'hotplug' will offline auxillary | ||
128 | + CPUs when the system is idle, and online those CPUs once the | ||
129 | + system becomes busy again. This last feature is needed for | ||
130 | + architectures which transition to low power states when only | ||
131 | + the "master" CPU is online, or for thermally constrained | ||
132 | + devices. | ||
133 | + | ||
134 | + If you don't have one of these architectures or devices, use | ||
135 | + 'ondemand' instead. | ||
136 | + | ||
137 | + If in doubt, say N. | ||
138 | + | ||
139 | endif # CPU_FREQ | ||
140 | diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile | ||
141 | index 71fc3b4..05d564c 100644 | ||
142 | --- a/drivers/cpufreq/Makefile | ||
143 | +++ b/drivers/cpufreq/Makefile | ||
144 | @@ -9,6 +9,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o | ||
145 | obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o | ||
146 | obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o | ||
147 | obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o | ||
148 | +obj-$(CONFIG_CPU_FREQ_GOV_HOTPLUG) += cpufreq_hotplug.o | ||
149 | |||
150 | # CPUfreq cross-arch helpers | ||
151 | obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o | ||
152 | diff --git a/drivers/cpufreq/cpufreq_hotplug.c b/drivers/cpufreq/cpufreq_hotplug.c | ||
153 | new file mode 100644 | ||
154 | index 0000000..85aa6d2 | ||
155 | --- /dev/null | ||
156 | +++ b/drivers/cpufreq/cpufreq_hotplug.c | ||
157 | @@ -0,0 +1,705 @@ | ||
158 | +/* | ||
159 | + * CPUFreq hotplug governor | ||
160 | + * | ||
161 | + * Copyright (C) 2010 Texas Instruments, Inc. | ||
162 | + * Mike Turquette <mturquette@ti.com> | ||
163 | + * Santosh Shilimkar <santosh.shilimkar@ti.com> | ||
164 | + * | ||
165 | + * Based on ondemand governor | ||
166 | + * Copyright (C) 2001 Russell King | ||
167 | + * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>, | ||
168 | + * Jun Nakajima <jun.nakajima@intel.com> | ||
169 | + * | ||
170 | + * This program is free software; you can redistribute it and/or modify | ||
171 | + * it under the terms of the GNU General Public License version 2 as | ||
172 | + * published by the Free Software Foundation. | ||
173 | + */ | ||
174 | + | ||
175 | +#include <linux/kernel.h> | ||
176 | +#include <linux/module.h> | ||
177 | +#include <linux/init.h> | ||
178 | +#include <linux/cpufreq.h> | ||
179 | +#include <linux/cpu.h> | ||
180 | +#include <linux/jiffies.h> | ||
181 | +#include <linux/kernel_stat.h> | ||
182 | +#include <linux/mutex.h> | ||
183 | +#include <linux/hrtimer.h> | ||
184 | +#include <linux/tick.h> | ||
185 | +#include <linux/ktime.h> | ||
186 | +#include <linux/sched.h> | ||
187 | +#include <linux/err.h> | ||
188 | +#include <linux/slab.h> | ||
189 | + | ||
190 | +/* greater than 80% avg load across online CPUs increases frequency */ | ||
191 | +#define DEFAULT_UP_FREQ_MIN_LOAD (80) | ||
192 | + | ||
193 | +/* less than 20% avg load across online CPUs decreases frequency */ | ||
194 | +#define DEFAULT_DOWN_FREQ_MAX_LOAD (20) | ||
195 | + | ||
196 | +/* default sampling period (uSec) is bogus; 10x ondemand's default for x86 */ | ||
197 | +#define DEFAULT_SAMPLING_PERIOD (100000) | ||
198 | + | ||
199 | +/* default number of sampling periods to average before hotplug-in decision */ | ||
200 | +#define DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS (5) | ||
201 | + | ||
202 | +/* default number of sampling periods to average before hotplug-out decision */ | ||
203 | +#define DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS (20) | ||
204 | + | ||
205 | +static void do_dbs_timer(struct work_struct *work); | ||
206 | +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | ||
207 | + unsigned int event); | ||
208 | + | ||
209 | +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG | ||
210 | +static | ||
211 | +#endif | ||
212 | +struct cpufreq_governor cpufreq_gov_hotplug = { | ||
213 | + .name = "hotplug", | ||
214 | + .governor = cpufreq_governor_dbs, | ||
215 | + .owner = THIS_MODULE, | ||
216 | +}; | ||
217 | + | ||
218 | +struct cpu_dbs_info_s { | ||
219 | + cputime64_t prev_cpu_idle; | ||
220 | + cputime64_t prev_cpu_wall; | ||
221 | + cputime64_t prev_cpu_nice; | ||
222 | + struct cpufreq_policy *cur_policy; | ||
223 | + struct delayed_work work; | ||
224 | + struct cpufreq_frequency_table *freq_table; | ||
225 | + int cpu; | ||
226 | + /* | ||
227 | + * percpu mutex that serializes governor limit change with | ||
228 | + * do_dbs_timer invocation. We do not want do_dbs_timer to run | ||
229 | + * when user is changing the governor or limits. | ||
230 | + */ | ||
231 | + struct mutex timer_mutex; | ||
232 | +}; | ||
233 | +static DEFINE_PER_CPU(struct cpu_dbs_info_s, hp_cpu_dbs_info); | ||
234 | + | ||
235 | +static unsigned int dbs_enable; /* number of CPUs using this policy */ | ||
236 | + | ||
237 | +/* | ||
238 | + * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on | ||
239 | + * different CPUs. It protects dbs_enable in governor start/stop. | ||
240 | + */ | ||
241 | +static DEFINE_MUTEX(dbs_mutex); | ||
242 | + | ||
243 | +static struct workqueue_struct *khotplug_wq; | ||
244 | + | ||
245 | +static struct dbs_tuners { | ||
246 | + unsigned int sampling_rate; | ||
247 | + unsigned int up_threshold; | ||
248 | + unsigned int down_threshold; | ||
249 | + unsigned int hotplug_in_sampling_periods; | ||
250 | + unsigned int hotplug_out_sampling_periods; | ||
251 | + unsigned int hotplug_load_index; | ||
252 | + unsigned int *hotplug_load_history; | ||
253 | + unsigned int ignore_nice; | ||
254 | + unsigned int io_is_busy; | ||
255 | +} dbs_tuners_ins = { | ||
256 | + .sampling_rate = DEFAULT_SAMPLING_PERIOD, | ||
257 | + .up_threshold = DEFAULT_UP_FREQ_MIN_LOAD, | ||
258 | + .down_threshold = DEFAULT_DOWN_FREQ_MAX_LOAD, | ||
259 | + .hotplug_in_sampling_periods = DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS, | ||
260 | + .hotplug_out_sampling_periods = DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS, | ||
261 | + .hotplug_load_index = 0, | ||
262 | + .ignore_nice = 0, | ||
263 | + .io_is_busy = 0, | ||
264 | +}; | ||
265 | + | ||
266 | +/* | ||
267 | + * A corner case exists when switching io_is_busy at run-time: comparing idle | ||
268 | + * times from a non-io_is_busy period to an io_is_busy period (or vice-versa) | ||
269 | + * will misrepresent the actual change in system idleness. We ignore this | ||
270 | + * corner case: enabling io_is_busy might cause freq increase and disabling | ||
271 | + * might cause freq decrease, which probably matches the original intent. | ||
272 | + */ | ||
273 | +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) | ||
274 | +{ | ||
275 | + u64 idle_time; | ||
276 | + u64 iowait_time; | ||
277 | + | ||
278 | + /* cpufreq-hotplug always assumes CONFIG_NO_HZ */ | ||
279 | + idle_time = get_cpu_idle_time_us(cpu, wall); | ||
280 | + | ||
281 | + /* add time spent doing I/O to idle time */ | ||
282 | + if (dbs_tuners_ins.io_is_busy) { | ||
283 | + iowait_time = get_cpu_iowait_time_us(cpu, wall); | ||
284 | + /* cpufreq-hotplug always assumes CONFIG_NO_HZ */ | ||
285 | + if (iowait_time != -1ULL && idle_time >= iowait_time) | ||
286 | + idle_time -= iowait_time; | ||
287 | + } | ||
288 | + | ||
289 | + return idle_time; | ||
290 | +} | ||
291 | + | ||
292 | +/************************** sysfs interface ************************/ | ||
293 | + | ||
294 | +/* XXX look at global sysfs macros in cpufreq.h, can those be used here? */ | ||
295 | + | ||
296 | +/* cpufreq_hotplug Governor Tunables */ | ||
297 | +#define show_one(file_name, object) \ | ||
298 | +static ssize_t show_##file_name \ | ||
299 | +(struct kobject *kobj, struct attribute *attr, char *buf) \ | ||
300 | +{ \ | ||
301 | + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ | ||
302 | +} | ||
303 | +show_one(sampling_rate, sampling_rate); | ||
304 | +show_one(up_threshold, up_threshold); | ||
305 | +show_one(down_threshold, down_threshold); | ||
306 | +show_one(hotplug_in_sampling_periods, hotplug_in_sampling_periods); | ||
307 | +show_one(hotplug_out_sampling_periods, hotplug_out_sampling_periods); | ||
308 | +show_one(ignore_nice_load, ignore_nice); | ||
309 | +show_one(io_is_busy, io_is_busy); | ||
310 | + | ||
311 | +static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, | ||
312 | + const char *buf, size_t count) | ||
313 | +{ | ||
314 | + unsigned int input; | ||
315 | + int ret; | ||
316 | + ret = sscanf(buf, "%u", &input); | ||
317 | + if (ret != 1) | ||
318 | + return -EINVAL; | ||
319 | + | ||
320 | + mutex_lock(&dbs_mutex); | ||
321 | + dbs_tuners_ins.sampling_rate = input; | ||
322 | + mutex_unlock(&dbs_mutex); | ||
323 | + | ||
324 | + return count; | ||
325 | +} | ||
326 | + | ||
327 | +static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, | ||
328 | + const char *buf, size_t count) | ||
329 | +{ | ||
330 | + unsigned int input; | ||
331 | + int ret; | ||
332 | + ret = sscanf(buf, "%u", &input); | ||
333 | + | ||
334 | + if (ret != 1 || input <= dbs_tuners_ins.down_threshold) { | ||
335 | + return -EINVAL; | ||
336 | + } | ||
337 | + | ||
338 | + mutex_lock(&dbs_mutex); | ||
339 | + dbs_tuners_ins.up_threshold = input; | ||
340 | + mutex_unlock(&dbs_mutex); | ||
341 | + | ||
342 | + return count; | ||
343 | +} | ||
344 | + | ||
345 | +static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, | ||
346 | + const char *buf, size_t count) | ||
347 | +{ | ||
348 | + unsigned int input; | ||
349 | + int ret; | ||
350 | + ret = sscanf(buf, "%u", &input); | ||
351 | + | ||
352 | + if (ret != 1 || input >= dbs_tuners_ins.up_threshold) { | ||
353 | + return -EINVAL; | ||
354 | + } | ||
355 | + | ||
356 | + mutex_lock(&dbs_mutex); | ||
357 | + dbs_tuners_ins.down_threshold = input; | ||
358 | + mutex_unlock(&dbs_mutex); | ||
359 | + | ||
360 | + return count; | ||
361 | +} | ||
362 | + | ||
363 | +static ssize_t store_hotplug_in_sampling_periods(struct kobject *a, | ||
364 | + struct attribute *b, const char *buf, size_t count) | ||
365 | +{ | ||
366 | + unsigned int input; | ||
367 | + unsigned int *temp; | ||
368 | + unsigned int max_windows; | ||
369 | + int ret; | ||
370 | + ret = sscanf(buf, "%u", &input); | ||
371 | + | ||
372 | + if (ret != 1) | ||
373 | + return -EINVAL; | ||
374 | + | ||
375 | + /* already using this value, bail out */ | ||
376 | + if (input == dbs_tuners_ins.hotplug_in_sampling_periods) | ||
377 | + return count; | ||
378 | + | ||
379 | + mutex_lock(&dbs_mutex); | ||
380 | + ret = count; | ||
381 | + max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods, | ||
382 | + dbs_tuners_ins.hotplug_out_sampling_periods); | ||
383 | + | ||
384 | + /* no need to resize array */ | ||
385 | + if (input <= max_windows) { | ||
386 | + dbs_tuners_ins.hotplug_in_sampling_periods = input; | ||
387 | + goto out; | ||
388 | + } | ||
389 | + | ||
390 | + /* resize array */ | ||
391 | + temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL); | ||
392 | + | ||
393 | + if (!temp || IS_ERR(temp)) { | ||
394 | + ret = -ENOMEM; | ||
395 | + goto out; | ||
396 | + } | ||
397 | + | ||
398 | + memcpy(temp, dbs_tuners_ins.hotplug_load_history, | ||
399 | + (max_windows * sizeof(unsigned int))); | ||
400 | + kfree(dbs_tuners_ins.hotplug_load_history); | ||
401 | + | ||
402 | + /* replace old buffer, old number of sampling periods & old index */ | ||
403 | + dbs_tuners_ins.hotplug_load_history = temp; | ||
404 | + dbs_tuners_ins.hotplug_in_sampling_periods = input; | ||
405 | + dbs_tuners_ins.hotplug_load_index = max_windows; | ||
406 | +out: | ||
407 | + mutex_unlock(&dbs_mutex); | ||
408 | + | ||
409 | + return ret; | ||
410 | +} | ||
411 | + | ||
412 | +static ssize_t store_hotplug_out_sampling_periods(struct kobject *a, | ||
413 | + struct attribute *b, const char *buf, size_t count) | ||
414 | +{ | ||
415 | + unsigned int input; | ||
416 | + unsigned int *temp; | ||
417 | + unsigned int max_windows; | ||
418 | + int ret; | ||
419 | + ret = sscanf(buf, "%u", &input); | ||
420 | + | ||
421 | + if (ret != 1) | ||
422 | + return -EINVAL; | ||
423 | + | ||
424 | + /* already using this value, bail out */ | ||
425 | + if (input == dbs_tuners_ins.hotplug_out_sampling_periods) | ||
426 | + return count; | ||
427 | + | ||
428 | + mutex_lock(&dbs_mutex); | ||
429 | + ret = count; | ||
430 | + max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods, | ||
431 | + dbs_tuners_ins.hotplug_out_sampling_periods); | ||
432 | + | ||
433 | + /* no need to resize array */ | ||
434 | + if (input <= max_windows) { | ||
435 | + dbs_tuners_ins.hotplug_out_sampling_periods = input; | ||
436 | + goto out; | ||
437 | + } | ||
438 | + | ||
439 | + /* resize array */ | ||
440 | + temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL); | ||
441 | + | ||
442 | + if (!temp || IS_ERR(temp)) { | ||
443 | + ret = -ENOMEM; | ||
444 | + goto out; | ||
445 | + } | ||
446 | + | ||
447 | + memcpy(temp, dbs_tuners_ins.hotplug_load_history, | ||
448 | + (max_windows * sizeof(unsigned int))); | ||
449 | + kfree(dbs_tuners_ins.hotplug_load_history); | ||
450 | + | ||
451 | + /* replace old buffer, old number of sampling periods & old index */ | ||
452 | + dbs_tuners_ins.hotplug_load_history = temp; | ||
453 | + dbs_tuners_ins.hotplug_out_sampling_periods = input; | ||
454 | + dbs_tuners_ins.hotplug_load_index = max_windows; | ||
455 | +out: | ||
456 | + mutex_unlock(&dbs_mutex); | ||
457 | + | ||
458 | + return ret; | ||
459 | +} | ||
460 | + | ||
461 | +static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, | ||
462 | + const char *buf, size_t count) | ||
463 | +{ | ||
464 | + unsigned int input; | ||
465 | + int ret; | ||
466 | + | ||
467 | + unsigned int j; | ||
468 | + | ||
469 | + ret = sscanf(buf, "%u", &input); | ||
470 | + if (ret != 1) | ||
471 | + return -EINVAL; | ||
472 | + | ||
473 | + if (input > 1) | ||
474 | + input = 1; | ||
475 | + | ||
476 | + mutex_lock(&dbs_mutex); | ||
477 | + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ | ||
478 | + mutex_unlock(&dbs_mutex); | ||
479 | + return count; | ||
480 | + } | ||
481 | + dbs_tuners_ins.ignore_nice = input; | ||
482 | + | ||
483 | + /* we need to re-evaluate prev_cpu_idle */ | ||
484 | + for_each_online_cpu(j) { | ||
485 | + struct cpu_dbs_info_s *dbs_info; | ||
486 | + dbs_info = &per_cpu(hp_cpu_dbs_info, j); | ||
487 | + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, | ||
488 | + &dbs_info->prev_cpu_wall); | ||
489 | + if (dbs_tuners_ins.ignore_nice) | ||
490 | + dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; | ||
491 | + | ||
492 | + } | ||
493 | + mutex_unlock(&dbs_mutex); | ||
494 | + | ||
495 | + return count; | ||
496 | +} | ||
497 | + | ||
498 | +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, | ||
499 | + const char *buf, size_t count) | ||
500 | +{ | ||
501 | + unsigned int input; | ||
502 | + int ret; | ||
503 | + | ||
504 | + ret = sscanf(buf, "%u", &input); | ||
505 | + if (ret != 1) | ||
506 | + return -EINVAL; | ||
507 | + | ||
508 | + mutex_lock(&dbs_mutex); | ||
509 | + dbs_tuners_ins.io_is_busy = !!input; | ||
510 | + mutex_unlock(&dbs_mutex); | ||
511 | + | ||
512 | + return count; | ||
513 | +} | ||
514 | + | ||
515 | +define_one_global_rw(sampling_rate); | ||
516 | +define_one_global_rw(up_threshold); | ||
517 | +define_one_global_rw(down_threshold); | ||
518 | +define_one_global_rw(hotplug_in_sampling_periods); | ||
519 | +define_one_global_rw(hotplug_out_sampling_periods); | ||
520 | +define_one_global_rw(ignore_nice_load); | ||
521 | +define_one_global_rw(io_is_busy); | ||
522 | + | ||
523 | +static struct attribute *dbs_attributes[] = { | ||
524 | + &sampling_rate.attr, | ||
525 | + &up_threshold.attr, | ||
526 | + &down_threshold.attr, | ||
527 | + &hotplug_in_sampling_periods.attr, | ||
528 | + &hotplug_out_sampling_periods.attr, | ||
529 | + &ignore_nice_load.attr, | ||
530 | + &io_is_busy.attr, | ||
531 | + NULL | ||
532 | +}; | ||
533 | + | ||
534 | +static struct attribute_group dbs_attr_group = { | ||
535 | + .attrs = dbs_attributes, | ||
536 | + .name = "hotplug", | ||
537 | +}; | ||
538 | + | ||
539 | +/************************** sysfs end ************************/ | ||
540 | + | ||
541 | +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) | ||
542 | +{ | ||
543 | + /* combined load of all enabled CPUs */ | ||
544 | + unsigned int total_load = 0; | ||
545 | + /* single largest CPU load */ | ||
546 | + unsigned int max_load = 0; | ||
547 | + /* average load across all enabled CPUs */ | ||
548 | + unsigned int avg_load = 0; | ||
549 | + /* average load across multiple sampling periods for hotplug events */ | ||
550 | + unsigned int hotplug_in_avg_load = 0; | ||
551 | + unsigned int hotplug_out_avg_load = 0; | ||
552 | + /* number of sampling periods averaged for hotplug decisions */ | ||
553 | + unsigned int periods; | ||
554 | + | ||
555 | + struct cpufreq_policy *policy; | ||
556 | + unsigned int index = 0; | ||
557 | + unsigned int i, j; | ||
558 | + | ||
559 | + policy = this_dbs_info->cur_policy; | ||
560 | + | ||
561 | + /* | ||
562 | + * cpu load accounting | ||
563 | + * get highest load, total load and average load across all CPUs | ||
564 | + */ | ||
565 | + for_each_cpu(j, policy->cpus) { | ||
566 | + unsigned int load; | ||
567 | + unsigned int idle_time, wall_time; | ||
568 | + cputime64_t cur_wall_time, cur_idle_time; | ||
569 | + struct cpu_dbs_info_s *j_dbs_info; | ||
570 | + | ||
571 | + j_dbs_info = &per_cpu(hp_cpu_dbs_info, j); | ||
572 | + | ||
573 | + /* update both cur_idle_time and cur_wall_time */ | ||
574 | + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); | ||
575 | + | ||
576 | + /* how much wall time has passed since last iteration? */ | ||
577 | + wall_time = (unsigned int) cputime64_sub(cur_wall_time, | ||
578 | + j_dbs_info->prev_cpu_wall); | ||
579 | + j_dbs_info->prev_cpu_wall = cur_wall_time; | ||
580 | + | ||
581 | + /* how much idle time has passed since last iteration? */ | ||
582 | + idle_time = (unsigned int) cputime64_sub(cur_idle_time, | ||
583 | + j_dbs_info->prev_cpu_idle); | ||
584 | + j_dbs_info->prev_cpu_idle = cur_idle_time; | ||
585 | + | ||
586 | + if (unlikely(!wall_time || wall_time < idle_time)) | ||
587 | + continue; | ||
588 | + | ||
589 | + /* load is the percentage of time not spent in idle */ | ||
590 | + load = 100 * (wall_time - idle_time) / wall_time; | ||
591 | + | ||
592 | + /* keep track of combined load across all CPUs */ | ||
593 | + total_load += load; | ||
594 | + | ||
595 | + /* keep track of highest single load across all CPUs */ | ||
596 | + if (load > max_load) | ||
597 | + max_load = load; | ||
598 | + } | ||
599 | + | ||
600 | + /* calculate the average load across all related CPUs */ | ||
601 | + avg_load = total_load / num_online_cpus(); | ||
602 | + | ||
603 | + | ||
604 | + /* | ||
605 | + * hotplug load accounting | ||
606 | + * average load over multiple sampling periods | ||
607 | + */ | ||
608 | + | ||
609 | + /* how many sampling periods do we use for hotplug decisions? */ | ||
610 | + periods = max(dbs_tuners_ins.hotplug_in_sampling_periods, | ||
611 | + dbs_tuners_ins.hotplug_out_sampling_periods); | ||
612 | + | ||
613 | + /* store avg_load in the circular buffer */ | ||
614 | + dbs_tuners_ins.hotplug_load_history[dbs_tuners_ins.hotplug_load_index] | ||
615 | + = avg_load; | ||
616 | + | ||
617 | + /* compute average load across in & out sampling periods */ | ||
618 | + for (i = 0, j = dbs_tuners_ins.hotplug_load_index; | ||
619 | + i < periods; i++, j--) { | ||
620 | + if (i < dbs_tuners_ins.hotplug_in_sampling_periods) | ||
621 | + hotplug_in_avg_load += | ||
622 | + dbs_tuners_ins.hotplug_load_history[j]; | ||
623 | + if (i < dbs_tuners_ins.hotplug_out_sampling_periods) | ||
624 | + hotplug_out_avg_load += | ||
625 | + dbs_tuners_ins.hotplug_load_history[j]; | ||
626 | + | ||
627 | + if (j == 0) | ||
628 | + j = periods; | ||
629 | + } | ||
630 | + | ||
631 | + hotplug_in_avg_load = hotplug_in_avg_load / | ||
632 | + dbs_tuners_ins.hotplug_in_sampling_periods; | ||
633 | + | ||
634 | + hotplug_out_avg_load = hotplug_out_avg_load / | ||
635 | + dbs_tuners_ins.hotplug_out_sampling_periods; | ||
636 | + | ||
637 | + /* return to first element if we're at the circular buffer's end */ | ||
638 | + if (++dbs_tuners_ins.hotplug_load_index == periods) | ||
639 | + dbs_tuners_ins.hotplug_load_index = 0; | ||
640 | + | ||
641 | + /* check for frequency increase */ | ||
642 | + if (avg_load > dbs_tuners_ins.up_threshold) { | ||
643 | + /* should we enable auxillary CPUs? */ | ||
644 | + if (num_online_cpus() < 2 && hotplug_in_avg_load > | ||
645 | + dbs_tuners_ins.up_threshold) { | ||
646 | + /* hotplug with cpufreq is nasty | ||
647 | + * a call to cpufreq_governor_dbs may cause a lockup. | ||
648 | + * wq is not running here so its safe. | ||
649 | + */ | ||
650 | + mutex_unlock(&this_dbs_info->timer_mutex); | ||
651 | + cpu_up(1); | ||
652 | + mutex_lock(&this_dbs_info->timer_mutex); | ||
653 | + goto out; | ||
654 | + } | ||
655 | + | ||
656 | + /* increase to highest frequency supported */ | ||
657 | + if (policy->cur < policy->max) | ||
658 | + __cpufreq_driver_target(policy, policy->max, | ||
659 | + CPUFREQ_RELATION_H); | ||
660 | + | ||
661 | + goto out; | ||
662 | + } | ||
663 | + | ||
664 | + /* check for frequency decrease */ | ||
665 | + if (avg_load < dbs_tuners_ins.down_threshold) { | ||
666 | + /* are we at the minimum frequency already? */ | ||
667 | + if (policy->cur == policy->min) { | ||
668 | + /* should we disable auxillary CPUs? */ | ||
669 | + if (num_online_cpus() > 1 && hotplug_out_avg_load < | ||
670 | + dbs_tuners_ins.down_threshold) { | ||
671 | + mutex_unlock(&this_dbs_info->timer_mutex); | ||
672 | + cpu_down(1); | ||
673 | + mutex_lock(&this_dbs_info->timer_mutex); | ||
674 | + } | ||
675 | + goto out; | ||
676 | + } | ||
677 | + | ||
678 | + /* bump down to the next lowest frequency in the table */ | ||
679 | + if (cpufreq_frequency_table_next_lowest(policy, | ||
680 | + this_dbs_info->freq_table, &index)) { | ||
681 | + pr_err("%s: failed to get next lowest frequency\n", | ||
682 | + __func__); | ||
683 | + goto out; | ||
684 | + } | ||
685 | + | ||
686 | + __cpufreq_driver_target(policy, | ||
687 | + this_dbs_info->freq_table[index].frequency, | ||
688 | + CPUFREQ_RELATION_L); | ||
689 | + } | ||
690 | +out: | ||
691 | + return; | ||
692 | +} | ||
693 | + | ||
694 | +static void do_dbs_timer(struct work_struct *work) | ||
695 | +{ | ||
696 | + struct cpu_dbs_info_s *dbs_info = | ||
697 | + container_of(work, struct cpu_dbs_info_s, work.work); | ||
698 | + unsigned int cpu = dbs_info->cpu; | ||
699 | + | ||
700 | + /* We want all related CPUs to do sampling nearly on same jiffy */ | ||
701 | + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); | ||
702 | + | ||
703 | + mutex_lock(&dbs_info->timer_mutex); | ||
704 | + dbs_check_cpu(dbs_info); | ||
705 | + queue_delayed_work_on(cpu, khotplug_wq, &dbs_info->work, delay); | ||
706 | + mutex_unlock(&dbs_info->timer_mutex); | ||
707 | +} | ||
708 | + | ||
709 | +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) | ||
710 | +{ | ||
711 | + /* We want all related CPUs to do sampling nearly on same jiffy */ | ||
712 | + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); | ||
713 | + delay -= jiffies % delay; | ||
714 | + | ||
715 | + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); | ||
716 | + queue_delayed_work_on(dbs_info->cpu, khotplug_wq, &dbs_info->work, | ||
717 | + delay); | ||
718 | +} | ||
719 | + | ||
720 | +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) | ||
721 | +{ | ||
722 | + cancel_delayed_work_sync(&dbs_info->work); | ||
723 | +} | ||
724 | + | ||
725 | +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, | ||
726 | + unsigned int event) | ||
727 | +{ | ||
728 | + unsigned int cpu = policy->cpu; | ||
729 | + struct cpu_dbs_info_s *this_dbs_info; | ||
730 | + unsigned int i, j, max_periods; | ||
731 | + int rc; | ||
732 | + | ||
733 | + this_dbs_info = &per_cpu(hp_cpu_dbs_info, cpu); | ||
734 | + | ||
735 | + switch (event) { | ||
736 | + case CPUFREQ_GOV_START: | ||
737 | + if ((!cpu_online(cpu)) || (!policy->cur)) | ||
738 | + return -EINVAL; | ||
739 | + | ||
740 | + mutex_lock(&dbs_mutex); | ||
741 | + dbs_enable++; | ||
742 | + for_each_cpu(j, policy->cpus) { | ||
743 | + struct cpu_dbs_info_s *j_dbs_info; | ||
744 | + j_dbs_info = &per_cpu(hp_cpu_dbs_info, j); | ||
745 | + j_dbs_info->cur_policy = policy; | ||
746 | + | ||
747 | + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, | ||
748 | + &j_dbs_info->prev_cpu_wall); | ||
749 | + if (dbs_tuners_ins.ignore_nice) { | ||
750 | + j_dbs_info->prev_cpu_nice = | ||
751 | + kstat_cpu(j).cpustat.nice; | ||
752 | + } | ||
753 | + | ||
754 | + max_periods = max(DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS, | ||
755 | + DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS); | ||
756 | + dbs_tuners_ins.hotplug_load_history = kmalloc( | ||
757 | + (sizeof(unsigned int) * max_periods), | ||
758 | + GFP_KERNEL); | ||
759 | + if (!dbs_tuners_ins.hotplug_load_history) { | ||
760 | + WARN_ON(1); | ||
761 | + return -ENOMEM; | ||
762 | + } | ||
763 | + for (i = 0; i < max_periods; i++) | ||
764 | + dbs_tuners_ins.hotplug_load_history[i] = 50; | ||
765 | + } | ||
766 | + this_dbs_info->cpu = cpu; | ||
767 | + this_dbs_info->freq_table = cpufreq_frequency_get_table(cpu); | ||
768 | + /* | ||
769 | + * Start the timerschedule work, when this governor | ||
770 | + * is used for first time | ||
771 | + */ | ||
772 | + if (dbs_enable == 1) { | ||
773 | + rc = sysfs_create_group(cpufreq_global_kobject, | ||
774 | + &dbs_attr_group); | ||
775 | + if (rc) { | ||
776 | + mutex_unlock(&dbs_mutex); | ||
777 | + return rc; | ||
778 | + } | ||
779 | + } | ||
780 | + mutex_unlock(&dbs_mutex); | ||
781 | + | ||
782 | + mutex_init(&this_dbs_info->timer_mutex); | ||
783 | + dbs_timer_init(this_dbs_info); | ||
784 | + break; | ||
785 | + | ||
786 | + case CPUFREQ_GOV_STOP: | ||
787 | + dbs_timer_exit(this_dbs_info); | ||
788 | + | ||
789 | + mutex_lock(&dbs_mutex); | ||
790 | + mutex_destroy(&this_dbs_info->timer_mutex); | ||
791 | + dbs_enable--; | ||
792 | + mutex_unlock(&dbs_mutex); | ||
793 | + if (!dbs_enable) | ||
794 | + sysfs_remove_group(cpufreq_global_kobject, | ||
795 | + &dbs_attr_group); | ||
796 | + kfree(dbs_tuners_ins.hotplug_load_history); | ||
797 | + /* | ||
798 | + * XXX BIG CAVEAT: Stopping the governor with CPU1 offline | ||
799 | + * will result in it remaining offline until the user onlines | ||
800 | + * it again. It is up to the user to do this (for now). | ||
801 | + */ | ||
802 | + break; | ||
803 | + | ||
804 | + case CPUFREQ_GOV_LIMITS: | ||
805 | + mutex_lock(&this_dbs_info->timer_mutex); | ||
806 | + if (policy->max < this_dbs_info->cur_policy->cur) | ||
807 | + __cpufreq_driver_target(this_dbs_info->cur_policy, | ||
808 | + policy->max, CPUFREQ_RELATION_H); | ||
809 | + else if (policy->min > this_dbs_info->cur_policy->cur) | ||
810 | + __cpufreq_driver_target(this_dbs_info->cur_policy, | ||
811 | + policy->min, CPUFREQ_RELATION_L); | ||
812 | + mutex_unlock(&this_dbs_info->timer_mutex); | ||
813 | + break; | ||
814 | + } | ||
815 | + return 0; | ||
816 | +} | ||
817 | + | ||
818 | +static int __init cpufreq_gov_dbs_init(void) | ||
819 | +{ | ||
820 | + int err; | ||
821 | + cputime64_t wall; | ||
822 | + u64 idle_time; | ||
823 | + int cpu = get_cpu(); | ||
824 | + | ||
825 | + idle_time = get_cpu_idle_time_us(cpu, &wall); | ||
826 | + put_cpu(); | ||
827 | + if (idle_time != -1ULL) { | ||
828 | + dbs_tuners_ins.up_threshold = DEFAULT_UP_FREQ_MIN_LOAD; | ||
829 | + } else { | ||
830 | + pr_err("cpufreq-hotplug: %s: assumes CONFIG_NO_HZ\n", | ||
831 | + __func__); | ||
832 | + return -EINVAL; | ||
833 | + } | ||
834 | + | ||
835 | + khotplug_wq = create_workqueue("khotplug"); | ||
836 | + if (!khotplug_wq) { | ||
837 | + pr_err("Creation of khotplug failed\n"); | ||
838 | + return -EFAULT; | ||
839 | + } | ||
840 | + err = cpufreq_register_governor(&cpufreq_gov_hotplug); | ||
841 | + if (err) | ||
842 | + destroy_workqueue(khotplug_wq); | ||
843 | + | ||
844 | + return err; | ||
845 | +} | ||
846 | + | ||
847 | +static void __exit cpufreq_gov_dbs_exit(void) | ||
848 | +{ | ||
849 | + cpufreq_unregister_governor(&cpufreq_gov_hotplug); | ||
850 | + destroy_workqueue(khotplug_wq); | ||
851 | +} | ||
852 | + | ||
853 | +MODULE_AUTHOR("Mike Turquette <mturquette@ti.com>"); | ||
854 | +MODULE_DESCRIPTION("'cpufreq_hotplug' - cpufreq governor for dynamic frequency scaling and CPU hotplugging"); | ||
855 | +MODULE_LICENSE("GPL"); | ||
856 | + | ||
857 | +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG | ||
858 | +fs_initcall(cpufreq_gov_dbs_init); | ||
859 | +#else | ||
860 | +module_init(cpufreq_gov_dbs_init); | ||
861 | +#endif | ||
862 | +module_exit(cpufreq_gov_dbs_exit); | ||
863 | diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h | ||
864 | index a38fca8..6cbc3df 100644 | ||
865 | --- a/include/linux/cpufreq.h | ||
866 | +++ b/include/linux/cpufreq.h | ||
867 | @@ -355,6 +355,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand; | ||
868 | #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) | ||
869 | extern struct cpufreq_governor cpufreq_gov_conservative; | ||
870 | #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) | ||
871 | +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG) | ||
872 | +extern struct cpufreq_governor cpufreq_gov_hotplug; | ||
873 | +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_hotplug) | ||
874 | #endif | ||
875 | |||
876 | |||
877 | -- | ||
878 | 1.6.6.1 | ||
879 | |||