From 58919e83c85c3a3c5fb34025dc0e95ddd998c478 Mon Sep 17 00:00:00 2001 From: Rafael J. Wysocki Date: Tue, 16 Aug 2016 22:14:55 +0200 Subject: cpufreq / sched: Pass flags to cpufreq_update_util() It is useful to know the reason why cpufreq_update_util() has just been called and that can be passed as flags to cpufreq_update_util() and to the ->func() callback in struct update_util_data. However, doing that in addition to passing the util and max arguments they already take would be clumsy, so avoid it. Instead, use the observation that the schedutil governor is part of the scheduler proper, so it can access scheduler data directly. This allows the util and max arguments of cpufreq_update_util() and the ->func() callback in struct update_util_data to be replaced with a flags one, but schedutil has to be modified to follow. Thus make the schedutil governor obtain the CFS utilization information from the scheduler and use the "RT" and "DL" flags instead of the special utilization value of ULONG_MAX to track updates from the RT and DL sched classes. Make it non-modular too to avoid having to export scheduler variables to modules at large. Next, update all of the other users of cpufreq_update_util() and the ->func() callback in struct update_util_data accordingly. Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar --- drivers/cpufreq/Kconfig | 5 +---- drivers/cpufreq/cpufreq_governor.c | 2 +- drivers/cpufreq/intel_pstate.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 74919aa81dcb..4dc95250cf4e 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE If in doubt, say N. config CPU_FREQ_GOV_SCHEDUTIL - tristate "'schedutil' cpufreq policy governor" + bool "'schedutil' cpufreq policy governor" depends on CPU_FREQ && SMP select CPU_FREQ_GOV_ATTR_SET select IRQ_WORK @@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL frequency tipping point is at utilization/capacity equal to 80% in both cases. - To compile this driver as a module, choose M here: the module will - be called cpufreq_schedutil. - If in doubt, say N. comment "CPU frequency scaling drivers" diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e415349ab31b..642dd0f183a8 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work) } static void dbs_update_util_handler(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be9eade147f2..bdbe9369146b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1329,7 +1329,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) } static void intel_pstate_update_util(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns = time - cpu->sample.time; -- cgit v1.2.3 From 09c448d3c61f31322c097cb4c1484778d50da399 Mon Sep 17 00:00:00 2001 From: Rafael J. Wysocki Date: Wed, 14 Sep 2016 02:28:13 +0200 Subject: cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm Modify the P-state selection algorithm for Atom processors to use the new SCHED_CPUFREQ_IOWAIT flag instead of the questionable get_cpu_iowait_time_us() function. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 58 ++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 27 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index bdbe9369146b..7c457ccf9153 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -181,6 +181,8 @@ struct _pid { * @cpu: CPU number for this instance data * @update_util: CPUFreq utility callback information * @update_util_set: CPUFreq utility callback is set + * @iowait_boost: iowait-related boost fraction + * @last_update: Time of the last update. * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @pid: Stores PID parameters for this CPU @@ -206,6 +208,7 @@ struct cpudata { struct vid_data vid; struct _pid pid; + u64 last_update; u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; @@ -216,6 +219,7 @@ struct cpudata { struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; #endif + unsigned int iowait_boost; }; static struct cpudata **all_cpu_data; @@ -229,6 +233,7 @@ static struct cpudata **all_cpu_data; * @p_gain_pct: PID proportional gain * @i_gain_pct: PID integral gain * @d_gain_pct: PID derivative gain + * @boost_iowait: Whether or not to use iowait boosting. * * Stores per CPU model static PID configuration data. */ @@ -240,6 +245,7 @@ struct pstate_adjust_policy { int p_gain_pct; int d_gain_pct; int i_gain_pct; + bool boost_iowait; }; /** @@ -1037,6 +1043,7 @@ static struct cpu_defaults silvermont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1058,6 +1065,7 @@ static struct cpu_defaults airmont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1099,6 +1107,7 @@ static struct cpu_defaults bxt_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = core_get_max_pstate, @@ -1222,36 +1231,18 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu) static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; - u64 cummulative_iowait, delta_iowait_us; - u64 delta_iowait_mperf; - u64 mperf, now; - int32_t cpu_load; + int32_t busy_frac, boost; - cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); + busy_frac = div_fp(sample->mperf, sample->tsc); - /* - * Convert iowait time into number of IO cycles spent at max_freq. - * IO is considered as busy only for the cpu_load algorithm. For - * performance this is not needed since we always try to reach the - * maximum P-State, so we are already boosting the IOs. - */ - delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait; - delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling * - cpu->pstate.max_pstate, MSEC_PER_SEC); + boost = cpu->iowait_boost; + cpu->iowait_boost >>= 1; - mperf = cpu->sample.mperf + delta_iowait_mperf; - cpu->prev_cummulative_iowait = cummulative_iowait; + if (busy_frac < boost) + busy_frac = boost; - /* - * The load can be estimated as the ratio of the mperf counter - * running at a constant frequency during active periods - * (C0) and the time stamp counter running at the same frequency - * also during C-states. - */ - cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); - cpu->sample.busy_scaled = cpu_load; - - return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); + sample->busy_scaled = busy_frac * 100; + return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled); } static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) @@ -1332,8 +1323,21 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); - u64 delta_ns = time - cpu->sample.time; + u64 delta_ns; + + if (pid_params.boost_iowait) { + if (flags & SCHED_CPUFREQ_IOWAIT) { + cpu->iowait_boost = int_tofp(1); + } else if (cpu->iowait_boost) { + /* Clear iowait_boost if the CPU may have been idle. */ + delta_ns = time - cpu->last_update; + if (delta_ns > TICK_NSEC) + cpu->iowait_boost = 0; + } + cpu->last_update = time; + } + delta_ns = time - cpu->sample.time; if ((s64)delta_ns >= pid_params.sample_rate_ns) { bool sample_taken = intel_pstate_sample(cpu, time); -- cgit v1.2.3 From 3ba7bcaa3657f5fe32295ebd17fbdaaf16608e2f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 13 Sep 2016 17:41:33 -0700 Subject: cpufreq: intel_pstate: Add io_boost trace Add io_boost percent to current pstate_sample tracepoint. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 ++- include/trace/events/power.h | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7c457ccf9153..86c29af7eb77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1316,7 +1316,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->mperf, sample->aperf, sample->tsc, - get_avg_frequency(cpu)); + get_avg_frequency(cpu), + fp_toint(cpu->iowait_boost * 100)); } static void intel_pstate_update_util(struct update_util_data *data, u64 time, diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 19e50300ce7d..54e3aad32806 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -69,7 +69,8 @@ TRACE_EVENT(pstate_sample, u64 mperf, u64 aperf, u64 tsc, - u32 freq + u32 freq, + u32 io_boost ), TP_ARGS(core_busy, @@ -79,7 +80,8 @@ TRACE_EVENT(pstate_sample, mperf, aperf, tsc, - freq + freq, + io_boost ), TP_STRUCT__entry( @@ -91,6 +93,7 @@ TRACE_EVENT(pstate_sample, __field(u64, aperf) __field(u64, tsc) __field(u32, freq) + __field(u32, io_boost) ), TP_fast_assign( @@ -102,9 +105,10 @@ TRACE_EVENT(pstate_sample, __entry->aperf = aperf; __entry->tsc = tsc; __entry->freq = freq; + __entry->io_boost = io_boost; ), - TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ", + TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu", (unsigned long)__entry->core_busy, (unsigned long)__entry->scaled_busy, (unsigned long)__entry->from, @@ -112,7 +116,8 @@ TRACE_EVENT(pstate_sample, (unsigned long long)__entry->mperf, (unsigned long long)__entry->aperf, (unsigned long long)__entry->tsc, - (unsigned long)__entry->freq + (unsigned long)__entry->freq, + (unsigned long)__entry->io_boost ) ); -- cgit v1.2.3