Skip to content

Commit ae0177a

Browse files
Joonwoo Parkpundiramit
authored andcommitted
sched: WALT: account cumulative window demand
Energy cost estimation has been a long lasting challenge for WALT because WALT guides CPU frequency based on the CPU utilization of previous window. Consequently it's not possible to know newly waking-up task's energy cost until WALT's end of the current window. The WALT already tracks 'Previous Runnable Sum' (prev_runnable_sum) and 'Cumulative Runnable Average' (cr_avg). They are designed for CPU frequency guidance and task placement but unfortunately both are not suitable for the energy cost estimation. It's because using prev_runnable_sum for energy cost calculation would make us to account CPU and task's energy solely based on activity in the previous window so for example, any task didn't have an activity in the previous window will be accounted as a 'zero energy cost' task. Energy estimation with cr_avg is what energy_diff() relies on at present. However cr_avg can only represent instantaneous picture of energy cost thus for example, if a CPU was fully occupied for an entire WALT window and became idle just before window boundary, and if there is a wake-up, energy_diff() accounts that CPU is a 'zero energy cost' CPU. As a result, introduce a new accounting unit 'Cumulative Window Demand'. The cumulative window demand tracks all the tasks' demands have seen in current window which is neither instantaneous nor actual execution time. Because task demand represents estimated scaled execution time when the task runs a full window, accumulation of all the demands represents predicted CPU load at the end of window. Thus we can estimate CPU's frequency at the end of current WALT window with the cumulative window demand. The use of prev_runnable_sum for the CPU frequency guidance and cr_avg for the task placement have not changed and these are going to be used for both purpose while this patch aims to add an additional statistics. Change-Id: I9908c77ead9973a26dea2b36c001c2baf944d4f5 Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
1 parent 3e5b295 commit ae0177a

4 files changed

Lines changed: 74 additions & 3 deletions

File tree

include/linux/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,7 @@ struct task_struct {
15681568
* of this task
15691569
*/
15701570
u32 init_load_pct;
1571+
u64 last_sleep_ts;
15711572
#endif
15721573

15731574
#ifdef CONFIG_CGROUP_SCHED

kernel/sched/core.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2170,6 +2170,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
21702170
p->se.prev_sum_exec_runtime = 0;
21712171
p->se.nr_migrations = 0;
21722172
p->se.vruntime = 0;
2173+
#ifdef CONFIG_SCHED_WALT
2174+
p->last_sleep_ts = 0;
2175+
#endif
2176+
21732177
INIT_LIST_HEAD(&p->se.group_node);
21742178
walt_init_new_task_load(p);
21752179

@@ -3379,6 +3383,10 @@ static void __sched notrace __schedule(bool preempt)
33793383
rq->clock_skip_update = 0;
33803384

33813385
if (likely(prev != next)) {
3386+
#ifdef CONFIG_SCHED_WALT
3387+
if (!prev->on_rq)
3388+
prev->last_sleep_ts = wallclock;
3389+
#endif
33823390
rq->nr_switches++;
33833391
rq->curr = next;
33843392
++*switch_count;

kernel/sched/sched.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ struct rq {
690690
u64 cur_irqload;
691691
u64 avg_irqload;
692692
u64 irqload_ts;
693+
u64 cum_window_demand;
693694
#endif /* CONFIG_SCHED_WALT */
694695

695696

@@ -2101,6 +2102,17 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
21012102
static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
21022103
#endif /* CONFIG_CPU_FREQ */
21032104

2105+
#ifdef CONFIG_SCHED_WALT
2106+
2107+
static inline bool
2108+
walt_task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
2109+
{
2110+
return cpu_of(rq) == task_cpu(p) &&
2111+
(p->on_rq || p->last_sleep_ts >= rq->window_start);
2112+
}
2113+
2114+
#endif /* CONFIG_SCHED_WALT */
2115+
21042116
#ifdef arch_scale_freq_capacity
21052117
#ifndef arch_scale_freq_invariant
21062118
#define arch_scale_freq_invariant() (true)

kernel/sched/walt.c

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,28 @@ static unsigned int task_load(struct task_struct *p)
7070
return p->ravg.demand;
7171
}
7272

73+
static inline void fixup_cum_window_demand(struct rq *rq, s64 delta)
74+
{
75+
rq->cum_window_demand += delta;
76+
if (unlikely((s64)rq->cum_window_demand < 0))
77+
rq->cum_window_demand = 0;
78+
}
79+
7380
void
7481
walt_inc_cumulative_runnable_avg(struct rq *rq,
7582
struct task_struct *p)
7683
{
7784
rq->cumulative_runnable_avg += p->ravg.demand;
85+
86+
/*
87+
* Add a task's contribution to the cumulative window demand when
88+
*
89+
* (1) task is enqueued with on_rq = 1 i.e migration,
90+
* prio/cgroup/class change.
91+
* (2) task is waking for the first time in this window.
92+
*/
93+
if (p->on_rq || (p->last_sleep_ts < rq->window_start))
94+
fixup_cum_window_demand(rq, p->ravg.demand);
7895
}
7996

8097
void
@@ -83,6 +100,14 @@ walt_dec_cumulative_runnable_avg(struct rq *rq,
83100
{
84101
rq->cumulative_runnable_avg -= p->ravg.demand;
85102
BUG_ON((s64)rq->cumulative_runnable_avg < 0);
103+
104+
/*
105+
* on_rq will be 1 for sleeping tasks. So check if the task
106+
* is migrating or dequeuing in RUNNING state to change the
107+
* prio/cgroup/class.
108+
*/
109+
if (task_on_rq_migrating(p) || p->state == TASK_RUNNING)
110+
fixup_cum_window_demand(rq, -(s64)p->ravg.demand);
86111
}
87112

88113
static void
@@ -95,6 +120,8 @@ fixup_cumulative_runnable_avg(struct rq *rq,
95120
if ((s64)rq->cumulative_runnable_avg < 0)
96121
panic("cra less than zero: tld: %lld, task_load(p) = %u\n",
97122
task_load_delta, task_load(p));
123+
124+
fixup_cum_window_demand(rq, task_load_delta);
98125
}
99126

100127
u64 walt_ktime_clock(void)
@@ -180,6 +207,8 @@ update_window_start(struct rq *rq, u64 wallclock)
180207

181208
nr_windows = div64_u64(delta, walt_ravg_window);
182209
rq->window_start += (u64)nr_windows * (u64)walt_ravg_window;
210+
211+
rq->cum_window_demand = rq->cumulative_runnable_avg;
183212
}
184213

185214
/*
@@ -568,10 +597,20 @@ static void update_history(struct rq *rq, struct task_struct *p,
568597
* A throttled deadline sched class task gets dequeued without
569598
* changing p->on_rq. Since the dequeue decrements hmp stats
570599
* avoid decrementing it here again.
600+
*
601+
* When window is rolled over, the cumulative window demand
602+
* is reset to the cumulative runnable average (contribution from
603+
* the tasks on the runqueue). If the current task is dequeued
604+
* already, it's demand is not included in the cumulative runnable
605+
* average. So add the task demand separately to cumulative window
606+
* demand.
571607
*/
572-
if (task_on_rq_queued(p) && (!task_has_dl_policy(p) ||
573-
!p->dl.dl_throttled))
574-
fixup_cumulative_runnable_avg(rq, p, demand);
608+
if (!task_has_dl_policy(p) || !p->dl.dl_throttled) {
609+
if (task_on_rq_queued(p))
610+
fixup_cumulative_runnable_avg(rq, p, demand);
611+
else if (rq->curr == p)
612+
fixup_cum_window_demand(rq, demand);
613+
}
575614

576615
p->ravg.demand = demand;
577616

@@ -792,6 +831,17 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu)
792831

793832
walt_update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0);
794833

834+
/*
835+
* When a task is migrating during the wakeup, adjust
836+
* the task's contribution towards cumulative window
837+
* demand.
838+
*/
839+
if (p->state == TASK_WAKING &&
840+
p->last_sleep_ts >= src_rq->window_start) {
841+
fixup_cum_window_demand(src_rq, -(s64)p->ravg.demand);
842+
fixup_cum_window_demand(dest_rq, p->ravg.demand);
843+
}
844+
795845
if (p->ravg.curr_window) {
796846
src_rq->curr_runnable_sum -= p->ravg.curr_window;
797847
dest_rq->curr_runnable_sum += p->ravg.curr_window;

0 commit comments

Comments
 (0)