Skip to content

Commit b8cb776

Browse files
Peter Zijlstrapundiramit
authored andcommitted
UPSTREAM: sched/fair: Fix and optimize the fork() path
The task_fork_fair() callback already calls __set_task_cpu() and takes rq->lock. If we move the sched_class::task_fork callback in sched_fork() under the existing p->pi_lock, right after its set_task_cpu() call, we can avoid doing two such calls and omit the IRQ disabling on the rq->lock. Change to __set_task_cpu() to skip the migration bits, this is a new task, not a migration. Similarly, make wake_up_new_task() use __set_task_cpu() for the same reason, the task hasn't actually migrated as it hasn't ever ran. This cures the problem of calling migrate_task_rq_fair(), which does remove_entity_from_load_avg() on tasks that have never been added to the load avg to begin with. This bug would result in transiently messed up load_avg values, averaged out after a few dozen milliseconds. This is probably the reason why this bug was not found for such a long time. Reported-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org> (cherry picked from commit e210bffd39d01b649c94b820c28ff112673266dd) Change-Id: Icbddbaa6e8c1071859673d8685bc3f38955cf144 Signed-off-by: Brendan Jackman <brendan.jackman@arm.com> Signed-off-by: Chris Redpath <chris.redpath@arm.com>
1 parent ba876f4 commit b8cb776

2 files changed

Lines changed: 17 additions & 26 deletions

File tree

kernel/sched/core.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,9 +2300,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
23002300
p->sched_class = &fair_sched_class;
23012301
}
23022302

2303-
if (p->sched_class->task_fork)
2304-
p->sched_class->task_fork(p);
2305-
23062303
/*
23072304
* The child is not yet in the pid-hash so no cgroup attach races,
23082305
* and the cgroup is pinned to this child due to cgroup_fork()
@@ -2311,7 +2308,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
23112308
* Silence PROVE_RCU.
23122309
*/
23132310
raw_spin_lock_irqsave(&p->pi_lock, flags);
2314-
set_task_cpu(p, cpu);
2311+
/*
2312+
* We're setting the cpu for the first time, we don't migrate,
2313+
* so use __set_task_cpu().
2314+
*/
2315+
__set_task_cpu(p, cpu);
2316+
if (p->sched_class->task_fork)
2317+
p->sched_class->task_fork(p);
23152318
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
23162319

23172320
#ifdef CONFIG_SCHED_INFO
@@ -2453,8 +2456,11 @@ void wake_up_new_task(struct task_struct *p)
24532456
* Fork balancing, do it here and not earlier because:
24542457
* - cpus_allowed can change in the fork path
24552458
* - any previously selected cpu might disappear through hotplug
2459+
*
2460+
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
2461+
* as we're not fully set-up yet.
24562462
*/
2457-
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
2463+
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
24582464
#endif
24592465
rq = __task_rq_lock(p);
24602466
post_init_entity_util_avg(&p->se);

kernel/sched/fair.c

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4712,7 +4712,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
47124712
*
47134713
* note: in the case of encountering a throttled cfs_rq we will
47144714
* post the final h_nr_running increment below.
4715-
*/
4715+
*/
47164716
if (cfs_rq_throttled(cfs_rq))
47174717
break;
47184718
cfs_rq->h_nr_running++;
@@ -9904,31 +9904,17 @@ static void task_fork_fair(struct task_struct *p)
99049904
{
99059905
struct cfs_rq *cfs_rq;
99069906
struct sched_entity *se = &p->se, *curr;
9907-
int this_cpu = smp_processor_id();
99089907
struct rq *rq = this_rq();
9909-
unsigned long flags;
9910-
9911-
raw_spin_lock_irqsave(&rq->lock, flags);
99129908

9909+
raw_spin_lock(&rq->lock);
99139910
update_rq_clock(rq);
99149911

99159912
cfs_rq = task_cfs_rq(current);
99169913
curr = cfs_rq->curr;
9917-
9918-
/*
9919-
* Not only the cpu but also the task_group of the parent might have
9920-
* been changed after parent->se.parent,cfs_rq were copied to
9921-
* child->se.parent,cfs_rq. So call __set_task_cpu() to make those
9922-
* of child point to valid ones.
9923-
*/
9924-
rcu_read_lock();
9925-
__set_task_cpu(p, this_cpu);
9926-
rcu_read_unlock();
9927-
9928-
update_curr(cfs_rq);
9929-
9930-
if (curr)
9914+
if (curr) {
9915+
update_curr(cfs_rq);
99319916
se->vruntime = curr->vruntime;
9917+
}
99329918
place_entity(cfs_rq, se, 1);
99339919

99349920
if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
@@ -9941,8 +9927,7 @@ static void task_fork_fair(struct task_struct *p)
99419927
}
99429928

99439929
se->vruntime -= cfs_rq->min_vruntime;
9944-
9945-
raw_spin_unlock_irqrestore(&rq->lock, flags);
9930+
raw_spin_unlock(&rq->lock);
99469931
}
99479932

99489933
/*

0 commit comments

Comments
 (0)