Skip to content

Commit 1fecf39

Browse files
johnstultz-workgregkh
authored andcommitted
time: Fix clock->read(clock) race around clocksource changes
commit ceea5e3771ed2378668455fa21861bead7504df5 upstream. In tests, which excercise switching of clocksources, a NULL pointer dereference can be observed on AMR64 platforms in the clocksource read() function: u64 clocksource_mmio_readl_down(struct clocksource *c) { return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask; } This is called from the core timekeeping code via: cycle_now = tkr->read(tkr->clock); tkr->read is the cached tkr->clock->read() function pointer. When the clocksource is changed then tkr->clock and tkr->read are updated sequentially. The code above results in a sequential load operation of tkr->read and tkr->clock as well. If the store to tkr->clock hits between the loads of tkr->read and tkr->clock, then the old read() function is called with the new clock pointer. As a consequence the read() function dereferences a different data structure and the resulting 'reg' pointer can point anywhere including NULL. This problem was introduced when the timekeeping code was switched over to use struct tk_read_base. Before that, it was theoretically possible as well when the compiler decided to reload clock in the code sequence: now = tk->clock->read(tk->clock); Add a helper function which avoids the issue by reading tk_read_base->clock once into a local variable clk and then issue the read function via clk->read(clk). This guarantees that the read() function always gets the proper clocksource pointer handed in. Since there is now no use for the tkr.read pointer, this patch also removes it, and to address stopping the fast timekeeper during suspend/resume, it introduces a dummy clocksource to use rather then just a dummy read function. Signed-off-by: John Stultz <john.stultz@linaro.org> Acked-by: Ingo Molnar <mingo@kernel.org> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Stephen Boyd <stephen.boyd@linaro.org> Cc: Miroslav Lichvar <mlichvar@redhat.com> Cc: Daniel Mentz <danielmentz@google.com> Link: http://lkml.kernel.org/r/1496965462-20003-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 255ad85 commit 1fecf39

2 files changed

Lines changed: 34 additions & 14 deletions

File tree

include/linux/timekeeper_internal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
*/
3030
struct tk_read_base {
3131
struct clocksource *clock;
32-
cycle_t (*read)(struct clocksource *cs);
3332
cycle_t mask;
3433
cycle_t cycle_last;
3534
u32 mult;

kernel/time/timekeeping.c

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
116116
tk->offs_boot = ktime_add(tk->offs_boot, delta);
117117
}
118118

119+
/*
120+
* tk_clock_read - atomic clocksource read() helper
121+
*
122+
* This helper is necessary to use in the read paths because, while the
123+
* seqlock ensures we don't return a bad value while structures are updated,
124+
* it doesn't protect from potential crashes. There is the possibility that
125+
* the tkr's clocksource may change between the read reference, and the
126+
* clock reference passed to the read function. This can cause crashes if
127+
* the wrong clocksource is passed to the wrong read function.
128+
* This isn't necessary to use when holding the timekeeper_lock or doing
129+
* a read of the fast-timekeeper tkrs (which is protected by its own locking
130+
* and update logic).
131+
*/
132+
static inline u64 tk_clock_read(struct tk_read_base *tkr)
133+
{
134+
struct clocksource *clock = READ_ONCE(tkr->clock);
135+
136+
return clock->read(clock);
137+
}
138+
119139
#ifdef CONFIG_DEBUG_TIMEKEEPING
120140
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
121141

@@ -173,7 +193,7 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
173193
*/
174194
do {
175195
seq = read_seqcount_begin(&tk_core.seq);
176-
now = tkr->read(tkr->clock);
196+
now = tk_clock_read(tkr);
177197
last = tkr->cycle_last;
178198
mask = tkr->mask;
179199
max = tkr->clock->max_cycles;
@@ -207,7 +227,7 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
207227
cycle_t cycle_now, delta;
208228

209229
/* read clocksource */
210-
cycle_now = tkr->read(tkr->clock);
230+
cycle_now = tk_clock_read(tkr);
211231

212232
/* calculate the delta since the last update_wall_time */
213233
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -235,12 +255,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
235255

236256
old_clock = tk->tkr_mono.clock;
237257
tk->tkr_mono.clock = clock;
238-
tk->tkr_mono.read = clock->read;
239258
tk->tkr_mono.mask = clock->mask;
240-
tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
259+
tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
241260

242261
tk->tkr_raw.clock = clock;
243-
tk->tkr_raw.read = clock->read;
244262
tk->tkr_raw.mask = clock->mask;
245263
tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
246264

@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
404422

405423
now += timekeeping_delta_to_ns(tkr,
406424
clocksource_delta(
407-
tkr->read(tkr->clock),
425+
tk_clock_read(tkr),
408426
tkr->cycle_last,
409427
tkr->mask));
410428
} while (read_seqcount_retry(&tkf->seq, seq));
@@ -432,6 +450,10 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
432450
return cycles_at_suspend;
433451
}
434452

453+
static struct clocksource dummy_clock = {
454+
.read = dummy_clock_read,
455+
};
456+
435457
/**
436458
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
437459
* @tk: Timekeeper to snapshot.
@@ -448,13 +470,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
448470
struct tk_read_base *tkr = &tk->tkr_mono;
449471

450472
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
451-
cycles_at_suspend = tkr->read(tkr->clock);
452-
tkr_dummy.read = dummy_clock_read;
473+
cycles_at_suspend = tk_clock_read(tkr);
474+
tkr_dummy.clock = &dummy_clock;
453475
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
454476

455477
tkr = &tk->tkr_raw;
456478
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
457-
tkr_dummy.read = dummy_clock_read;
479+
tkr_dummy.clock = &dummy_clock;
458480
update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
459481
}
460482

@@ -618,11 +640,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
618640
*/
619641
static void timekeeping_forward_now(struct timekeeper *tk)
620642
{
621-
struct clocksource *clock = tk->tkr_mono.clock;
622643
cycle_t cycle_now, delta;
623644
s64 nsec;
624645

625-
cycle_now = tk->tkr_mono.read(clock);
646+
cycle_now = tk_clock_read(&tk->tkr_mono);
626647
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
627648
tk->tkr_mono.cycle_last = cycle_now;
628649
tk->tkr_raw.cycle_last = cycle_now;
@@ -1405,7 +1426,7 @@ void timekeeping_resume(void)
14051426
* The less preferred source will only be tried if there is no better
14061427
* usable source. The rtc part is handled separately in rtc core code.
14071428
*/
1408-
cycle_now = tk->tkr_mono.read(clock);
1429+
cycle_now = tk_clock_read(&tk->tkr_mono);
14091430
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
14101431
cycle_now > tk->tkr_mono.cycle_last) {
14111432
u64 num, max = ULLONG_MAX;
@@ -1800,7 +1821,7 @@ void update_wall_time(void)
18001821
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
18011822
offset = real_tk->cycle_interval;
18021823
#else
1803-
offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
1824+
offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
18041825
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
18051826
#endif
18061827

0 commit comments

Comments
 (0)