Skip to content

Commit c5538d0

Browse files
mrutland-armThomas Gleixner
authored andcommitted
entry: Split kernel mode logic from irqentry_{enter,exit}()
The generic irqentry code has entry/exit functions specifically for exceptions taken from user mode, but doesn't have entry/exit functions specifically for exceptions taken from kernel mode. It would be helpful to have separate entry/exit functions specifically for exceptions taken from kernel mode. This would make the structure of the entry code more consistent, and would make it easier for architectures to manage logic specific to exceptions taken from kernel mode. Move the logic specific to kernel mode out of irqentry_enter() and irqentry_exit() into new irqentry_enter_from_kernel_mode() and irqentry_exit_to_kernel_mode() functions. These are marked __always_inline and placed in irq-entry-common.h, as with irqentry_enter_from_user_mode() and irqentry_exit_to_user_mode(), so that they can be inlined into architecture-specific wrappers. The existing out-of-line irqentry_enter() and irqentry_exit() functions retained as callers of the new functions. The lockdep assertion from irqentry_exit() is moved into irqentry_exit_to_user_mode() and irqentry_exit_to_kernel_mode(). This was previously missing from irqentry_exit_to_user_mode() when called directly, and any new lockdep assertion failure relating from this change is a latent bug. Aside from the lockdep change noted above, there should be no functional change as a result of this change. [ tglx: Updated kernel doc ] Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Thomas Gleixner <tglx@kernel.org> Reviewed-by: Jinjie Ruan <ruanjinjie@huawei.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20260407131650.3813777-5-mark.rutland@arm.com
1 parent eb1b51a commit c5538d0

2 files changed

Lines changed: 142 additions & 95 deletions

File tree

include/linux/irq-entry-common.h

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
304304
*/
305305
static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
306306
{
307+
lockdep_assert_irqs_disabled();
308+
307309
instrumentation_begin();
308310
irqentry_exit_to_user_mode_prepare(regs);
309311
instrumentation_end();
@@ -356,6 +358,138 @@ void dynamic_irqentry_exit_cond_resched(void);
356358
#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched()
357359
#endif /* CONFIG_PREEMPT_DYNAMIC */
358360

361+
/**
362+
* irqentry_enter_from_kernel_mode - Establish state before invoking the irq handler
363+
* @regs: Pointer to currents pt_regs
364+
*
365+
* Invoked from architecture specific entry code with interrupts disabled.
366+
* Can only be called when the interrupt entry came from kernel mode. The
367+
* calling code must be non-instrumentable. When the function returns all
368+
* state is correct and the subsequent functions can be instrumented.
369+
*
370+
* The function establishes state (lockdep, RCU (context tracking), tracing) and
371+
* is provided for architectures which require a strict split between entry from
372+
* kernel and user mode and therefore cannot use irqentry_enter() which handles
373+
* both entry modes.
374+
*
375+
* Returns: An opaque object that must be passed to irqentry_exit_to_kernel_mode().
376+
*/
377+
static __always_inline irqentry_state_t irqentry_enter_from_kernel_mode(struct pt_regs *regs)
378+
{
379+
irqentry_state_t ret = {
380+
.exit_rcu = false,
381+
};
382+
383+
/*
384+
* If this entry hit the idle task invoke ct_irq_enter() whether
385+
* RCU is watching or not.
386+
*
387+
* Interrupts can nest when the first interrupt invokes softirq
388+
* processing on return which enables interrupts.
389+
*
390+
* Scheduler ticks in the idle task can mark quiescent state and
391+
* terminate a grace period, if and only if the timer interrupt is
392+
* not nested into another interrupt.
393+
*
394+
* Checking for rcu_is_watching() here would prevent the nesting
395+
* interrupt to invoke ct_irq_enter(). If that nested interrupt is
396+
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
397+
* assume that it is the first interrupt and eventually claim
398+
* quiescent state and end grace periods prematurely.
399+
*
400+
* Unconditionally invoke ct_irq_enter() so RCU state stays
401+
* consistent.
402+
*
403+
* TINY_RCU does not support EQS, so let the compiler eliminate
404+
* this part when enabled.
405+
*/
406+
if (!IS_ENABLED(CONFIG_TINY_RCU) &&
407+
(is_idle_task(current) || arch_in_rcu_eqs())) {
408+
/*
409+
* If RCU is not watching then the same careful
410+
* sequence vs. lockdep and tracing is required
411+
* as in irqentry_enter_from_user_mode().
412+
*/
413+
lockdep_hardirqs_off(CALLER_ADDR0);
414+
ct_irq_enter();
415+
instrumentation_begin();
416+
kmsan_unpoison_entry_regs(regs);
417+
trace_hardirqs_off_finish();
418+
instrumentation_end();
419+
420+
ret.exit_rcu = true;
421+
return ret;
422+
}
423+
424+
/*
425+
* If RCU is watching then RCU only wants to check whether it needs
426+
* to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
427+
* already contains a warning when RCU is not watching, so no point
428+
* in having another one here.
429+
*/
430+
lockdep_hardirqs_off(CALLER_ADDR0);
431+
instrumentation_begin();
432+
kmsan_unpoison_entry_regs(regs);
433+
rcu_irq_enter_check_tick();
434+
trace_hardirqs_off_finish();
435+
instrumentation_end();
436+
437+
return ret;
438+
}
439+
440+
/**
441+
* irqentry_exit_to_kernel_mode - Run preempt checks and establish state after
442+
* invoking the interrupt handler
443+
* @regs: Pointer to current's pt_regs
444+
* @state: Return value from matching call to irqentry_enter_from_kernel_mode()
445+
*
446+
* This is the counterpart of irqentry_enter_from_kernel_mode() and runs the
447+
* necessary preemption check if possible and required. It returns to the caller
448+
* with interrupts disabled and the correct state vs. tracing, lockdep and RCU
449+
* required to return to the interrupted context.
450+
*
451+
* It is the last action before returning to the low level ASM code which just
452+
* needs to return.
453+
*/
454+
static __always_inline void irqentry_exit_to_kernel_mode(struct pt_regs *regs,
455+
irqentry_state_t state)
456+
{
457+
lockdep_assert_irqs_disabled();
458+
459+
if (!regs_irqs_disabled(regs)) {
460+
/*
461+
* If RCU was not watching on entry this needs to be done
462+
* carefully and needs the same ordering of lockdep/tracing
463+
* and RCU as the return to user mode path.
464+
*/
465+
if (state.exit_rcu) {
466+
instrumentation_begin();
467+
/* Tell the tracer that IRET will enable interrupts */
468+
trace_hardirqs_on_prepare();
469+
lockdep_hardirqs_on_prepare();
470+
instrumentation_end();
471+
ct_irq_exit();
472+
lockdep_hardirqs_on(CALLER_ADDR0);
473+
return;
474+
}
475+
476+
instrumentation_begin();
477+
if (IS_ENABLED(CONFIG_PREEMPTION))
478+
irqentry_exit_cond_resched();
479+
480+
/* Covers both tracing and lockdep */
481+
trace_hardirqs_on();
482+
instrumentation_end();
483+
} else {
484+
/*
485+
* IRQ flags state is correct already. Just tell RCU if it
486+
* was not watching on entry.
487+
*/
488+
if (state.exit_rcu)
489+
ct_irq_exit();
490+
}
491+
}
492+
359493
/**
360494
* irqentry_enter - Handle state tracking on ordinary interrupt entries
361495
* @regs: Pointer to pt_regs of interrupted context

kernel/entry/common.c

Lines changed: 8 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -105,70 +105,16 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
105105

106106
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
107107
{
108-
irqentry_state_t ret = {
109-
.exit_rcu = false,
110-
};
111-
112108
if (user_mode(regs)) {
113-
irqentry_enter_from_user_mode(regs);
114-
return ret;
115-
}
109+
irqentry_state_t ret = {
110+
.exit_rcu = false,
111+
};
116112

117-
/*
118-
* If this entry hit the idle task invoke ct_irq_enter() whether
119-
* RCU is watching or not.
120-
*
121-
* Interrupts can nest when the first interrupt invokes softirq
122-
* processing on return which enables interrupts.
123-
*
124-
* Scheduler ticks in the idle task can mark quiescent state and
125-
* terminate a grace period, if and only if the timer interrupt is
126-
* not nested into another interrupt.
127-
*
128-
* Checking for rcu_is_watching() here would prevent the nesting
129-
* interrupt to invoke ct_irq_enter(). If that nested interrupt is
130-
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
131-
* assume that it is the first interrupt and eventually claim
132-
* quiescent state and end grace periods prematurely.
133-
*
134-
* Unconditionally invoke ct_irq_enter() so RCU state stays
135-
* consistent.
136-
*
137-
* TINY_RCU does not support EQS, so let the compiler eliminate
138-
* this part when enabled.
139-
*/
140-
if (!IS_ENABLED(CONFIG_TINY_RCU) &&
141-
(is_idle_task(current) || arch_in_rcu_eqs())) {
142-
/*
143-
* If RCU is not watching then the same careful
144-
* sequence vs. lockdep and tracing is required
145-
* as in irqentry_enter_from_user_mode().
146-
*/
147-
lockdep_hardirqs_off(CALLER_ADDR0);
148-
ct_irq_enter();
149-
instrumentation_begin();
150-
kmsan_unpoison_entry_regs(regs);
151-
trace_hardirqs_off_finish();
152-
instrumentation_end();
153-
154-
ret.exit_rcu = true;
113+
irqentry_enter_from_user_mode(regs);
155114
return ret;
156115
}
157116

158-
/*
159-
* If RCU is watching then RCU only wants to check whether it needs
160-
* to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
161-
* already contains a warning when RCU is not watching, so no point
162-
* in having another one here.
163-
*/
164-
lockdep_hardirqs_off(CALLER_ADDR0);
165-
instrumentation_begin();
166-
kmsan_unpoison_entry_regs(regs);
167-
rcu_irq_enter_check_tick();
168-
trace_hardirqs_off_finish();
169-
instrumentation_end();
170-
171-
return ret;
117+
return irqentry_enter_from_kernel_mode(regs);
172118
}
173119

174120
/**
@@ -212,43 +158,10 @@ void dynamic_irqentry_exit_cond_resched(void)
212158

213159
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
214160
{
215-
lockdep_assert_irqs_disabled();
216-
217-
/* Check whether this returns to user mode */
218-
if (user_mode(regs)) {
161+
if (user_mode(regs))
219162
irqentry_exit_to_user_mode(regs);
220-
} else if (!regs_irqs_disabled(regs)) {
221-
/*
222-
* If RCU was not watching on entry this needs to be done
223-
* carefully and needs the same ordering of lockdep/tracing
224-
* and RCU as the return to user mode path.
225-
*/
226-
if (state.exit_rcu) {
227-
instrumentation_begin();
228-
/* Tell the tracer that IRET will enable interrupts */
229-
trace_hardirqs_on_prepare();
230-
lockdep_hardirqs_on_prepare();
231-
instrumentation_end();
232-
ct_irq_exit();
233-
lockdep_hardirqs_on(CALLER_ADDR0);
234-
return;
235-
}
236-
237-
instrumentation_begin();
238-
if (IS_ENABLED(CONFIG_PREEMPTION))
239-
irqentry_exit_cond_resched();
240-
241-
/* Covers both tracing and lockdep */
242-
trace_hardirqs_on();
243-
instrumentation_end();
244-
} else {
245-
/*
246-
* IRQ flags state is correct already. Just tell RCU if it
247-
* was not watching on entry.
248-
*/
249-
if (state.exit_rcu)
250-
ct_irq_exit();
251-
}
163+
else
164+
irqentry_exit_to_kernel_mode(regs, state);
252165
}
253166

254167
irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)

0 commit comments

Comments
 (0)