Skip to content

Commit 27fa6e5

Browse files
Ard BiesheuvelAlex Shi
authored andcommitted
arm64: lse: deal with clobbered IP registers after branch via PLT
The LSE atomics implementation uses runtime patching to patch in calls to out of line non-LSE atomics implementations on cores that lack hardware support for LSE. To avoid paying the overhead cost of a function call even if no call ends up being made, the bl instruction is kept invisible to the compiler, and the out of line implementations preserve all registers, not just the ones that they are required to preserve as per the AAPCS64. However, commit fd045f6cd98e ("arm64: add support for module PLTs") added support for routing branch instructions via veneers if the branch target offset exceeds the range of the ordinary relative branch instructions. Since this deals with jump and call instructions that are exposed to ELF relocations, the PLT code uses x16 to hold the address of the branch target when it performs an indirect branch-to-register, something which is explicitly allowed by the AAPCS64 (and ordinary compiler generated code does not expect register x16 or x17 to retain their values across a bl instruction). Since the lse runtime patched bl instructions don't adhere to the AAPCS64, they don't deal with this clobbering of registers x16 and x17. So add them to the clobber list of the asm() statements that perform the call instructions, and drop x16 and x17 from the list of registers that are callee saved in the out of line non-LSE implementations. In addition, since we have given these functions two scratch registers, they no longer need to stack/unstack temp registers. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> [will: factored clobber list into #define, updated Makefile comment] Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> (cherry picked from commit 5be8b70af1ca78cefb8b756d157532360a5fd663) Signed-off-by: Alex Shi <alex.shi@linaro.org>
1 parent 3d3fe7c commit 27fa6e5

3 files changed

Lines changed: 27 additions & 25 deletions

File tree

arch/arm64/include/asm/atomic_lse.h

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v)
3636
" stclr %w[i], %[v]\n")
3737
: [i] "+r" (w0), [v] "+Q" (v->counter)
3838
: "r" (x1)
39-
: "x30");
39+
: __LL_SC_CLOBBERS);
4040
}
4141

4242
static inline void atomic_or(int i, atomic_t *v)
@@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v)
4848
" stset %w[i], %[v]\n")
4949
: [i] "+r" (w0), [v] "+Q" (v->counter)
5050
: "r" (x1)
51-
: "x30");
51+
: __LL_SC_CLOBBERS);
5252
}
5353

5454
static inline void atomic_xor(int i, atomic_t *v)
@@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v)
6060
" steor %w[i], %[v]\n")
6161
: [i] "+r" (w0), [v] "+Q" (v->counter)
6262
: "r" (x1)
63-
: "x30");
63+
: __LL_SC_CLOBBERS);
6464
}
6565

6666
static inline void atomic_add(int i, atomic_t *v)
@@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v)
7272
" stadd %w[i], %[v]\n")
7373
: [i] "+r" (w0), [v] "+Q" (v->counter)
7474
: "r" (x1)
75-
: "x30");
75+
: __LL_SC_CLOBBERS);
7676
}
7777

7878
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
@@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \
9090
" add %w[i], %w[i], w30") \
9191
: [i] "+r" (w0), [v] "+Q" (v->counter) \
9292
: "r" (x1) \
93-
: "x30" , ##cl); \
93+
: __LL_SC_CLOBBERS, ##cl); \
9494
\
9595
return w0; \
9696
}
@@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v)
116116
" stclr %w[i], %[v]")
117117
: [i] "+r" (w0), [v] "+Q" (v->counter)
118118
: "r" (x1)
119-
: "x30");
119+
: __LL_SC_CLOBBERS);
120120
}
121121

122122
static inline void atomic_sub(int i, atomic_t *v)
@@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v)
133133
" stadd %w[i], %[v]")
134134
: [i] "+r" (w0), [v] "+Q" (v->counter)
135135
: "r" (x1)
136-
: "x30");
136+
: __LL_SC_CLOBBERS);
137137
}
138138

139139
#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
@@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \
153153
" add %w[i], %w[i], w30") \
154154
: [i] "+r" (w0), [v] "+Q" (v->counter) \
155155
: "r" (x1) \
156-
: "x30" , ##cl); \
156+
: __LL_SC_CLOBBERS , ##cl); \
157157
\
158158
return w0; \
159159
}
@@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v)
177177
" stclr %[i], %[v]\n")
178178
: [i] "+r" (x0), [v] "+Q" (v->counter)
179179
: "r" (x1)
180-
: "x30");
180+
: __LL_SC_CLOBBERS);
181181
}
182182

183183
static inline void atomic64_or(long i, atomic64_t *v)
@@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v)
189189
" stset %[i], %[v]\n")
190190
: [i] "+r" (x0), [v] "+Q" (v->counter)
191191
: "r" (x1)
192-
: "x30");
192+
: __LL_SC_CLOBBERS);
193193
}
194194

195195
static inline void atomic64_xor(long i, atomic64_t *v)
@@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v)
201201
" steor %[i], %[v]\n")
202202
: [i] "+r" (x0), [v] "+Q" (v->counter)
203203
: "r" (x1)
204-
: "x30");
204+
: __LL_SC_CLOBBERS);
205205
}
206206

207207
static inline void atomic64_add(long i, atomic64_t *v)
@@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v)
213213
" stadd %[i], %[v]\n")
214214
: [i] "+r" (x0), [v] "+Q" (v->counter)
215215
: "r" (x1)
216-
: "x30");
216+
: __LL_SC_CLOBBERS);
217217
}
218218

219219
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
@@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \
231231
" add %[i], %[i], x30") \
232232
: [i] "+r" (x0), [v] "+Q" (v->counter) \
233233
: "r" (x1) \
234-
: "x30" , ##cl); \
234+
: __LL_SC_CLOBBERS, ##cl); \
235235
\
236236
return x0; \
237237
}
@@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
257257
" stclr %[i], %[v]")
258258
: [i] "+r" (x0), [v] "+Q" (v->counter)
259259
: "r" (x1)
260-
: "x30");
260+
: __LL_SC_CLOBBERS);
261261
}
262262

263263
static inline void atomic64_sub(long i, atomic64_t *v)
@@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
274274
" stadd %[i], %[v]")
275275
: [i] "+r" (x0), [v] "+Q" (v->counter)
276276
: "r" (x1)
277-
: "x30");
277+
: __LL_SC_CLOBBERS);
278278
}
279279

280280
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
@@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
294294
" add %[i], %[i], x30") \
295295
: [i] "+r" (x0), [v] "+Q" (v->counter) \
296296
: "r" (x1) \
297-
: "x30" , ##cl); \
297+
: __LL_SC_CLOBBERS, ##cl); \
298298
\
299299
return x0; \
300300
}
@@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
330330
"2:")
331331
: [ret] "+&r" (x0), [v] "+Q" (v->counter)
332332
:
333-
: "x30", "cc", "memory");
333+
: __LL_SC_CLOBBERS, "cc", "memory");
334334

335335
return x0;
336336
}
@@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
359359
" mov %" #w "[ret], " #w "30") \
360360
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \
361361
: [old] "r" (x1), [new] "r" (x2) \
362-
: "x30" , ##cl); \
362+
: __LL_SC_CLOBBERS, ##cl); \
363363
\
364364
return x0; \
365365
}
@@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \
416416
[v] "+Q" (*(unsigned long *)ptr) \
417417
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
418418
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
419-
: "x30" , ##cl); \
419+
: __LL_SC_CLOBBERS, ##cl); \
420420
\
421421
return x0; \
422422
}

arch/arm64/include/asm/lse.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ __asm__(".arch_extension lse");
2626

2727
/* Macro for constructing calls to out-of-line ll/sc atomics */
2828
#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
29+
#define __LL_SC_CLOBBERS "x16", "x17", "x30"
2930

3031
/* In-line patching at runtime */
3132
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \

arch/arm64/lib/Makefile

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
44
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
55
strchr.o strrchr.o
66

7-
# Tell the compiler to treat all general purpose registers as
8-
# callee-saved, which allows for efficient runtime patching of the bl
9-
# instruction in the caller with an atomic instruction when supported by
10-
# the CPU. Result and argument registers are handled correctly, based on
11-
# the function prototype.
7+
# Tell the compiler to treat all general purpose registers (with the
8+
# exception of the IP registers, which are already handled by the caller
9+
# in case of a PLT) as callee-saved, which allows for efficient runtime
10+
# patching of the bl instruction in the caller with an atomic instruction
11+
# when supported by the CPU. Result and argument registers are handled
12+
# correctly, based on the function prototype.
1213
lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
1314
CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
1415
-ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \
1516
-ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \
1617
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
1718
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
18-
-fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
19+
-fcall-saved-x18

0 commit comments

Comments
 (0)