Skip to content

Commit cbc2078

Browse files
authored
AOT call stack optimizations (#3773)
- Implement TINY / STANDARD frame modes - tiny mode is only able to keep track on the IP and func idx, STANDARD mode provides more capabilities (parameters, stack pointer etc.). - Implement FRAME_PER_FUNCTION / FRAME_PER_CALL modes - frame per function adds code at the beginning and at the end of each function for allocating / deallocating stack frame, whereas in per-call mode the frame is allocated before each call. The exception is call to the imported function, where frame-per-function mode also allocates the stack before the `call` instruction (as it can't instrument the imported function). At the moment TINY + FRAME_PER_FUNCTION is automatically enabled in case GC and perf profiling are disabled and `values` call stack feature is not requested. In all the other cases STANDARD + FRAME_PER_CALL is used. STANDARD + FRAME_PER_FUNCTION and TINY + FRAME_PER_CALL are currently not implemented but possible, and might be enabled in the future. ps. #3758
1 parent 0599351 commit cbc2078

17 files changed

Lines changed: 591 additions & 86 deletions

core/iwasm/aot/aot_loader.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,10 @@ load_target_info_section(const uint8 *buf, const uint8 *buf_end,
597597
return false;
598598
}
599599

600+
#if WASM_ENABLE_DUMP_CALL_STACK != 0
601+
module->feature_flags = target_info.feature_flags;
602+
#endif
603+
600604
/* Finally, check feature flags */
601605
return check_feature_flags(error_buf, error_buf_size,
602606
target_info.feature_flags);

core/iwasm/aot/aot_runtime.c

Lines changed: 161 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
#include "aot_runtime.h"
7+
#include "../compilation/aot_stack_frame.h"
78
#include "bh_log.h"
89
#include "mem_alloc.h"
910
#include "../common/wasm_runtime_common.h"
@@ -72,6 +73,10 @@ bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5);
7273
bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6);
7374
bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7);
7475

76+
bh_static_assert(offsetof(AOTTinyFrame, func_index) == sizeof(uint32) * 0);
77+
bh_static_assert(offsetof(AOTTinyFrame, ip_offset) == sizeof(uint32) * 1);
78+
bh_static_assert(sizeof(AOTTinyFrame) == sizeof(uint32) * 2);
79+
7580
static void
7681
set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
7782
{
@@ -110,6 +115,55 @@ runtime_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
110115
return mem;
111116
}
112117

118+
#if WASM_ENABLE_AOT_STACK_FRAME != 0
119+
static bool
120+
is_tiny_frame(WASMExecEnv *exec_env)
121+
{
122+
AOTModule *module =
123+
(AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
124+
125+
return module->feature_flags & WASM_FEATURE_TINY_STACK_FRAME;
126+
}
127+
128+
static bool
129+
is_frame_per_function(WASMExecEnv *exec_env)
130+
{
131+
AOTModule *module =
132+
(AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
133+
134+
return module->feature_flags & WASM_FEATURE_FRAME_PER_FUNCTION;
135+
}
136+
137+
static void *
138+
get_top_frame(WASMExecEnv *exec_env)
139+
{
140+
if (is_tiny_frame(exec_env)) {
141+
return exec_env->wasm_stack.top > exec_env->wasm_stack.bottom
142+
? exec_env->wasm_stack.top - sizeof(AOTTinyFrame)
143+
: NULL;
144+
}
145+
else {
146+
return exec_env->cur_frame;
147+
}
148+
}
149+
150+
static void *
151+
get_prev_frame(WASMExecEnv *exec_env, void *cur_frame)
152+
{
153+
bh_assert(cur_frame);
154+
155+
if (is_tiny_frame(exec_env)) {
156+
if ((uint8 *)cur_frame == exec_env->wasm_stack.bottom) {
157+
return NULL;
158+
}
159+
return ((AOTTinyFrame *)cur_frame) - 1;
160+
}
161+
else {
162+
return ((AOTFrame *)cur_frame)->prev_frame;
163+
}
164+
}
165+
#endif
166+
113167
static bool
114168
check_global_init_expr(const AOTModule *module, uint32 global_index,
115169
char *error_buf, uint32 error_buf_size)
@@ -2265,7 +2319,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
22652319
uint32 ext_ret_cell = wasm_get_cell_num(ext_ret_types, ext_ret_count);
22662320
uint64 size;
22672321
#if WASM_ENABLE_AOT_STACK_FRAME != 0
2268-
struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
2322+
void *prev_frame = get_top_frame(exec_env);
22692323
#endif
22702324

22712325
/* Allocate memory all arguments */
@@ -2296,7 +2350,8 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
22962350
}
22972351

22982352
#if WASM_ENABLE_AOT_STACK_FRAME != 0
2299-
if (!aot_alloc_frame(exec_env, function->func_index)) {
2353+
if (!is_frame_per_function(exec_env)
2354+
&& !aot_alloc_frame(exec_env, function->func_index)) {
23002355
if (argv1 != argv1_buf)
23012356
wasm_runtime_free(argv1);
23022357
return false;
@@ -2324,7 +2379,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
23242379
/* Free all frames allocated, note that some frames
23252380
may be allocated in AOT code and haven't been
23262381
freed if exception occurred */
2327-
while (exec_env->cur_frame != prev_frame)
2382+
while (get_top_frame(exec_env) != prev_frame)
23282383
aot_free_frame(exec_env);
23292384
#endif
23302385
if (!ret) {
@@ -2367,9 +2422,12 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
23672422
}
23682423
else {
23692424
#if WASM_ENABLE_AOT_STACK_FRAME != 0
2370-
struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
2371-
2372-
if (!aot_alloc_frame(exec_env, function->func_index)) {
2425+
void *prev_frame = get_top_frame(exec_env);
2426+
/* Only allocate frame for frame-per-call mode; in the
2427+
frame-per-function mode the frame is allocated at the
2428+
beginning of the function. */
2429+
if (!is_frame_per_function(exec_env)
2430+
&& !aot_alloc_frame(exec_env, function->func_index)) {
23732431
return false;
23742432
}
23752433
#endif
@@ -2394,7 +2452,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
23942452
/* Free all frames allocated, note that some frames
23952453
may be allocated in AOT code and haven't been
23962454
freed if exception occurred */
2397-
while (exec_env->cur_frame != prev_frame)
2455+
while (get_top_frame(exec_env) != prev_frame)
23982456
aot_free_frame(exec_env);
23992457
#endif
24002458

@@ -2880,7 +2938,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
28802938
goto fail;
28812939
}
28822940
#if WASM_ENABLE_AOT_STACK_FRAME != 0
2883-
struct WASMInterpFrame *prev_frame = exec_env->cur_frame;
2941+
void *prev_frame = get_top_frame(exec_env);
28842942

28852943
if (!aot_alloc_frame(exec_env, func_idx)) {
28862944
goto fail;
@@ -2894,7 +2952,7 @@ aot_invoke_native(WASMExecEnv *exec_env, uint32 func_idx, uint32 argc,
28942952
/* Free all frames allocated, note that some frames
28952953
may be allocated in AOT code and haven't been
28962954
freed if exception occurred */
2897-
while (exec_env->cur_frame != prev_frame)
2955+
while (get_top_frame(exec_env) != prev_frame)
28982956
aot_free_frame(exec_env);
28992957
#endif
29002958
}
@@ -3622,8 +3680,8 @@ get_func_name_from_index(const AOTModuleInstance *module_inst,
36223680
WASM_ENABLE_PERF_PROFILING != 0 */
36233681

36243682
#if WASM_ENABLE_GC == 0
3625-
bool
3626-
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
3683+
static bool
3684+
aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index)
36273685
{
36283686
AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
36293687
#if WASM_ENABLE_PERF_PROFILING != 0
@@ -3670,8 +3728,8 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
36703728

36713729
#else /* else of WASM_ENABLE_GC == 0 */
36723730

3673-
bool
3674-
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
3731+
static bool
3732+
aot_alloc_standard_frame(WASMExecEnv *exec_env, uint32 func_index)
36753733
{
36763734
AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
36773735
AOTModule *module = (AOTModule *)module_inst->module;
@@ -3727,11 +3785,48 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
37273785
}
37283786
#endif /* end of WASM_ENABLE_GC == 0 */
37293787

3788+
static bool
3789+
aot_alloc_tiny_frame(WASMExecEnv *exec_env, uint32 func_index)
3790+
{
3791+
AOTTinyFrame *new_frame = (AOTTinyFrame *)exec_env->wasm_stack.top;
3792+
3793+
if ((uint8 *)new_frame > exec_env->wasm_stack.top_boundary) {
3794+
aot_set_exception((WASMModuleInstance *)exec_env->module_inst,
3795+
"wasm operand stack overflow");
3796+
return false;
3797+
}
3798+
3799+
new_frame->func_index = func_index;
3800+
exec_env->wasm_stack.top += sizeof(AOTTinyFrame);
3801+
return true;
3802+
}
3803+
3804+
bool
3805+
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
3806+
{
3807+
AOTModule *module =
3808+
(AOTModule *)((AOTModuleInstance *)exec_env->module_inst)->module;
3809+
3810+
if (is_frame_per_function(exec_env)
3811+
&& func_index >= module->import_func_count) {
3812+
/* in frame per function mode the frame is allocated at
3813+
the beginning of each frame, so we only need to allocate
3814+
the frame for imported functions */
3815+
return true;
3816+
}
3817+
if (is_tiny_frame(exec_env)) {
3818+
return aot_alloc_tiny_frame(exec_env, func_index);
3819+
}
3820+
else {
3821+
return aot_alloc_standard_frame(exec_env, func_index);
3822+
}
3823+
}
3824+
37303825
static inline void
3731-
aot_free_frame_internal(WASMExecEnv *exec_env)
3826+
aot_free_standard_frame(WASMExecEnv *exec_env)
37323827
{
37333828
AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
3734-
AOTFrame *prev_frame = cur_frame->prev_frame;
3829+
AOTFrame *prev_frame = (AOTFrame *)cur_frame->prev_frame;
37353830

37363831
#if WASM_ENABLE_PERF_PROFILING != 0
37373832
uint64 time_elapsed =
@@ -3751,13 +3846,24 @@ aot_free_frame_internal(WASMExecEnv *exec_env)
37513846
exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
37523847
}
37533848

3849+
static inline void
3850+
aot_free_tiny_frame(WASMExecEnv *exec_env)
3851+
{
3852+
exec_env->wasm_stack.top =
3853+
get_prev_frame(exec_env, exec_env->wasm_stack.top);
3854+
}
3855+
37543856
void
37553857
aot_free_frame(WASMExecEnv *exec_env)
37563858
{
3757-
aot_free_frame_internal(exec_env);
3859+
if (is_tiny_frame(exec_env)) {
3860+
aot_free_tiny_frame(exec_env);
3861+
}
3862+
else {
3863+
aot_free_standard_frame(exec_env);
3864+
}
37583865
}
37593866

3760-
37613867
void
37623868
aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame)
37633869
{
@@ -3806,14 +3912,13 @@ aot_frame_update_profile_info(WASMExecEnv *exec_env, bool alloc_frame)
38063912
bool
38073913
aot_create_call_stack(struct WASMExecEnv *exec_env)
38083914
{
3809-
AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame,
3810-
*first_frame = cur_frame;
38113915
AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
38123916
AOTModule *module = (AOTModule *)module_inst->module;
38133917
uint32 n = 0;
38143918

3815-
while (cur_frame) {
3816-
cur_frame = cur_frame->prev_frame;
3919+
void *top_frame = get_top_frame(exec_env);
3920+
while (top_frame) {
3921+
top_frame = get_prev_frame(exec_env, top_frame);
38173922
n++;
38183923
}
38193924

@@ -3823,28 +3928,46 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
38233928
return false;
38243929
}
38253930

3826-
cur_frame = first_frame;
3827-
while (cur_frame) {
3931+
top_frame = get_top_frame(exec_env);
3932+
while (n-- > 0) {
3933+
uint32 func_index, ip_offset;
3934+
uint32 *lp = NULL;
3935+
#if WASM_ENABLE_GC != 0
3936+
uint32 *sp = NULL;
3937+
uint8 *frame_ref = NULL;
3938+
#endif
3939+
if (is_tiny_frame(exec_env)) {
3940+
AOTTinyFrame *frame = (AOTTinyFrame *)top_frame;
3941+
func_index = (uint32)frame->func_index;
3942+
ip_offset = (uint32)frame->ip_offset;
3943+
}
3944+
else {
3945+
AOTFrame *frame = (AOTFrame *)top_frame;
3946+
func_index = (uint32)frame->func_index;
3947+
ip_offset = (uint32)frame->ip_offset;
3948+
lp = frame->lp;
3949+
#if WASM_ENABLE_GC != 0
3950+
sp = frame->sp;
3951+
frame_ref = frame->frame_ref;
3952+
#endif
3953+
}
38283954
WASMCApiFrame frame = { 0 };
38293955
uint32 max_local_cell_num, max_stack_cell_num;
38303956
uint32 all_cell_num, lp_size;
38313957

38323958
frame.instance = module_inst;
38333959
frame.module_offset = 0;
3834-
frame.func_index = (uint32)cur_frame->func_index;
3835-
frame.func_offset = (uint32)cur_frame->ip_offset;
3836-
frame.func_name_wp = get_func_name_from_index(
3837-
module_inst, (uint32)cur_frame->func_index);
3838-
3839-
if (cur_frame->func_index >= module->import_func_count) {
3840-
uint32 aot_func_idx =
3841-
(uint32)(cur_frame->func_index - module->import_func_count);
3960+
frame.func_index = func_index;
3961+
frame.func_offset = ip_offset;
3962+
frame.func_name_wp = get_func_name_from_index(module_inst, func_index);
3963+
3964+
if (func_index >= module->import_func_count) {
3965+
uint32 aot_func_idx = func_index - module->import_func_count;
38423966
max_local_cell_num = module->max_local_cell_nums[aot_func_idx];
38433967
max_stack_cell_num = module->max_stack_cell_nums[aot_func_idx];
38443968
}
38453969
else {
3846-
AOTFuncType *func_type =
3847-
module->import_funcs[cur_frame->func_index].func_type;
3970+
AOTFuncType *func_type = module->import_funcs[func_index].func_type;
38483971
max_local_cell_num =
38493972
func_type->param_cell_num > 2 ? func_type->param_cell_num : 2;
38503973
max_stack_cell_num = 0;
@@ -3856,22 +3979,21 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
38563979
#else
38573980
lp_size = align_uint(all_cell_num * 5, 4);
38583981
#endif
3859-
if (lp_size > 0) {
3982+
if (lp_size > 0 && !is_tiny_frame(exec_env)) {
38603983
if (!(frame.lp = wasm_runtime_malloc(lp_size))) {
38613984
destroy_c_api_frames(module_inst->frames);
38623985
return false;
38633986
}
3864-
bh_memcpy_s(frame.lp, lp_size, cur_frame->lp, lp_size);
3987+
bh_memcpy_s(frame.lp, lp_size, lp, lp_size);
38653988

38663989
#if WASM_ENABLE_GC != 0
38673990
uint32 local_ref_flags_cell_num =
38683991
module->func_local_ref_flags[frame.func_index]
38693992
.local_ref_flag_cell_num;
38703993
uint8 *local_ref_flags =
38713994
module->func_local_ref_flags[frame.func_index].local_ref_flags;
3872-
frame.sp = frame.lp + (cur_frame->sp - cur_frame->lp);
3873-
frame.frame_ref = (uint8 *)frame.lp
3874-
+ (cur_frame->frame_ref - (uint8 *)cur_frame->lp);
3995+
frame.sp = frame.lp + (sp - lp);
3996+
frame.frame_ref = (uint8 *)frame.lp + (frame_ref - (uint8 *)lp);
38753997
/* copy local ref flags from AOT module */
38763998
bh_memcpy_s(frame.frame_ref, local_ref_flags_cell_num,
38773999
local_ref_flags, lp_size);
@@ -3885,7 +4007,7 @@ aot_create_call_stack(struct WASMExecEnv *exec_env)
38854007
return false;
38864008
}
38874009

3888-
cur_frame = cur_frame->prev_frame;
4010+
top_frame = get_prev_frame(exec_env, top_frame);
38894011
}
38904012

38914013
return true;

core/iwasm/aot/aot_runtime.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,15 @@ extern "C" {
2525
#define WASM_FEATURE_REF_TYPES (1 << 3)
2626
#define WASM_FEATURE_GARBAGE_COLLECTION (1 << 4)
2727
#define WASM_FEATURE_EXCEPTION_HANDLING (1 << 5)
28-
#define WASM_FEATURE_MEMORY64 (1 << 6)
28+
#define WASM_FEATURE_TINY_STACK_FRAME (1 << 6)
2929
#define WASM_FEATURE_MULTI_MEMORY (1 << 7)
3030
#define WASM_FEATURE_DYNAMIC_LINKING (1 << 8)
3131
#define WASM_FEATURE_COMPONENT_MODEL (1 << 9)
3232
#define WASM_FEATURE_RELAXED_SIMD (1 << 10)
3333
#define WASM_FEATURE_FLEXIBLE_VECTORS (1 << 11)
34+
/* Stack frame is created at the beginning of the function,
35+
* and not at the beginning of each function call */
36+
#define WASM_FEATURE_FRAME_PER_FUNCTION (1 << 12)
3437

3538
typedef enum AOTSectionType {
3639
AOT_SECTION_TYPE_TARGET_INFO = 0,
@@ -326,6 +329,10 @@ typedef struct AOTModule {
326329
/* `.data` and `.text` sections merged into one large mmaped section */
327330
uint8 *merged_data_text_sections;
328331
uint32 merged_data_text_sections_size;
332+
333+
#if WASM_ENABLE_AOT_STACK_FRAME != 0
334+
uint32 feature_flags;
335+
#endif
329336
} AOTModule;
330337

331338
#define AOTMemoryInstance WASMMemoryInstance

0 commit comments

Comments
 (0)