Skip to content

Commit 55cb9c5

Browse files
authored
Enable merged os_mmap for aot data sections (#3681)
And enable merged os_mmap for aot data and text sections except on platform nuttx and esp-idf. Fix issue that aarch64 AOT module fails to load on android: #2274
1 parent 1362a30 commit 55cb9c5

2 files changed

Lines changed: 150 additions & 15 deletions

File tree

core/iwasm/aot/aot_loader.c

Lines changed: 143 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2378,7 +2378,6 @@ destroy_object_data_sections(AOTObjectDataSection *data_sections,
23782378
}
23792379
}
23802380
#endif
2381-
os_munmap(data_section->data, data_section->size);
23822381
}
23832382
wasm_runtime_free(data_sections);
23842383
}
@@ -2392,6 +2391,9 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
23922391
AOTObjectDataSection *data_sections;
23932392
uint64 size;
23942393
uint32 i;
2394+
uint64 total_size = 0;
2395+
uint32 page_size = os_getpagesize();
2396+
uint8 *merged_sections = NULL;
23952397

23962398
/* Allocate memory */
23972399
size = sizeof(AOTObjectDataSection) * (uint64)module->data_section_count;
@@ -2400,8 +2402,22 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
24002402
return false;
24012403
}
24022404

2403-
/* Create each data section */
2405+
/* First iteration: read data from buf, and calculate total memory needed */
24042406
for (i = 0; i < module->data_section_count; i++) {
2407+
read_string(buf, buf_end, data_sections[i].name);
2408+
read_uint32(buf, buf_end, data_sections[i].size);
2409+
CHECK_BUF(buf, buf_end, data_sections[i].size);
2410+
/* temporary record data ptr for merge, will be replaced after mmaped */
2411+
if (data_sections[i].size > 0)
2412+
data_sections[i].data = (uint8 *)buf;
2413+
buf += data_sections[i].size;
2414+
total_size += align_uint64((uint64)data_sections[i].size, page_size);
2415+
}
2416+
if (total_size > UINT32_MAX) {
2417+
set_error_buf(error_buf, error_buf_size, "data sections too large");
2418+
return false;
2419+
}
2420+
if (total_size > 0) {
24052421
int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
24062422
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
24072423
|| defined(BUILD_TARGET_RISCV64_LP64D) \
@@ -2412,29 +2428,33 @@ load_object_data_sections(const uint8 **p_buf, const uint8 *buf_end,
24122428
#else
24132429
int map_flags = MMAP_MAP_NONE;
24142430
#endif
2415-
2416-
read_string(buf, buf_end, data_sections[i].name);
2417-
read_uint32(buf, buf_end, data_sections[i].size);
2418-
24192431
/* Allocate memory for data */
2420-
if (data_sections[i].size > 0
2421-
&& !(data_sections[i].data =
2422-
os_mmap(NULL, data_sections[i].size, map_prot, map_flags,
2423-
os_get_invalid_handle()))) {
2432+
merged_sections = module->merged_data_sections =
2433+
os_mmap(NULL, (uint32)total_size, map_prot, map_flags,
2434+
os_get_invalid_handle());
2435+
if (!merged_sections) {
24242436
set_error_buf(error_buf, error_buf_size, "allocate memory failed");
24252437
return false;
24262438
}
2439+
module->merged_data_sections_size = (uint32)total_size;
24272440
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
24282441
#if !defined(BH_PLATFORM_LINUX_SGX) && !defined(BH_PLATFORM_WINDOWS) \
24292442
&& !defined(BH_PLATFORM_DARWIN)
24302443
/* address must be in the first 2 Gigabytes of
24312444
the process address space */
2432-
bh_assert((uintptr_t)data_sections[i].data < INT32_MAX);
2445+
bh_assert((uintptr_t)merged_sections < INT32_MAX);
24332446
#endif
24342447
#endif
2448+
}
24352449

2436-
read_byte_array(buf, buf_end, data_sections[i].data,
2437-
data_sections[i].size);
2450+
/* Second iteration: Create each data section */
2451+
for (i = 0; i < module->data_section_count; i++) {
2452+
if (data_sections[i].size > 0) {
2453+
bh_memcpy_s(merged_sections, data_sections[i].size,
2454+
data_sections[i].data, data_sections[i].size);
2455+
data_sections[i].data = merged_sections;
2456+
merged_sections += align_uint(data_sections[i].size, page_size);
2457+
}
24382458
}
24392459

24402460
*p_buf = buf;
@@ -2532,6 +2552,90 @@ load_init_data_section(const uint8 *buf, const uint8 *buf_end,
25322552
return false;
25332553
}
25342554

2555+
#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
2556+
static bool
2557+
try_merge_data_and_text(const uint8 **buf, const uint8 **buf_end,
2558+
AOTModule *module, char *error_buf,
2559+
uint32 error_buf_size)
2560+
{
2561+
uint8 *old_buf = (uint8 *)*buf;
2562+
uint8 *old_end = (uint8 *)*buf_end;
2563+
size_t code_size = (size_t)(old_end - old_buf);
2564+
uint32 page_size = os_getpagesize();
2565+
uint64 total_size = 0;
2566+
uint32 i;
2567+
uint8 *sections;
2568+
2569+
if (code_size == 0) {
2570+
return true;
2571+
}
2572+
2573+
/* calc total memory needed */
2574+
total_size += align_uint64((uint64)code_size, page_size);
2575+
for (i = 0; i < module->data_section_count; ++i) {
2576+
total_size +=
2577+
align_uint64((uint64)module->data_sections[i].size, page_size);
2578+
}
2579+
/* distance between .data and .text should not greater than 4GB for some
2580+
* targets (eg. arm64 reloc need < 4G distance) */
2581+
if (total_size > UINT32_MAX) {
2582+
return false;
2583+
}
2584+
2585+
if (total_size != 0) {
2586+
int map_prot = MMAP_PROT_READ | MMAP_PROT_WRITE;
2587+
2588+
#if defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) \
2589+
|| defined(BUILD_TARGET_RISCV64_LP64D) \
2590+
|| defined(BUILD_TARGET_RISCV64_LP64)
2591+
/* aot code and data in x86_64 must be in range 0 to 2G due
2592+
to relocation for R_X86_64_32/32S/PC32 */
2593+
int map_flags = MMAP_MAP_32BIT;
2594+
#else
2595+
int map_flags = MMAP_MAP_NONE;
2596+
#endif
2597+
2598+
sections = os_mmap(NULL, (uint32)total_size, map_prot, map_flags,
2599+
os_get_invalid_handle());
2600+
if (!sections) {
2601+
/* merge failed but maybe not critical for some targes */
2602+
return false;
2603+
}
2604+
if (os_mprotect(sections, code_size, map_prot | MMAP_PROT_EXEC) != 0) {
2605+
os_munmap(sections, (uint32)total_size);
2606+
return false;
2607+
}
2608+
2609+
module->merged_data_text_sections = sections;
2610+
module->merged_data_text_sections_size = (uint32)total_size;
2611+
2612+
/* order not essential just as compilers do: .text section first */
2613+
*buf = sections;
2614+
*buf_end = sections + code_size;
2615+
bh_memcpy_s(sections, code_size, old_buf, code_size);
2616+
os_munmap(old_buf, code_size);
2617+
sections += align_uint((uint32)code_size, page_size);
2618+
2619+
/* then .data sections */
2620+
for (i = 0; i < module->data_section_count; ++i) {
2621+
AOTObjectDataSection *data_section = module->data_sections + i;
2622+
uint8 *old_data = data_section->data;
2623+
data_section->data = sections;
2624+
bh_memcpy_s(data_section->data, data_section->size, old_data,
2625+
data_section->size);
2626+
sections += align_uint(data_section->size, page_size);
2627+
}
2628+
if (module->merged_data_sections) {
2629+
os_munmap(module->merged_data_sections,
2630+
module->merged_data_sections_size);
2631+
module->merged_data_sections = NULL;
2632+
module->merged_data_sections_size = 0;
2633+
}
2634+
}
2635+
return true;
2636+
}
2637+
#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
2638+
25352639
static bool
25362640
load_text_section(const uint8 *buf, const uint8 *buf_end, AOTModule *module,
25372641
char *error_buf, uint32 error_buf_size)
@@ -3749,6 +3853,17 @@ load_from_sections(AOTModule *module, AOTSection *sections,
37493853
return false;
37503854
break;
37513855
case AOT_SECTION_TYPE_TEXT:
3856+
#if !defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
3857+
/* try to merge .data and .text, with exceptions:
3858+
* 1. XIP mode
3859+
* 2. pre-mmapped module load from aot_load_from_sections()
3860+
* 3. nuttx & esp-idf: have separate region for MMAP_PROT_EXEC
3861+
*/
3862+
if (!module->is_indirect_mode && is_load_from_file_buf)
3863+
if (!try_merge_data_and_text(&buf, &buf_end, module,
3864+
error_buf, error_buf_size))
3865+
LOG_WARNING("merge .data and .text sections failed");
3866+
#endif //! defined(BH_PLATFORM_NUTTX) && !defined(BH_PLATFORM_ESP_IDF)
37523867
if (!load_text_section(buf, buf_end, module, error_buf,
37533868
error_buf_size))
37543869
return false;
@@ -4179,7 +4294,11 @@ load(const uint8 *buf, uint32 size, AOTModule *module,
41794294
if (!ret) {
41804295
/* If load_from_sections() fails, then aot text is destroyed
41814296
in destroy_sections() */
4182-
destroy_sections(section_list, module->is_indirect_mode ? false : true);
4297+
destroy_sections(section_list,
4298+
module->is_indirect_mode
4299+
|| module->merged_data_text_sections
4300+
? false
4301+
: true);
41834302
/* aot_unload() won't destroy aot text again */
41844303
module->code = NULL;
41854304
}
@@ -4329,7 +4448,8 @@ aot_unload(AOTModule *module)
43294448
}
43304449
#endif
43314450

4332-
if (module->code && !module->is_indirect_mode) {
4451+
if (module->code && !module->is_indirect_mode
4452+
&& !module->merged_data_text_sections) {
43334453
/* The layout is: literal size + literal + code (with plt table) */
43344454
uint8 *mmap_addr = module->literal - sizeof(uint32);
43354455
uint32 total_size =
@@ -4364,6 +4484,14 @@ aot_unload(AOTModule *module)
43644484
destroy_object_data_sections(module->data_sections,
43654485
module->data_section_count);
43664486

4487+
if (module->merged_data_sections)
4488+
os_munmap(module->merged_data_sections,
4489+
module->merged_data_sections_size);
4490+
4491+
if (module->merged_data_text_sections)
4492+
os_munmap(module->merged_data_text_sections,
4493+
module->merged_data_text_sections_size);
4494+
43674495
#if WASM_ENABLE_DEBUG_AOT != 0
43684496
jit_code_entry_destroy(module->elf_hdr);
43694497
#endif

core/iwasm/aot/aot_runtime.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,13 @@ typedef struct AOTModule {
315315

316316
/* Whether the underlying wasm binary buffer can be freed */
317317
bool is_binary_freeable;
318+
319+
/* `.data` sections merged into one mmaped to reduce the tlb cache miss */
320+
uint8 *merged_data_sections;
321+
uint32 merged_data_sections_size;
322+
/* `.data` and `.text` sections merged into one large mmaped section */
323+
uint8 *merged_data_text_sections;
324+
uint32 merged_data_text_sections_size;
318325
} AOTModule;
319326

320327
#define AOTMemoryInstance WASMMemoryInstance

0 commit comments

Comments
 (0)