@@ -9125,6 +9125,9 @@ preserve_referenced_local(WASMLoaderContext *loader_ctx, uint8 opcode,
91259125
91269126 if (is_32bit_type(cur_type))
91279127 i++;
9128+ else if (cur_type == VALUE_TYPE_V128) {
9129+ i += 4;
9130+ }
91289131 else
91299132 i += 2;
91309133 }
@@ -9155,7 +9158,10 @@ preserve_local_for_block(WASMLoaderContext *loader_ctx, uint8 opcode,
91559158 return false;
91569159 }
91579160
9158- if (is_32bit_type(cur_type)) {
9161+ if (cur_type == VALUE_TYPE_V128) {
9162+ i += 4;
9163+ }
9164+ else if (is_32bit_type(cur_type)) {
91599165 i++;
91609166 }
91619167 else {
@@ -9498,6 +9504,8 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
94989504 || (type == VALUE_TYPE_EXTERNREF
94999505 && *(int32 *)value == c->value.i32)
95009506#endif
9507+ || (type == VALUE_TYPE_V128
9508+ && (0 == memcmp(value, &(c->value.v128), sizeof(V128))))
95019509 || (type == VALUE_TYPE_F64
95029510 && (0 == memcmp(value, &(c->value.f64), sizeof(float64))))
95039511 || (type == VALUE_TYPE_F32
@@ -9508,6 +9516,9 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
95089516 }
95099517 if (is_32bit_type(c->value_type))
95109518 operand_offset += 1;
9519+ else if (c->value_type == VALUE_TYPE_V128) {
9520+ operand_offset += 4;
9521+ }
95119522 else
95129523 operand_offset += 2;
95139524 }
@@ -9559,6 +9570,10 @@ wasm_loader_get_const_offset(WASMLoaderContext *ctx, uint8 type, void *value,
95599570 c->value.i32 = *(int32 *)value;
95609571 ctx->const_cell_num++;
95619572 break;
9573+ case VALUE_TYPE_V128:
9574+ bh_memcpy_s(&(c->value.v128), sizeof(WASMValue), value,
9575+ sizeof(V128));
9576+ ctx->const_cell_num++;
95629577#if WASM_ENABLE_REF_TYPES != 0 && WASM_ENABLE_GC == 0
95639578 case VALUE_TYPE_EXTERNREF:
95649579 case VALUE_TYPE_FUNCREF:
@@ -9760,17 +9775,22 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
97609775 block_type, &return_types, &reftype_maps, &reftype_map_count);
97619776#endif
97629777
9763- /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64 instead
9764- * of EXT_OP_COPY_STACK_VALUES for interpreter performance. */
9778+ /* If there is only one return value, use EXT_OP_COPY_STACK_TOP/_I64/V128
9779+ * instead of EXT_OP_COPY_STACK_VALUES for interpreter performance. */
97659780 if (return_count == 1) {
97669781 uint8 cell = (uint8)wasm_value_type_cell_num(return_types[0]);
9767- if (cell <= 2 /* V128 isn't supported whose cell num is 4 */
9768- && block->dynamic_offset != *(loader_ctx->frame_offset - cell)) {
9782+ if (block->dynamic_offset != *(loader_ctx->frame_offset - cell)) {
97699783 /* insert op_copy before else opcode */
97709784 if (opcode == WASM_OP_ELSE)
97719785 skip_label();
9772- emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP
9773- : EXT_OP_COPY_STACK_TOP_I64);
9786+
9787+ if (cell == 4) {
9788+ emit_label(EXT_OP_COPY_STACK_TOP_V128);
9789+ }
9790+ else {
9791+ emit_label(cell == 1 ? EXT_OP_COPY_STACK_TOP
9792+ : EXT_OP_COPY_STACK_TOP_I64);
9793+ }
97749794 emit_operand(loader_ctx, *(loader_ctx->frame_offset - cell));
97759795 emit_operand(loader_ctx, block->dynamic_offset);
97769796
@@ -9805,11 +9825,37 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
98059825 for (i = (int32)return_count - 1; i >= 0; i--) {
98069826 uint8 cells = (uint8)wasm_value_type_cell_num(return_types[i]);
98079827
9808- frame_offset -= cells;
9809- dynamic_offset -= cells;
9810- if (dynamic_offset != *frame_offset) {
9811- value_count++;
9812- total_cel_num += cells;
9828+ if (frame_offset - cells < loader_ctx->frame_offset_bottom) {
9829+ set_error_buf(error_buf, error_buf_size, "frame offset underflow");
9830+ goto fail;
9831+ }
9832+
9833+ if (cells == 4) {
9834+ bool needs_copy = false;
9835+ int16 v128_dynamic = dynamic_offset - cells;
9836+
9837+ for (int j = 0; j < 4; j++) {
9838+ if (*(frame_offset - j - 1) != (v128_dynamic + j)) {
9839+ needs_copy = true;
9840+ break;
9841+ }
9842+ }
9843+
9844+ if (needs_copy) {
9845+ value_count++;
9846+ total_cel_num += cells;
9847+ }
9848+
9849+ frame_offset -= cells;
9850+ dynamic_offset = v128_dynamic;
9851+ }
9852+ else {
9853+ frame_offset -= cells;
9854+ dynamic_offset -= cells;
9855+ if (dynamic_offset != *frame_offset) {
9856+ value_count++;
9857+ total_cel_num += cells;
9858+ }
98139859 }
98149860 }
98159861
@@ -9845,19 +9891,50 @@ reserve_block_ret(WASMLoaderContext *loader_ctx, uint8 opcode,
98459891 dynamic_offset = dynamic_offset_org;
98469892 for (i = (int32)return_count - 1, j = 0; i >= 0; i--) {
98479893 uint8 cell = (uint8)wasm_value_type_cell_num(return_types[i]);
9848- frame_offset -= cell;
9849- dynamic_offset -= cell;
9850- if (dynamic_offset != *frame_offset) {
9851- /* cell num */
9852- cells[j] = cell;
9853- /* src offset */
9854- src_offsets[j] = *frame_offset;
9855- /* dst offset */
9856- dst_offsets[j] = dynamic_offset;
9857- j++;
9894+
9895+ if (cell == 4) {
9896+ bool needs_copy = false;
9897+ int16 v128_dynamic = dynamic_offset - cell;
9898+
9899+ for (int k = 0; k < 4; k++) {
9900+ if (*(frame_offset - k - 1) != (v128_dynamic + k)) {
9901+ needs_copy = true;
9902+ break;
9903+ }
9904+ }
9905+
9906+ if (needs_copy) {
9907+ cells[j] = cell;
9908+ src_offsets[j] = *(frame_offset - cell);
9909+ dst_offsets[j] = v128_dynamic;
9910+ j++;
9911+ }
9912+
9913+ frame_offset -= cell;
9914+ dynamic_offset = v128_dynamic;
9915+ }
9916+ else {
9917+ frame_offset -= cell;
9918+ dynamic_offset -= cell;
9919+ if (dynamic_offset != *frame_offset) {
9920+ cells[j] = cell;
9921+ /* src offset */
9922+ src_offsets[j] = *frame_offset;
9923+ /* dst offset */
9924+ dst_offsets[j] = dynamic_offset;
9925+ j++;
9926+ }
98589927 }
9928+
98599929 if (opcode == WASM_OP_ELSE) {
9860- *frame_offset = dynamic_offset;
9930+ if (cell == 4) {
9931+ for (int k = 0; k < cell; k++) {
9932+ *(frame_offset + k) = dynamic_offset + k;
9933+ }
9934+ }
9935+ else {
9936+ *frame_offset = dynamic_offset;
9937+ }
98619938 }
98629939 else {
98639940 loader_ctx->frame_offset = frame_offset;
@@ -13031,6 +13108,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
1303113108 emit_label(EXT_OP_TEE_LOCAL_FAST);
1303213109 emit_byte(loader_ctx, (uint8)local_offset);
1303313110 }
13111+ else if (local_type == VALUE_TYPE_V128) {
13112+ emit_label(EXT_OP_TEE_LOCAL_FAST_V128);
13113+ emit_byte(loader_ctx, (uint8)local_offset);
13114+ }
1303413115 else {
1303513116 emit_label(EXT_OP_TEE_LOCAL_FAST_I64);
1303613117 emit_byte(loader_ctx, (uint8)local_offset);
0 commit comments