Skip to content

Commit 9595240

Browse files
authored
Refactor how string encodings are specified to wit-component (#401)
* Refactor how string encodings are specified to `wit-component` This commit is motivated by the recent addition of utf-16 support into the C generator. Currently that works by "passing a flag" to the `wit-component` invocation effectively, but the level of abstraction of this flag isn't quite right: * The utf16 flag must be specified in two separate locations that need to be kept in sync. * Interfaces aren't necessarily all generated in one location, meaning that it's not always correct to specify utf16 for everything all at-once. Instead it would be better to on a per-unit-generation granularity for the encoding to be specified. The contents of this commit aim to address this issue by changing the `component-type` custom section in wasm binaries generated by guest-generators to include the string encoding used. The encoding of strings wasn't moved into the `Interface` structure since it's an implementation-level detail rather than an interface-level detail. The `component-type` custom section is now encoded as a leading version byte, a string encoding byte, followed by the component as before. The `wit-component` tooling internally has been refactored to take all of this into account. Notably the import/exports/default lists were replaced with a new `BindgenMetadata` structure which handles all the merging between different custom sections and CLI flags. Additionally new maps are plumbed around which keep track of core wasm imports/exports and their registered encoding to ensure that the liftings/lowerings use the right encoding. The main limitation of this commit is that the same interface cannot be imported with two different encodings into the same core wasm module. Implementing this will require unique names of the import into the module (e.g. some form of name mangling). Otherwise for now it returns an error. It's expected that user-level interactions with `wit-component` are largely unchanged after this commit. The main change is that if the `wit-component` tool is run manually then the `--encoding` flag is largely not necessary unless the binary doesn't previously have `component-type` embedded sections. * Fix demo build
1 parent 9ed3613 commit 9595240

19 files changed

Lines changed: 539 additions & 400 deletions

File tree

crates/gen-guest-c/src/component_type_object.rs

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
1-
use anyhow::{Context, Result};
1+
use anyhow::Result;
22
use heck::ToSnakeCase;
33
use wasm_encoder::{
44
CodeSection, CustomSection, Encode, Function, FunctionSection, Module, TypeSection,
55
};
6-
use wit_component::{ComponentEncoder, ComponentInterfaces};
6+
use wit_component::{ComponentInterfaces, StringEncoding};
77

88
pub fn linking_symbol(name: &str) -> String {
99
let snake = name.to_snake_case();
1010
format!("__component_type_object_force_link_{snake}")
1111
}
1212

13-
pub fn object(name: &str, interfaces: &ComponentInterfaces) -> Result<Vec<u8>> {
13+
pub fn object(
14+
name: &str,
15+
interfaces: &ComponentInterfaces,
16+
encoding: StringEncoding,
17+
) -> Result<Vec<u8>> {
1418
let mut module = Module::new();
1519

1620
// Build a module with one function that's a "dummy function"
@@ -24,18 +28,7 @@ pub fn object(name: &str, interfaces: &ComponentInterfaces) -> Result<Vec<u8>> {
2428
code.function(&Function::new([]));
2529
module.section(&code);
2630

27-
let mut encoder = ComponentEncoder::default()
28-
.imports(interfaces.imports.values().cloned())?
29-
.exports(interfaces.exports.values().cloned())?;
30-
31-
if let Some(default) = &interfaces.default {
32-
encoder = encoder.interface(default.clone())?;
33-
}
34-
35-
let data = encoder
36-
.types_only(true)
37-
.encode()
38-
.with_context(|| format!("translating {name} to component type"))?;
31+
let data = wit_component::metadata::encode(interfaces, encoding);
3932

4033
// The custom section name here must start with "component-type" but
4134
// otherwise is attempted to be unique here to ensure that this doesn't get

crates/gen-guest-c/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ impl WorldGenerator for C {
271271
files.push(&format!("{snake}.h"), h_str.as_bytes());
272272
files.push(
273273
&format!("{snake}_component_type.o",),
274-
component_type_object::object(name, interfaces)
274+
component_type_object::object(name, interfaces, self.opts.string_encoding)
275275
.unwrap()
276276
.as_slice(),
277277
);

crates/gen-guest-rust/src/lib.rs

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -178,18 +178,8 @@ impl WorldGenerator for RustWasm {
178178
self.src
179179
.push_str(&format!("#[link_section = \"component-type:{name}\"]\n"));
180180

181-
let mut encoder = wit_component::ComponentEncoder::default()
182-
.imports(interfaces.imports.values().cloned())
183-
.unwrap()
184-
.exports(interfaces.exports.values().cloned())
185-
.unwrap();
186-
if let Some(default) = &interfaces.default {
187-
encoder = encoder.interface(default.clone()).unwrap();
188-
}
189-
let component_type = encoder
190-
.types_only(true)
191-
.encode()
192-
.expect("encoding a component type");
181+
let component_type =
182+
wit_component::metadata::encode(interfaces, wit_component::StringEncoding::UTF8);
193183
self.src.push_str(&format!(
194184
"pub static __WIT_BINDGEN_COMPONENT_TYPE: [u8; {}] = ",
195185
component_type.len()

crates/test-helpers/macros/build.rs

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::env;
22
use std::fs;
3-
use std::path::PathBuf;
3+
use std::path::{Path, PathBuf};
44
use std::process::Command;
55
use wit_bindgen_core::{wit_parser::Interface, Direction, Generator};
66
use wit_component::{ComponentEncoder, ComponentInterfaces, StringEncoding};
@@ -18,19 +18,8 @@ fn guest_c(
1818
if !c_impl.exists() {
1919
continue;
2020
}
21-
let imports = test_dir.join("imports.wit");
22-
let exports = test_dir.join("exports.wit");
23-
println!("cargo:rerun-if-changed={}", imports.display());
24-
println!("cargo:rerun-if-changed={}", exports.display());
2521
println!("cargo:rerun-if-changed={}", c_impl.display());
26-
27-
let import = Interface::parse_file(&test_dir.join("imports.wit")).unwrap();
28-
let export = Interface::parse_file(&test_dir.join("exports.wit")).unwrap();
29-
let interfaces = ComponentInterfaces {
30-
imports: [(import.name.clone(), import)].into_iter().collect(),
31-
exports: Default::default(),
32-
default: Some(export),
33-
};
22+
let interfaces = read_interfaces(&test_dir);
3423
let name = test_dir.file_name().unwrap().to_str().unwrap();
3524
let snake = name.replace("-", "_");
3625
let mut files = Default::default();
@@ -89,11 +78,7 @@ fn guest_c(
8978

9079
// Translate the canonical ABI module into a component.
9180
let module = fs::read(&out_wasm).expect("failed to read wasm file");
92-
let mut encoder = ComponentEncoder::default();
93-
if utf_16 {
94-
encoder = encoder.encoding(StringEncoding::UTF16);
95-
}
96-
let component = encoder
81+
let component = ComponentEncoder::default()
9782
.module(module.as_slice())
9883
.expect("pull custom sections from module")
9984
.validate(true)
@@ -218,13 +203,13 @@ fn main() {
218203
drop(fs::remove_dir_all(&out_dir));
219204

220205
let java_dir = out_dir.join("src/main/java");
206+
let interfaces = read_interfaces(&test_dir);
221207

222208
for (wit, direction) in [
223209
("imports.wit", Direction::Import),
224210
("exports.wit", Direction::Export),
225211
] {
226212
let path = test_dir.join(wit);
227-
println!("cargo:rerun-if-changed={}", path.display());
228213
let iface = Interface::parse_file(&path).unwrap();
229214
let package_dir = java_dir.join(&format!("wit_{}", iface.name));
230215
fs::create_dir_all(&package_dir).unwrap();
@@ -281,18 +266,13 @@ fn main() {
281266

282267
let out_wasm = out_dir.join("target/generated/wasm/teavm-wasm/classes.wasm");
283268

284-
let imports = [Interface::parse_file(test_dir.join("imports.wit")).unwrap()];
285-
let interface = Interface::parse_file(test_dir.join("exports.wit")).unwrap();
286-
287269
// Translate the canonical ABI module into a component.
288270
// The wit interfaces are explicitly passed to ComponentEncoder,
289271
// because the TeaVM guest doesnt yet support putting component
290272
// types into custom sections.
291273
let module = fs::read(&out_wasm).expect("failed to read wasm file");
292274
let component = ComponentEncoder::default()
293-
.imports(imports)
294-
.unwrap()
295-
.interface(interface)
275+
.interfaces(interfaces, StringEncoding::UTF8)
296276
.unwrap()
297277
.module(module.as_slice())
298278
.expect("pull custom sections from module")
@@ -321,16 +301,29 @@ fn main() {
321301
std::fs::write(out_dir.join("wasms.rs"), src).unwrap();
322302
}
323303

324-
#[cfg(unix)]
325-
fn mvn() -> Command {
326-
Command::new("mvn")
304+
fn read_interfaces(dir: &Path) -> ComponentInterfaces {
305+
let imports = dir.join("imports.wit");
306+
let exports = dir.join("exports.wit");
307+
println!("cargo:rerun-if-changed={}", imports.display());
308+
println!("cargo:rerun-if-changed={}", exports.display());
309+
310+
let import = Interface::parse_file(&imports).unwrap();
311+
let export = Interface::parse_file(&exports).unwrap();
312+
ComponentInterfaces {
313+
imports: [(import.name.clone(), import)].into_iter().collect(),
314+
exports: Default::default(),
315+
default: Some(export),
316+
}
327317
}
328318

329-
#[cfg(windows)]
330319
fn mvn() -> Command {
331-
let mut cmd = Command::new("cmd");
332-
cmd.args(&["/c", "mvn"]);
333-
cmd
320+
if cfg!(windows) {
321+
let mut cmd = Command::new("cmd");
322+
cmd.args(&["/c", "mvn"]);
323+
cmd
324+
} else {
325+
Command::new("mvn")
326+
}
334327
}
335328

336329
fn pom_xml(classes_to_preserve: &[&str]) -> Vec<u8> {

crates/test-helpers/src/lib.rs

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -145,24 +145,25 @@ pub fn run_component_codegen_test(
145145
generate: fn(&str, &[u8], &mut Files),
146146
verify: fn(&Path, &str),
147147
) {
148-
let mut encoder = wit_component::ComponentEncoder::default();
149148
let iface = Interface::parse_file(wit_path).unwrap();
149+
let mut interfaces = ComponentInterfaces::default();
150150

151-
let wasm = match dir {
151+
match dir {
152152
Direction::Import => {
153-
encoder = encoder.imports([iface.clone()]).unwrap();
154-
dummy_module(&[iface], &[], None)
153+
interfaces.imports.insert(iface.name.clone(), iface);
155154
}
156155
Direction::Export => {
157-
encoder = encoder.interface(iface.clone()).unwrap();
158-
dummy_module(&[], &[], Some(&iface))
156+
interfaces.default = Some(iface);
159157
}
160-
};
158+
}
161159

162-
let component = encoder
160+
let wasm = dummy_module(&interfaces);
161+
let component = wit_component::ComponentEncoder::default()
163162
.module(&wasm)
164163
.unwrap()
165164
.validate(true)
165+
.interfaces(interfaces, wit_component::StringEncoding::UTF8)
166+
.unwrap()
166167
.encode()
167168
.unwrap();
168169

@@ -189,14 +190,10 @@ pub fn run_component_codegen_test(
189190
verify(&dir, name);
190191
}
191192

192-
pub fn dummy_module(
193-
imports: &[Interface],
194-
exports: &[Interface],
195-
default: Option<&Interface>,
196-
) -> Vec<u8> {
193+
pub fn dummy_module(interfaces: &ComponentInterfaces) -> Vec<u8> {
197194
let mut wat = String::new();
198195
wat.push_str("(module\n");
199-
for import in imports {
196+
for (_, import) in interfaces.imports.iter() {
200197
for func in import.functions.iter() {
201198
let sig = import.wasm_signature(AbiVariant::GuestImport, func);
202199

@@ -210,14 +207,14 @@ pub fn dummy_module(
210207
}
211208
}
212209

213-
for export in exports {
210+
for (_, export) in interfaces.exports.iter() {
214211
for func in export.functions.iter() {
215212
let name = format!("{}#{}", export.name, func.name);
216213
push_func(&mut wat, &name, export, func);
217214
}
218215
}
219216

220-
if let Some(default) = default {
217+
if let Some(default) = &interfaces.default {
221218
for func in default.functions.iter() {
222219
push_func(&mut wat, &func.name, default, func);
223220
}

crates/wit-bindgen-demo/src/lib.rs

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,19 @@ fn init() {
5454

5555
fn render(lang: demo::Lang, wit: &str, files: &mut Files, options: &demo::Options) -> Result<()> {
5656
let iface = Interface::parse("input", &wit)?;
57+
let interfaces = ComponentInterfaces {
58+
imports: if options.import {
59+
[(iface.name.clone(), iface.clone())].into_iter().collect()
60+
} else {
61+
Default::default()
62+
},
63+
exports: Default::default(),
64+
default: if !options.import {
65+
Some(iface.clone())
66+
} else {
67+
None
68+
},
69+
};
5770

5871
let gen_world_legacy = |mut gen: Box<dyn Generator>, files: &mut Files| {
5972
let (imports, exports) = if options.import {
@@ -65,16 +78,6 @@ fn render(lang: demo::Lang, wit: &str, files: &mut Files, options: &demo::Option
6578
};
6679

6780
let gen_world = |mut gen: Box<dyn WorldGenerator>, files: &mut Files| {
68-
let (imports, default) = if options.import {
69-
(vec![iface.clone()], None)
70-
} else {
71-
(vec![], Some(iface.clone()))
72-
};
73-
let interfaces = ComponentInterfaces {
74-
imports: imports.into_iter().map(|i| (i.name.clone(), i)).collect(),
75-
exports: Default::default(),
76-
default,
77-
};
7881
gen.generate("demo", &interfaces, files);
7982
};
8083

@@ -86,19 +89,11 @@ fn render(lang: demo::Lang, wit: &str, files: &mut Files, options: &demo::Option
8689
// interface and dummy module. Finally this component is fed into the host
8790
// generator which gives us the files we want.
8891
let gen_component = |mut gen: Box<dyn ComponentGenerator>, files: &mut Files| {
89-
let (imports, interface) = if options.import {
90-
(vec![iface.clone()], None)
91-
} else {
92-
(Vec::new(), Some(iface.clone()))
93-
};
94-
let dummy = test_helpers::dummy_module(&imports, &[], interface.as_ref());
95-
let mut encoder = wit_component::ComponentEncoder::default()
92+
let dummy = test_helpers::dummy_module(&interfaces);
93+
let wasm = wit_component::ComponentEncoder::default()
9694
.module(&dummy)?
97-
.imports(imports)?;
98-
if let Some(iface) = interface {
99-
encoder = encoder.interface(iface)?;
100-
}
101-
let wasm = encoder.encode()?;
95+
.interfaces(interfaces.clone(), wit_component::StringEncoding::UTF8)?
96+
.encode()?;
10297
wit_bindgen_core::component::generate(&mut *gen, "input", &wasm, files)
10398
};
10499

crates/wit-component/src/cli.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
//! The WebAssembly component tool command line interface.
22
3-
use crate::{decode_component_interfaces, ComponentEncoder, InterfacePrinter, StringEncoding};
3+
use crate::{
4+
decode_component_interfaces, ComponentEncoder, ComponentInterfaces, InterfacePrinter,
5+
StringEncoding,
6+
};
47
use anyhow::{anyhow, bail, Context, Result};
58
use clap::Parser;
69
use std::path::{Path, PathBuf};
@@ -119,24 +122,34 @@ impl WitComponentApp {
119122
let module = wat::parse_file(&self.module)
120123
.with_context(|| format!("failed to parse module `{}`", self.module.display()))?;
121124

125+
let mut interfaces = ComponentInterfaces::default();
126+
for import in self.imports {
127+
let name = import.name.clone();
128+
let prev = interfaces.imports.insert(name.clone(), import);
129+
if prev.is_some() {
130+
bail!("duplicate import interface specified for `{name}`");
131+
}
132+
}
133+
for export in self.exports {
134+
let name = export.name.clone();
135+
let prev = interfaces.exports.insert(name.clone(), export);
136+
if prev.is_some() {
137+
bail!("duplicate export interface specified for `{name}`");
138+
}
139+
}
140+
interfaces.default = self.interface;
141+
142+
let encoding = self.encoding.unwrap_or(StringEncoding::UTF8);
143+
122144
let mut encoder = ComponentEncoder::default()
123145
.module(&module)?
124-
.imports(self.imports)?
125-
.exports(self.exports)?
146+
.interfaces(interfaces, encoding)?
126147
.validate(!self.skip_validation);
127148

128149
for (name, wasm) in self.adapters.iter() {
129150
encoder = encoder.adapter(name, wasm)?;
130151
}
131152

132-
if let Some(interface) = self.interface {
133-
encoder = encoder.interface(interface)?;
134-
}
135-
136-
if let Some(encoding) = &self.encoding {
137-
encoder = encoder.encoding(*encoding);
138-
}
139-
140153
let bytes = encoder.encode().with_context(|| {
141154
format!(
142155
"failed to encode a component from module `{}`",

crates/wit-component/src/decoding.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ struct InterfaceDecoder<'a> {
8888
///
8989
/// This is more-or-less a "world" and will likely be replaced one day with a
9090
/// `wit-parser` representation of a world.
91-
#[derive(Default)]
91+
#[derive(Clone, Default)]
9292
pub struct ComponentInterfaces {
9393
/// The "default export" which is the interface directly exported from the
9494
/// component at the top level.

0 commit comments

Comments
 (0)