Skip to content

Commit bee5a7c

Browse files
authored
Implement DWARF support in text-to-binary (#1632)
* Implement DWARF support in text-to-binary This commit implements support to emit DWARF debugging information when the WebAssembly text format is converted to binary. Currently there is otherwise no means of translating a binary offset in a WebAssembly file back to the text format that it came from. This support is implemented with a few API knobs here and there as well as two new CLI flags for all the commands that support the text format as input: `-g` and `--generate-dwarf lines|full`. The original motivation of this commit is that I was curious to learn more about DWARF and it seemed like a neat little feature that wouldn't be too hard to maintain. The inspiration was bytecodealliance/wasmtime#8658 which this does not implement as-is (e.g. not custom DWARF, just wasm-text-DWARF). Otherwise though I've realized that this can be useful for development in a few situations: * Wasmtime's backtraces can now mention filenames/line numbers of the WebAssembly text format. * Validation errors can use `addr2line` to print a filename/line number of a WebAssembly text file. * Wasmtime doesn't have a native debugger nor does LLDB/GDB know about WebAssembly. Through Wasmtime's DWARF transformation this enables debugging WebAssembly text files. This commit implements knobs for "full" or "line" support in DWARF. The "line" support should be as complete as it can be (DWARF can only provide information for the code section). The "full" support is somewhat basic still but my hope is that it can be expanded in the future as needed. * Fix a compile warning
1 parent 02d291a commit bee5a7c

31 files changed

Lines changed: 1425 additions & 235 deletions

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ hashbrown = { version = "0.14.3", default-features = false, features = ['ahash']
8585
ahash = { version = "0.8.11", default-features = false }
8686
termcolor = "1.2.0"
8787
indoc = "2.0.5"
88+
gimli = "0.29.0"
8889

8990
wasm-compose = { version = "0.211.1", path = "crates/wasm-compose" }
9091
wasm-encoder = { version = "0.211.1", path = "crates/wasm-encoder" }
@@ -108,7 +109,7 @@ log = { workspace = true }
108109
clap = { workspace = true }
109110
clap_complete = { workspace = true, optional = true }
110111
tempfile = "3.2.0"
111-
wat = { workspace = true }
112+
wat = { workspace = true, features = ['dwarf'] }
112113
termcolor = { workspace = true }
113114

114115
# Dependencies of `validate`
@@ -156,7 +157,7 @@ wit-smith = { workspace = true, features = ["clap"], optional = true }
156157

157158
# Dependencies of `addr2line`
158159
addr2line = { version = "0.22.0", optional = true }
159-
gimli = { version = "0.29.0", optional = true }
160+
gimli = { workspace = true, optional = true }
160161

161162
[target.'cfg(not(target_family = "wasm"))'.dependencies]
162163
is_executable = { version = "1.0.1", optional = true }
@@ -200,7 +201,7 @@ default = [
200201
]
201202

202203
# Each subcommand is gated behind a feature and lists the dependencies it needs
203-
validate = ['dep:wasmparser', 'rayon']
204+
validate = ['dep:wasmparser', 'rayon', 'dep:addr2line', 'dep:gimli']
204205
print = []
205206
parse = []
206207
smith = ['wasm-smith', 'arbitrary', 'dep:serde', 'dep:serde_derive', 'dep:serde_json']

crates/wast/Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ Customizable Rust parsers for the WebAssembly Text formats WAT and WAST
1313
"""
1414
rust-version.workspace = true
1515

16+
[package.metadata.docs.rs]
17+
all-features = true
18+
1619
[lints]
1720
workspace = true
1821

@@ -22,12 +25,14 @@ unicode-width = "0.1.9"
2225
memchr = "2.4.1"
2326
wasm-encoder = { workspace = true }
2427
bumpalo = "3.14.0"
28+
gimli = { workspace = true, optional = true }
2529

2630
[dev-dependencies]
2731
anyhow = { workspace = true }
2832
libtest-mimic = { workspace = true }
2933
wasmparser = { path = "../wasmparser" }
3034
wat = { path = "../wat" }
35+
rand = { workspace = true }
3136

3237
[features]
3338
default = ['wasm-module']
@@ -40,6 +45,11 @@ default = ['wasm-module']
4045
# This feature is turned on by default.
4146
wasm-module = []
4247

48+
# Off-by-default feature to support emitting DWARF debugging information in
49+
# parsed binaries pointing back to source locations in the original `*.wat`
50+
# source.
51+
dwarf = ["dep:gimli"]
52+
4353
[[test]]
4454
name = "parse-fail"
4555
harness = false

crates/wast/src/component.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Types and support for parsing the component model text format.
22
33
mod alias;
4-
mod binary;
4+
pub(crate) mod binary;
55
mod component;
66
mod custom;
77
mod expand;

crates/wast/src/component/binary.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::component::*;
22
use crate::core;
3+
use crate::core::EncodeOptions;
34
use crate::token::{Id, Index, NameAnnotation, Span};
45
use wasm_encoder::{
56
CanonicalFunctionSection, ComponentAliasSection, ComponentDefinedTypeEncoder,
@@ -9,10 +10,10 @@ use wasm_encoder::{
910
NestedComponentSection, RawSection, SectionId,
1011
};
1112

12-
pub fn encode(component: &Component<'_>) -> Vec<u8> {
13+
pub fn encode(component: &Component<'_>, options: &EncodeOptions) -> Vec<u8> {
1314
match &component.kind {
1415
ComponentKind::Text(fields) => {
15-
encode_fields(&component.id, &component.name, fields).finish()
16+
encode_fields(&component.id, &component.name, fields, options).finish()
1617
}
1718
ComponentKind::Binary(bytes) => bytes.iter().flat_map(|b| b.iter().copied()).collect(),
1819
}
@@ -23,15 +24,16 @@ fn encode_fields(
2324
component_id: &Option<Id<'_>>,
2425
component_name: &Option<NameAnnotation<'_>>,
2526
fields: &[ComponentField<'_>],
27+
options: &EncodeOptions,
2628
) -> wasm_encoder::Component {
2729
let mut e = Encoder::default();
2830

2931
for field in fields {
3032
match field {
31-
ComponentField::CoreModule(m) => e.encode_core_module(m),
33+
ComponentField::CoreModule(m) => e.encode_core_module(m, options),
3234
ComponentField::CoreInstance(i) => e.encode_core_instance(i),
3335
ComponentField::CoreType(t) => e.encode_core_type(t),
34-
ComponentField::Component(c) => e.encode_component(c),
36+
ComponentField::Component(c) => e.encode_component(c, options),
3537
ComponentField::Instance(i) => e.encode_instance(i),
3638
ComponentField::Alias(a) => e.encode_alias(a),
3739
ComponentField::Type(t) => e.encode_type(t),
@@ -191,7 +193,7 @@ impl<'a> Encoder<'a> {
191193
})
192194
}
193195

194-
fn encode_core_module(&mut self, module: &CoreModule<'a>) {
196+
fn encode_core_module(&mut self, module: &CoreModule<'a>, options: &EncodeOptions) {
195197
// Flush any in-progress section before encoding the module
196198
self.flush(None);
197199

@@ -202,7 +204,7 @@ impl<'a> Encoder<'a> {
202204
CoreModuleKind::Import { .. } => unreachable!("should be expanded already"),
203205
CoreModuleKind::Inline { fields } => {
204206
// TODO: replace this with a wasm-encoder based encoding (should return `wasm_encoder::Module`)
205-
let data = crate::core::binary::encode(&module.id, &module.name, fields);
207+
let data = crate::core::binary::encode(&module.id, &module.name, fields, options);
206208
self.component.section(&RawSection {
207209
id: ComponentSectionId::CoreModule.into(),
208210
data: &data,
@@ -238,7 +240,7 @@ impl<'a> Encoder<'a> {
238240
self.flush(Some(self.core_types.id()));
239241
}
240242

241-
fn encode_component(&mut self, component: &NestedComponent<'a>) {
243+
fn encode_component(&mut self, component: &NestedComponent<'a>, options: &EncodeOptions) {
242244
self.component_names
243245
.push(get_name(&component.id, &component.name));
244246
// Flush any in-progress section before encoding the component
@@ -252,6 +254,7 @@ impl<'a> Encoder<'a> {
252254
&component.id,
253255
&component.name,
254256
fields,
257+
options,
255258
)));
256259
}
257260
}

crates/wast/src/component/component.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ impl<'a> Component<'a> {
8686
/// This function can return an error for name resolution errors and other
8787
/// expansion-related errors.
8888
pub fn encode(&mut self) -> std::result::Result<Vec<u8>, crate::Error> {
89-
self.resolve()?;
90-
Ok(crate::component::binary::encode(self))
89+
crate::core::EncodeOptions::default().encode_component(self)
9190
}
9291

9392
pub(crate) fn validate(&self, parser: Parser<'_>) -> Result<()> {

crates/wast/src/core.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod table;
1212
mod tag;
1313
mod types;
1414
mod wast;
15+
pub use self::binary::{EncodeOptions, GenerateDwarf};
1516
pub use self::custom::*;
1617
pub use self::export::*;
1718
pub use self::expr::*;

0 commit comments

Comments
 (0)