Skip to content

Commit ff2b40f

Browse files
authored
wasmparser: Add validator identifiers and a reset method (#1506)
* wasmparser: Add validator IDs and a reset method Lets users reuse validators (and their typing contexts) across different Wasm modules. The IDs are for helping them assert they are using the correct context with their `CoreTypeId`s and all that. * Remove from impl
1 parent 371aff5 commit ff2b40f

2 files changed

Lines changed: 154 additions & 8 deletions

File tree

crates/wasmparser/src/validator.rs

Lines changed: 122 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use crate::{
2020
use bitflags::bitflags;
2121
use std::mem;
2222
use std::ops::Range;
23+
use std::sync::atomic::{AtomicUsize, Ordering};
2324
use std::sync::Arc;
2425

2526
/// Test whether the given buffer contains a valid WebAssembly module or component,
@@ -86,6 +87,23 @@ fn combine_type_sizes(a: u32, b: u32, offset: usize) -> Result<u32> {
8687
}
8788
}
8889

90+
/// A unique identifier for a particular `Validator`.
91+
///
92+
/// Allows you to save the `ValidatorId` of the [`Validator`][crate::Validator]
93+
/// you get identifiers out of (e.g. [`CoreTypeId`][crate::types::CoreTypeId])
94+
/// and then later assert that you are pairing those identifiers with the same
95+
/// `Validator` instance when accessing the identifier's associated data.
96+
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)]
97+
pub struct ValidatorId(usize);
98+
99+
impl Default for ValidatorId {
100+
#[inline]
101+
fn default() -> Self {
102+
static ID_COUNTER: AtomicUsize = AtomicUsize::new(0);
103+
ValidatorId(ID_COUNTER.fetch_add(1, Ordering::AcqRel))
104+
}
105+
}
106+
89107
/// Validator for a WebAssembly binary module or component.
90108
///
91109
/// This structure encapsulates state necessary to validate a WebAssembly
@@ -113,6 +131,8 @@ fn combine_type_sizes(a: u32, b: u32, offset: usize) -> Result<u32> {
113131
/// [core]: https://webassembly.github.io/spec/core/valid/index.html
114132
#[derive(Default)]
115133
pub struct Validator {
134+
id: ValidatorId,
135+
116136
/// The current state of the validator.
117137
state: State,
118138

@@ -484,6 +504,100 @@ impl Validator {
484504
&self.features
485505
}
486506

507+
/// Reset this validator's state such that it is ready to validate a new
508+
/// Wasm module or component.
509+
///
510+
/// This does *not* clear or reset the internal state keeping track of
511+
/// validated (and deduplicated and canonicalized) types, allowing you to
512+
/// use the same type identifiers (such as
513+
/// [`CoreTypeId`][crate::types::CoreTypeId]) for the same types that are
514+
/// defined multiple times across different modules and components.
515+
///
516+
/// ```
517+
/// fn foo() -> anyhow::Result<()> {
518+
/// use wasmparser::Validator;
519+
///
520+
/// let mut validator = Validator::default();
521+
///
522+
/// // Two wasm modules, both of which define the same type, but at
523+
/// // different indices in their respective types index spaces.
524+
/// let wasm1 = wat::parse_str("
525+
/// (module
526+
/// (type $same_type (func (param i32) (result f64)))
527+
/// )
528+
/// ")?;
529+
/// let wasm2 = wat::parse_str("
530+
/// (module
531+
/// (type $different_type (func))
532+
/// (type $same_type (func (param i32) (result f64)))
533+
/// )
534+
/// ")?;
535+
///
536+
/// // Validate the first Wasm module and get the ID of its type.
537+
/// let types = validator.validate_all(&wasm1)?;
538+
/// let id1 = types.core_type_at(0);
539+
///
540+
/// // Reset the validator so we can parse the second wasm module inside
541+
/// // this validator's same context.
542+
/// validator.reset();
543+
///
544+
/// // Validate the second Wasm module and get the ID of its second type,
545+
/// // which is the same type as the first Wasm module's only type.
546+
/// let types = validator.validate_all(&wasm2)?;
547+
/// let id2 = types.core_type_at(1);
548+
///
549+
/// // Because both modules were processed in the same `Validator`, they
550+
/// // share the same types context and therefore the same type defined
551+
/// // multiple times across different modules will be deduplicated and
552+
/// // assigned the same identifier!
553+
/// assert_eq!(id1, id2);
554+
/// assert_eq!(types[id1.unwrap_sub()], types[id2.unwrap_sub()]);
555+
/// # Ok(())
556+
/// # }
557+
/// # foo().unwrap()
558+
/// ```
559+
pub fn reset(&mut self) {
560+
let Validator {
561+
// Not changing the identifier; users should be able to observe that
562+
// they are using the same validation context, even after resetting.
563+
id: _,
564+
565+
// Don't mess with `types`, we specifically want to reuse canonicalizations.
566+
types: _,
567+
568+
// Also leave features as they are. While this is perhaps not
569+
// strictly necessary, it helps us avoid weird bugs where we have
570+
// different views of what is or is not a valid type at different
571+
// times, despite using the same `TypeList` and hash consing
572+
// context, and therefore there could be moments in time where we
573+
// have "invalid" types inside our current types list.
574+
features: _,
575+
576+
state,
577+
module,
578+
components,
579+
} = self;
580+
581+
assert!(
582+
matches!(state, State::End),
583+
"cannot reset a validator that did not successfully complete validation"
584+
);
585+
assert!(module.is_none());
586+
assert!(components.is_empty());
587+
588+
*state = State::default();
589+
}
590+
591+
/// Get this validator's unique identifier.
592+
///
593+
/// Allows you to assert that you are always working with the same
594+
/// `Validator` instance, when you can't otherwise statically ensure that
595+
/// property by e.g. storing a reference to the validator inside your
596+
/// structure.
597+
pub fn id(&self) -> ValidatorId {
598+
self.id
599+
}
600+
487601
/// Validates an entire in-memory module or component with this validator.
488602
///
489603
/// This function will internally create a [`Parser`] to parse the `bytes`
@@ -530,7 +644,7 @@ impl Validator {
530644
pub fn types(&self, mut level: usize) -> Option<TypesRef> {
531645
if let Some(module) = &self.module {
532646
if level == 0 {
533-
return Some(TypesRef::from_module(&self.types, &module.module));
647+
return Some(TypesRef::from_module(self.id, &self.types, &module.module));
534648
} else {
535649
level -= 1;
536650
}
@@ -539,7 +653,7 @@ impl Validator {
539653
self.components
540654
.iter()
541655
.nth_back(level)
542-
.map(|component| TypesRef::from_component(&self.types, component))
656+
.map(|component| TypesRef::from_component(self.id, &self.types, component))
543657
}
544658

545659
/// Convenience function to validate a single [`Payload`].
@@ -1372,6 +1486,7 @@ impl Validator {
13721486
}
13731487

13741488
Ok(Types::from_module(
1489+
self.id,
13751490
self.types.commit(),
13761491
state.module.arc().clone(),
13771492
))
@@ -1396,7 +1511,11 @@ impl Validator {
13961511
self.state = State::Component;
13971512
}
13981513

1399-
Ok(Types::from_component(self.types.commit(), component))
1514+
Ok(Types::from_component(
1515+
self.id,
1516+
self.types.commit(),
1517+
component,
1518+
))
14001519
}
14011520
}
14021521
}

crates/wasmparser/src/validator/types.rs

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use super::{
44
component::{ComponentState, ExternKind},
55
core::Module,
66
};
7-
use crate::{validator::names::KebabString, HeapType};
7+
use crate::{validator::names::KebabString, HeapType, ValidatorId};
88
use crate::{
99
BinaryReaderError, CompositeType, Export, ExternalKind, FuncType, GlobalType, Import, Matches,
1010
MemoryType, PackedIndex, PrimitiveValType, RecGroup, RefType, Result, SubType, TableType,
@@ -1474,6 +1474,7 @@ enum TypesKind {
14741474
///
14751475
/// The type information is returned via the [`crate::Validator::end`] method.
14761476
pub struct Types {
1477+
id: ValidatorId,
14771478
list: TypeList,
14781479
kind: TypesKind,
14791480
}
@@ -1489,25 +1490,38 @@ enum TypesRefKind<'a> {
14891490
/// Retrieved via the [`crate::Validator::types`] method.
14901491
#[derive(Clone, Copy)]
14911492
pub struct TypesRef<'a> {
1493+
id: ValidatorId,
14921494
list: &'a TypeList,
14931495
kind: TypesRefKind<'a>,
14941496
}
14951497

14961498
impl<'a> TypesRef<'a> {
1497-
pub(crate) fn from_module(types: &'a TypeList, module: &'a Module) -> Self {
1499+
pub(crate) fn from_module(id: ValidatorId, types: &'a TypeList, module: &'a Module) -> Self {
14981500
Self {
1501+
id,
14991502
list: types,
15001503
kind: TypesRefKind::Module(module),
15011504
}
15021505
}
15031506

1504-
pub(crate) fn from_component(types: &'a TypeList, component: &'a ComponentState) -> Self {
1507+
pub(crate) fn from_component(
1508+
id: ValidatorId,
1509+
types: &'a TypeList,
1510+
component: &'a ComponentState,
1511+
) -> Self {
15051512
Self {
1513+
id,
15061514
list: types,
15071515
kind: TypesRefKind::Component(component),
15081516
}
15091517
}
15101518

1519+
/// Get the id of the validator that these types are associated with.
1520+
#[inline]
1521+
pub fn id(&self) -> ValidatorId {
1522+
self.id
1523+
}
1524+
15111525
/// Gets a type based on its type id.
15121526
///
15131527
/// Returns `None` if the type id is unknown.
@@ -1930,23 +1944,36 @@ where
19301944
}
19311945

19321946
impl Types {
1933-
pub(crate) fn from_module(types: TypeList, module: Arc<Module>) -> Self {
1947+
pub(crate) fn from_module(id: ValidatorId, types: TypeList, module: Arc<Module>) -> Self {
19341948
Self {
1949+
id,
19351950
list: types,
19361951
kind: TypesKind::Module(module),
19371952
}
19381953
}
19391954

1940-
pub(crate) fn from_component(types: TypeList, component: ComponentState) -> Self {
1955+
pub(crate) fn from_component(
1956+
id: ValidatorId,
1957+
types: TypeList,
1958+
component: ComponentState,
1959+
) -> Self {
19411960
Self {
1961+
id,
19421962
list: types,
19431963
kind: TypesKind::Component(component),
19441964
}
19451965
}
19461966

1967+
/// Get the id of the validator that these types are associated with.
1968+
#[inline]
1969+
pub fn id(&self) -> ValidatorId {
1970+
self.id
1971+
}
1972+
19471973
/// Gets a reference to this validation type information.
19481974
pub fn as_ref(&self) -> TypesRef {
19491975
TypesRef {
1976+
id: self.id,
19501977
list: &self.list,
19511978
kind: match &self.kind {
19521979
TypesKind::Module(module) => TypesRefKind::Module(module),

0 commit comments

Comments
 (0)