parser.rs - mozsearch

use crate::binary_reader::WASM_MAGIC_NUMBER;

use crate::prelude::*;

#[cfg(feature = "features")]

use crate::WasmFeatures;

#[cfg(feature = "component-model")]

use crate::{

    limits::MAX_WASM_MODULE_SIZE, ComponentCanonicalSectionReader, ComponentExportSectionReader,

    ComponentImportSectionReader, ComponentInstanceSectionReader, ComponentStartFunction,

    ComponentTypeSectionReader, CoreTypeSectionReader, InstanceSectionReader, SectionLimited,

};

use crate::{

    BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader,

    ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader,

    ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader,

    TypeSectionReader,

};

use core::fmt;

use core::iter;

use core::ops::Range;

pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;

// Note that this started at `0xa` and we're incrementing up from there. When

// the component model is stabilized this will become 0x1. The changes here are:

//

// * [????-??-??] 0xa - original version

// * [2023-01-05] 0xb - `export` introduces an alias

// * [2023-02-06] 0xc - `export` has an optional type ascribed to it

// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which

//                      allows for `(import (interface "...") ...)` syntax.

pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;

const KIND_MODULE: u16 = 0x00;

const KIND_COMPONENT: u16 = 0x01;

/// The supported encoding formats for the parser.

#[derive(Debug, Clone, Copy, Eq, PartialEq)]

pub enum Encoding {

    /// The encoding format is a WebAssembly module.

    Module,

    /// The encoding format is a WebAssembly component.

    Component,

/// An incremental parser of a binary WebAssembly module or component.

///

/// This type is intended to be used to incrementally parse a WebAssembly module

/// or component as bytes become available for the module. This can also be used

/// to parse modules or components that are already entirely resident within memory.

///

/// This primary function for a parser is the [`Parser::parse`] function which

/// will incrementally consume input. You can also use the [`Parser::parse_all`]

/// function to parse a module or component that is entirely resident in memory.

#[derive(Debug, Clone)]

pub struct Parser {

    state: State,

    offset: u64,

    max_size: u64,

    encoding: Encoding,

    #[cfg(feature = "features")]

    features: WasmFeatures,

#[derive(Debug, Clone)]

enum State {

    Header,

    SectionStart,

    FunctionBody { remaining: u32, len: u32 },

/// A successful return payload from [`Parser::parse`].

///

/// On success one of two possible values can be returned, either that more data

/// is needed to continue parsing or a chunk of the input was parsed, indicating

/// how much of it was parsed.

#[derive(Debug)]

pub enum Chunk<'a> {

    /// This can be returned at any time and indicates that more data is needed

    /// to proceed with parsing. Zero bytes were consumed from the input to

    /// [`Parser::parse`]. The `u64` value here is a hint as to how many more

    /// bytes are needed to continue parsing.

    NeedMoreData(u64),

    /// A chunk was successfully parsed.

    Parsed {

        /// This many bytes of the `data` input to [`Parser::parse`] were

        /// consumed to produce `payload`.

        consumed: usize,

        /// The value that we actually parsed.

        payload: Payload<'a>,

},

/// Values that can be parsed from a WebAssembly module or component.

///

/// This enumeration is all possible chunks of pieces that can be parsed by a

/// [`Parser`] from a binary WebAssembly module or component. Note that for many

/// sections the entire section is parsed all at once, whereas other functions,

/// like the code section, are parsed incrementally. This is a distinction where some

/// sections, like the type section, are required to be fully resident in memory

/// (fully downloaded) before proceeding. Other sections, like the code section,

/// can be processed in a streaming fashion where each function is extracted

/// individually so it can possibly be shipped to another thread while you wait

/// for more functions to get downloaded.

///

/// Note that payloads, when returned, do not indicate that the module or component

/// is valid. For example when you receive a `Payload::TypeSection` the type

/// section itself has not yet actually been parsed. The reader returned will be

/// able to parse it, but you'll have to actually iterate the reader to do the

/// full parse. Each payload returned is intended to be a *window* into the

/// original `data` passed to [`Parser::parse`] which can be further processed

/// if necessary.

#[non_exhaustive]

pub enum Payload<'a> {

    /// Indicates the header of a WebAssembly module or component.

    Version {

        /// The version number found in the header.

        num: u16,

        /// The encoding format being parsed.

        encoding: Encoding,

        /// The range of bytes that were parsed to consume the header of the

        /// module or component. Note that this range is relative to the start

        /// of the byte stream.

        range: Range<usize>,

},

    /// A module type section was received and the provided reader can be

    /// used to parse the contents of the type section.

    TypeSection(TypeSectionReader<'a>),

    /// A module import section was received and the provided reader can be

    /// used to parse the contents of the import section.

    ImportSection(ImportSectionReader<'a>),

    /// A module function section was received and the provided reader can be

    /// used to parse the contents of the function section.

    FunctionSection(FunctionSectionReader<'a>),

    /// A module table section was received and the provided reader can be

    /// used to parse the contents of the table section.

    TableSection(TableSectionReader<'a>),

    /// A module memory section was received and the provided reader can be

    /// used to parse the contents of the memory section.

    MemorySection(MemorySectionReader<'a>),

    /// A module tag section was received, and the provided reader can be

    /// used to parse the contents of the tag section.

    TagSection(TagSectionReader<'a>),

    /// A module global section was received and the provided reader can be

    /// used to parse the contents of the global section.

    GlobalSection(GlobalSectionReader<'a>),

    /// A module export section was received, and the provided reader can be

    /// used to parse the contents of the export section.

    ExportSection(ExportSectionReader<'a>),

    /// A module start section was received.

    StartSection {

        /// The start function index

        func: u32,

        /// The range of bytes that specify the `func` field, specified in

        /// offsets relative to the start of the byte stream.

        range: Range<usize>,

},

    /// A module element section was received and the provided reader can be

    /// used to parse the contents of the element section.

    ElementSection(ElementSectionReader<'a>),

    /// A module data count section was received.

    DataCountSection {

        /// The number of data segments.

        count: u32,

        /// The range of bytes that specify the `count` field, specified in

        /// offsets relative to the start of the byte stream.

        range: Range<usize>,

},

    /// A module data section was received and the provided reader can be

    /// used to parse the contents of the data section.

    DataSection(DataSectionReader<'a>),

    /// Indicator of the start of the code section of a WebAssembly module.

///

    /// This entry is returned whenever the code section starts. The `count`

    /// field indicates how many entries are in this code section. After

    /// receiving this start marker you're guaranteed that the next `count`

    /// items will be either `CodeSectionEntry` or an error will be returned.

///

    /// This, unlike other sections, is intended to be used for streaming the

    /// contents of the code section. The code section is not required to be

    /// fully resident in memory when we parse it. Instead a [`Parser`] is

    /// capable of parsing piece-by-piece of a code section.

    CodeSectionStart {

        /// The number of functions in this section.

        count: u32,

        /// The range of bytes that represent this section, specified in

        /// offsets relative to the start of the byte stream.

        range: Range<usize>,

        /// The size, in bytes, of the remaining contents of this section.

///

        /// This can be used in combination with [`Parser::skip_section`]

        /// where the caller will know how many bytes to skip before feeding

        /// bytes into `Parser` again.

        size: u32,

},

    /// An entry of the code section, a function, was parsed from a WebAssembly

    /// module.

///

    /// This entry indicates that a function was successfully received from the

    /// code section, and the payload here is the window into the original input

    /// where the function resides. Note that the function itself has not been

    /// parsed, it's only been outlined. You'll need to process the

    /// `FunctionBody` provided to test whether it parses and/or is valid.

    CodeSectionEntry(FunctionBody<'a>),

    /// A core module section was received and the provided parser can be

    /// used to parse the nested module.

///

    /// This variant is special in that it returns a sub-`Parser`. Upon

    /// receiving a `ModuleSection` it is expected that the returned

    /// `Parser` will be used instead of the parent `Parser` until the parse has

    /// finished. You'll need to feed data into the `Parser` returned until it

    /// returns `Payload::End`. After that you'll switch back to the parent

    /// parser to resume parsing the rest of the current component.

///

    /// Note that binaries will not be parsed correctly if you feed the data for

    /// a nested module into the parent [`Parser`].

    #[cfg(feature = "component-model")]

    ModuleSection {

        /// The parser for the nested module.

        parser: Parser,

        /// The range of bytes that represent the nested module in the

        /// original byte stream.

///

        /// Note that, to better support streaming parsing and validation, the

        /// validator does *not* check that this range is in bounds.

        unchecked_range: Range<usize>,

},

    /// A core instance section was received and the provided parser can be

    /// used to parse the contents of the core instance section.

///

    /// Currently this section is only parsed in a component.

    #[cfg(feature = "component-model")]

    InstanceSection(InstanceSectionReader<'a>),

    /// A core type section was received and the provided parser can be

    /// used to parse the contents of the core type section.

///

    /// Currently this section is only parsed in a component.

    #[cfg(feature = "component-model")]

    CoreTypeSection(CoreTypeSectionReader<'a>),

    /// A component section from a WebAssembly component was received and the

    /// provided parser can be used to parse the nested component.

///

    /// This variant is special in that it returns a sub-`Parser`. Upon

    /// receiving a `ComponentSection` it is expected that the returned

    /// `Parser` will be used instead of the parent `Parser` until the parse has

    /// finished. You'll need to feed data into the `Parser` returned until it

    /// returns `Payload::End`. After that you'll switch back to the parent

    /// parser to resume parsing the rest of the current component.

///

    /// Note that binaries will not be parsed correctly if you feed the data for

    /// a nested component into the parent [`Parser`].

    #[cfg(feature = "component-model")]

    ComponentSection {

        /// The parser for the nested component.

        parser: Parser,

        /// The range of bytes that represent the nested component in the

        /// original byte stream.

///

        /// Note that, to better support streaming parsing and validation, the

        /// validator does *not* check that this range is in bounds.

        unchecked_range: Range<usize>,

},

    /// A component instance section was received and the provided reader can be

    /// used to parse the contents of the component instance section.

    #[cfg(feature = "component-model")]

    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),

    /// A component alias section was received and the provided reader can be

    /// used to parse the contents of the component alias section.

    #[cfg(feature = "component-model")]

    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),

    /// A component type section was received and the provided reader can be

    /// used to parse the contents of the component type section.

    #[cfg(feature = "component-model")]

    ComponentTypeSection(ComponentTypeSectionReader<'a>),

    /// A component canonical section was received and the provided reader can be

    /// used to parse the contents of the component canonical section.

    #[cfg(feature = "component-model")]

    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),

    /// A component start section was received.

    #[cfg(feature = "component-model")]

    ComponentStartSection {

        /// The start function description.

        start: ComponentStartFunction,

        /// The range of bytes that specify the `start` field.

        range: Range<usize>,

},

    /// A component import section was received and the provided reader can be

    /// used to parse the contents of the component import section.

    #[cfg(feature = "component-model")]

    ComponentImportSection(ComponentImportSectionReader<'a>),

    /// A component export section was received, and the provided reader can be

    /// used to parse the contents of the component export section.

    #[cfg(feature = "component-model")]

    ComponentExportSection(ComponentExportSectionReader<'a>),

    /// A module or component custom section was received.

    CustomSection(CustomSectionReader<'a>),

    /// An unknown section was found.

///

    /// This variant is returned for all unknown sections encountered. This

    /// likely wants to be interpreted as an error by consumers of the parser,

    /// but this can also be used to parse sections currently unsupported by

    /// the parser.

    UnknownSection {

        /// The 8-bit identifier for this section.

        id: u8,

        /// The contents of this section.

        contents: &'a [u8],

        /// The range of bytes, relative to the start of the original data

        /// stream, that the contents of this section reside in.

        range: Range<usize>,

},

    /// The end of the WebAssembly module or component was reached.

///

    /// The value is the offset in the input byte stream where the end

    /// was reached.

    End(usize),

const CUSTOM_SECTION: u8 = 0;

const TYPE_SECTION: u8 = 1;

const IMPORT_SECTION: u8 = 2;

const FUNCTION_SECTION: u8 = 3;

const TABLE_SECTION: u8 = 4;

const MEMORY_SECTION: u8 = 5;

const GLOBAL_SECTION: u8 = 6;

const EXPORT_SECTION: u8 = 7;

const START_SECTION: u8 = 8;

const ELEMENT_SECTION: u8 = 9;

const CODE_SECTION: u8 = 10;

const DATA_SECTION: u8 = 11;

const DATA_COUNT_SECTION: u8 = 12;

const TAG_SECTION: u8 = 13;

#[cfg(feature = "component-model")]

const COMPONENT_MODULE_SECTION: u8 = 1;

#[cfg(feature = "component-model")]

const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;

#[cfg(feature = "component-model")]

const COMPONENT_CORE_TYPE_SECTION: u8 = 3;

#[cfg(feature = "component-model")]

const COMPONENT_SECTION: u8 = 4;

#[cfg(feature = "component-model")]

const COMPONENT_INSTANCE_SECTION: u8 = 5;

#[cfg(feature = "component-model")]

const COMPONENT_ALIAS_SECTION: u8 = 6;

#[cfg(feature = "component-model")]

const COMPONENT_TYPE_SECTION: u8 = 7;

#[cfg(feature = "component-model")]

const COMPONENT_CANONICAL_SECTION: u8 = 8;

#[cfg(feature = "component-model")]

const COMPONENT_START_SECTION: u8 = 9;

#[cfg(feature = "component-model")]

const COMPONENT_IMPORT_SECTION: u8 = 10;

#[cfg(feature = "component-model")]

const COMPONENT_EXPORT_SECTION: u8 = 11;

impl Parser {

    /// Creates a new parser.

///

    /// Reports errors and ranges relative to `offset` provided, where `offset`

    /// is some logical offset within the input stream that we're parsing.

    pub fn new(offset: u64) -> Parser {

        Parser {

            state: State::Header,

            offset,

            max_size: u64::MAX,

            // Assume the encoding is a module until we know otherwise

            encoding: Encoding::Module,

            #[cfg(feature = "features")]

            features: WasmFeatures::all(),

    /// Tests whether `bytes` looks like a core WebAssembly module.

///

    /// This will inspect the first 8 bytes of `bytes` and return `true` if it

    /// starts with the standard core WebAssembly header.

    pub fn is_core_wasm(bytes: &[u8]) -> bool {

        const HEADER: [u8; 8] = [

            WASM_MAGIC_NUMBER[0],

            WASM_MAGIC_NUMBER[1],

            WASM_MAGIC_NUMBER[2],

            WASM_MAGIC_NUMBER[3],

            WASM_MODULE_VERSION.to_le_bytes()[0],

            WASM_MODULE_VERSION.to_le_bytes()[1],

            KIND_MODULE.to_le_bytes()[0],

            KIND_MODULE.to_le_bytes()[1],

];

        bytes.starts_with(&HEADER)

    /// Tests whether `bytes` looks like a WebAssembly component.

///

    /// This will inspect the first 8 bytes of `bytes` and return `true` if it

    /// starts with the standard WebAssembly component header.

    pub fn is_component(bytes: &[u8]) -> bool {

        const HEADER: [u8; 8] = [

            WASM_MAGIC_NUMBER[0],

            WASM_MAGIC_NUMBER[1],

            WASM_MAGIC_NUMBER[2],

            WASM_MAGIC_NUMBER[3],

            WASM_COMPONENT_VERSION.to_le_bytes()[0],

            WASM_COMPONENT_VERSION.to_le_bytes()[1],

            KIND_COMPONENT.to_le_bytes()[0],

            KIND_COMPONENT.to_le_bytes()[1],

];

        bytes.starts_with(&HEADER)

    /// Returns the currently active set of wasm features that this parser is

    /// using while parsing.

///

    /// The default set of features is [`WasmFeatures::all()`] for new parsers.

///

    /// For more information see [`BinaryReader::new`].

    #[cfg(feature = "features")]

    pub fn features(&self) -> WasmFeatures {

        self.features

    /// Sets the wasm features active while parsing to the `features` specified.

///

    /// The default set of features is [`WasmFeatures::all()`] for new parsers.

///

    /// For more information see [`BinaryReader::new`].

    #[cfg(feature = "features")]

    pub fn set_features(&mut self, features: WasmFeatures) {

        self.features = features;

    /// Returns the original offset that this parser is currently at.

    pub fn offset(&self) -> u64 {

        self.offset

    /// Attempts to parse a chunk of data.

///

    /// This method will attempt to parse the next incremental portion of a

    /// WebAssembly binary. Data available for the module or component is

    /// provided as `data`, and the data can be incomplete if more data has yet

    /// to arrive. The `eof` flag indicates whether more data will ever be received.

///

    /// There are two ways parsing can succeed with this method:

///

    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes

    ///   in `data` to parse a payload. The caller needs to wait for more data to

    ///   be available in this situation before calling this method again. It is

    ///   guaranteed that this is only returned if `eof` is `false`.

///

    /// * `Chunk::Parsed` - this indicates that a chunk of the input was

    ///   successfully parsed. The payload is available in this variant of what

    ///   was parsed, and this also indicates how many bytes of `data` was

    ///   consumed. It's expected that the caller will not provide these bytes

    ///   back to the [`Parser`] again.

///

    /// Note that all `Chunk` return values are connected, with a lifetime, to

    /// the input buffer. Each parsed chunk borrows the input buffer and is a

    /// view into it for successfully parsed chunks.

///

    /// It is expected that you'll call this method until `Payload::End` is

    /// reached, at which point you're guaranteed that the parse has completed.

    /// Note that complete parsing, for the top-level module or component,

    /// implies that `data` is empty and `eof` is `true`.

///

    /// # Errors

///

    /// Parse errors are returned as an `Err`. Errors can happen when the

    /// structure of the data is unexpected or if sections are too large for

    /// example. Note that errors are not returned for malformed *contents* of

    /// sections here. Sections are generally not individually parsed and each

    /// returned [`Payload`] needs to be iterated over further to detect all

    /// errors.

///

    /// # Examples

///

    /// An example of reading a wasm file from a stream (`std::io::Read`) and

    /// incrementally parsing it.

///

    /// ```

    /// use std::io::Read;

    /// use anyhow::Result;

    /// use wasmparser::{Parser, Chunk, Payload::*};

///

    /// fn parse(mut reader: impl Read) -> Result<()> {

    ///     let mut buf = Vec::new();

    ///     let mut cur = Parser::new(0);

    ///     let mut eof = false;

    ///     let mut stack = Vec::new();

///

    ///     loop {

    ///         let (payload, consumed) = match cur.parse(&buf, eof)? {

    ///             Chunk::NeedMoreData(hint) => {

    ///                 assert!(!eof); // otherwise an error would be returned

///

    ///                 // Use the hint to preallocate more space, then read

    ///                 // some more data into our buffer.

    ///                 //

    ///                 // Note that the buffer management here is not ideal,

    ///                 // but it's compact enough to fit in an example!

    ///                 let len = buf.len();

    ///                 buf.extend((0..hint).map(|_| 0u8));

    ///                 let n = reader.read(&mut buf[len..])?;

    ///                 buf.truncate(len + n);

    ///                 eof = n == 0;

    ///                 continue;

    ///             }

///

    ///             Chunk::Parsed { consumed, payload } => (payload, consumed),

    ///         };

///

    ///         match payload {

    ///             // Sections for WebAssembly modules

    ///             Version { .. } => { /* ... */ }

    ///             TypeSection(_) => { /* ... */ }

    ///             ImportSection(_) => { /* ... */ }

    ///             FunctionSection(_) => { /* ... */ }

    ///             TableSection(_) => { /* ... */ }

    ///             MemorySection(_) => { /* ... */ }

    ///             TagSection(_) => { /* ... */ }

    ///             GlobalSection(_) => { /* ... */ }

    ///             ExportSection(_) => { /* ... */ }

    ///             StartSection { .. } => { /* ... */ }

    ///             ElementSection(_) => { /* ... */ }

    ///             DataCountSection { .. } => { /* ... */ }

    ///             DataSection(_) => { /* ... */ }

///

    ///             // Here we know how many functions we'll be receiving as

    ///             // `CodeSectionEntry`, so we can prepare for that, and

    ///             // afterwards we can parse and handle each function

    ///             // individually.

    ///             CodeSectionStart { .. } => { /* ... */ }

    ///             CodeSectionEntry(body) => {

    ///                 // here we can iterate over `body` to parse the function

    ///                 // and its locals

    ///             }

///

    ///             // Sections for WebAssembly components

    ///             InstanceSection(_) => { /* ... */ }

    ///             CoreTypeSection(_) => { /* ... */ }

    ///             ComponentInstanceSection(_) => { /* ... */ }

    ///             ComponentAliasSection(_) => { /* ... */ }

    ///             ComponentTypeSection(_) => { /* ... */ }

    ///             ComponentCanonicalSection(_) => { /* ... */ }

    ///             ComponentStartSection { .. } => { /* ... */ }

    ///             ComponentImportSection(_) => { /* ... */ }

    ///             ComponentExportSection(_) => { /* ... */ }

///

    ///             ModuleSection { parser, .. }

    ///             | ComponentSection { parser, .. } => {

    ///                 stack.push(cur.clone());

    ///                 cur = parser.clone();

    ///             }

///

    ///             CustomSection(_) => { /* ... */ }

///

    ///             // Once we've reached the end of a parser we either resume

    ///             // at the parent parser or we break out of the loop because

    ///             // we're done.

    ///             End(_) => {

    ///                 if let Some(parent_parser) = stack.pop() {

    ///                     cur = parent_parser;

    ///                 } else {

    ///                     break;

    ///                 }

    ///             }

///

    ///             // most likely you'd return an error here

    ///             _ => { /* ... */ }

    ///         }

///

    ///         // once we're done processing the payload we can forget the

    ///         // original.

    ///         buf.drain(..consumed);

    ///     }

///

    ///     Ok(())

    /// }

///

    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();

    /// ```

    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {

        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {

            (&data[..(self.max_size as usize)], true)

        } else {

            (data, eof)

};

        // TODO: thread through `offset: u64` to `BinaryReader`, remove

        // the cast here.

        let starting_offset = self.offset as usize;

        let mut reader = BinaryReader::new(data, starting_offset);

        #[cfg(feature = "features")]

            reader.set_features(self.features);

        match self.parse_reader(&mut reader, eof) {

            Ok(payload) => {

                // Be sure to update our offset with how far we got in the

                // reader

                let consumed = reader.original_position() - starting_offset;

                self.offset += usize_to_u64(consumed);

                self.max_size -= usize_to_u64(consumed);

                Ok(Chunk::Parsed {

                    consumed: consumed,

                    payload,

})

            Err(e) => {

                // If we're at EOF then there's no way we can recover from any

                // error, so continue to propagate it.

                if eof {

                    return Err(e);

                // If our error doesn't look like it can be resolved with more

                // data being pulled down, then propagate it, otherwise switch

                // the error to "feed me please"

                match e.inner.needed_hint {

                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),

                    None => Err(e),

    fn parse_reader<'a>(

        &mut self,

        reader: &mut BinaryReader<'a>,

        eof: bool,

    ) -> Result<Payload<'a>> {

        use Payload::*;

        match self.state {

            State::Header => {

                let start = reader.original_position();

                let header_version = reader.read_header_version()?;

                self.encoding = match (header_version >> 16) as u16 {

                    KIND_MODULE => Encoding::Module,

                    KIND_COMPONENT => Encoding::Component,

                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),

};

                let num = header_version as u16;

                self.state = State::SectionStart;

                Ok(Version {

                    num,

                    encoding: self.encoding,

                    range: start..reader.original_position(),

})

            State::SectionStart => {

                // If we're at eof and there are no bytes in our buffer, then

                // that means we reached the end of the data since it's

                // just a bunch of sections concatenated after the header.

                if eof && reader.bytes_remaining() == 0 {

                    return Ok(Payload::End(reader.original_position()));

                let id_pos = reader.original_position();

                let id = reader.read_u8()?;

                if id & 0x80 != 0 {

                    return Err(BinaryReaderError::new("malformed section id", id_pos));

                let len_pos = reader.original_position();

                let mut len = reader.read_var_u32()?;

                // Test to make sure that this section actually fits within

                // `Parser::max_size`. This doesn't matter for top-level modules

                // but it is required for nested modules/components to correctly ensure

                // that all sections live entirely within their section of the

                // file.

                let consumed = reader.original_position() - id_pos;

                let section_overflow = self

                    .max_size

                    .checked_sub(usize_to_u64(consumed))

                    .and_then(|s| s.checked_sub(len.into()))

                    .is_none();

                if section_overflow {

                    return Err(BinaryReaderError::new("section too large", len_pos));

                match (self.encoding, id) {

                    // Sections for both modules and components.

                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),

                    // Module sections

                    (Encoding::Module, TYPE_SECTION) => {

                        section(reader, len, TypeSectionReader::new, TypeSection)

                    (Encoding::Module, IMPORT_SECTION) => {

                        section(reader, len, ImportSectionReader::new, ImportSection)

                    (Encoding::Module, FUNCTION_SECTION) => {

                        section(reader, len, FunctionSectionReader::new, FunctionSection)

                    (Encoding::Module, TABLE_SECTION) => {

                        section(reader, len, TableSectionReader::new, TableSection)

                    (Encoding::Module, MEMORY_SECTION) => {

                        section(reader, len, MemorySectionReader::new, MemorySection)

                    (Encoding::Module, GLOBAL_SECTION) => {

                        section(reader, len, GlobalSectionReader::new, GlobalSection)

                    (Encoding::Module, EXPORT_SECTION) => {

                        section(reader, len, ExportSectionReader::new, ExportSection)

                    (Encoding::Module, START_SECTION) => {

                        let (func, range) = single_item(reader, len, "start")?;

                        Ok(StartSection { func, range })

                    (Encoding::Module, ELEMENT_SECTION) => {

                        section(reader, len, ElementSectionReader::new, ElementSection)

                    (Encoding::Module, CODE_SECTION) => {

                        let start = reader.original_position();

                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;

                        let range = start..reader.original_position() + len as usize;

                        self.state = State::FunctionBody {

                            remaining: count,

                            len,

};

                        Ok(CodeSectionStart {

                            count,

                            range,

                            size: len,

})

                    (Encoding::Module, DATA_SECTION) => {

                        section(reader, len, DataSectionReader::new, DataSection)

                    (Encoding::Module, DATA_COUNT_SECTION) => {

                        let (count, range) = single_item(reader, len, "data count")?;

                        Ok(DataCountSection { count, range })

                    (Encoding::Module, TAG_SECTION) => {

                        section(reader, len, TagSectionReader::new, TagSection)

                    // Component sections

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_MODULE_SECTION)

                    | (Encoding::Component, COMPONENT_SECTION) => {

                        if len as usize > MAX_WASM_MODULE_SIZE {

                            bail!(

                                len_pos,

                                "{} section is too large",

                                if id == 1 { "module" } else { "component " }

);

                        let range = reader.original_position()

                            ..reader.original_position() + usize::try_from(len).unwrap();

                        self.max_size -= u64::from(len);

                        self.offset += u64::from(len);

                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));

                        #[cfg(feature = "features")]

                            parser.features = self.features;

                        parser.max_size = u64::from(len);

                        Ok(match id {

                            1 => ModuleSection {

                                parser,

                                unchecked_range: range,

},

                            4 => ComponentSection {

                                parser,

                                unchecked_range: range,

},

                            _ => unreachable!(),

})

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {

                        section(reader, len, InstanceSectionReader::new, InstanceSection)

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {

                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(

                        reader,

                        len,

                        ComponentInstanceSectionReader::new,

                        ComponentInstanceSection,

),

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {

                        section(reader, len, SectionLimited::new, ComponentAliasSection)

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(

                        reader,

                        len,

                        ComponentTypeSectionReader::new,

                        ComponentTypeSection,

),

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(

                        reader,

                        len,

                        ComponentCanonicalSectionReader::new,

                        ComponentCanonicalSection,

),

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_START_SECTION) => {

                        let (start, range) = single_item(reader, len, "component start")?;

                        Ok(ComponentStartSection { start, range })

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(

                        reader,

                        len,

                        ComponentImportSectionReader::new,

                        ComponentImportSection,

),

                    #[cfg(feature = "component-model")]

                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(

                        reader,

                        len,

                        ComponentExportSectionReader::new,

                        ComponentExportSection,

),

                    (_, id) => {

                        let offset = reader.original_position();

                        let contents = reader.read_bytes(len as usize)?;

                        let range = offset..offset + len as usize;

                        Ok(UnknownSection {

id,

                            contents,

                            range,

})

            // Once we hit 0 remaining incrementally parsed items, with 0

            // remaining bytes in each section, we're done and can switch back

            // to parsing sections.

            State::FunctionBody {

                remaining: 0,

                len: 0,

            } => {

                self.state = State::SectionStart;

                self.parse_reader(reader, eof)

            // ... otherwise trailing bytes with no remaining entries in these

            // sections indicates an error.

            State::FunctionBody { remaining: 0, len } => {

                debug_assert!(len > 0);

                let offset = reader.original_position();

                Err(BinaryReaderError::new(

                    "trailing bytes at end of section",

                    offset,

))

            // Functions are relatively easy to parse when we know there's at

            // least one remaining and at least one byte available to read

            // things.

//

            // We use the remaining length try to read a u32 size of the

            // function, and using that size we require the entire function be

            // resident in memory. This means that we're reading whole chunks of

            // functions at a time.

//

            // Limiting via `Parser::max_size` (nested parsing) happens above in

            // `fn parse`, and limiting by our section size happens via

            // `delimited`. Actual parsing of the function body is delegated to

            // the caller to iterate over the `FunctionBody` structure.

            State::FunctionBody { remaining, mut len } => {

                let body = delimited(reader, &mut len, |r| {

                    Ok(FunctionBody::new(r.read_reader()?))

                })?;

                self.state = State::FunctionBody {

                    remaining: remaining - 1,

                    len,

};

                Ok(CodeSectionEntry(body))

    /// Convenience function that can be used to parse a module or component

    /// that is entirely resident in memory.

///

    /// This function will parse the `data` provided as a WebAssembly module

    /// or component.

///

    /// Note that when this function yields sections that provide parsers,

    /// no further action is required for those sections as payloads from

    /// those parsers will be automatically returned.

///

    /// # Examples

///

    /// An example of reading a wasm file from a stream (`std::io::Read`) into

    /// a buffer and then parsing it.

///

    /// ```

    /// use std::io::Read;

    /// use anyhow::Result;

    /// use wasmparser::{Parser, Chunk, Payload::*};

///

    /// fn parse(mut reader: impl Read) -> Result<()> {

    ///     let mut buf = Vec::new();

    ///     reader.read_to_end(&mut buf)?;

    ///     let parser = Parser::new(0);

///

    ///     for payload in parser.parse_all(&buf) {

    ///         match payload? {

    ///             // Sections for WebAssembly modules

    ///             Version { .. } => { /* ... */ }

    ///             TypeSection(_) => { /* ... */ }

    ///             ImportSection(_) => { /* ... */ }

    ///             FunctionSection(_) => { /* ... */ }

    ///             TableSection(_) => { /* ... */ }

    ///             MemorySection(_) => { /* ... */ }

    ///             TagSection(_) => { /* ... */ }

    ///             GlobalSection(_) => { /* ... */ }

    ///             ExportSection(_) => { /* ... */ }

    ///             StartSection { .. } => { /* ... */ }

    ///             ElementSection(_) => { /* ... */ }

    ///             DataCountSection { .. } => { /* ... */ }

    ///             DataSection(_) => { /* ... */ }

///

    ///             // Here we know how many functions we'll be receiving as

    ///             // `CodeSectionEntry`, so we can prepare for that, and

    ///             // afterwards we can parse and handle each function

    ///             // individually.

    ///             CodeSectionStart { .. } => { /* ... */ }

    ///             CodeSectionEntry(body) => {

    ///                 // here we can iterate over `body` to parse the function

    ///                 // and its locals

    ///             }

///

    ///             // Sections for WebAssembly components

    ///             ModuleSection { .. } => { /* ... */ }

    ///             InstanceSection(_) => { /* ... */ }

    ///             CoreTypeSection(_) => { /* ... */ }

    ///             ComponentSection { .. } => { /* ... */ }

    ///             ComponentInstanceSection(_) => { /* ... */ }

    ///             ComponentAliasSection(_) => { /* ... */ }

    ///             ComponentTypeSection(_) => { /* ... */ }

    ///             ComponentCanonicalSection(_) => { /* ... */ }

    ///             ComponentStartSection { .. } => { /* ... */ }

    ///             ComponentImportSection(_) => { /* ... */ }

    ///             ComponentExportSection(_) => { /* ... */ }

///

    ///             CustomSection(_) => { /* ... */ }

///

    ///             // Once we've reached the end of a parser we either resume

    ///             // at the parent parser or the payload iterator is at its

    ///             // end and we're done.

    ///             End(_) => {}

///

    ///             // most likely you'd return an error here, but if you want

    ///             // you can also inspect the raw contents of unknown sections

    ///             other => {

    ///                 match other.as_section() {

    ///                     Some((id, range)) => { /* ... */ }

    ///                     None => { /* ... */ }

    ///                 }

    ///             }

    ///         }

    ///     }

///

    ///     Ok(())

    /// }

///

    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();

    /// ```

    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {

        let mut stack = Vec::new();

        let mut cur = self;

        let mut done = false;

        iter::from_fn(move || {

            if done {

                return None;

            let payload = match cur.parse(data, true) {

                // Propagate all errors

                Err(e) => {

                    done = true;

                    return Some(Err(e));

                // This isn't possible because `eof` is always true.

                Ok(Chunk::NeedMoreData(_)) => unreachable!(),

                Ok(Chunk::Parsed { payload, consumed }) => {

                    data = &data[consumed..];

                    payload

};

            match &payload {

                #[cfg(feature = "component-model")]

                Payload::ModuleSection { parser, .. }

                | Payload::ComponentSection { parser, .. } => {

                    stack.push(cur.clone());

                    cur = parser.clone();

                Payload::End(_) => match stack.pop() {

                    Some(p) => cur = p,

                    None => done = true,

},

                _ => {}

            Some(Ok(payload))

})

    /// Skip parsing the code section entirely.

///

    /// This function can be used to indicate, after receiving

    /// `CodeSectionStart`, that the section will not be parsed.

///

    /// The caller will be responsible for skipping `size` bytes (found in the

    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`

    /// after the `size` bytes have been skipped.

///

    /// # Panics

///

    /// This function will panic if the parser is not in a state where it's

    /// parsing the code section.

///

    /// # Examples

///

    /// ```

    /// use wasmparser::{Result, Parser, Chunk, Payload::*};

    /// use core::ops::Range;

///

    /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {

    ///     let mut parser = Parser::new(0);

    ///     loop {

    ///         let payload = match parser.parse(wasm, true)? {

    ///             Chunk::Parsed { consumed, payload } => {

    ///                 wasm = &wasm[consumed..];

    ///                 payload

    ///             }

    ///             // this state isn't possible with `eof = true`

    ///             Chunk::NeedMoreData(_) => unreachable!(),

    ///         };

    ///         match payload {

    ///             TypeSection(s) => print_range("type section", &s.range()),

    ///             ImportSection(s) => print_range("import section", &s.range()),

    ///             // .. other sections

///

    ///             // Print the range of the code section we see, but don't

    ///             // actually iterate over each individual function.

    ///             CodeSectionStart { range, size, .. } => {

    ///                 print_range("code section", &range);

    ///                 parser.skip_section();

    ///                 wasm = &wasm[size as usize..];

    ///             }

    ///             End(_) => break,

    ///             _ => {}

    ///         }

    ///     }

    ///     Ok(())

    /// }

///

    /// fn print_range(section: &str, range: &Range<usize>) {

    ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);

    /// }

    /// ```

    pub fn skip_section(&mut self) {

        let skip = match self.state {

            State::FunctionBody { remaining: _, len } => len,

            _ => panic!("wrong state to call `skip_section`"),

};

        self.offset += u64::from(skip);

        self.max_size -= u64::from(skip);

        self.state = State::SectionStart;

fn usize_to_u64(a: usize) -> u64 {

    a.try_into().unwrap()

/// Parses an entire section resident in memory into a `Payload`.

///

/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`

/// to construct the section to return.

fn section<'a, T>(

    reader: &mut BinaryReader<'a>,

    len: u32,

    ctor: fn(BinaryReader<'a>) -> Result<T>,

    variant: fn(T) -> Payload<'a>,

) -> Result<Payload<'a>> {

    let reader = reader.skip(|r| {

        r.read_bytes(len as usize)?;

        Ok(())

    })?;

    // clear the hint for "need this many more bytes" here because we already

    // read all the bytes, so it's not possible to read more bytes if this

    // fails.

    let reader = ctor(reader).map_err(clear_hint)?;

    Ok(variant(reader))

/// Reads a section that is represented by a single uleb-encoded `u32`.

fn single_item<'a, T>(

    reader: &mut BinaryReader<'a>,

    len: u32,

    desc: &str,

) -> Result<(T, Range<usize>)>

where

    T: FromReader<'a>,

    let range = reader.original_position()..reader.original_position() + len as usize;

    let mut content = reader.skip(|r| {

        r.read_bytes(len as usize)?;

        Ok(())

    })?;

    // We can't recover from "unexpected eof" here because our entire section is

    // already resident in memory, so clear the hint for how many more bytes are

    // expected.

    let ret = content.read().map_err(clear_hint)?;

    if !content.eof() {

        bail!(

            content.original_position(),

            "unexpected content in the {desc} section",

);

    Ok((ret, range))

/// Attempts to parse using `f`.

///

/// This will update `*len` with the number of bytes consumed, and it will cause

/// a failure to be returned instead of the number of bytes consumed exceeds

/// what `*len` currently is.

fn delimited<'a, T>(

    reader: &mut BinaryReader<'a>,

    len: &mut u32,

    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,

) -> Result<T> {

    let start = reader.original_position();

    let ret = f(reader)?;

    *len = match (reader.original_position() - start)

        .try_into()

        .ok()

        .and_then(|i| len.checked_sub(i))

        Some(i) => i,

        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),

};

    Ok(ret)

impl Default for Parser {

    fn default() -> Parser {

        Parser::new(0)

impl Payload<'_> {

    /// If this `Payload` represents a section in the original wasm module then

    /// the section's id and range within the original wasm binary are returned.

///

    /// Not all payloads refer to entire sections, such as the `Version` and

    /// `CodeSectionEntry` variants. These variants will return `None` from this

    /// function.

///

    /// Otherwise this function will return `Some` where the first element is

    /// the byte identifier for the section and the second element is the range

    /// of the contents of the section within the original wasm binary.

///

    /// The purpose of this method is to enable tools to easily iterate over

    /// entire sections if necessary and handle sections uniformly, for example

    /// dropping custom sections while preserving all other sections.

    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {

        use Payload::*;

        match self {

            Version { .. } => None,

            TypeSection(s) => Some((TYPE_SECTION, s.range())),

            ImportSection(s) => Some((IMPORT_SECTION, s.range())),

            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),

            TableSection(s) => Some((TABLE_SECTION, s.range())),

            MemorySection(s) => Some((MEMORY_SECTION, s.range())),

            TagSection(s) => Some((TAG_SECTION, s.range())),

            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),

            ExportSection(s) => Some((EXPORT_SECTION, s.range())),

            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),

            DataSection(s) => Some((DATA_SECTION, s.range())),

            StartSection { range, .. } => Some((START_SECTION, range.clone())),

            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),

            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),

            CodeSectionEntry(_) => None,

            #[cfg(feature = "component-model")]

            ModuleSection {

                unchecked_range: range,

..

            } => Some((COMPONENT_MODULE_SECTION, range.clone())),

            #[cfg(feature = "component-model")]

            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),

            #[cfg(feature = "component-model")]

            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),

            #[cfg(feature = "component-model")]

            ComponentSection {

                unchecked_range: range,

..

            } => Some((COMPONENT_SECTION, range.clone())),

            #[cfg(feature = "component-model")]

            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),

            #[cfg(feature = "component-model")]

            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),

            #[cfg(feature = "component-model")]

            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),

            #[cfg(feature = "component-model")]

            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),

            #[cfg(feature = "component-model")]

            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),

            #[cfg(feature = "component-model")]

            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),

            #[cfg(feature = "component-model")]

            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),

            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),

            UnknownSection { id, range, .. } => Some((*id, range.clone())),

            End(_) => None,

impl fmt::Debug for Payload<'_> {

    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

        use Payload::*;

        match self {

            Version {

                num,

                encoding,

                range,

            } => f

                .debug_struct("Version")

                .field("num", num)

                .field("encoding", encoding)

                .field("range", range)

                .finish(),

            // Module sections

            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),

            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),

            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),

            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),

            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),

            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),

            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),

            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),

            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),

            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),

            StartSection { func, range } => f

                .debug_struct("StartSection")

                .field("func", func)

                .field("range", range)

                .finish(),

            DataCountSection { count, range } => f

                .debug_struct("DataCountSection")

                .field("count", count)

                .field("range", range)

                .finish(),

            CodeSectionStart { count, range, size } => f

                .debug_struct("CodeSectionStart")

                .field("count", count)

                .field("range", range)

                .field("size", size)

                .finish(),

            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),

            // Component sections

            #[cfg(feature = "component-model")]

            ModuleSection {

                parser: _,

                unchecked_range: range,

            } => f

                .debug_struct("ModuleSection")

                .field("range", range)

                .finish(),

            #[cfg(feature = "component-model")]

            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),

            #[cfg(feature = "component-model")]

            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),

            #[cfg(feature = "component-model")]

            ComponentSection {

                parser: _,

                unchecked_range: range,

            } => f

                .debug_struct("ComponentSection")

                .field("range", range)

                .finish(),

            #[cfg(feature = "component-model")]

            ComponentInstanceSection(_) => f

                .debug_tuple("ComponentInstanceSection")

                .field(&"...")

                .finish(),

            #[cfg(feature = "component-model")]

            ComponentAliasSection(_) => f

                .debug_tuple("ComponentAliasSection")

                .field(&"...")

                .finish(),

            #[cfg(feature = "component-model")]

            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),

            #[cfg(feature = "component-model")]

            ComponentCanonicalSection(_) => f

                .debug_tuple("ComponentCanonicalSection")

                .field(&"...")

                .finish(),

            #[cfg(feature = "component-model")]

            ComponentStartSection { .. } => f

                .debug_tuple("ComponentStartSection")

                .field(&"...")

                .finish(),

            #[cfg(feature = "component-model")]

            ComponentImportSection(_) => f

                .debug_tuple("ComponentImportSection")

                .field(&"...")

                .finish(),

            #[cfg(feature = "component-model")]

            ComponentExportSection(_) => f

                .debug_tuple("ComponentExportSection")

                .field(&"...")

                .finish(),

            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),

            UnknownSection { id, range, .. } => f

                .debug_struct("UnknownSection")

                .field("id", id)

                .field("range", range)

                .finish(),

            End(offset) => f.debug_tuple("End").field(offset).finish(),

fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {

    err.inner.needed_hint = None;

err

#[cfg(test)]

mod tests {

    use super::*;

    macro_rules! assert_matches {

        ($a:expr, $b:pat $(,)?) => {

            match $a {

                $b => {}

                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),

};

    #[test]

    fn header() {

        assert!(Parser::default().parse(&[], true).is_err());

        assert_matches!(

            Parser::default().parse(&[], false),

            Ok(Chunk::NeedMoreData(4)),

);

        assert_matches!(

            Parser::default().parse(b"\0", false),

            Ok(Chunk::NeedMoreData(3)),

);

        assert_matches!(

            Parser::default().parse(b"\0asm", false),

            Ok(Chunk::NeedMoreData(4)),

);

        assert_matches!(

            Parser::default().parse(b"\0asm\x01\0\0\0", false),

            Ok(Chunk::Parsed {

                consumed: 8,

                payload: Payload::Version { num: 1, .. },

}),

);

    #[test]

    fn header_iter() {

        for _ in Parser::default().parse_all(&[]) {}

        for _ in Parser::default().parse_all(b"\0") {}

        for _ in Parser::default().parse_all(b"\0asm") {}

        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}

    fn parser_after_header() -> Parser {

        let mut p = Parser::default();

        assert_matches!(

            p.parse(b"\0asm\x01\0\0\0", false),

            Ok(Chunk::Parsed {

                consumed: 8,

                payload: Payload::Version {

                    num: WASM_MODULE_VERSION,

                    encoding: Encoding::Module,

..

},

}),

);

    fn parser_after_component_header() -> Parser {

        let mut p = Parser::default();

        assert_matches!(

            p.parse(b"\0asm\x0d\0\x01\0", false),

            Ok(Chunk::Parsed {

                consumed: 8,

                payload: Payload::Version {

                    num: WASM_COMPONENT_VERSION,

                    encoding: Encoding::Component,

..

},

}),

);

    #[test]

    fn start_section() {

        assert_matches!(

            parser_after_header().parse(&[], false),

            Ok(Chunk::NeedMoreData(1)),

);

        assert!(parser_after_header().parse(&[8], true).is_err());

        assert!(parser_after_header().parse(&[8, 1], true).is_err());

        assert!(parser_after_header().parse(&[8, 2], true).is_err());

        assert_matches!(

            parser_after_header().parse(&[8], false),

            Ok(Chunk::NeedMoreData(1)),

);

        assert_matches!(

            parser_after_header().parse(&[8, 1], false),

            Ok(Chunk::NeedMoreData(1)),

);

        assert_matches!(

            parser_after_header().parse(&[8, 2], false),

            Ok(Chunk::NeedMoreData(2)),

);

        assert_matches!(

            parser_after_header().parse(&[8, 1, 1], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::StartSection { func: 1, .. },

}),

);

        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());

        assert!(parser_after_header().parse(&[8, 0], false).is_err());

    #[test]

    fn end_works() {

        assert_matches!(

            parser_after_header().parse(&[], true),

            Ok(Chunk::Parsed {

                consumed: 0,

                payload: Payload::End(8),

}),

);

    #[test]

    fn type_section() {

        assert!(parser_after_header().parse(&[1], true).is_err());

        assert!(parser_after_header().parse(&[1, 0], false).is_err());

        assert!(parser_after_header().parse(&[8, 2], true).is_err());

        assert_matches!(

            parser_after_header().parse(&[1], false),

            Ok(Chunk::NeedMoreData(1)),

);

        assert_matches!(

            parser_after_header().parse(&[1, 1], false),

            Ok(Chunk::NeedMoreData(1)),

);

        assert_matches!(

            parser_after_header().parse(&[1, 1, 1], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::TypeSection(_),

}),

);

        assert_matches!(

            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::TypeSection(_),

}),

);

    #[test]

    fn custom_section() {

        assert!(parser_after_header().parse(&[0], true).is_err());

        assert!(parser_after_header().parse(&[0, 0], false).is_err());

        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());

        assert_matches!(

            parser_after_header().parse(&[0, 2, 1], false),

            Ok(Chunk::NeedMoreData(1)),

);

        assert_custom(

            parser_after_header().parse(&[0, 1, 0], false).unwrap(),

3,

"",

11,

            b"",

            Range { start: 10, end: 11 },

);

        assert_custom(

            parser_after_header()

                .parse(&[0, 2, 1, b'a'], false)

                .unwrap(),

4,

            "a",

12,

            b"",

            Range { start: 10, end: 12 },

);

        assert_custom(

            parser_after_header()

                .parse(&[0, 2, 0, b'a'], false)

                .unwrap(),

4,

"",

11,

            b"a",

            Range { start: 10, end: 12 },

);

    fn assert_custom(

        chunk: Chunk<'_>,

        expected_consumed: usize,

        expected_name: &str,

        expected_data_offset: usize,

        expected_data: &[u8],

        expected_range: Range<usize>,

) {

        let (consumed, s) = match chunk {

            Chunk::Parsed {

                consumed,

                payload: Payload::CustomSection(s),

            } => (consumed, s),

            _ => panic!("not a custom section payload"),

};

        assert_eq!(consumed, expected_consumed);

        assert_eq!(s.name(), expected_name);

        assert_eq!(s.data_offset(), expected_data_offset);

        assert_eq!(s.data(), expected_data);

        assert_eq!(s.range(), expected_range);

    #[test]

    fn function_section() {

        assert!(parser_after_header().parse(&[10], true).is_err());

        assert!(parser_after_header().parse(&[10, 0], true).is_err());

        assert!(parser_after_header().parse(&[10, 1], true).is_err());

        assert_matches!(

            parser_after_header().parse(&[10], false),

            Ok(Chunk::NeedMoreData(1))

);

        assert_matches!(

            parser_after_header().parse(&[10, 1], false),

            Ok(Chunk::NeedMoreData(1))

);

        let mut p = parser_after_header();

        assert_matches!(

            p.parse(&[10, 1, 0], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::CodeSectionStart { count: 0, .. },

}),

);

        assert_matches!(

            p.parse(&[], true),

            Ok(Chunk::Parsed {

                consumed: 0,

                payload: Payload::End(11),

}),

);

        let mut p = parser_after_header();

        assert_matches!(

            p.parse(&[10, 2, 1, 0], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::CodeSectionStart { count: 1, .. },

}),

);

        assert_matches!(

            p.parse(&[0], false),

            Ok(Chunk::Parsed {

                consumed: 1,

                payload: Payload::CodeSectionEntry(_),

}),

);

        assert_matches!(

            p.parse(&[], true),

            Ok(Chunk::Parsed {

                consumed: 0,

                payload: Payload::End(12),

}),

);

        // 1 byte section with 1 function can't read the function body because

        // the section is too small

        let mut p = parser_after_header();

        assert_matches!(

            p.parse(&[10, 1, 1], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::CodeSectionStart { count: 1, .. },

}),

);

        assert_eq!(

            p.parse(&[0], false).unwrap_err().message(),

            "unexpected end-of-file"

);

        // section with 2 functions but section is cut off

        let mut p = parser_after_header();

        assert_matches!(

            p.parse(&[10, 2, 2], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::CodeSectionStart { count: 2, .. },

}),

);

        assert_matches!(

            p.parse(&[0], false),

            Ok(Chunk::Parsed {

                consumed: 1,

                payload: Payload::CodeSectionEntry(_),

}),

);

        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));

        assert_eq!(

            p.parse(&[0], false).unwrap_err().message(),

            "unexpected end-of-file",

);

        // trailing data is bad

        let mut p = parser_after_header();

        assert_matches!(

            p.parse(&[10, 3, 1], false),

            Ok(Chunk::Parsed {

                consumed: 3,

                payload: Payload::CodeSectionStart { count: 1, .. },

}),

);

        assert_matches!(

            p.parse(&[0], false),

            Ok(Chunk::Parsed {

                consumed: 1,

                payload: Payload::CodeSectionEntry(_),

}),

);

        assert_eq!(

            p.parse(&[0], false).unwrap_err().message(),

            "trailing bytes at end of section",

);

    #[test]

    fn single_module() {

        let mut p = parser_after_component_header();

        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));

        // A module that's 8 bytes in length

        let mut sub = match p.parse(&[1, 8], false) {

            Ok(Chunk::Parsed {

                consumed: 2,

                payload: Payload::ModuleSection { parser, .. },

            }) => parser,

            other => panic!("bad parse {:?}", other),

};

        // Parse the header of the submodule with the sub-parser.

        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));

        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));

        assert_matches!(

            sub.parse(b"\0asm\x01\0\0\0", false),

            Ok(Chunk::Parsed {

                consumed: 8,

                payload: Payload::Version {

                    num: 1,

                    encoding: Encoding::Module,

..

},

}),

);

        // The sub-parser should be byte-limited so the next byte shouldn't get

        // consumed, it's intended for the parent parser.

        assert_matches!(

            sub.parse(&[10], false),

            Ok(Chunk::Parsed {

                consumed: 0,

                payload: Payload::End(18),

}),

);

        // The parent parser should now be back to resuming, and we simulate it

        // being done with bytes to ensure that it's safely at the end,

        // completing the module code section.

        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));

        assert_matches!(

            p.parse(&[], true),

            Ok(Chunk::Parsed {

                consumed: 0,

                payload: Payload::End(18),

}),

);

    #[test]

    fn nested_section_too_big() {

        let mut p = parser_after_component_header();

        // A module that's 10 bytes in length

        let mut sub = match p.parse(&[1, 10], false) {

            Ok(Chunk::Parsed {

                consumed: 2,

                payload: Payload::ModuleSection { parser, .. },

            }) => parser,

            other => panic!("bad parse {:?}", other),

};

        // use 8 bytes to parse the header, leaving 2 remaining bytes in our

        // module.

        assert_matches!(

            sub.parse(b"\0asm\x01\0\0\0", false),

            Ok(Chunk::Parsed {

                consumed: 8,

                payload: Payload::Version { num: 1, .. },

}),

);

        // We can't parse a section which declares its bigger than the outer

        // module. This is a custom section, one byte big, with one content byte. The

        // content byte, however, lives outside of the parent's module code

        // section.

        assert_eq!(

            sub.parse(&[0, 1, 0], false).unwrap_err().message(),

            "section too large",

);

Revision control

Copy as Markdown

Other Tools