token.rs - mozsearch

comm-central/third_party/rust/wast/src/token.rs

Enable keyboard shortcuts

Revision control

Copy as Markdown

Other Tools

HG Web

//! Common tokens that implement the [`Parse`] trait which are otherwise not

//! associated specifically with the wasm text format per se (useful in other

//! contexts too perhaps).

use crate::annotation;

use crate::lexer::Float;

use crate::parser::{Cursor, Parse, Parser, Peek, Result};

use std::fmt;

use std::hash::{Hash, Hasher};

use std::str;

/// A position in the original source stream, used to render errors.

#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]

pub struct Span {

    pub(crate) offset: usize,

impl Span {

    /// Construct a `Span` from a byte offset in the source file.

    pub fn from_offset(offset: usize) -> Self {

        Span { offset }

    /// Returns the line/column information of this span within `text`.

    /// Line and column numbers are 0-indexed. User presentation is typically

    /// 1-indexed, but 0-indexing is appropriate for internal use with

    /// iterators and slices.

    pub fn linecol_in(&self, text: &str) -> (usize, usize) {

        let mut cur = 0;

        // Use split_terminator instead of lines so that if there is a `\r`,

        // it is included in the offset calculation. The `+1` values below

        // account for the `\n`.

        for (i, line) in text.split_terminator('\n').enumerate() {

            if cur + line.len() + 1 > self.offset {

                return (i, self.offset - cur);

            cur += line.len() + 1;

        (text.lines().count(), 0)

    /// Returns the byte offset of this span.

    pub fn offset(&self) -> usize {

        self.offset

/// An identifier in a WebAssembly module, prefixed by `$` in the textual

/// format.

///

/// An identifier is used to symbolically refer to items in a a wasm module,

/// typically via the [`Index`] type.

#[derive(Copy, Clone)]

pub struct Id<'a> {

    name: &'a str,

    gen: u32,

    span: Span,

impl<'a> Id<'a> {

    /// Construct a new identifier from given string.

///

    /// Note that `name` can be any arbitrary string according to the

    /// WebAssembly/annotations proposal.

    pub fn new(name: &'a str, span: Span) -> Id<'a> {

        Id { name, gen: 0, span }

    #[cfg(feature = "wasm-module")]

    pub(crate) fn gensym(span: Span, gen: u32) -> Id<'a> {

        Id {

            name: "gensym",

            gen,

            span,

    /// Returns the underlying name of this identifier.

///

    /// The name returned does not contain the leading `$`.

    pub fn name(&self) -> &'a str {

        self.name

    /// Returns span of this identifier in the original source

    pub fn span(&self) -> Span {

        self.span

    #[cfg(feature = "wasm-module")]

    pub(crate) fn is_gensym(&self) -> bool {

        self.gen != 0

impl<'a> Hash for Id<'a> {

    fn hash<H: Hasher>(&self, hasher: &mut H) {

        self.name.hash(hasher);

        self.gen.hash(hasher);

impl<'a> PartialEq for Id<'a> {

    fn eq(&self, other: &Id<'a>) -> bool {

        self.name == other.name && self.gen == other.gen

impl<'a> Eq for Id<'a> {}

impl<'a> Parse<'a> for Id<'a> {

    fn parse(parser: Parser<'a>) -> Result<Self> {

        parser.step(|c| {

            if let Some((name, rest)) = c.id()? {

                return Ok((

                    Id {

                        name,

                        gen: 0,

                        span: c.cur_span(),

},

                    rest,

));

            Err(c.error("expected an identifier"))

})

impl fmt::Debug for Id<'_> {

    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

        if self.gen != 0 {

            f.debug_struct("Id").field("gen", &self.gen).finish()

        } else {

            self.name.fmt(f)

impl Peek for Id<'_> {

    fn peek(cursor: Cursor<'_>) -> Result<bool> {

        cursor.peek_id()

    fn display() -> &'static str {

        "an identifier"

/// A reference to another item in a wasm module.

///

/// This type is used for items referring to other items (such as `call $foo`

/// referencing function `$foo`). References can be either an index (u32) or an

/// [`Id`] in the textual format.

///

/// The emission phase of a module will ensure that `Index::Id` is never used

/// and switch them all to `Index::Num`.

#[derive(Copy, Clone, Debug)]

pub enum Index<'a> {

    /// A numerical index that this references. The index space this is

    /// referencing is implicit based on where this [`Index`] is stored.

    Num(u32, Span),

    /// A human-readable identifier this references. Like `Num`, the namespace

    /// this references is based on where this is stored.

    Id(Id<'a>),

impl Index<'_> {

    /// Returns the source location where this `Index` was defined.

    pub fn span(&self) -> Span {

        match self {

            Index::Num(_, span) => *span,

            Index::Id(id) => id.span(),

    #[cfg(feature = "wasm-module")]

    pub(crate) fn is_resolved(&self) -> bool {

        matches!(self, Index::Num(..))

impl<'a> Parse<'a> for Index<'a> {

    fn parse(parser: Parser<'a>) -> Result<Self> {

        if parser.peek::<Id>()? {

            Ok(Index::Id(parser.parse()?))

        } else if parser.peek::<u32>()? {

            let (val, span) = parser.parse()?;

            Ok(Index::Num(val, span))

        } else {

            Err(parser.error(format!(

                "unexpected token, expected an index or an identifier"

)))

impl Peek for Index<'_> {

    fn peek(cursor: Cursor<'_>) -> Result<bool> {

        Ok(u32::peek(cursor)? || Id::peek(cursor)?)

    fn display() -> &'static str {

        "an index"

impl<'a> From<Id<'a>> for Index<'a> {

    fn from(id: Id<'a>) -> Index<'a> {

        Index::Id(id)

impl PartialEq for Index<'_> {

    fn eq(&self, other: &Index<'_>) -> bool {

        match (self, other) {

            (Index::Num(a, _), Index::Num(b, _)) => a == b,

            (Index::Id(a), Index::Id(b)) => a == b,

            _ => false,

impl Eq for Index<'_> {}

impl Hash for Index<'_> {

    fn hash<H: Hasher>(&self, hasher: &mut H) {

        match self {

            Index::Num(a, _) => {

                0u8.hash(hasher);

                a.hash(hasher);

            Index::Id(a) => {

                1u8.hash(hasher);

                a.hash(hasher);

/// Parses `(func $foo)`

#[derive(Clone, Debug)]

#[allow(missing_docs)]

pub struct ItemRef<'a, K> {

    pub kind: K,

    pub idx: Index<'a>,

impl<'a, K: Parse<'a>> Parse<'a> for ItemRef<'a, K> {

    fn parse(parser: Parser<'a>) -> Result<Self> {

        parser.parens(|parser| {

            let kind = parser.parse::<K>()?;

            let idx = parser.parse()?;

            Ok(ItemRef { kind, idx })

})

impl<'a, K: Peek> Peek for ItemRef<'a, K> {

    fn peek(cursor: Cursor<'_>) -> Result<bool> {

        match cursor.lparen()? {

            Some(remaining) => K::peek(remaining),

            None => Ok(false),

    fn display() -> &'static str {

        "an item reference"

/// An `@name` annotation in source, currently of the form `@name "foo"`

#[derive(Copy, Clone, PartialEq, Eq, Debug)]

pub struct NameAnnotation<'a> {

    /// The name specified for the item

    pub name: &'a str,

impl<'a> Parse<'a> for NameAnnotation<'a> {

    fn parse(parser: Parser<'a>) -> Result<Self> {

        parser.parse::<annotation::name>()?;

        let name = parser.parse()?;

        Ok(NameAnnotation { name })

impl<'a> Parse<'a> for Option<NameAnnotation<'a>> {

    fn parse(parser: Parser<'a>) -> Result<Self> {

        Ok(if parser.peek2::<annotation::name>()? {

            Some(parser.parens(|p| p.parse())?)

        } else {

            None

})

macro_rules! integers {

    ($($i:ident($u:ident))*) => ($(

        impl<'a> Parse<'a> for $i {

            fn parse(parser: Parser<'a>) -> Result<Self> {

                Ok(parser.parse::<($i, Span)>()?.0)

        impl<'a> Parse<'a> for ($i, Span) {

            fn parse(parser: Parser<'a>) -> Result<Self> {

                parser.step(|c| {

                    if let Some((i, rest)) = c.integer()? {

                        let (s, base) = i.val();

                        let val = $i::from_str_radix(s, base)

                            .or_else(|_| {

                                $u::from_str_radix(s, base).map(|i| i as $i)

});

                        return match val {

                            Ok(n) => Ok(((n, c.cur_span()), rest)),

                            Err(_) => Err(c.error(concat!(

                                "invalid ",

                                stringify!($i),

                                " number: constant out of range",

                            ))),

};

                    Err(c.error(concat!("expected a ", stringify!($i))))

})

        impl Peek for $i {

            fn peek(cursor: Cursor<'_>) -> Result<bool> {

                cursor.peek_integer()

            fn display() -> &'static str {

                stringify!($i)

)*)

integers! {

    u8(u8) u16(u16) u32(u32) u64(u64)

    i8(u8) i16(u16) i32(u32) i64(u64)

impl<'a> Parse<'a> for &'a [u8] {

    fn parse(parser: Parser<'a>) -> Result<Self> {

        parser.step(|c| {

            if let Some((i, rest)) = c.string()? {

                return Ok((i, rest));

            Err(c.error("expected a string"))

})

impl Peek for &'_ [u8] {

    fn peek(cursor: Cursor<'_>) -> Result<bool> {

        cursor.peek_string()

    fn display() -> &'static str {

        "string"

impl<'a> Parse<'a> for &'a str {

    fn parse(parser: Parser<'a>) -> Result<Self> {

        str::from_utf8(parser.parse()?)

            .map_err(|_| parser.error_at(parser.prev_span(), "malformed UTF-8 encoding"))

impl Parse<'_> for String {

    fn parse(parser: Parser<'_>) -> Result<Self> {

        Ok(<&str>::parse(parser)?.to_string())

impl Peek for &'_ str {

    fn peek(cursor: Cursor<'_>) -> Result<bool> {

        <&[u8]>::peek(cursor)

    fn display() -> &'static str {

        <&[u8]>::display()

macro_rules! float {

    ($($name:ident => {

        bits: $int:ident,

        float: $float:ident,

        exponent_bits: $exp_bits:tt,

        name: $parse:ident,

    })*) => ($(

        /// A parsed floating-point type

        #[derive(Debug, Copy, Clone)]

        pub struct $name {

            /// The raw bits that this floating point number represents.

            pub bits: $int,

        impl<'a> Parse<'a> for $name {

            fn parse(parser: Parser<'a>) -> Result<Self> {

                parser.step(|c| {

                    let (val, rest) = if let Some((f, rest)) = c.float()? {

                        ($parse(&f), rest)

                    } else if let Some((i, rest)) = c.integer()? {

                        let (s, base) = i.val();

                            $parse(&Float::Val {

                                hex: base == 16,

                                integral: s.into(),

                                decimal: None,

                                exponent: None,

}),

                            rest,

                    } else {

                        return Err(c.error("expected a float"));

};

                    match val {

                        Some(bits) => Ok(($name { bits }, rest)),

                        None => Err(c.error("invalid float value: constant out of range")),

})

        fn $parse(val: &Float<'_>) -> Option<$int> {

            // Compute a few well-known constants about the float representation

            // given the parameters to the macro here.

            let width = std::mem::size_of::<$int>() * 8;

            let neg_offset = width - 1;

            let exp_offset = neg_offset - $exp_bits;

            let signif_bits = width - 1 - $exp_bits;

            let signif_mask = (1 << exp_offset) - 1;

            let bias = (1 << ($exp_bits - 1)) - 1;

            let (hex, integral, decimal, exponent_str) = match val {

                // Infinity is when the exponent bits are all set and

                // the significand is zero.

                Float::Inf { negative } => {

                    let exp_bits = (1 << $exp_bits) - 1;

                    let neg_bit = *negative as $int;

                    return Some(

                        (neg_bit << neg_offset) |

                        (exp_bits << exp_offset)

);

                // NaN is when the exponent bits are all set and

                // the significand is nonzero. The default of NaN is

                // when only the highest bit of the significand is set.

                Float::Nan { negative, val } => {

                    let exp_bits = (1 << $exp_bits) - 1;

                    let neg_bit = *negative as $int;

                    let signif = match val {

                        Some(val) => $int::from_str_radix(val,16).ok()?,

                        None => 1 << (signif_bits - 1),

};

                    // If the significand is zero then this is actually infinity

                    // so we fail to parse it.

                    if signif & signif_mask == 0 {

                        return None;

                    return Some(

                        (neg_bit << neg_offset) |

                        (exp_bits << exp_offset) |

                        (signif & signif_mask)

);

                // This is trickier, handle this below

                Float::Val { hex, integral, decimal, exponent } => {

                    (hex, integral, decimal, exponent)

};

            // Rely on Rust's standard library to parse base 10 floats

            // correctly.

            if !*hex {

                let mut s = integral.to_string();

                if let Some(decimal) = decimal {

                    s.push_str(".");

                    s.push_str(&decimal);

                if let Some(exponent) = exponent_str {

                    s.push_str("e");

                    s.push_str(&exponent);

                let float = s.parse::<$float>().ok()?;

                // looks like the `*.wat` format considers infinite overflow to

                // be invalid.

                if float.is_infinite() {

                    return None;

                return Some(float.to_bits());

            // Parsing hex floats is... hard! I don't really know what most of

            // this below does. It was copied from Gecko's implementation in

            // `WasmTextToBinary.cpp`. Would love comments on this if you have

            // them!

            let decimal = decimal.as_ref().map(|s| &**s).unwrap_or("");

            let negative = integral.starts_with('-');

            let integral = integral.trim_start_matches('-').trim_start_matches('0');

            // Do a bunch of work up front to locate the first non-zero digit

            // to determine the initial exponent. There's a number of

            // adjustments depending on where the digit was found, but the

            // general idea here is that I'm not really sure why things are

            // calculated the way they are but it should match Gecko.

            let decimal_no_leading = decimal.trim_start_matches('0');

            let decimal_iter = if integral.is_empty() {

                decimal_no_leading.chars()

            } else {

                decimal.chars()

};

            let mut digits = integral.chars()

                .map(|c| (to_hex(c) as $int, false))

                .chain(decimal_iter.map(|c| (to_hex(c) as $int, true)));

            let lead_nonzero_digit = match digits.next() {

                Some((c, _)) => c,

                // No digits? Must be `+0` or `-0`, being careful to handle the

                // sign encoding here.

                None if negative => return Some(1 << (width - 1)),

                None => return Some(0),

};

            let mut significand = 0 as $int;

            let mut exponent = if !integral.is_empty() {

            } else {

                -((decimal.len() - decimal_no_leading.len() + 1) as i32) + 1

};

            let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;

            exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;

            let mut significand_pos = (width - (4 - (lz as usize))) as isize;

            assert!(significand_pos >= 0);

            significand |= lead_nonzero_digit << significand_pos;

            // Now that we've got an anchor in the string we parse the remaining

            // digits. Again, not entirely sure why everything is the way it is

            // here! This is copied frmo gecko.

            let mut discarded_extra_nonzero = false;

            for (digit, decimal) in digits {

                if !decimal {

                    exponent += 4;

                if significand_pos > -4 {

                    significand_pos -= 4;

                if significand_pos >= 0 {

                    significand |= digit << significand_pos;

                } else if significand_pos > -4 {

                    significand |= digit >> (4 - significand_pos);

                    discarded_extra_nonzero = (digit & !((!0) >> (4 - significand_pos))) != 0;

                } else if digit != 0 {

                    discarded_extra_nonzero = true;

            exponent = exponent.checked_add(match exponent_str {

                Some(s) => s.parse::<i32>().ok()?,

                None => 0,

            })?;

            debug_assert!(significand != 0);

            let (encoded_exponent, encoded_significand, discarded_significand) =

                if exponent <= -bias {

                    // Underflow to subnormal or zero.

                    let shift = exp_offset as i32 + exponent + bias;

                    if shift == 0 {

                        (0, 0, significand)

                    } else if shift < 0 || shift >= width as i32 {

                        (0, 0, 0)

                    } else {

0,

                            significand >> (width as i32 - shift),

                            significand << shift,

                } else if exponent <= bias {

                    // Normal (non-zero). The significand's leading 1 is encoded

                    // implicitly.

                        ((exponent + bias) as $int) << exp_offset,

                        (significand >> (width - exp_offset - 1)) & signif_mask,

                        significand << (exp_offset + 1),

                } else {

                    // Overflow to infinity.

                        ((1 << $exp_bits) - 1) << exp_offset,

0,

0,

};

            let bits = encoded_exponent | encoded_significand;

            // Apply rounding. If this overflows the significand, it carries

            // into the exponent bit according to the magic of the IEEE 754

            // encoding.

//

            // Or rather, the comment above is what Gecko says so it's copied

            // here too.

            let msb = 1 << (width - 1);

            let bits = bits

                + (((discarded_significand & msb != 0)

                    && ((discarded_significand & !msb != 0) ||

                         discarded_extra_nonzero ||

                         // ties to even

                         (encoded_significand & 1 != 0))) as $int);

            // Just before we return the bits be sure to handle the sign bit we

            // found at the beginning.

            let bits = if negative {

                bits | (1 << (width - 1))

            } else {

                bits

};

            // looks like the `*.wat` format considers infinite overflow to

            // be invalid.

            if $float::from_bits(bits).is_infinite() {

                return None;

            Some(bits)

)*)

float! {

    F32 => {

        bits: u32,

        float: f32,

        exponent_bits: 8,

        name: strtof,

    F64 => {

        bits: u64,

        float: f64,

        exponent_bits: 11,

        name: strtod,

fn to_hex(c: char) -> u8 {

    match c {

        'a'..='f' => c as u8 - b'a' + 10,

        'A'..='F' => c as u8 - b'A' + 10,

        _ => c as u8 - b'0',

/// A convenience type to use with [`Parser::peek`](crate::parser::Parser::peek)

/// to see if the next token is an s-expression.

pub struct LParen {

    _priv: (),

impl Peek for LParen {

    fn peek(cursor: Cursor<'_>) -> Result<bool> {

        cursor.peek_lparen()

    fn display() -> &'static str {

        "left paren"

/// A convenience type to use with [`Parser::peek`](crate::parser::Parser::peek)

/// to see if the next token is the end of an s-expression.

pub struct RParen {

    _priv: (),

impl Peek for RParen {

    fn peek(cursor: Cursor<'_>) -> Result<bool> {

        cursor.peek_rparen()

    fn display() -> &'static str {

        "right paren"

#[cfg(test)]

mod tests {

    #[test]

    fn hex_strtof() {

        macro_rules! f {

            ($a:tt) => (f!(@mk $a, None, None));

            ($a:tt p $e:tt) => (f!(@mk $a, None, Some($e.into())));

            ($a:tt . $b:tt) => (f!(@mk $a, Some($b.into()), None));

            ($a:tt . $b:tt p $e:tt) => (f!(@mk $a, Some($b.into()), Some($e.into())));

            (@mk $a:tt, $b:expr, $e:expr) => (crate::lexer::Float::Val {

                hex: true,

                integral: $a.into(),

                decimal: $b,

                exponent: $e

});

        assert_eq!(super::strtof(&f!("0")), Some(0));

        assert_eq!(super::strtof(&f!("0" . "0")), Some(0));

        assert_eq!(super::strtof(&f!("0" . "0" p "2354")), Some(0));

        assert_eq!(super::strtof(&f!("-0")), Some(1 << 31));

        assert_eq!(super::strtof(&f!("f32")), Some(0x45732000));

        assert_eq!(super::strtof(&f!("0" . "f32")), Some(0x3f732000));

        assert_eq!(super::strtof(&f!("1" . "2")), Some(0x3f900000));

        assert_eq!(

            super::strtof(&f!("0" . "00000100000000000" p "-126")),

            Some(0)

);

        assert_eq!(

            super::strtof(&f!("1" . "fffff4" p "-106")),

            Some(0x0afffffa)

);

        assert_eq!(super::strtof(&f!("fffff98" p "-133")), Some(0x0afffffa));

        assert_eq!(super::strtof(&f!("0" . "081" p "023")), Some(0x48810000));

        assert_eq!(

            super::strtof(&f!("1" . "00000100000000000" p "-50")),

            Some(0x26800000)

);