Source code

Revision control

Copy as Markdown

Other Tools

//! Common tokens that implement the [`Parse`] trait which are otherwise not
//! associated specifically with the wasm text format per se (useful in other
//! contexts too perhaps).
use crate::lexer::{Float, Lexer, TokenKind};
use crate::parser::{Cursor, Parse, Parser, Peek, Result};
use crate::{annotation, Error};
use std::fmt;
use std::hash::{Hash, Hasher};
use std::str;
/// A position in the original source stream, used to render errors.
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct Span {
pub(crate) offset: usize,
}
impl Span {
/// Construct a `Span` from a byte offset in the source file.
pub fn from_offset(offset: usize) -> Self {
Span { offset }
}
/// Returns the line/column information of this span within `text`.
/// Line and column numbers are 0-indexed. User presentation is typically
/// 1-indexed, but 0-indexing is appropriate for internal use with
/// iterators and slices.
pub fn linecol_in(&self, text: &str) -> (usize, usize) {
let mut cur = 0;
// Use split_terminator instead of lines so that if there is a `\r`,
// it is included in the offset calculation. The `+1` values below
// account for the `\n`.
for (i, line) in text.split_terminator('\n').enumerate() {
if cur + line.len() + 1 > self.offset {
return (i, self.offset - cur);
}
cur += line.len() + 1;
}
(text.lines().count(), 0)
}
/// Returns the byte offset of this span.
pub fn offset(&self) -> usize {
self.offset
}
}
/// An identifier in a WebAssembly module, prefixed by `$` in the textual
/// format.
///
/// An identifier is used to symbolically refer to items in a a wasm module,
/// typically via the [`Index`] type.
#[derive(Copy, Clone)]
pub struct Id<'a> {
name: &'a str,
gen: u32,
span: Span,
}
impl<'a> Id<'a> {
/// Construct a new identifier from given string.
///
/// Returns an error if the string does not contain a leading `$`, or is not a
/// valid WASM text format identifier.
pub fn new(name: &'a str, span: Span) -> Result<Id<'a>> {
let mut _pos: usize = 0;
let tok = Lexer::new(name).parse(&mut _pos)?;
match tok {
Some(tok) if tok.kind == TokenKind::Id => Ok(Id {
name: tok.id(name),
gen: 0,
span,
}),
_ => Err(Error::parse(span, name, "expected an identifier".into())),
}
}
pub(crate) fn gensym(span: Span, gen: u32) -> Id<'a> {
Id {
name: "gensym",
gen,
span,
}
}
/// Returns the underlying name of this identifier.
///
/// The name returned does not contain the leading `$`.
pub fn name(&self) -> &'a str {
self.name
}
/// Returns span of this identifier in the original source
pub fn span(&self) -> Span {
self.span
}
pub(crate) fn is_gensym(&self) -> bool {
self.gen != 0
}
}
impl<'a> Hash for Id<'a> {
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.name.hash(hasher);
self.gen.hash(hasher);
}
}
impl<'a> PartialEq for Id<'a> {
fn eq(&self, other: &Id<'a>) -> bool {
self.name == other.name && self.gen == other.gen
}
}
impl<'a> Eq for Id<'a> {}
impl<'a> Parse<'a> for Id<'a> {
fn parse(parser: Parser<'a>) -> Result<Self> {
parser.step(|c| {
if let Some((name, rest)) = c.id()? {
return Ok((
Id {
name,
gen: 0,
span: c.cur_span(),
},
rest,
));
}
Err(c.error("expected an identifier"))
})
}
}
impl fmt::Debug for Id<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.gen != 0 {
f.debug_struct("Id").field("gen", &self.gen).finish()
} else {
self.name.fmt(f)
}
}
}
impl Peek for Id<'_> {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
cursor.peek_id()
}
fn display() -> &'static str {
"an identifier"
}
}
/// A reference to another item in a wasm module.
///
/// This type is used for items referring to other items (such as `call $foo`
/// referencing function `$foo`). References can be either an index (u32) or an
/// [`Id`] in the textual format.
///
/// The emission phase of a module will ensure that `Index::Id` is never used
/// and switch them all to `Index::Num`.
#[derive(Copy, Clone, Debug)]
pub enum Index<'a> {
/// A numerical index that this references. The index space this is
/// referencing is implicit based on where this [`Index`] is stored.
Num(u32, Span),
/// A human-readable identifier this references. Like `Num`, the namespace
/// this references is based on where this is stored.
Id(Id<'a>),
}
impl Index<'_> {
/// Returns the source location where this `Index` was defined.
pub fn span(&self) -> Span {
match self {
Index::Num(_, span) => *span,
Index::Id(id) => id.span(),
}
}
pub(crate) fn is_resolved(&self) -> bool {
matches!(self, Index::Num(..))
}
}
impl<'a> Parse<'a> for Index<'a> {
fn parse(parser: Parser<'a>) -> Result<Self> {
if parser.peek::<Id>()? {
Ok(Index::Id(parser.parse()?))
} else if parser.peek::<u32>()? {
let (val, span) = parser.parse()?;
Ok(Index::Num(val, span))
} else {
Err(parser.error(format!(
"unexpected token, expected an index or an identifier"
)))
}
}
}
impl Peek for Index<'_> {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
Ok(u32::peek(cursor)? || Id::peek(cursor)?)
}
fn display() -> &'static str {
"an index"
}
}
impl<'a> From<Id<'a>> for Index<'a> {
fn from(id: Id<'a>) -> Index<'a> {
Index::Id(id)
}
}
impl PartialEq for Index<'_> {
fn eq(&self, other: &Index<'_>) -> bool {
match (self, other) {
(Index::Num(a, _), Index::Num(b, _)) => a == b,
(Index::Id(a), Index::Id(b)) => a == b,
_ => false,
}
}
}
impl Eq for Index<'_> {}
impl Hash for Index<'_> {
fn hash<H: Hasher>(&self, hasher: &mut H) {
match self {
Index::Num(a, _) => {
0u8.hash(hasher);
a.hash(hasher);
}
Index::Id(a) => {
1u8.hash(hasher);
a.hash(hasher);
}
}
}
}
/// Parses `(func $foo)`
#[derive(Clone, Debug)]
#[allow(missing_docs)]
pub struct ItemRef<'a, K> {
pub kind: K,
pub idx: Index<'a>,
}
impl<'a, K: Parse<'a>> Parse<'a> for ItemRef<'a, K> {
fn parse(parser: Parser<'a>) -> Result<Self> {
parser.parens(|parser| {
let kind = parser.parse::<K>()?;
let idx = parser.parse()?;
Ok(ItemRef { kind, idx })
})
}
}
impl<'a, K: Peek> Peek for ItemRef<'a, K> {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
match cursor.lparen()? {
Some(remaining) => K::peek(remaining),
None => Ok(false),
}
}
fn display() -> &'static str {
"an item reference"
}
}
/// An `@name` annotation in source, currently of the form `@name "foo"`
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct NameAnnotation<'a> {
/// The name specified for the item
pub name: &'a str,
}
impl<'a> Parse<'a> for NameAnnotation<'a> {
fn parse(parser: Parser<'a>) -> Result<Self> {
parser.parse::<annotation::name>()?;
let name = parser.parse()?;
Ok(NameAnnotation { name })
}
}
impl<'a> Parse<'a> for Option<NameAnnotation<'a>> {
fn parse(parser: Parser<'a>) -> Result<Self> {
Ok(if parser.peek2::<annotation::name>()? {
Some(parser.parens(|p| p.parse())?)
} else {
None
})
}
}
macro_rules! integers {
($($i:ident($u:ident))*) => ($(
impl<'a> Parse<'a> for $i {
fn parse(parser: Parser<'a>) -> Result<Self> {
Ok(parser.parse::<($i, Span)>()?.0)
}
}
impl<'a> Parse<'a> for ($i, Span) {
fn parse(parser: Parser<'a>) -> Result<Self> {
parser.step(|c| {
if let Some((i, rest)) = c.integer()? {
let (s, base) = i.val();
let val = $i::from_str_radix(s, base)
.or_else(|_| {
$u::from_str_radix(s, base).map(|i| i as $i)
});
return match val {
Ok(n) => Ok(((n, c.cur_span()), rest)),
Err(_) => Err(c.error(concat!(
"invalid ",
stringify!($i),
" number: constant out of range",
))),
};
}
Err(c.error(concat!("expected a ", stringify!($i))))
})
}
}
impl Peek for $i {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
cursor.peek_integer()
}
fn display() -> &'static str {
stringify!($i)
}
}
)*)
}
integers! {
u8(u8) u16(u16) u32(u32) u64(u64)
i8(u8) i16(u16) i32(u32) i64(u64)
}
impl<'a> Parse<'a> for &'a [u8] {
fn parse(parser: Parser<'a>) -> Result<Self> {
parser.step(|c| {
if let Some((i, rest)) = c.string()? {
return Ok((i, rest));
}
Err(c.error("expected a string"))
})
}
}
impl Peek for &'_ [u8] {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
cursor.peek_string()
}
fn display() -> &'static str {
"string"
}
}
impl<'a> Parse<'a> for &'a str {
fn parse(parser: Parser<'a>) -> Result<Self> {
str::from_utf8(parser.parse()?)
.map_err(|_| parser.error_at(parser.prev_span(), "malformed UTF-8 encoding"))
}
}
impl Parse<'_> for String {
fn parse(parser: Parser<'_>) -> Result<Self> {
Ok(<&str>::parse(parser)?.to_string())
}
}
impl Peek for &'_ str {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
<&[u8]>::peek(cursor)
}
fn display() -> &'static str {
<&[u8]>::display()
}
}
macro_rules! float {
($($name:ident => {
bits: $int:ident,
float: $float:ident,
exponent_bits: $exp_bits:tt,
name: $parse:ident,
})*) => ($(
/// A parsed floating-point type
#[derive(Debug, Copy, Clone)]
pub struct $name {
/// The raw bits that this floating point number represents.
pub bits: $int,
}
impl<'a> Parse<'a> for $name {
fn parse(parser: Parser<'a>) -> Result<Self> {
parser.step(|c| {
let (val, rest) = if let Some((f, rest)) = c.float()? {
($parse(&f), rest)
} else if let Some((i, rest)) = c.integer()? {
let (s, base) = i.val();
(
$parse(&Float::Val {
hex: base == 16,
integral: s.into(),
decimal: None,
exponent: None,
}),
rest,
)
} else {
return Err(c.error("expected a float"));
};
match val {
Some(bits) => Ok(($name { bits }, rest)),
None => Err(c.error("invalid float value: constant out of range")),
}
})
}
}
fn $parse(val: &Float<'_>) -> Option<$int> {
// Compute a few well-known constants about the float representation
// given the parameters to the macro here.
let width = std::mem::size_of::<$int>() * 8;
let neg_offset = width - 1;
let exp_offset = neg_offset - $exp_bits;
let signif_bits = width - 1 - $exp_bits;
let signif_mask = (1 << exp_offset) - 1;
let bias = (1 << ($exp_bits - 1)) - 1;
let (hex, integral, decimal, exponent_str) = match val {
// Infinity is when the exponent bits are all set and
// the significand is zero.
Float::Inf { negative } => {
let exp_bits = (1 << $exp_bits) - 1;
let neg_bit = *negative as $int;
return Some(
(neg_bit << neg_offset) |
(exp_bits << exp_offset)
);
}
// NaN is when the exponent bits are all set and
// the significand is nonzero. The default of NaN is
// when only the highest bit of the significand is set.
Float::Nan { negative, val } => {
let exp_bits = (1 << $exp_bits) - 1;
let neg_bit = *negative as $int;
let signif = match val {
Some(val) => $int::from_str_radix(val,16).ok()?,
None => 1 << (signif_bits - 1),
};
// If the significand is zero then this is actually infinity
// so we fail to parse it.
if signif & signif_mask == 0 {
return None;
}
return Some(
(neg_bit << neg_offset) |
(exp_bits << exp_offset) |
(signif & signif_mask)
);
}
// This is trickier, handle this below
Float::Val { hex, integral, decimal, exponent } => {
(hex, integral, decimal, exponent)
}
};
// Rely on Rust's standard library to parse base 10 floats
// correctly.
if !*hex {
let mut s = integral.to_string();
if let Some(decimal) = decimal {
s.push_str(".");
s.push_str(&decimal);
}
if let Some(exponent) = exponent_str {
s.push_str("e");
s.push_str(&exponent);
}
let float = s.parse::<$float>().ok()?;
// looks like the `*.wat` format considers infinite overflow to
// be invalid.
if float.is_infinite() {
return None;
}
return Some(float.to_bits());
}
// Parsing hex floats is... hard! I don't really know what most of
// this below does. It was copied from Gecko's implementation in
// `WasmTextToBinary.cpp`. Would love comments on this if you have
// them!
let decimal = decimal.as_ref().map(|s| &**s).unwrap_or("");
let negative = integral.starts_with('-');
let integral = integral.trim_start_matches('-').trim_start_matches('0');
// Do a bunch of work up front to locate the first non-zero digit
// to determine the initial exponent. There's a number of
// adjustments depending on where the digit was found, but the
// general idea here is that I'm not really sure why things are
// calculated the way they are but it should match Gecko.
let decimal_no_leading = decimal.trim_start_matches('0');
let decimal_iter = if integral.is_empty() {
decimal_no_leading.chars()
} else {
decimal.chars()
};
let mut digits = integral.chars()
.map(|c| (to_hex(c) as $int, false))
.chain(decimal_iter.map(|c| (to_hex(c) as $int, true)));
let lead_nonzero_digit = match digits.next() {
Some((c, _)) => c,
// No digits? Must be `+0` or `-0`, being careful to handle the
// sign encoding here.
None if negative => return Some(1 << (width - 1)),
None => return Some(0),
};
let mut significand = 0 as $int;
let mut exponent = if !integral.is_empty() {
1
} else {
-((decimal.len() - decimal_no_leading.len() + 1) as i32) + 1
};
let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4;
exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?;
let mut significand_pos = (width - (4 - (lz as usize))) as isize;
assert!(significand_pos >= 0);
significand |= lead_nonzero_digit << significand_pos;
// Now that we've got an anchor in the string we parse the remaining
// digits. Again, not entirely sure why everything is the way it is
// here! This is copied frmo gecko.
let mut discarded_extra_nonzero = false;
for (digit, decimal) in digits {
if !decimal {
exponent += 4;
}
if significand_pos > -4 {
significand_pos -= 4;
}
if significand_pos >= 0 {
significand |= digit << significand_pos;
} else if significand_pos > -4 {
significand |= digit >> (4 - significand_pos);
discarded_extra_nonzero = (digit & !((!0) >> (4 - significand_pos))) != 0;
} else if digit != 0 {
discarded_extra_nonzero = true;
}
}
exponent = exponent.checked_add(match exponent_str {
Some(s) => s.parse::<i32>().ok()?,
None => 0,
})?;
debug_assert!(significand != 0);
let (encoded_exponent, encoded_significand, discarded_significand) =
if exponent <= -bias {
// Underflow to subnormal or zero.
let shift = exp_offset as i32 + exponent + bias;
if shift == 0 {
(0, 0, significand)
} else if shift < 0 || shift >= width as i32 {
(0, 0, 0)
} else {
(
0,
significand >> (width as i32 - shift),
significand << shift,
)
}
} else if exponent <= bias {
// Normal (non-zero). The significand's leading 1 is encoded
// implicitly.
(
((exponent + bias) as $int) << exp_offset,
(significand >> (width - exp_offset - 1)) & signif_mask,
significand << (exp_offset + 1),
)
} else {
// Overflow to infinity.
(
((1 << $exp_bits) - 1) << exp_offset,
0,
0,
)
};
let bits = encoded_exponent | encoded_significand;
// Apply rounding. If this overflows the significand, it carries
// into the exponent bit according to the magic of the IEEE 754
// encoding.
//
// Or rather, the comment above is what Gecko says so it's copied
// here too.
let msb = 1 << (width - 1);
let bits = bits
+ (((discarded_significand & msb != 0)
&& ((discarded_significand & !msb != 0) ||
discarded_extra_nonzero ||
// ties to even
(encoded_significand & 1 != 0))) as $int);
// Just before we return the bits be sure to handle the sign bit we
// found at the beginning.
let bits = if negative {
bits | (1 << (width - 1))
} else {
bits
};
// looks like the `*.wat` format considers infinite overflow to
// be invalid.
if $float::from_bits(bits).is_infinite() {
return None;
}
Some(bits)
}
)*)
}
float! {
F32 => {
bits: u32,
float: f32,
exponent_bits: 8,
name: strtof,
}
F64 => {
bits: u64,
float: f64,
exponent_bits: 11,
name: strtod,
}
}
fn to_hex(c: char) -> u8 {
match c {
'a'..='f' => c as u8 - b'a' + 10,
'A'..='F' => c as u8 - b'A' + 10,
_ => c as u8 - b'0',
}
}
/// A convenience type to use with [`Parser::peek`](crate::parser::Parser::peek)
/// to see if the next token is an s-expression.
pub struct LParen {
_priv: (),
}
impl Peek for LParen {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
cursor.peek_lparen()
}
fn display() -> &'static str {
"left paren"
}
}
/// A convenience type to use with [`Parser::peek`](crate::parser::Parser::peek)
/// to see if the next token is the end of an s-expression.
pub struct RParen {
_priv: (),
}
impl Peek for RParen {
fn peek(cursor: Cursor<'_>) -> Result<bool> {
cursor.peek_rparen()
}
fn display() -> &'static str {
"right paren"
}
}
#[cfg(test)]
mod tests {
#[test]
fn hex_strtof() {
macro_rules! f {
($a:tt) => (f!(@mk $a, None, None));
($a:tt p $e:tt) => (f!(@mk $a, None, Some($e.into())));
($a:tt . $b:tt) => (f!(@mk $a, Some($b.into()), None));
($a:tt . $b:tt p $e:tt) => (f!(@mk $a, Some($b.into()), Some($e.into())));
(@mk $a:tt, $b:expr, $e:expr) => (crate::lexer::Float::Val {
hex: true,
integral: $a.into(),
decimal: $b,
exponent: $e
});
}
assert_eq!(super::strtof(&f!("0")), Some(0));
assert_eq!(super::strtof(&f!("0" . "0")), Some(0));
assert_eq!(super::strtof(&f!("0" . "0" p "2354")), Some(0));
assert_eq!(super::strtof(&f!("-0")), Some(1 << 31));
assert_eq!(super::strtof(&f!("f32")), Some(0x45732000));
assert_eq!(super::strtof(&f!("0" . "f32")), Some(0x3f732000));
assert_eq!(super::strtof(&f!("1" . "2")), Some(0x3f900000));
assert_eq!(
super::strtof(&f!("0" . "00000100000000000" p "-126")),
Some(0)
);
assert_eq!(
super::strtof(&f!("1" . "fffff4" p "-106")),
Some(0x0afffffa)
);
assert_eq!(super::strtof(&f!("fffff98" p "-133")), Some(0x0afffffa));
assert_eq!(super::strtof(&f!("0" . "081" p "023")), Some(0x48810000));
assert_eq!(
super::strtof(&f!("1" . "00000100000000000" p "-50")),
Some(0x26800000)
);
}
}