Source code
Revision control
Copy as Markdown
Other Tools
use winnow::stream::ContainsToken as _;
use winnow::stream::FindSlice as _;
use winnow::stream::Offset as _;
use winnow::stream::Stream as _;
use crate::decoder::StringBuilder;
use crate::ErrorSink;
use crate::Expected;
use crate::ParseError;
use crate::Raw;
use crate::Span;
const ALLOCATION_ERROR: &str = "could not allocate for string";
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum ScalarKind {
String,
Boolean(bool),
DateTime,
Float,
Integer(IntegerRadix),
}
impl ScalarKind {
pub fn description(&self) -> &'static str {
match self {
Self::String => "string",
Self::Boolean(_) => "boolean",
Self::DateTime => "date-time",
Self::Float => "float",
Self::Integer(radix) => radix.description(),
}
}
pub fn invalid_description(&self) -> &'static str {
match self {
Self::String => "invalid string",
Self::Boolean(_) => "invalid boolean",
Self::DateTime => "invalid date-time",
Self::Float => "invalid float",
Self::Integer(radix) => radix.invalid_description(),
}
}
}
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum IntegerRadix {
#[default]
Dec,
Hex,
Oct,
Bin,
}
impl IntegerRadix {
pub fn description(&self) -> &'static str {
match self {
Self::Dec => "integer",
Self::Hex => "hexadecimal",
Self::Oct => "octal",
Self::Bin => "binary",
}
}
pub fn value(&self) -> u32 {
match self {
Self::Dec => 10,
Self::Hex => 16,
Self::Oct => 8,
Self::Bin => 2,
}
}
pub fn invalid_description(&self) -> &'static str {
match self {
Self::Dec => "invalid integer number",
Self::Hex => "invalid hexadecimal number",
Self::Oct => "invalid octal number",
Self::Bin => "invalid binary number",
}
}
fn validator(&self) -> fn(char) -> bool {
match self {
Self::Dec => |c| c.is_ascii_digit(),
Self::Hex => |c| c.is_ascii_hexdigit(),
Self::Oct => |c| matches!(c, '0'..='7'),
Self::Bin => |c| matches!(c, '0'..='1'),
}
}
}
pub(crate) fn decode_unquoted_scalar<'i>(
raw: Raw<'i>,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
let s = raw.as_str();
let Some(first) = s.as_bytes().first() else {
return decode_invalid(raw, output, error);
};
match first {
// number starts
b'+' | b'-' => {
let value = &raw.as_str()[1..];
decode_sign_prefix(raw, value, output, error)
}
// Report as if they were numbers because its most likely a typo
b'_' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error),
// Date/number starts
b'0' => decode_zero_prefix(raw.as_str(), false, raw, output, error),
b'1'..=b'9' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error),
// Report as if they were numbers because its most likely a typo
b'.' => {
let kind = ScalarKind::Float;
let stream = raw.as_str();
ensure_float(stream, raw, error);
decode_float_or_integer(stream, raw, kind, output, error)
}
b't' | b'T' => {
const SYMBOL: &str = "true";
let kind = ScalarKind::Boolean(true);
let expected = &[Expected::Literal(SYMBOL)];
decode_symbol(raw, SYMBOL, kind, expected, output, error)
}
b'f' | b'F' => {
const SYMBOL: &str = "false";
let kind = ScalarKind::Boolean(false);
let expected = &[Expected::Literal(SYMBOL)];
decode_symbol(raw, SYMBOL, kind, expected, output, error)
}
b'i' | b'I' => {
const SYMBOL: &str = "inf";
let kind = ScalarKind::Float;
let expected = &[Expected::Literal(SYMBOL)];
decode_symbol(raw, SYMBOL, kind, expected, output, error)
}
b'n' | b'N' => {
const SYMBOL: &str = "nan";
let kind = ScalarKind::Float;
let expected = &[Expected::Literal(SYMBOL)];
decode_symbol(raw, SYMBOL, kind, expected, output, error)
}
_ => decode_invalid(raw, output, error),
}
}
pub(crate) fn decode_sign_prefix<'i>(
raw: Raw<'i>,
value: &'i str,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
let Some(first) = value.as_bytes().first() else {
return decode_invalid(raw, output, error);
};
match first {
// number starts
b'+' | b'-' => {
let start = value.offset_from(&raw.as_str());
let end = start + 1;
error.report_error(
ParseError::new("redundant numeric sign")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(start, end)),
);
let value = &value[1..];
decode_sign_prefix(raw, value, output, error)
}
// Report as if they were numbers because its most likely a typo
b'_' => decode_datetime_or_float_or_integer(value, raw, output, error),
// Date/number starts
b'0' => decode_zero_prefix(value, true, raw, output, error),
b'1'..=b'9' => decode_datetime_or_float_or_integer(value, raw, output, error),
// Report as if they were numbers because its most likely a typo
b'.' => {
let kind = ScalarKind::Float;
let stream = raw.as_str();
ensure_float(stream, raw, error);
decode_float_or_integer(stream, raw, kind, output, error)
}
b'i' | b'I' => {
const SYMBOL: &str = "inf";
let kind = ScalarKind::Float;
if value != SYMBOL {
let expected = &[Expected::Literal(SYMBOL)];
let start = value.offset_from(&raw.as_str());
let end = start + value.len();
error.report_error(
ParseError::new(kind.invalid_description())
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(expected)
.with_unexpected(Span::new_unchecked(start, end)),
);
decode_as(raw, SYMBOL, kind, output, error)
} else {
decode_as_is(raw, kind, output, error)
}
}
b'n' | b'N' => {
const SYMBOL: &str = "nan";
let kind = ScalarKind::Float;
if value != SYMBOL {
let expected = &[Expected::Literal(SYMBOL)];
let start = value.offset_from(&raw.as_str());
let end = start + value.len();
error.report_error(
ParseError::new(kind.invalid_description())
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(expected)
.with_unexpected(Span::new_unchecked(start, end)),
);
decode_as(raw, SYMBOL, kind, output, error)
} else {
decode_as_is(raw, kind, output, error)
}
}
_ => decode_invalid(raw, output, error),
}
}
pub(crate) fn decode_zero_prefix<'i>(
value: &'i str,
signed: bool,
raw: Raw<'i>,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
debug_assert_eq!(value.as_bytes()[0], b'0');
if value.len() == 1 {
let kind = ScalarKind::Integer(IntegerRadix::Dec);
// No extra validation needed
decode_float_or_integer(raw.as_str(), raw, kind, output, error)
} else {
let radix = value.as_bytes()[1];
match radix {
b'x' | b'X' => {
if signed {
error.report_error(
ParseError::new("integers with a radix cannot be signed")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(0, 1)),
);
}
if radix == b'X' {
let start = value.offset_from(&raw.as_str());
let end = start + 2;
error.report_error(
ParseError::new("radix must be lowercase")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Literal("0x")])
.with_unexpected(Span::new_unchecked(start, end)),
);
}
let radix = IntegerRadix::Hex;
let kind = ScalarKind::Integer(radix);
let stream = &value[2..];
ensure_radixed_value(stream, raw, radix, error);
decode_float_or_integer(stream, raw, kind, output, error)
}
b'o' | b'O' => {
if signed {
error.report_error(
ParseError::new("integers with a radix cannot be signed")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(0, 1)),
);
}
if radix == b'O' {
let start = value.offset_from(&raw.as_str());
let end = start + 2;
error.report_error(
ParseError::new("radix must be lowercase")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Literal("0o")])
.with_unexpected(Span::new_unchecked(start, end)),
);
}
let radix = IntegerRadix::Oct;
let kind = ScalarKind::Integer(radix);
let stream = &value[2..];
ensure_radixed_value(stream, raw, radix, error);
decode_float_or_integer(stream, raw, kind, output, error)
}
b'b' | b'B' => {
if signed {
error.report_error(
ParseError::new("integers with a radix cannot be signed")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(0, 1)),
);
}
if radix == b'B' {
let start = value.offset_from(&raw.as_str());
let end = start + 2;
error.report_error(
ParseError::new("radix must be lowercase")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Literal("0b")])
.with_unexpected(Span::new_unchecked(start, end)),
);
}
let radix = IntegerRadix::Bin;
let kind = ScalarKind::Integer(radix);
let stream = &value[2..];
ensure_radixed_value(stream, raw, radix, error);
decode_float_or_integer(stream, raw, kind, output, error)
}
b'd' | b'D' => {
if signed {
error.report_error(
ParseError::new("integers with a radix cannot be signed")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(0, 1)),
);
}
let radix = IntegerRadix::Dec;
let kind = ScalarKind::Integer(radix);
let stream = &value[2..];
error.report_error(
ParseError::new("redundant integer number prefix")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(0, 2)),
);
ensure_radixed_value(stream, raw, radix, error);
decode_float_or_integer(stream, raw, kind, output, error)
}
_ => decode_datetime_or_float_or_integer(value, raw, output, error),
}
}
}
pub(crate) fn decode_datetime_or_float_or_integer<'i>(
value: &'i str,
raw: Raw<'i>,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
let Some(digit_end) = value
.as_bytes()
.offset_for(|b| !(b'0'..=b'9').contains_token(b))
else {
let kind = ScalarKind::Integer(IntegerRadix::Dec);
let stream = raw.as_str();
ensure_no_leading_zero(value, raw, error);
return decode_float_or_integer(stream, raw, kind, output, error);
};
#[cfg(feature = "unsafe")] // SAFETY: ascii digits ensures UTF-8 boundary
let rest = unsafe { &value.get_unchecked(digit_end..) };
#[cfg(not(feature = "unsafe"))]
let rest = &value[digit_end..];
if rest.starts_with("-") || rest.starts_with(":") {
decode_as_is(raw, ScalarKind::DateTime, output, error)
} else if rest.contains(" ") {
decode_invalid(raw, output, error)
} else if is_float(rest) {
let kind = ScalarKind::Float;
let stream = raw.as_str();
ensure_float(value, raw, error);
decode_float_or_integer(stream, raw, kind, output, error)
} else if rest.starts_with("_") {
let kind = ScalarKind::Integer(IntegerRadix::Dec);
let stream = raw.as_str();
ensure_no_leading_zero(value, raw, error);
decode_float_or_integer(stream, raw, kind, output, error)
} else {
decode_invalid(raw, output, error)
}
}
/// ```abnf
/// float = float-int-part ( exp / frac [ exp ] )
///
/// float-int-part = dec-int
/// frac = decimal-point zero-prefixable-int
/// decimal-point = %x2E ; .
/// zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT )
///
/// exp = "e" float-exp-part
/// float-exp-part = [ minus / plus ] zero-prefixable-int
/// ```
pub(crate) fn ensure_float<'i>(mut value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
ensure_dec_uint(&mut value, raw, false, "invalid mantissa", error);
if value.starts_with(".") {
let _ = value.next_token();
ensure_dec_uint(&mut value, raw, true, "invalid fraction", error);
}
if value.starts_with(['e', 'E']) {
let _ = value.next_token();
if value.starts_with(['+', '-']) {
let _ = value.next_token();
}
ensure_dec_uint(&mut value, raw, true, "invalid exponent", error);
}
if !value.is_empty() {
let start = value.offset_from(&raw.as_str());
let end = raw.len();
error.report_error(
ParseError::new(ScalarKind::Float.invalid_description())
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(start, end)),
);
}
}
pub(crate) fn ensure_dec_uint<'i>(
value: &mut &'i str,
raw: Raw<'i>,
zero_prefix: bool,
invalid_description: &'static str,
error: &mut dyn ErrorSink,
) {
let start = *value;
let mut digit_count = 0;
while let Some(current) = value.chars().next() {
if current.is_ascii_digit() {
digit_count += 1;
} else if current == '_' {
} else {
break;
}
let _ = value.next_token();
}
match digit_count {
0 => {
let start = start.offset_from(&raw.as_str());
let end = start;
error.report_error(
ParseError::new(invalid_description)
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Description("digits")])
.with_unexpected(Span::new_unchecked(start, end)),
);
}
1 => {}
_ if start.starts_with("0") && !zero_prefix => {
let start = start.offset_from(&raw.as_str());
let end = start + 1;
error.report_error(
ParseError::new("unexpected leading zero")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(start, end)),
);
}
_ => {}
}
}
pub(crate) fn ensure_no_leading_zero<'i>(value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
if value.starts_with("0") {
let start = value.offset_from(&raw.as_str());
let end = start + 1;
error.report_error(
ParseError::new("unexpected leading zero")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(start, end)),
);
}
}
pub(crate) fn ensure_radixed_value(
value: &str,
raw: Raw<'_>,
radix: IntegerRadix,
error: &mut dyn ErrorSink,
) {
let invalid = ['+', '-'];
let value = if let Some(value) = value.strip_prefix(invalid) {
let pos = raw.as_str().find(invalid).unwrap();
error.report_error(
ParseError::new("unexpected sign")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(pos, pos + 1)),
);
value
} else {
value
};
let valid = radix.validator();
for (index, c) in value.char_indices() {
if !valid(c) && c != '_' {
let pos = value.offset_from(&raw.as_str()) + index;
error.report_error(
ParseError::new(radix.invalid_description())
.with_context(Span::new_unchecked(0, raw.len()))
.with_unexpected(Span::new_unchecked(pos, pos)),
);
}
}
}
pub(crate) fn decode_float_or_integer<'i>(
stream: &'i str,
raw: Raw<'i>,
kind: ScalarKind,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
output.clear();
let underscore = "_";
if has_underscore(stream) {
if stream.starts_with(underscore) {
error.report_error(
ParseError::new("`_` may only go between digits")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(0, underscore.len())),
);
}
if 1 < stream.len() && stream.ends_with(underscore) {
let start = stream.offset_from(&raw.as_str());
let end = start + stream.len();
error.report_error(
ParseError::new("`_` may only go between digits")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[])
.with_unexpected(Span::new_unchecked(end - underscore.len(), end)),
);
}
for part in stream.split(underscore) {
let part_start = part.offset_from(&raw.as_str());
let part_end = part_start + part.len();
if 0 < part_start {
let first = part.as_bytes().first().copied().unwrap_or(b'0');
if !is_any_digit(first, kind) {
let start = part_start - 1;
let end = part_start;
debug_assert_eq!(&raw.as_str()[start..end], underscore);
error.report_error(
ParseError::new("`_` may only go between digits")
.with_context(Span::new_unchecked(0, raw.len()))
.with_unexpected(Span::new_unchecked(start, end)),
);
}
}
if 1 < part.len() && part_end < raw.len() {
let last = part.as_bytes().last().copied().unwrap_or(b'0');
if !is_any_digit(last, kind) {
let start = part_end;
let end = start + underscore.len();
debug_assert_eq!(&raw.as_str()[start..end], underscore);
error.report_error(
ParseError::new("`_` may only go between digits")
.with_context(Span::new_unchecked(0, raw.len()))
.with_unexpected(Span::new_unchecked(start, end)),
);
}
}
if part.is_empty() && part_start != 0 && part_end != raw.len() {
let start = part_start;
let end = start + 1;
error.report_error(
ParseError::new("`_` may only go between digits")
.with_context(Span::new_unchecked(0, raw.len()))
.with_unexpected(Span::new_unchecked(start, end)),
);
}
if !part.is_empty() && !output.push_str(part) {
error.report_error(
ParseError::new(ALLOCATION_ERROR)
.with_unexpected(Span::new_unchecked(part_start, part_end)),
);
}
}
} else {
if !output.push_str(stream) {
error.report_error(
ParseError::new(ALLOCATION_ERROR)
.with_unexpected(Span::new_unchecked(0, raw.len())),
);
}
}
kind
}
fn is_any_digit(b: u8, kind: ScalarKind) -> bool {
if kind == ScalarKind::Float {
is_dec_integer_digit(b)
} else {
is_any_integer_digit(b)
}
}
fn is_any_integer_digit(b: u8) -> bool {
(b'0'..=b'9', b'a'..=b'f', b'A'..=b'F').contains_token(b)
}
fn is_dec_integer_digit(b: u8) -> bool {
(b'0'..=b'9').contains_token(b)
}
fn has_underscore(raw: &str) -> bool {
raw.as_bytes().find_slice(b'_').is_some()
}
fn is_float(raw: &str) -> bool {
raw.as_bytes().find_slice((b'.', b'e', b'E')).is_some()
}
pub(crate) fn decode_as_is<'i>(
raw: Raw<'i>,
kind: ScalarKind,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
let kind = decode_as(raw, raw.as_str(), kind, output, error);
kind
}
pub(crate) fn decode_as<'i>(
raw: Raw<'i>,
symbol: &'i str,
kind: ScalarKind,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
output.clear();
if !output.push_str(symbol) {
error.report_error(
ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
);
}
kind
}
pub(crate) fn decode_symbol<'i>(
raw: Raw<'i>,
symbol: &'static str,
kind: ScalarKind,
expected: &'static [Expected],
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
if raw.as_str() != symbol {
if raw.as_str().contains(" ") {
return decode_invalid(raw, output, error);
} else {
error.report_error(
ParseError::new(kind.invalid_description())
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(expected)
.with_unexpected(Span::new_unchecked(0, raw.len())),
);
}
}
decode_as(raw, symbol, kind, output, error)
}
pub(crate) fn decode_invalid<'i>(
raw: Raw<'i>,
output: &mut dyn StringBuilder<'i>,
error: &mut dyn ErrorSink,
) -> ScalarKind {
if raw.as_str().ends_with("'''") {
error.report_error(
ParseError::new("missing opening quote")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Literal(r#"'''"#)])
.with_unexpected(Span::new_unchecked(0, 0)),
);
} else if raw.as_str().ends_with(r#"""""#) {
error.report_error(
ParseError::new("missing opening quote")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Description("multi-line basic string")])
.with_expected(&[Expected::Literal(r#"""""#)])
.with_unexpected(Span::new_unchecked(0, 0)),
);
} else if raw.as_str().ends_with("'") {
error.report_error(
ParseError::new("missing opening quote")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Literal(r#"'"#)])
.with_unexpected(Span::new_unchecked(0, 0)),
);
} else if raw.as_str().ends_with(r#"""#) {
error.report_error(
ParseError::new("missing opening quote")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Literal(r#"""#)])
.with_unexpected(Span::new_unchecked(0, 0)),
);
} else {
error.report_error(
ParseError::new("string values must be quoted")
.with_context(Span::new_unchecked(0, raw.len()))
.with_expected(&[Expected::Description("literal string")])
.with_unexpected(Span::new_unchecked(0, raw.len())),
);
}
output.clear();
if !output.push_str(raw.as_str()) {
error.report_error(
ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
);
}
ScalarKind::String
}