Source code
Revision control
Copy as Markdown
Other Tools
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
//! Types for walking stepwise through a trie.
//!
//! For examples, see the `.cursor()` functions
//! and the `Cursor` types in this module.
use crate::reader;
use crate::ZeroAsciiIgnoreCaseTrie;
use crate::ZeroTrieSimpleAscii;
use core::fmt;
impl<Store> ZeroTrieSimpleAscii<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
/// Gets a cursor into the current trie.
///
/// Useful to query a trie with data that is not a slice.
///
/// This is currently supported only on [`ZeroTrieSimpleAscii`]
/// and [`ZeroAsciiIgnoreCaseTrie`].
///
/// # Examples
///
/// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
///
/// ```
/// use core::fmt::Write;
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "abc" and "abcdef"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
///
/// // Get out the value for "abc"
/// let mut cursor = trie.cursor();
/// write!(&mut cursor, "abc");
/// assert_eq!(cursor.take_value(), Some(0));
/// ```
///
/// Find the longest prefix match:
///
/// ```
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "abc" and "abcdef"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
///
/// // Find the longest prefix of the string "abcdxy":
/// let query = b"abcdxy";
/// let mut longest_prefix = 0;
/// let mut cursor = trie.cursor();
/// for (i, b) in query.iter().enumerate() {
/// // Checking is_empty() is not required, but it is
/// // good for efficiency
/// if cursor.is_empty() {
/// break;
/// }
/// if cursor.take_value().is_some() {
/// longest_prefix = i;
/// }
/// cursor.step(*b);
/// }
///
/// // The longest prefix is "abc" which is length 3:
/// assert_eq!(longest_prefix, 3);
/// ```
#[inline]
pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
ZeroTrieSimpleAsciiCursor {
trie: self.as_borrowed_slice(),
}
}
}
impl<Store> ZeroAsciiIgnoreCaseTrie<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
/// Gets a cursor into the current trie.
///
/// Useful to query a trie with data that is not a slice.
///
/// This is currently supported only on [`ZeroTrieSimpleAscii`]
/// and [`ZeroAsciiIgnoreCaseTrie`].
///
/// # Examples
///
/// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
///
/// ```
/// use core::fmt::Write;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// // A trie with two values: "aBc" and "aBcdEf"
/// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
///
/// // Get out the value for "abc" (case-insensitive!)
/// let mut cursor = trie.cursor();
/// write!(&mut cursor, "abc");
/// assert_eq!(cursor.take_value(), Some(0));
/// ```
///
/// For more examples, see [`ZeroTrieSimpleAscii::cursor`].
#[inline]
pub fn cursor(&self) -> ZeroAsciiIgnoreCaseTrieCursor {
ZeroAsciiIgnoreCaseTrieCursor {
trie: self.as_borrowed_slice(),
}
}
}
impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
/// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
/// having to doubly anchor the trie to the stack.
#[inline]
pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
ZeroTrieSimpleAsciiCursor { trie: self }
}
}
impl<'a> ZeroAsciiIgnoreCaseTrie<&'a [u8]> {
/// Same as [`ZeroAsciiIgnoreCaseTrie::cursor()`] but moves self to avoid
/// having to doubly anchor the trie to the stack.
#[inline]
pub fn into_cursor(self) -> ZeroAsciiIgnoreCaseTrieCursor<'a> {
ZeroAsciiIgnoreCaseTrieCursor { trie: self }
}
}
/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
///
/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
#[derive(Debug, Clone)]
pub struct ZeroTrieSimpleAsciiCursor<'a> {
trie: ZeroTrieSimpleAscii<&'a [u8]>,
}
/// A cursor into a [`ZeroAsciiIgnoreCaseTrie`], useful for stepwise lookup.
///
/// For examples, see [`ZeroAsciiIgnoreCaseTrie::cursor()`].
#[derive(Debug, Clone)]
pub struct ZeroAsciiIgnoreCaseTrieCursor<'a> {
trie: ZeroAsciiIgnoreCaseTrie<&'a [u8]>,
}
/// Information about a probed edge.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[non_exhaustive] // no need to destructure or construct this in userland
pub struct AsciiProbeResult {
/// The character's byte value between this node and its parent.
pub byte: u8,
/// The number of siblings of this node, _including itself_.
pub total_siblings: u8,
}
impl ZeroTrieSimpleAsciiCursor<'_> {
/// Steps the cursor one character into the trie based on the character's byte value.
///
/// # Examples
///
/// Unrolled loop checking for string presence at every step:
///
/// ```
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "abc" and "abcdef"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
///
/// // Search the trie for the string "abcdxy"
/// let mut cursor = trie.cursor();
/// assert_eq!(cursor.take_value(), None); // ""
/// cursor.step(b'a');
/// assert_eq!(cursor.take_value(), None); // "a"
/// cursor.step(b'b');
/// assert_eq!(cursor.take_value(), None); // "ab"
/// cursor.step(b'c');
/// assert_eq!(cursor.take_value(), Some(0)); // "abc"
/// cursor.step(b'd');
/// assert_eq!(cursor.take_value(), None); // "abcd"
/// assert!(!cursor.is_empty());
/// cursor.step(b'x'); // no strings have the prefix "abcdx"
/// assert!(cursor.is_empty());
/// assert_eq!(cursor.take_value(), None); // "abcdx"
/// cursor.step(b'y');
/// assert_eq!(cursor.take_value(), None); // "abcdxy"
/// ```
///
/// If the byte is not ASCII, the cursor will become empty:
///
/// ```
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "abc" and "abcdef"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
///
/// let mut cursor = trie.cursor();
/// assert_eq!(cursor.take_value(), None); // ""
/// cursor.step(b'a');
/// assert_eq!(cursor.take_value(), None); // "a"
/// cursor.step(b'b');
/// assert_eq!(cursor.take_value(), None); // "ab"
/// cursor.step(b'\xFF');
/// assert!(cursor.is_empty());
/// assert_eq!(cursor.take_value(), None);
/// ```
#[inline]
pub fn step(&mut self, byte: u8) {
reader::step_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, byte);
}
/// Takes the value at the current position.
///
/// Calling this function on a new cursor is equivalent to calling `.get()`
/// with the empty string (except that it can only be called once).
///
/// # Examples
///
/// ```
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "" and "abc"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
///
/// assert_eq!(Some(0), trie.get(""));
/// let mut cursor = trie.cursor();
/// assert_eq!(Some(0), cursor.take_value());
/// assert_eq!(None, cursor.take_value());
/// ```
#[inline]
pub fn take_value(&mut self) -> Option<usize> {
reader::take_value(&mut self.trie.store)
}
/// Steps the cursor one character into the trie based on an edge index,
/// returning the corresponding character as a byte.
///
/// This function is similar to [`Self::step()`], but it takes an index instead of a char.
/// This enables stepwise iteration over the contents of the trie.
///
/// If there are multiple possibilities for the next byte, the `index` argument allows
/// visiting them in order. Since this function steps the cursor, the cursor must be
/// cloned (a cheap operation) in order to visit multiple children.
///
/// # Examples
///
/// Continually query index 0 to extract the first item from a trie:
///
/// ```
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// let data: &[(String, usize)] = &[
/// ("ab".to_string(), 111),
/// ("abcxyz".to_string(), 22),
/// ("abde".to_string(), 333),
/// ("afg".to_string(), 44),
/// ];
///
/// let trie: ZeroTrieSimpleAscii<Vec<u8>> =
/// data.iter().map(|(s, v)| (s.as_str(), *v)).collect();
///
/// let mut cursor = trie.cursor();
/// let mut key = String::new();
/// let value = loop {
/// if let Some(value) = cursor.take_value() {
/// break value;
/// }
/// let probe_result = cursor.probe(0).unwrap();
/// key.push(char::from(probe_result.byte));
/// };
///
/// assert_eq!(key, "ab");
/// assert_eq!(value, 111);
/// ```
///
/// Stepwise iterate over all entries in the trie:
///
/// ```
/// # use zerotrie::ZeroTrieSimpleAscii;
/// # let data: &[(String, usize)] = &[
/// # ("ab".to_string(), 111),
/// # ("abcxyz".to_string(), 22),
/// # ("abde".to_string(), 333),
/// # ("afg".to_string(), 44)
/// # ];
/// # let trie: ZeroTrieSimpleAscii<Vec<u8>> = data
/// # .iter()
/// # .map(|(s, v)| (s.as_str(), *v))
/// # .collect();
/// // (trie built as in previous example)
///
/// // Initialize the iteration at the first child of the trie.
/// let mut stack = Vec::from([(trie.cursor(), 0, 0)]);
/// let mut key = Vec::new();
/// let mut results = Vec::new();
/// loop {
/// let Some((mut cursor, index, suffix_len)) = stack.pop() else {
/// // Nothing left in the trie.
/// break;
/// };
/// // Check to see if there is a value at the current node.
/// if let Some(value) = cursor.take_value() {
/// results.push((String::from_utf8(key.clone()).unwrap(), value));
/// }
/// // Now check for children of the current node.
/// let mut sub_cursor = cursor.clone();
/// if let Some(probe_result) = sub_cursor.probe(index) {
/// // Found a child. Add the current byte edge to the key.
/// key.push(probe_result.byte);
/// // Add the child to the stack, and also add back the current
/// // node if there are more siblings to visit.
/// if index + 1 < probe_result.total_siblings as usize {
/// stack.push((cursor, index + 1, suffix_len));
/// stack.push((sub_cursor, 0, 1));
/// } else {
/// stack.push((sub_cursor, 0, suffix_len + 1));
/// }
/// } else {
/// // No more children. Pop this node's bytes from the key.
/// for _ in 0..suffix_len {
/// key.pop();
/// }
/// }
/// }
///
/// assert_eq!(&results, data);
/// ```
pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
reader::probe_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, index)
}
/// Checks whether the cursor points to an empty trie.
///
/// Use this to determine when to stop iterating.
#[inline]
pub fn is_empty(&self) -> bool {
self.trie.is_empty()
}
}
impl ZeroAsciiIgnoreCaseTrieCursor<'_> {
/// Steps the cursor one byte into the trie.
///
/// Returns the byte if matched, which may be a different case than the input byte.
/// If this function returns `None`, any lookup loops can be terminated.
///
/// # Examples
///
/// Normalize the case of a value by stepping through an ignore-case trie:
///
/// ```
/// use std::borrow::Cow;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// // A trie with two values: "aBc" and "aBcdEf"
/// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
///
/// // Get out the value for "abc" and normalize the key string
/// let mut cursor = trie.cursor();
/// let mut key_str = Cow::Borrowed("abc".as_bytes());
/// let mut i = 0;
/// let value = loop {
/// let Some(&input_byte) = key_str.get(i) else {
/// break cursor.take_value();
/// };
/// let Some(matched_byte) = cursor.step(input_byte) else {
/// break None;
/// };
/// if matched_byte != input_byte {
/// key_str.to_mut()[i] = matched_byte;
/// }
/// i += 1;
/// };
///
/// assert_eq!(value, Some(0));
/// assert_eq!(&*key_str, "aBc".as_bytes());
/// ```
///
/// For more examples, see [`ZeroTrieSimpleAsciiCursor::step`].
#[inline]
pub fn step(&mut self, byte: u8) -> Option<u8> {
reader::step_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, byte)
}
/// Takes the value at the current position.
///
/// For more details, see [`ZeroTrieSimpleAsciiCursor::take_value`].
#[inline]
pub fn take_value(&mut self) -> Option<usize> {
reader::take_value(&mut self.trie.store)
}
/// Probes the next byte in the cursor.
///
/// For more details, see [`ZeroTrieSimpleAsciiCursor::probe`].
pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
reader::probe_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, index)
}
/// Checks whether the cursor points to an empty trie.
///
/// For more details, see [`ZeroTrieSimpleAsciiCursor::is_empty`].
#[inline]
pub fn is_empty(&self) -> bool {
self.trie.is_empty()
}
}
impl fmt::Write for ZeroTrieSimpleAsciiCursor<'_> {
/// Steps the cursor through each ASCII byte of the string.
///
/// If the string contains non-ASCII chars, an error is returned.
///
/// # Examples
///
/// ```
/// use core::fmt::Write;
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "abc" and "abcdef"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
///
/// let mut cursor = trie.cursor();
/// cursor.write_str("abcdxy").expect("all ASCII");
/// cursor.write_str("🚂").expect_err("non-ASCII");
/// ```
fn write_str(&mut self, s: &str) -> fmt::Result {
for b in s.bytes() {
if !b.is_ascii() {
return Err(fmt::Error);
}
self.step(b);
}
Ok(())
}
/// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
/// an error if the char is non-ASCII.
///
/// # Examples
///
/// ```
/// use core::fmt::Write;
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "abc" and "abcdef"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
///
/// let mut cursor = trie.cursor();
/// cursor.write_char('a').expect("ASCII");
/// cursor.write_char('x').expect("ASCII");
/// cursor.write_char('🚂').expect_err("non-ASCII");
/// ```
fn write_char(&mut self, c: char) -> fmt::Result {
if !c.is_ascii() {
return Err(fmt::Error);
}
self.step(c as u8);
Ok(())
}
}
impl fmt::Write for ZeroAsciiIgnoreCaseTrieCursor<'_> {
/// Steps the cursor through each ASCII byte of the string.
///
/// If the string contains non-ASCII chars, an error is returned.
fn write_str(&mut self, s: &str) -> fmt::Result {
for b in s.bytes() {
if !b.is_ascii() {
return Err(fmt::Error);
}
self.step(b);
}
Ok(())
}
/// Equivalent to [`ZeroAsciiIgnoreCaseTrieCursor::step()`], except returns
/// an error if the char is non-ASCII.
fn write_char(&mut self, c: char) -> fmt::Result {
if !c.is_ascii() {
return Err(fmt::Error);
}
self.step(c as u8);
Ok(())
}
}