Source code

Revision control

Copy as Markdown

Other Tools

// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
use crate::reader;
use core::borrow::Borrow;
#[cfg(feature = "alloc")]
use crate::{
builder::bytestr::ByteStr, builder::nonconst::ZeroTrieBuilder, error::ZeroTrieBuildError,
};
#[cfg(feature = "alloc")]
use alloc::{boxed::Box, collections::BTreeMap, collections::VecDeque, string::String, vec::Vec};
#[cfg(feature = "litemap")]
use litemap::LiteMap;
/// A data structure that compactly maps from byte sequences to integers.
///
/// There are several variants of `ZeroTrie` which are very similar but are optimized
/// for different use cases:
///
/// - [`ZeroTrieSimpleAscii`] is the most compact structure. Very fast for small data.
/// Only stores ASCII-encoded strings. Can be const-constructed!
/// - [`ZeroTriePerfectHash`] is also compact, but it also supports arbitrary binary
/// strings. It also scales better to large data. Cannot be const-constructed.
/// - [`ZeroTrieExtendedCapacity`] can be used if more than 2^32 bytes are required.
///
/// You can create a `ZeroTrie` directly, in which case the most appropriate
/// backing implementation will be chosen.
///
/// # Backing Store
///
/// The data structure has a flexible backing data store. The only requirement for most
/// functionality is that it implement `AsRef<[u8]>`. All of the following are valid
/// ZeroTrie types:
///
/// - `ZeroTrie<[u8]>` (dynamically sized type: must be stored in a reference or Box)
/// - `ZeroTrie<&[u8]>` (borrows its data from a u8 buffer)
/// - `ZeroTrie<Vec<u8>>` (fully owned data)
/// - `ZeroTrie<ZeroVec<u8>>` (the recommended borrowed-or-owned signature)
/// - `Cow<ZeroTrie<[u8]>>` (another borrowed-or-owned signature)
/// - `ZeroTrie<Cow<[u8]>>` (another borrowed-or-owned signature)
///
/// # Examples
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroTrie;
///
/// let mut map = LiteMap::<&[u8], usize>::new_vec();
/// map.insert("foo".as_bytes(), 1);
/// map.insert("bar".as_bytes(), 2);
/// map.insert("bazzoo".as_bytes(), 3);
///
/// let trie = ZeroTrie::try_from(&map)?;
///
/// assert_eq!(trie.get("foo"), Some(1));
/// assert_eq!(trie.get("bar"), Some(2));
/// assert_eq!(trie.get("bazzoo"), Some(3));
/// assert_eq!(trie.get("unknown"), None);
///
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
/// ```
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
// Note: The absence of the following derive does not cause any test failures in this crate
#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
pub struct ZeroTrie<Store>(pub(crate) ZeroTrieFlavor<Store>);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum ZeroTrieFlavor<Store> {
SimpleAscii(ZeroTrieSimpleAscii<Store>),
PerfectHash(ZeroTriePerfectHash<Store>),
ExtendedCapacity(ZeroTrieExtendedCapacity<Store>),
}
/// A data structure that compactly maps from ASCII strings to integers.
///
/// For more information, see [`ZeroTrie`].
///
/// # Examples
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"foo"[..], 1);
/// map.insert(b"bar", 2);
/// map.insert(b"bazzoo", 3);
///
/// let trie = ZeroTrieSimpleAscii::try_from(&map)?;
///
/// assert_eq!(trie.get(b"foo"), Some(1));
/// assert_eq!(trie.get(b"bar"), Some(2));
/// assert_eq!(trie.get(b"bazzoo"), Some(3));
/// assert_eq!(trie.get(b"unknown"), None);
///
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
/// ```
///
/// The trie can only store ASCII bytes; a string with non-ASCII always returns None:
///
/// ```
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// // A trie with two values: "abc" and "abcdef"
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
///
/// assert!(matches!(trie.get(b"ab\xFF"), None));
/// ```
#[repr(transparent)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
#[allow(clippy::exhaustive_structs)] // databake hidden fields
pub struct ZeroTrieSimpleAscii<Store: ?Sized> {
#[doc(hidden)] // for databake, but there are no invariants
pub store: Store,
}
impl<Store: ?Sized> ZeroTrieSimpleAscii<Store> {
fn transparent_ref_from_store(s: &Store) -> &Self {
unsafe {
// Safety: Self is transparent over Store
core::mem::transmute(s)
}
}
}
impl<Store> ZeroTrieSimpleAscii<Store> {
/// Wrap this specific ZeroTrie variant into a ZeroTrie.
#[inline]
pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
ZeroTrie(ZeroTrieFlavor::SimpleAscii(self))
}
}
/// A data structure that compactly maps from ASCII strings to integers
/// in a case-insensitive way.
///
/// # Examples
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"foo"[..], 1);
/// map.insert(b"Bar", 2);
/// map.insert(b"Bazzoo", 3);
///
/// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
///
/// assert_eq!(trie.get(b"foo"), Some(1));
/// assert_eq!(trie.get(b"bar"), Some(2));
/// assert_eq!(trie.get(b"BAR"), Some(2));
/// assert_eq!(trie.get(b"bazzoo"), Some(3));
/// assert_eq!(trie.get(b"unknown"), None);
///
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
/// ```
///
/// Strings with different cases of the same character at the same offset are not allowed:
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"bar"[..], 1);
/// // OK: 'r' and 'Z' are different letters
/// map.insert(b"baZ", 2);
/// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position
/// map.insert(b"baR", 2);
///
/// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!");
/// ```
#[repr(transparent)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
#[allow(clippy::exhaustive_structs)] // databake hidden fields
pub struct ZeroAsciiIgnoreCaseTrie<Store: ?Sized> {
#[doc(hidden)] // for databake, but there are no invariants
pub store: Store,
}
impl<Store: ?Sized> ZeroAsciiIgnoreCaseTrie<Store> {
fn transparent_ref_from_store(s: &Store) -> &Self {
unsafe {
// Safety: Self is transparent over Store
core::mem::transmute(s)
}
}
}
// Note: ZeroAsciiIgnoreCaseTrie is not a variant of ZeroTrie so there is no `into_zerotrie`
/// A data structure that compactly maps from byte strings to integers.
///
/// For more information, see [`ZeroTrie`].
///
/// # Examples
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroTriePerfectHash;
///
/// let mut map = LiteMap::<&[u8], usize>::new_vec();
/// map.insert("foo".as_bytes(), 1);
/// map.insert("bår".as_bytes(), 2);
/// map.insert("båzzøø".as_bytes(), 3);
///
/// let trie = ZeroTriePerfectHash::try_from(&map)?;
///
/// assert_eq!(trie.get("foo".as_bytes()), Some(1));
/// assert_eq!(trie.get("bår".as_bytes()), Some(2));
/// assert_eq!(trie.get("båzzøø".as_bytes()), Some(3));
/// assert_eq!(trie.get("bazzoo".as_bytes()), None);
///
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
/// ```
#[repr(transparent)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
#[allow(clippy::exhaustive_structs)] // databake hidden fields
pub struct ZeroTriePerfectHash<Store: ?Sized> {
#[doc(hidden)] // for databake, but there are no invariants
pub store: Store,
}
impl<Store: ?Sized> ZeroTriePerfectHash<Store> {
fn transparent_ref_from_store(s: &Store) -> &Self {
unsafe {
// Safety: Self is transparent over Store
core::mem::transmute(s)
}
}
}
impl<Store> ZeroTriePerfectHash<Store> {
/// Wrap this specific ZeroTrie variant into a ZeroTrie.
#[inline]
pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
ZeroTrie(ZeroTrieFlavor::PerfectHash(self))
}
}
/// A data structure that maps from a large number of byte strings to integers.
///
/// For more information, see [`ZeroTrie`].
#[repr(transparent)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "databake", derive(databake::Bake))]
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
#[allow(clippy::exhaustive_structs)] // databake hidden fields
pub struct ZeroTrieExtendedCapacity<Store: ?Sized> {
#[doc(hidden)] // for databake, but there are no invariants
pub store: Store,
}
impl<Store: ?Sized> ZeroTrieExtendedCapacity<Store> {
fn transparent_ref_from_store(s: &Store) -> &Self {
unsafe {
// Safety: Self is transparent over Store
core::mem::transmute(s)
}
}
}
impl<Store> ZeroTrieExtendedCapacity<Store> {
/// Wrap this specific ZeroTrie variant into a ZeroTrie.
#[inline]
pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
ZeroTrie(ZeroTrieFlavor::ExtendedCapacity(self))
}
}
macro_rules! impl_zerotrie_subtype {
($name:ident, $iter_element:ty, $iter_fn:path, $iter_ty:ty, $cnv_fn:path) => {
impl<Store> $name<Store> {
/// Create a trie directly from a store.
///
/// If the store does not contain valid bytes, unexpected behavior may occur.
#[inline]
pub const fn from_store(store: Store) -> Self {
Self { store }
}
/// Takes the byte store from this trie.
#[inline]
pub fn into_store(self) -> Store {
self.store
}
/// Converts this trie's store to a different store implementing the `From` trait.
///
#[doc = concat!("For example, use this to change `", stringify!($name), "<Vec<u8>>` to `", stringify!($name), "<Cow<[u8]>>`.")]
///
/// # Examples
///
/// ```
/// use std::borrow::Cow;
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
///
#[doc = concat!("let trie: ", stringify!($name), "<Vec<u8>> = ", stringify!($name), "::from_bytes(b\"abc\\x85\").to_owned();")]
#[doc = concat!("let cow: ", stringify!($name), "<Cow<[u8]>> = trie.convert_store();")]
///
/// assert_eq!(cow.get(b"abc"), Some(5));
/// ```
pub fn convert_store<X: From<Store>>(self) -> $name<X> {
$name::<X>::from_store(X::from(self.store))
}
}
impl<Store> $name<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
/// Queries the trie for a string.
pub fn get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]> {
// TODO: Should this be AsRef or Borrow?
reader::get_parameterized::<Self>(self.store.as_ref(), key.as_ref())
}
/// Returns `true` if the trie is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.store.as_ref().is_empty()
}
/// Returns the size of the trie in number of bytes.
///
/// To get the number of keys in the trie, use `.iter().count()`:
///
/// ```
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
///
/// // A trie with two values: "abc" and "abcdef"
#[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
///
/// assert_eq!(8, trie.byte_len());
/// assert_eq!(2, trie.iter().count());
/// ```
#[inline]
pub fn byte_len(&self) -> usize {
self.store.as_ref().len()
}
/// Returns the bytes contained in the underlying store.
#[inline]
pub fn as_bytes(&self) -> &[u8] {
self.store.as_ref()
}
/// Returns this trie as a reference transparent over a byte slice.
#[inline]
pub fn as_borrowed(&self) -> &$name<[u8]> {
$name::from_bytes(self.store.as_ref())
}
/// Returns a trie with a store borrowing from this trie.
#[inline]
pub fn as_borrowed_slice(&self) -> $name<&[u8]> {
$name::from_store(self.store.as_ref())
}
}
impl<Store> AsRef<$name<[u8]>> for $name<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
#[inline]
fn as_ref(&self) -> &$name<[u8]> {
self.as_borrowed()
}
}
#[cfg(feature = "alloc")]
impl<Store> $name<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
/// Converts a possibly-borrowed $name to an owned one.
///
/// ✨ *Enabled with the `alloc` Cargo feature.*
///
/// # Examples
///
/// ```
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
///
#[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x85\");")]
#[doc = concat!("let owned: ", stringify!($name), "<Vec<u8>> = trie.to_owned();")]
///
/// assert_eq!(trie.get(b"abc"), Some(5));
/// assert_eq!(owned.get(b"abc"), Some(5));
/// ```
#[inline]
pub fn to_owned(&self) -> $name<Vec<u8>> {
$name::from_store(
Vec::from(self.store.as_ref()),
)
}
/// Returns an iterator over the key/value pairs in this trie.
///
/// ✨ *Enabled with the `alloc` Cargo feature.*
///
/// # Examples
///
/// ```
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
///
/// // A trie with two values: "abc" and "abcdef"
#[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
///
/// let mut it = trie.iter();
/// assert_eq!(it.next(), Some(("abc".into(), 0)));
/// assert_eq!(it.next(), Some(("abcdef".into(), 1)));
/// assert_eq!(it.next(), None);
/// ```
#[inline]
#[allow(clippy::type_complexity)]
pub fn iter(&self) -> $iter_ty {
$iter_fn(self.as_bytes())
}
}
impl $name<[u8]> {
/// Casts from a byte slice to a reference to a trie with the same lifetime.
///
/// If the bytes are not a valid trie, unexpected behavior may occur.
#[inline]
pub fn from_bytes(trie: &[u8]) -> &Self {
Self::transparent_ref_from_store(trie)
}
}
#[cfg(feature = "alloc")]
impl $name<Vec<u8>> {
pub(crate) fn try_from_tuple_slice(items: &[(&ByteStr, usize)]) -> Result<Self, ZeroTrieBuildError> {
use crate::options::ZeroTrieWithOptions;
ZeroTrieBuilder::<VecDeque<u8>>::from_sorted_tuple_slice(
items,
Self::OPTIONS,
)
.map(|s| Self {
store: s.to_bytes(),
})
}
}
#[cfg(feature = "alloc")]
impl<'a, K> FromIterator<(K, usize)> for $name<Vec<u8>>
where
K: AsRef<[u8]>
{
fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
use crate::options::ZeroTrieWithOptions;
use crate::builder::nonconst::ZeroTrieBuilder;
ZeroTrieBuilder::<VecDeque<u8>>::from_bytes_iter(
iter,
Self::OPTIONS
)
.map(|s| Self {
store: s.to_bytes(),
})
.unwrap()
}
}
#[cfg(feature = "alloc")]
impl<'a, K> TryFrom<&'a BTreeMap<K, usize>> for $name<Vec<u8>>
where
K: Borrow<[u8]>
{
type Error = crate::error::ZeroTrieBuildError;
fn try_from(map: &'a BTreeMap<K, usize>) -> Result<Self, Self::Error> {
let tuples: Vec<(&[u8], usize)> = map
.iter()
.map(|(k, v)| (k.borrow(), *v))
.collect();
let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
Self::try_from_tuple_slice(byte_str_slice)
}
}
#[cfg(feature = "alloc")]
impl<Store> $name<Store>
where
Store: AsRef<[u8]> + ?Sized
{
/// Exports the data from this ZeroTrie type into a BTreeMap.
///
/// ✨ *Enabled with the `alloc` Cargo feature.*
///
/// # Examples
///
/// ```
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
/// use std::collections::BTreeMap;
///
#[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
/// let items = trie.to_btreemap();
///
/// assert_eq!(items.len(), 2);
///
#[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
/// .into_iter()
/// .collect();
/// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
/// ```
pub fn to_btreemap(&self) -> BTreeMap<$iter_element, usize> {
self.iter().collect()
}
#[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
pub(crate) fn to_btreemap_bytes(&self) -> BTreeMap<Box<[u8]>, usize> {
self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
}
}
#[cfg(feature = "alloc")]
impl<Store> From<&$name<Store>> for BTreeMap<$iter_element, usize>
where
Store: AsRef<[u8]> + ?Sized,
{
#[inline]
fn from(other: &$name<Store>) -> Self {
other.to_btreemap()
}
}
#[cfg(feature = "litemap")]
impl<'a, K, S> TryFrom<&'a LiteMap<K, usize, S>> for $name<Vec<u8>>
where
K: Borrow<[u8]>,
S: litemap::store::StoreIterable<'a, K, usize>,
{
type Error = crate::error::ZeroTrieBuildError;
fn try_from(map: &'a LiteMap<K, usize, S>) -> Result<Self, Self::Error> {
let tuples: Vec<(&[u8], usize)> = map
.iter()
.map(|(k, v)| (k.borrow(), *v))
.collect();
let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
Self::try_from_tuple_slice(byte_str_slice)
}
}
#[cfg(feature = "litemap")]
impl<Store> $name<Store>
where
Store: AsRef<[u8]> + ?Sized,
{
/// Exports the data from this ZeroTrie type into a LiteMap.
///
/// ✨ *Enabled with the `litemap` Cargo feature.*
///
/// # Examples
///
/// ```
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
/// use litemap::LiteMap;
///
#[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
///
/// let items = trie.to_litemap();
/// assert_eq!(items.len(), 2);
///
#[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
/// .iter()
/// .map(|(k, v)| (k, *v))
/// .collect();
/// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
/// ```
pub fn to_litemap(&self) -> LiteMap<$iter_element, usize> {
self.iter().collect()
}
#[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
pub(crate) fn to_litemap_bytes(&self) -> LiteMap<Box<[u8]>, usize> {
self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
}
}
#[cfg(feature = "litemap")]
impl<Store> From<&$name<Store>> for LiteMap<$iter_element, usize>
where
Store: AsRef<[u8]> + ?Sized,
{
#[inline]
fn from(other: &$name<Store>) -> Self {
other.to_litemap()
}
}
#[cfg(feature = "litemap")]
impl $name<Vec<u8>>
{
#[cfg(feature = "serde")]
pub(crate) fn try_from_serde_litemap(items: &LiteMap<Box<ByteStr>, usize>) -> Result<Self, ZeroTrieBuildError> {
let lm_borrowed: LiteMap<&ByteStr, usize> = items.to_borrowed_keys();
Self::try_from_tuple_slice(lm_borrowed.as_slice())
}
}
// Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
impl Borrow<$name<[u8]>> for $name<&[u8]> {
#[inline]
fn borrow(&self) -> &$name<[u8]> {
self.as_borrowed()
}
}
// Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
#[cfg(feature = "alloc")]
impl Borrow<$name<[u8]>> for $name<Box<[u8]>> {
#[inline]
fn borrow(&self) -> &$name<[u8]> {
self.as_borrowed()
}
}
// Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
#[cfg(feature = "alloc")]
impl Borrow<$name<[u8]>> for $name<Vec<u8>> {
#[inline]
fn borrow(&self) -> &$name<[u8]> {
self.as_borrowed()
}
}
#[cfg(feature = "alloc")]
impl alloc::borrow::ToOwned for $name<[u8]> {
type Owned = $name<Box<[u8]>>;
#[doc = concat!("This impl allows [`", stringify!($name), "`] to be used inside of a [`Cow`](alloc::borrow::Cow).")]
///
#[doc = concat!("Note that it is also possible to use `", stringify!($name), "<ZeroVec<u8>>` for a similar result.")]
///
/// ✨ *Enabled with the `alloc` Cargo feature.*
///
/// # Examples
///
/// ```
/// use std::borrow::Cow;
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
///
#[doc = concat!("let trie: Cow<", stringify!($name), "<[u8]>> = Cow::Borrowed(", stringify!($name), "::from_bytes(b\"abc\\x85\"));")]
/// assert_eq!(trie.get(b"abc"), Some(5));
/// ```
fn to_owned(&self) -> Self::Owned {
let bytes: &[u8] = self.store.as_ref();
$name::from_store(
Vec::from(bytes).into_boxed_slice(),
)
}
}
// TODO(#2778): Auto-derive these impls based on the repr(transparent).
//
// Safety (based on the safety checklist on the VarULE trait):
// 1. `$name` does not include any uninitialized or padding bytes as it is `repr(transparent)`
// over a `VarULE` type, `Store`, as evidenced by the existence of `transparent_ref_from_store()`
// 2. `$name` is aligned to 1 byte for the same reason
// 3. The impl of `validate_bytes()` returns an error if any byte is not valid (passed down to `VarULE` impl of `Store`)
// 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (passed down to `VarULE` impl of `Store`)
// 5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
// 6. `parse_bytes()` is left to its default impl
// 7. byte equality is semantic equality
#[cfg(feature = "zerovec")]
unsafe impl<Store> zerovec::ule::VarULE for $name<Store>
where
Store: zerovec::ule::VarULE,
{
#[inline]
fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
Store::validate_bytes(bytes)
}
#[inline]
unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
// Safety: we can pass down the validity invariant to Store
Self::transparent_ref_from_store(Store::from_bytes_unchecked(bytes))
}
}
#[cfg(feature = "zerofrom")]
impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, $name<Store1>> for $name<Store2>
where
Store2: zerofrom::ZeroFrom<'zf, Store1>,
{
#[inline]
fn zero_from(other: &'zf $name<Store1>) -> Self {
$name::from_store(zerofrom::ZeroFrom::zero_from(&other.store))
}
}
};
}
#[cfg(feature = "alloc")]
fn string_to_box_u8(input: String) -> Box<[u8]> {
input.into_boxed_str().into_boxed_bytes()
}
#[doc(hidden)] // subject to change
#[cfg(feature = "alloc")]
pub type ZeroTrieStringIterator<'a> =
core::iter::Map<reader::ZeroTrieIterator<'a>, fn((Vec<u8>, usize)) -> (String, usize)>;
impl_zerotrie_subtype!(
ZeroTrieSimpleAscii,
String,
reader::get_iter_ascii_or_panic,
ZeroTrieStringIterator,
string_to_box_u8
);
impl_zerotrie_subtype!(
ZeroAsciiIgnoreCaseTrie,
String,
reader::get_iter_ascii_or_panic,
ZeroTrieStringIterator,
string_to_box_u8
);
impl_zerotrie_subtype!(
ZeroTriePerfectHash,
Vec<u8>,
reader::get_iter_phf,
reader::ZeroTrieIterator<'_>,
Vec::into_boxed_slice
);
impl_zerotrie_subtype!(
ZeroTrieExtendedCapacity,
Vec<u8>,
reader::get_iter_phf,
reader::ZeroTrieIterator<'_>,
Vec::into_boxed_slice
);
macro_rules! impl_dispatch {
($self:ident, $inner_fn:ident()) => {
match $self.0 {
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
}
};
($self:ident, $inner_fn:ident().into_zerotrie()) => {
match $self.0 {
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn().into_zerotrie(),
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn().into_zerotrie(),
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn().into_zerotrie(),
}
};
(&$self:ident, $inner_fn:ident()) => {
match &$self.0 {
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
}
};
($self:ident, $inner_fn:ident($arg:ident)) => {
match $self.0 {
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
}
};
(&$self:ident, $inner_fn:ident($arg:ident)) => {
match &$self.0 {
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
}
};
(&$self:ident, $trait:ident::$inner_fn:ident()) => {
match &$self.0 {
ZeroTrieFlavor::SimpleAscii(subtype) => {
ZeroTrie(ZeroTrieFlavor::SimpleAscii($trait::$inner_fn(subtype)))
}
ZeroTrieFlavor::PerfectHash(subtype) => {
ZeroTrie(ZeroTrieFlavor::PerfectHash($trait::$inner_fn(subtype)))
}
ZeroTrieFlavor::ExtendedCapacity(subtype) => {
ZeroTrie(ZeroTrieFlavor::ExtendedCapacity($trait::$inner_fn(subtype)))
}
}
};
}
impl<Store> ZeroTrie<Store> {
/// Takes the byte store from this trie.
pub fn into_store(self) -> Store {
impl_dispatch!(self, into_store())
}
/// Converts this trie's store to a different store implementing the `From` trait.
///
/// For example, use this to change `ZeroTrie<Vec<u8>>` to `ZeroTrie<Cow<[u8]>>`.
pub fn convert_store<NewStore>(self) -> ZeroTrie<NewStore>
where
NewStore: From<Store>,
{
impl_dispatch!(self, convert_store().into_zerotrie())
}
}
impl<Store> ZeroTrie<Store>
where
Store: AsRef<[u8]>,
{
/// Queries the trie for a string.
pub fn get<K>(&self, key: K) -> Option<usize>
where
K: AsRef<[u8]>,
{
impl_dispatch!(&self, get(key))
}
/// Returns `true` if the trie is empty.
pub fn is_empty(&self) -> bool {
impl_dispatch!(&self, is_empty())
}
/// Returns the size of the trie in number of bytes.
///
/// To get the number of keys in the trie, use `.iter().count()`.
pub fn byte_len(&self) -> usize {
impl_dispatch!(&self, byte_len())
}
}
#[cfg(feature = "alloc")]
impl<Store> ZeroTrie<Store>
where
Store: AsRef<[u8]>,
{
/// Exports the data from this ZeroTrie into a BTreeMap.
pub fn to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize> {
impl_dispatch!(&self, to_btreemap_bytes())
}
}
#[cfg(feature = "litemap")]
impl<Store> ZeroTrie<Store>
where
Store: AsRef<[u8]>,
{
/// Exports the data from this ZeroTrie into a LiteMap.
pub fn to_litemap(&self) -> LiteMap<Box<[u8]>, usize> {
impl_dispatch!(&self, to_litemap_bytes())
}
}
#[cfg(feature = "alloc")]
impl ZeroTrie<Vec<u8>> {
pub(crate) fn try_from_tuple_slice(
items: &[(&ByteStr, usize)],
) -> Result<Self, ZeroTrieBuildError> {
let is_all_ascii = items.iter().all(|(s, _)| s.is_all_ascii());
if is_all_ascii && items.len() < 512 {
ZeroTrieSimpleAscii::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
} else {
ZeroTriePerfectHash::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
}
}
}
#[cfg(feature = "alloc")]
impl<K> FromIterator<(K, usize)> for ZeroTrie<Vec<u8>>
where
K: AsRef<[u8]>,
{
fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
// We need two Vecs because the first one anchors the `K`s that the second one borrows.
let items = Vec::from_iter(iter);
let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect();
items.sort();
let byte_str_slice = ByteStr::from_byte_slice_with_value(&items);
#[allow(clippy::unwrap_used)] // FromIterator is panicky
Self::try_from_tuple_slice(byte_str_slice).unwrap()
}
}
#[cfg(feature = "databake")]
impl<Store> databake::Bake for ZeroTrie<Store>
where
Store: databake::Bake,
{
fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
use databake::*;
let inner = impl_dispatch!(&self, bake(env));
quote! { #inner.into_zerotrie() }
}
}
#[cfg(feature = "databake")]
impl<Store> databake::BakeSize for ZeroTrie<Store>
where
Store: databake::BakeSize,
{
fn borrows_size(&self) -> usize {
impl_dispatch!(&self, borrows_size())
}
}
#[cfg(feature = "zerofrom")]
impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, ZeroTrie<Store1>> for ZeroTrie<Store2>
where
Store2: zerofrom::ZeroFrom<'zf, Store1>,
{
fn zero_from(other: &'zf ZeroTrie<Store1>) -> Self {
use zerofrom::ZeroFrom;
impl_dispatch!(&other, ZeroFrom::zero_from())
}
}