utils.rs - mozsearch

mozilla-central/third_party/rust/arraystring/src/utils.rs

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

//! Misc functions to improve readability

use crate::{generic::ArraySlice, prelude::*};

use core::ptr::copy;

#[cfg(feature = "logs")]

use log::{debug, trace};

pub(crate) trait IntoLossy<T>: Sized {

    fn into_lossy(self) -> T;

/// Marks branch as impossible, UB if taken in prod, panics in debug

///

/// This function should never be used lightly, it will cause UB if used wrong

#[inline]

#[allow(unused_variables)]

pub(crate) unsafe fn never(s: &str) -> ! {

    #[cfg(debug_assertions)]

    panic!("{}", s);

    #[cfg(not(debug_assertions))]

    core::hint::unreachable_unchecked()

/// Encodes `char` into `ArrayString` at specified position, heavily unsafe

///

/// We reimplement the `core` function to avoid panicking (UB instead, be careful)

///

/// Reimplemented from:

///

/// `https://github.com/rust-lang/rust/blob/7843e2792dce0f20d23b3c1cca51652013bef0ea/src/libcore/char/methods.rs#L447`

/// # Safety

///

/// - It's UB if index is outside of buffer's boundaries (buffer needs at most 4 bytes)

/// - It's UB if index is inside a character (like a index 3 for "a🤔")

#[inline]

pub(crate) unsafe fn encode_char_utf8_unchecked<S: Capacity>(

    s: &mut ArrayString<S>,

    ch: char,

    index: u8,

) {

    // UTF-8 ranges and tags for encoding characters

    #[allow(clippy::missing_docs_in_private_items)]

    const TAG_CONT: u8 = 0b1000_0000;

    #[allow(clippy::missing_docs_in_private_items)]

    const TAG_TWO_B: u8 = 0b1100_0000;

    #[allow(clippy::missing_docs_in_private_items)]

    const TAG_THREE_B: u8 = 0b1110_0000;

    #[allow(clippy::missing_docs_in_private_items)]

    const TAG_FOUR_B: u8 = 0b1111_0000;

    #[allow(clippy::missing_docs_in_private_items)]

    const MAX_ONE_B: u32 = 0x80;

    #[allow(clippy::missing_docs_in_private_items)]

    const MAX_TWO_B: u32 = 0x800;

    #[allow(clippy::missing_docs_in_private_items)]

    const MAX_THREE_B: u32 = 0x10000;

    trace!("Encode char: {} to {}", ch, index);

    debug_assert!(ch.len_utf8().saturating_add(index.into()) <= S::to_usize());

    debug_assert!(ch.len_utf8().saturating_add(s.len().into()) <= S::to_usize());

    let dst = s.array.as_mut_slice().get_unchecked_mut(index.into()..);

    let code = ch as u32;

    if code < MAX_ONE_B {

        debug_assert!(!dst.is_empty());

        *dst.get_unchecked_mut(0) = code.into_lossy();

    } else if code < MAX_TWO_B {

        debug_assert!(dst.len() >= 2);

        *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F).into_lossy() | TAG_TWO_B;

        *dst.get_unchecked_mut(1) = (code & 0x3F).into_lossy() | TAG_CONT;

    } else if code < MAX_THREE_B {

        debug_assert!(dst.len() >= 3);

        *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F).into_lossy() | TAG_THREE_B;

        *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F).into_lossy() | TAG_CONT;

        *dst.get_unchecked_mut(2) = (code & 0x3F).into_lossy() | TAG_CONT;

    } else {

        debug_assert!(dst.len() >= 4);

        *dst.get_unchecked_mut(0) = (code >> 18 & 0x07).into_lossy() | TAG_FOUR_B;

        *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F).into_lossy() | TAG_CONT;

        *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F).into_lossy() | TAG_CONT;

        *dst.get_unchecked_mut(3) = (code & 0x3F).into_lossy() | TAG_CONT;

/// Copies part of slice to another part (`mem::copy`, basically `memmove`)

#[inline]

unsafe fn shift_unchecked(s: &mut [u8], from: usize, to: usize, len: usize) {

    debug!(

        "Shift {:?} {}-{}",

        &s.get(from..).map(|s| s.get(..len)),

        from,

to

);

    debug_assert!(to.saturating_add(len) <= s.len() && from.saturating_add(len) <= s.len());

    let (f, t) = (s.as_ptr().add(from), s.as_mut_ptr().add(to));

    copy(f, t, len);

/// Shifts string right

///

/// # Safety

///

/// It's UB if `to + (s.len() - from)` is bigger than [`S::to_u8()`]

///

/// [`<S as Unsigned>::to_u8()`]: ../struct.ArrayString.html#CAPACITY

#[inline]

pub(crate) unsafe fn shift_right_unchecked<S, F, T>(s: &mut ArrayString<S>, from: F, to: T)

where

    S: Capacity,

    F: Into<usize> + Copy,

    T: Into<usize> + Copy,

    let len = (s.len() as usize).saturating_sub(from.into());

    debug_assert!(from.into() <= to.into() && to.into().saturating_add(len) <= S::to_usize());

    debug_assert!(s.as_str().is_char_boundary(from.into()));

    shift_unchecked(s.array.as_mut_slice(), from.into(), to.into(), len);

/// Shifts string left

#[inline]

pub(crate) unsafe fn shift_left_unchecked<S, F, T>(s: &mut ArrayString<S>, from: F, to: T)

where

    S: Capacity,

    F: Into<usize> + Copy,

    T: Into<usize> + Copy,

    debug_assert!(to.into() <= from.into() && from.into() <= s.len().into());

    debug_assert!(s.as_str().is_char_boundary(from.into()));

    let len = (s.len() as usize).saturating_sub(to.into());

    shift_unchecked(s.array.as_mut_slice(), from.into(), to.into(), len);

/// Returns error if size is outside of specified boundary

#[inline]

pub fn is_inside_boundary<S, L>(size: S, limit: L) -> Result<(), OutOfBounds>

where

    S: Into<usize>,

    L: Into<usize>,

    let (s, l) = (size.into(), limit.into());

    trace!("Out of bounds: ensures {} < {}", s, l);

    Some(()).filter(|_| s <= l).ok_or(OutOfBounds)

/// Returns error if index is not at a valid utf-8 char boundary

#[inline]

pub fn is_char_boundary<S: Capacity>(s: &ArrayString<S>, idx: u8) -> Result<(), Utf8> {

    trace!("Is char boundary: {} at {}", s.as_str(), idx);

    if s.as_str().is_char_boundary(idx.into()) {

        return Ok(());

    Err(Utf8)

/// Truncates string to specified size (ignoring last bytes if they form a partial `char`)

#[inline]

pub(crate) fn truncate_str(slice: &str, size: u8) -> &str {

    trace!("Truncate str: {} at {}", slice, size);

    if slice.is_char_boundary(size.into()) {

        unsafe { slice.get_unchecked(..size.into()) }

    } else if (size as usize) < slice.len() {

        let mut index = size.saturating_sub(1) as usize;

        while !slice.is_char_boundary(index) {

            index = index.saturating_sub(1);

        unsafe { slice.get_unchecked(..index) }

    } else {

        slice

impl IntoLossy<u8> for usize {

    #[allow(clippy::cast_possible_truncation)]

    #[inline]

    fn into_lossy(self) -> u8 {

        self as u8

impl IntoLossy<u8> for u32 {

    #[allow(clippy::cast_possible_truncation)]

    #[inline]

    fn into_lossy(self) -> u8 {

        self as u8

#[cfg(test)]

mod tests {

    use super::*;

    use core::str::from_utf8;

    #[test]

    fn truncate() {

        assert_eq!(truncate_str("i", 10), "i");

        assert_eq!(truncate_str("iiiiii", 3), "iii");

        assert_eq!(truncate_str("🤔🤔🤔", 5), "🤔");

    #[test]

    fn shift_right() {

        let _ = env_logger::try_init();

        let mut ls = SmallString::try_from_str("abcdefg").unwrap();

        unsafe { shift_right_unchecked(&mut ls, 0u8, 4u8) };

        ls.size += 4;

        assert_eq!(ls.as_str(), "abcdabcdefg");

    #[test]

    fn shift_left() {

        let _ = env_logger::try_init();

        let mut ls = SmallString::try_from_str("abcdefg").unwrap();

        unsafe { shift_left_unchecked(&mut ls, 1u8, 0u8) };

        ls.size -= 1;

        assert_eq!(ls.as_str(), "bcdefg");

    #[test]

    fn shift_nop() {

        let _ = env_logger::try_init();

        let mut ls = SmallString::try_from_str("abcdefg").unwrap();

        unsafe { shift_right_unchecked(&mut ls, 0u8, 0u8) };

        assert_eq!(ls.as_str(), "abcdefg");

        unsafe { shift_left_unchecked(&mut ls, 0u8, 0u8) };

        assert_eq!(ls.as_str(), "abcdefg");

    #[test]

    fn encode_char_utf8() {

        let _ = env_logger::try_init();

        let mut string = SmallString::default();

        unsafe {

            encode_char_utf8_unchecked(&mut string, 'a', 0);

            assert_eq!(from_utf8(&string.array.as_mut_slice()[..1]).unwrap(), "a");

            let mut string = SmallString::try_from_str("a").unwrap();

            encode_char_utf8_unchecked(&mut string, '🤔', 1);

            assert_eq!(

                from_utf8(&string.array.as_mut_slice()[..5]).unwrap(),

                "a🤔"

);