string.rs - mozsearch

firefox-main/third_party/rust/objc2-core-foundation/src/string.rs

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

use core::cmp::Ordering;

use core::ffi::c_char;

use core::fmt::Write;

use core::ptr::NonNull;

use core::{fmt, slice, str};

use crate::{

    kCFAllocatorNull, CFIndex, CFRange, CFRetained, CFString, CFStringBuiltInEncodings,

    CFStringCompareFlags,

};

#[track_caller]

unsafe fn debug_checked_utf8_unchecked(bytes: &[u8]) -> &str {

    if cfg!(debug_assertions) {

        match str::from_utf8(bytes) {

            Ok(s) => s,

            Err(err) => panic!(

                "unsafe precondition violated: CF function did not return valid UTF-8: {err}"

),

    } else {

        // SAFETY: Checked by caller

        unsafe { str::from_utf8_unchecked(bytes) }

impl CFString {

    /// Creates a new `CFString` from a [`str`][prim@str].

    #[inline]

    #[doc(alias = "CFStringCreateWithBytes")]

    #[allow(clippy::should_implement_trait)] // Not really sure of a better name

    pub fn from_str(string: &str) -> CFRetained<Self> {

        // Can never happen, allocations in Rust cannot be this large.

        debug_assert!(string.len() < CFIndex::MAX as usize);

        let len = string.len() as CFIndex;

        let s = unsafe {

            Self::with_bytes(

                None,

                string.as_ptr(),

                len,

                CFStringBuiltInEncodings::EncodingUTF8.0,

                false,

};

        // Should only fail if the string is not UTF-8 (which we know it is)

        // or perhaps on allocation error.

        s.expect("failed creating CFString")

    /// Alias for easier transition from the `core-foundation` crate.

    #[inline]

    #[deprecated = "renamed to CFString::from_str"]

    pub fn new(string: &str) -> CFRetained<Self> {

        Self::from_str(string)

    /// Creates a new `CFString` from a `'static` [`str`][prim@str].

///

    /// This may be slightly more efficient than [`CFString::from_str`], as it

    /// may be able to re-use the existing buffer (since we know it won't be

    /// deallocated).

    #[inline]

    #[doc(alias = "CFStringCreateWithBytesNoCopy")]

    pub fn from_static_str(string: &'static str) -> CFRetained<Self> {

        debug_assert!(string.len() < CFIndex::MAX as usize);

        let len = string.len() as CFIndex;

        // SAFETY: The string is used as a backing store, and thus must

        // potentially live forever, since we don't know how long the returned

        // CFString will be alive for. This is ensured by the `'static`

        // requirement.

        let s = unsafe {

            Self::with_bytes_no_copy(

                None,

                string.as_ptr(),

                len,

                CFStringBuiltInEncodings::EncodingUTF8.0,

                false,

                kCFAllocatorNull,

};

        s.expect("failed creating CFString")

    /// Get the [`str`](`prim@str`) representation of this string if it can be

    /// done efficiently.

///

    /// Returns [`None`] if the internal storage does not allow this to be

    /// done efficiently. Use `CFString::to_string` if performance is not an

    /// issue.

///

    /// # Safety

///

    /// The `CFString` must not be mutated for the lifetime of the returned

    /// string.

///

    /// Warning: This is very difficult to ensure in generic contexts, e.g. it

    /// cannot even be used inside `Debug::fmt`, since `Formatter` uses `dyn`

    /// internally, and could thus mutate the string inside there.

    #[doc(alias = "CFStringGetCStringPtr")]

    pub unsafe fn as_str_unchecked(&self) -> Option<&str> {

        // NOTE: The encoding is an 8-bit encoding.

        let bytes = self.c_string_ptr(CFStringBuiltInEncodings::EncodingASCII.0);

        NonNull::new(bytes as *mut c_char).map(|bytes| {

            // NOTE: The returned string may contain interior NUL bytes:

            // https://github.com/swiftlang/swift-corelibs-foundation/issues/5200

//

            // So we have to check the length of the string too. We do that

            // using `CFStringGetLength`; Since `CFStringGetCStringPtr`

            // returned a pointer, and we picked the encoding to be ASCII

            // (which has 1 codepoint per byte), this means that the number of

            // codepoints is the same as the number of bytes in the string.

//

            // This is also what Swift does:

            // https://github.com/swiftlang/swift-corelibs-foundation/commit/8422c1a5e63913613a93523b3b398cb982df6205

            let len = self.length() as usize;

            // SAFETY: The pointer is valid for as long as the CFString is not

            // mutated (which the caller ensures it isn't for the lifetime of

            // the reference), and the length is correct (see above).

            let bytes = unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), len) };

            // SAFETY: `CFStringGetCStringPtr` is (very likely) implemented

            // correctly, and we picked the encoding to be ASCII (which is a

            // subset of UTF-8).

            unsafe { debug_checked_utf8_unchecked(bytes) }

})

impl fmt::Display for CFString {

    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {

        // Copy UTF-8 bytes from the CFString to the formatter in a loop, to

        // avoid allocating.

//

        // We have to do this instead of using `CFStringGetCStringPtr`, as

        // that will be invalidated if the string is mutated while in use, and

        // `fmt::Formatter` contains `dyn Write` which may very theoretically

        // do exactly that.

        // Somewhat reasonably sized stack buffer.

        // TODO: Do performance testing, and tweak this value.

//

        // Should be at least 4 (as that's the minimum size of `char`).

        let mut buf = [0u8; 32];

        let mut location_utf16 = 0;

        loop {

            let len_utf16 = self.length();

            let mut read_utf8 = 0;

            let read_utf16 = unsafe {

                self.bytes(

                    CFRange {

                        location: location_utf16,

                        length: len_utf16 - location_utf16,

},

                    CFStringBuiltInEncodings::EncodingUTF8.0,

                    0, // No conversion character

                    false,

                    buf.as_mut_ptr(),

                    buf.len() as _,

                    &mut read_utf8,

};

            if read_utf16 <= 0 {

                if location_utf16 < len_utf16 {

                    // We're not done reading the entire string yet; emit

                    // replacement character, advance one character, and try again.

                    f.write_char(char::REPLACEMENT_CHARACTER)?;

                    location_utf16 += 1;

                    continue;

                break;

            location_utf16 += read_utf16;

            // SAFETY: `CFStringGetBytes` is (very likely) implemented

            // correctly, and won't return non-UTF8 strings.

//

            // Even if a string contains an UTF-8 char on a boundary, it won't

            // split it up when returning UTF-8.

            let s = unsafe { debug_checked_utf8_unchecked(&buf[0..read_utf8 as usize]) };

            // NOTE: May unwind, and may invalidate the string contents.

            f.write_str(s)?;

        Ok(())

impl PartialOrd for CFString {

    #[inline]

    #[doc(alias = "CFStringCompare")]

    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {

        Some(self.cmp(other))

impl Ord for CFString {

    #[inline]

    #[doc(alias = "CFStringCompare")]

    fn cmp(&self, other: &Self) -> Ordering {

        // Request standard lexiographical ordering.

        let flags = CFStringCompareFlags::empty();

        self.compare(Some(other), flags).into()

#[cfg(test)]

mod tests {

    use alloc::string::ToString;

    use core::ffi::CStr;

    use super::*;

    #[test]

    fn basic_conversion() {

        let s = CFString::from_str("abc");

        assert_eq!(s.to_string(), "abc");

        let s = CFString::from_str("a♥😀");

        assert_eq!(s.to_string(), "a♥😀");

    #[test]

    fn cstr_conversion() {

        let table = [

                b"abc\xf8xyz\0" as &[u8],

                CFStringBuiltInEncodings::EncodingISOLatin1,

                "abcøxyz",

),

                b"\x26\x65\0",

                CFStringBuiltInEncodings::EncodingUTF16BE,

                "♥",

),

                b"\x65\x26\0",

                CFStringBuiltInEncodings::EncodingUTF16LE,

                "♥",

),

];

        for (cstr, encoding, expected) in table {

            let cstr = CStr::from_bytes_with_nul(cstr).unwrap();

            let s = unsafe { CFString::with_c_string(None, cstr.as_ptr(), encoding.0) }.unwrap();

            assert_eq!(s.to_string(), expected);

    #[test]

    fn from_incomplete() {

        let s = unsafe {

            CFString::with_bytes(

                None,

                b"\xd8\x3d\xde".as_ptr(),

3,

                CFStringBuiltInEncodings::EncodingUTF16BE.0,

                false,

            .unwrap()

};

        assert_eq!(s.to_string(), "�"); // Replacement character

        assert_eq!(s.length(), 1);

    #[test]

    fn internal_nul_byte() {

        let s = CFString::from_str("a\0b\0c\0d");

        // Works with `CFStringGetBytes`.

        assert_eq!(s.to_string(), "a\0b\0c\0d");

        // `CFStringGetCStringPtr` does not seem to work here on very short

        // strings (probably those that are stored inline?).

        if cfg!(target_pointer_width = "64") {

            assert_eq!(unsafe { s.as_str_unchecked() }, None);

        } else {

            assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0b\0c\0d"));

        // Test `CFStringGetCString`.

        let mut buf = [0u8; 10];

        assert!(unsafe {

            s.c_string(

                buf.as_mut_ptr().cast(),

                buf.len() as _,

                CFStringBuiltInEncodings::EncodingUTF8.0,

});

        // All the data is copied to the buffer.

        assert_eq!(&buf[0..10], b"a\0b\0c\0d\0\0\0");

        // But subsequent usage of that as a CStr fails, since it contains

        // interior NUL bytes.

        let cstr = CStr::from_bytes_until_nul(&buf).unwrap();

        assert_eq!(cstr.to_bytes(), b"a");

        // Test with a bit longer string, to ensure the same holds for heap-

        // allocated CFStrings

        let s = CFString::from_str("a\0aaaaaaaaaaaaaaa");

        // Works with `CFStringGetBytes`.

        assert_eq!(s.to_string(), "a\0aaaaaaaaaaaaaaa");

        // `CFStringGetCStringPtr` also allows these without truncation.

        assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0aaaaaaaaaaaaaaa"));

    #[test]

    fn as_str_correct_on_unicode() {

        let s = CFString::from_static_str("😀");

        assert_eq!(unsafe { s.as_str_unchecked() }, None);

        let s = CFString::from_static_str("♥");

        assert_eq!(unsafe { s.as_str_unchecked() }, None);

    #[test]

    fn utf8_on_boundary() {

        // Make the emoji lie across the 32 byte buffer size in Display::fmt.

        let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 29 'a's

        assert_eq!(CFString::from_str(s).to_string(), s);

        let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 30 'a's

        assert_eq!(CFString::from_str(s).to_string(), s);

        let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 31 'a's

        assert_eq!(CFString::from_str(s).to_string(), s);

    #[test]

    fn create_with_cstring_broken_on_non_8_bit() {

        // A CFString that is supposed to contain a "♥" (the UTF-8 encoding of

        // that is the vastly different b"\xE2\x99\xA5").

//

        // This line is wrong, because `CFStringCreateWithCString` expects an

        // 8-bit encoding.

//

        // See also:

        // https://github.com/swiftlang/swift-corelibs-foundation/issues/5164

        let s = unsafe {

            CFString::with_c_string(

                None,

                b"\x65\x26\0".as_ptr().cast(),

                CFStringBuiltInEncodings::EncodingUnicode.0,

        .unwrap();

        // `CFStringGetBytes` used in `fmt::Display` converts to UTF-8.

        assert_eq!(s.to_string(), "♥");

        // So does `CFStringGetCString`.

        let mut buf = [0u8; 20];

        assert!(unsafe {

            s.c_string(

                buf.as_mut_ptr().cast(),

                buf.len() as _,

                CFStringBuiltInEncodings::EncodingUTF8.0,

});

        let cstr = CStr::from_bytes_until_nul(&buf).unwrap();

        assert_eq!(cstr.to_bytes(), "♥".as_bytes());

        // `CFStringGetCStringPtr` completely ignores the requested UTF-8 conversion.

        assert_eq!(unsafe { s.as_str_unchecked() }, Some("e"));

        assert_eq!(

            unsafe { CStr::from_ptr(s.c_string_ptr(CFStringBuiltInEncodings::EncodingUTF8.0,)) },

            CStr::from_bytes_with_nul(b"e&\0").unwrap()

);

    #[test]

    fn test_static() {

        let cf = CFString::from_static_str("xyz");

        assert_eq!(cf.to_string(), "xyz");

    #[test]

    fn eq() {

        assert_eq!(CFString::from_str("abc"), CFString::from_str("abc"));

        assert_ne!(CFString::from_str("abc"), CFString::from_str("xyz"));

        // Cross-type comparison

        assert_ne!(

            **CFString::from_str("abc"),

            **unsafe { kCFAllocatorNull }.unwrap()

);

    // TODO: Test mutation while formatting.