Source code

Revision control

Copy as Markdown

Other Tools

use core::cmp::Ordering;
use core::ffi::c_char;
use core::fmt::Write;
use core::ptr::NonNull;
use core::{fmt, slice, str};
use crate::{
kCFAllocatorNull, CFIndex, CFRange, CFRetained, CFString, CFStringBuiltInEncodings,
CFStringCompareFlags,
};
#[track_caller]
unsafe fn debug_checked_utf8_unchecked(bytes: &[u8]) -> &str {
if cfg!(debug_assertions) {
match str::from_utf8(bytes) {
Ok(s) => s,
Err(err) => panic!(
"unsafe precondition violated: CF function did not return valid UTF-8: {err}"
),
}
} else {
// SAFETY: Checked by caller
unsafe { str::from_utf8_unchecked(bytes) }
}
}
impl CFString {
/// Creates a new `CFString` from a [`str`][prim@str].
#[inline]
#[doc(alias = "CFStringCreateWithBytes")]
#[allow(clippy::should_implement_trait)] // Not really sure of a better name
pub fn from_str(string: &str) -> CFRetained<Self> {
// Can never happen, allocations in Rust cannot be this large.
debug_assert!(string.len() < CFIndex::MAX as usize);
let len = string.len() as CFIndex;
let s = unsafe {
Self::with_bytes(
None,
string.as_ptr(),
len,
CFStringBuiltInEncodings::EncodingUTF8.0,
false,
)
};
// Should only fail if the string is not UTF-8 (which we know it is)
// or perhaps on allocation error.
s.expect("failed creating CFString")
}
/// Alias for easier transition from the `core-foundation` crate.
#[inline]
#[deprecated = "renamed to CFString::from_str"]
pub fn new(string: &str) -> CFRetained<Self> {
Self::from_str(string)
}
/// Creates a new `CFString` from a `'static` [`str`][prim@str].
///
/// This may be slightly more efficient than [`CFString::from_str`], as it
/// may be able to re-use the existing buffer (since we know it won't be
/// deallocated).
#[inline]
#[doc(alias = "CFStringCreateWithBytesNoCopy")]
pub fn from_static_str(string: &'static str) -> CFRetained<Self> {
debug_assert!(string.len() < CFIndex::MAX as usize);
let len = string.len() as CFIndex;
// SAFETY: The string is used as a backing store, and thus must
// potentially live forever, since we don't know how long the returned
// CFString will be alive for. This is ensured by the `'static`
// requirement.
let s = unsafe {
Self::with_bytes_no_copy(
None,
string.as_ptr(),
len,
CFStringBuiltInEncodings::EncodingUTF8.0,
false,
kCFAllocatorNull,
)
};
s.expect("failed creating CFString")
}
/// Get the [`str`](`prim@str`) representation of this string if it can be
/// done efficiently.
///
/// Returns [`None`] if the internal storage does not allow this to be
/// done efficiently. Use `CFString::to_string` if performance is not an
/// issue.
///
/// # Safety
///
/// The `CFString` must not be mutated for the lifetime of the returned
/// string.
///
/// Warning: This is very difficult to ensure in generic contexts, e.g. it
/// cannot even be used inside `Debug::fmt`, since `Formatter` uses `dyn`
/// internally, and could thus mutate the string inside there.
#[doc(alias = "CFStringGetCStringPtr")]
pub unsafe fn as_str_unchecked(&self) -> Option<&str> {
// NOTE: The encoding is an 8-bit encoding.
let bytes = self.c_string_ptr(CFStringBuiltInEncodings::EncodingASCII.0);
NonNull::new(bytes as *mut c_char).map(|bytes| {
// NOTE: The returned string may contain interior NUL bytes:
//
// So we have to check the length of the string too. We do that
// using `CFStringGetLength`; Since `CFStringGetCStringPtr`
// returned a pointer, and we picked the encoding to be ASCII
// (which has 1 codepoint per byte), this means that the number of
// codepoints is the same as the number of bytes in the string.
//
// This is also what Swift does:
let len = self.length() as usize;
// SAFETY: The pointer is valid for as long as the CFString is not
// mutated (which the caller ensures it isn't for the lifetime of
// the reference), and the length is correct (see above).
let bytes = unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), len) };
// SAFETY: `CFStringGetCStringPtr` is (very likely) implemented
// correctly, and we picked the encoding to be ASCII (which is a
// subset of UTF-8).
unsafe { debug_checked_utf8_unchecked(bytes) }
})
}
}
impl fmt::Display for CFString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Copy UTF-8 bytes from the CFString to the formatter in a loop, to
// avoid allocating.
//
// We have to do this instead of using `CFStringGetCStringPtr`, as
// that will be invalidated if the string is mutated while in use, and
// `fmt::Formatter` contains `dyn Write` which may very theoretically
// do exactly that.
// Somewhat reasonably sized stack buffer.
// TODO: Do performance testing, and tweak this value.
//
// Should be at least 4 (as that's the minimum size of `char`).
let mut buf = [0u8; 32];
let mut location_utf16 = 0;
loop {
let len_utf16 = self.length();
let mut read_utf8 = 0;
let read_utf16 = unsafe {
self.bytes(
CFRange {
location: location_utf16,
length: len_utf16 - location_utf16,
},
CFStringBuiltInEncodings::EncodingUTF8.0,
0, // No conversion character
false,
buf.as_mut_ptr(),
buf.len() as _,
&mut read_utf8,
)
};
if read_utf16 <= 0 {
if location_utf16 < len_utf16 {
// We're not done reading the entire string yet; emit
// replacement character, advance one character, and try again.
f.write_char(char::REPLACEMENT_CHARACTER)?;
location_utf16 += 1;
continue;
}
break;
}
location_utf16 += read_utf16;
// SAFETY: `CFStringGetBytes` is (very likely) implemented
// correctly, and won't return non-UTF8 strings.
//
// Even if a string contains an UTF-8 char on a boundary, it won't
// split it up when returning UTF-8.
let s = unsafe { debug_checked_utf8_unchecked(&buf[0..read_utf8 as usize]) };
// NOTE: May unwind, and may invalidate the string contents.
f.write_str(s)?;
}
Ok(())
}
}
impl PartialOrd for CFString {
#[inline]
#[doc(alias = "CFStringCompare")]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for CFString {
#[inline]
#[doc(alias = "CFStringCompare")]
fn cmp(&self, other: &Self) -> Ordering {
// Request standard lexiographical ordering.
let flags = CFStringCompareFlags::empty();
self.compare(Some(other), flags).into()
}
}
#[cfg(test)]
mod tests {
use alloc::string::ToString;
use core::ffi::CStr;
use super::*;
#[test]
fn basic_conversion() {
let s = CFString::from_str("abc");
assert_eq!(s.to_string(), "abc");
let s = CFString::from_str("a♥😀");
assert_eq!(s.to_string(), "a♥😀");
}
#[test]
fn cstr_conversion() {
let table = [
(
b"abc\xf8xyz\0" as &[u8],
CFStringBuiltInEncodings::EncodingISOLatin1,
"abcøxyz",
),
(
b"\x26\x65\0",
CFStringBuiltInEncodings::EncodingUTF16BE,
"♥",
),
(
b"\x65\x26\0",
CFStringBuiltInEncodings::EncodingUTF16LE,
"♥",
),
];
for (cstr, encoding, expected) in table {
let cstr = CStr::from_bytes_with_nul(cstr).unwrap();
let s = unsafe { CFString::with_c_string(None, cstr.as_ptr(), encoding.0) }.unwrap();
assert_eq!(s.to_string(), expected);
}
}
#[test]
fn from_incomplete() {
let s = unsafe {
CFString::with_bytes(
None,
b"\xd8\x3d\xde".as_ptr(),
3,
CFStringBuiltInEncodings::EncodingUTF16BE.0,
false,
)
.unwrap()
};
assert_eq!(s.to_string(), "�"); // Replacement character
assert_eq!(s.length(), 1);
}
#[test]
fn internal_nul_byte() {
let s = CFString::from_str("a\0b\0c\0d");
// Works with `CFStringGetBytes`.
assert_eq!(s.to_string(), "a\0b\0c\0d");
// `CFStringGetCStringPtr` does not seem to work here on very short
// strings (probably those that are stored inline?).
if cfg!(target_pointer_width = "64") {
assert_eq!(unsafe { s.as_str_unchecked() }, None);
} else {
assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0b\0c\0d"));
}
// Test `CFStringGetCString`.
let mut buf = [0u8; 10];
assert!(unsafe {
s.c_string(
buf.as_mut_ptr().cast(),
buf.len() as _,
CFStringBuiltInEncodings::EncodingUTF8.0,
)
});
// All the data is copied to the buffer.
assert_eq!(&buf[0..10], b"a\0b\0c\0d\0\0\0");
// But subsequent usage of that as a CStr fails, since it contains
// interior NUL bytes.
let cstr = CStr::from_bytes_until_nul(&buf).unwrap();
assert_eq!(cstr.to_bytes(), b"a");
// Test with a bit longer string, to ensure the same holds for heap-
// allocated CFStrings
let s = CFString::from_str("a\0aaaaaaaaaaaaaaa");
// Works with `CFStringGetBytes`.
assert_eq!(s.to_string(), "a\0aaaaaaaaaaaaaaa");
// `CFStringGetCStringPtr` also allows these without truncation.
assert_eq!(unsafe { s.as_str_unchecked() }, Some("a\0aaaaaaaaaaaaaaa"));
}
#[test]
fn as_str_correct_on_unicode() {
let s = CFString::from_static_str("😀");
assert_eq!(unsafe { s.as_str_unchecked() }, None);
let s = CFString::from_static_str("♥");
assert_eq!(unsafe { s.as_str_unchecked() }, None);
}
#[test]
fn utf8_on_boundary() {
// Make the emoji lie across the 32 byte buffer size in Display::fmt.
let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 29 'a's
assert_eq!(CFString::from_str(s).to_string(), s);
let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 30 'a's
assert_eq!(CFString::from_str(s).to_string(), s);
let s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😀"; // 31 'a's
assert_eq!(CFString::from_str(s).to_string(), s);
}
#[test]
fn create_with_cstring_broken_on_non_8_bit() {
// A CFString that is supposed to contain a "♥" (the UTF-8 encoding of
// that is the vastly different b"\xE2\x99\xA5").
//
// This line is wrong, because `CFStringCreateWithCString` expects an
// 8-bit encoding.
//
// See also:
let s = unsafe {
CFString::with_c_string(
None,
b"\x65\x26\0".as_ptr().cast(),
CFStringBuiltInEncodings::EncodingUnicode.0,
)
}
.unwrap();
// `CFStringGetBytes` used in `fmt::Display` converts to UTF-8.
assert_eq!(s.to_string(), "♥");
// So does `CFStringGetCString`.
let mut buf = [0u8; 20];
assert!(unsafe {
s.c_string(
buf.as_mut_ptr().cast(),
buf.len() as _,
CFStringBuiltInEncodings::EncodingUTF8.0,
)
});
let cstr = CStr::from_bytes_until_nul(&buf).unwrap();
assert_eq!(cstr.to_bytes(), "♥".as_bytes());
// `CFStringGetCStringPtr` completely ignores the requested UTF-8 conversion.
assert_eq!(unsafe { s.as_str_unchecked() }, Some("e"));
assert_eq!(
unsafe { CStr::from_ptr(s.c_string_ptr(CFStringBuiltInEncodings::EncodingUTF8.0,)) },
CStr::from_bytes_with_nul(b"e&\0").unwrap()
);
}
#[test]
fn test_static() {
let cf = CFString::from_static_str("xyz");
assert_eq!(cf.to_string(), "xyz");
}
#[test]
fn eq() {
assert_eq!(CFString::from_str("abc"), CFString::from_str("abc"));
assert_ne!(CFString::from_str("abc"), CFString::from_str("xyz"));
// Cross-type comparison
assert_ne!(
**CFString::from_str("abc"),
**unsafe { kCFAllocatorNull }.unwrap()
);
}
// TODO: Test mutation while formatting.
}