Source code

Revision control

Copy as Markdown

Other Tools

use core::ffi::c_char;
use core::slice;
use core::str;
use crate::ffi::NSUInteger;
use crate::msg_send;
use crate::rc::AutoreleasePool;
use crate::runtime::NSObject;
// Note: While this is not public, it is still a breaking change to modify,
// since `objc2-foundation` relies on it.
#[cfg(not(feature = "gnustep-1-7"))]
pub const UTF8_ENCODING: usize = 4;
#[cfg(feature = "gnustep-1-7")]
pub const UTF8_ENCODING: i32 = 4;
/// The number of UTF-8 code units in the given string.
///
/// # Safety
///
/// The object must be an instance of `NSString`.
//
// Note: While this is not public, it is still a breaking change to modify,
// since `objc2-foundation` relies on it.
#[inline]
pub unsafe fn nsstring_len(obj: &NSObject) -> NSUInteger {
unsafe { msg_send![obj, lengthOfBytesUsingEncoding: UTF8_ENCODING] }
}
/// Extract a [`str`](`prim@str`) representation out of the given NSString.
///
/// Uses [`UTF8String`] under the hood.
///
///
///
/// # Safety
///
/// - The object must be an instance of `NSString`.
/// - The returned string must not be moved outside the autorelease pool into
/// which it (may) have been released.
///
/// Furthermore, the object must not, as is always the case for strings, be
/// mutated in parallel.
//
// Note: While this is not public, it is still a breaking change to modify,
// since `objc2-foundation` relies on it.
pub unsafe fn nsstring_to_str<'r, 's: 'r, 'p: 'r>(
obj: &'s NSObject,
pool: AutoreleasePool<'p>,
) -> &'r str {
// This is necessary until `auto` types stabilizes.
pool.__verify_is_inner();
// The documentation on `UTF8String` is quite sparse, but with educated
// guesses, testing, reading the code for `CFString` and a bit of
// reverse-engineering, we've determined that `NSString` stores a pointer
// to the string data, sometimes with an UTF-8 encoding (usual for ascii
// data), sometimes in other encodings (often UTF-16).
//
// `UTF8String` then checks the internal encoding:
// - If the data is UTF-8 encoded, and (since macOS 10.6) if the string is
// immutable, it returns the internal pointer using
// `CFStringGetCStringPtr`.
// - Otherwise, if the data is in another encoding or is mutable, it
// creates a new allocation, writes the UTF-8 representation of the
// string into it, autoreleases the allocation, and returns a pointer to
// it (similar to `CFStringGetCString`).
//
// If the string is a tagged pointer, or a custom subclass, then another
// code-path is taken that always creates a new allocation and copies the
// string into that using (effectively) `length` and `characterAtIndex:`.
//
// As a result, the lifetime of the returned pointer is either the same as
// the passed-in `NSString` OR the lifetime of the current / innermost
// `@autoreleasepool`.
//
// Furthermore, we can allow creating a `&str` from `&obj`, even if the
// string is originally a `NSMutableString` which may be mutated later on,
// since in that case the lifetime will be tied to the pool and not the
// string.
let bytes: *const c_char = unsafe { msg_send![obj, UTF8String] };
let bytes: *const u8 = bytes.cast();
// SAFETY: Caller ensures that the object is an instance of `NSString`.
let len = unsafe { nsstring_len(obj) };
// SAFETY:
// The held AutoreleasePool is the innermost, and the reference is
// constrained both by the pool and the NSString.
//
// `len` is the length of the string in the UTF-8 encoding.
//
// `bytes` is a null-terminated C string (with length = len + 1), so
// it is never a NULL pointer.
let bytes: &'r [u8] = unsafe { slice::from_raw_parts(bytes, len) };
// SAFETY: The bytes are valid UTF-8.
#[cfg(not(debug_assertions))]
unsafe {
str::from_utf8_unchecked(bytes)
}
#[cfg(debug_assertions)]
{
str::from_utf8(bytes).expect("invalid UTF-8 in NSString")
}
}