Source code

Revision control

Copy as Markdown

Other Tools

//! A zero-copy parser for the contents of the `__unwind_info` section of a
//! mach-O binary.
//!
//! Quickly look up the unwinding opcode for an address. Then parse the opcode to find
//! out how to recover the return address and the caller frame's register values.
//!
//! This crate is intended to be fast enough to be used in a sampling profiler.
//! Re-parsing from scratch is cheap and can be done on every sample.
//!
//! For the full unwinding experience, both `__unwind_info` and `__eh_frame` may need
//! to be consulted. The two sections are complementary: `__unwind_info` handles the
//! easy cases, and refers to an `__eh_frame` FDE for the hard cases. Conversely,
//! `__eh_frame` only includes FDEs for functions whose unwinding info cannot be
//! represented in `__unwind_info`.
//!
//! On x86 and x86_64, `__unwind_info` can represent most functions regardless of
//! whether they were compiled with framepointers or without.
//!
//! On arm64, compiling without framepointers is strongly discouraged, and
//! `__unwind_info` can only represent functions which have framepointers or
//! which don't need to restore any registers. As a result, if you have an arm64
//! binary without framepointers (rare!), then the `__unwind_info` basically just
//! acts as an index for `__eh_frame`, similarly to `.eh_frame_hdr` for ELF.
//!
//! In clang's default configuration for arm64, non-leaf functions have framepointers
//! and leaf functions without stored registers on the stack don't have framepointers.
//! For leaf functions, the return address is kept in the `lr` register for the entire
//! duration of the function. And the unwind info lets you discern between these two
//! types of functions ("frame-based" and "frameless").
//!
//! # Example
//!
//! ```rust
//! use macho_unwind_info::UnwindInfo;
//! use macho_unwind_info::opcodes::OpcodeX86_64;
//!
//! # fn example(data: &[u8]) -> Result<(), macho_unwind_info::Error> {
//! let unwind_info = UnwindInfo::parse(data)?;
//!
//! if let Some(function) = unwind_info.lookup(0x1234)? {
//! println!("Found function entry covering the address 0x1234:");
//! let opcode = OpcodeX86_64::parse(function.opcode);
//! println!("0x{:08x}..0x{:08x}: {}", function.start_address, function.end_address, opcode);
//! }
//! # Ok(())
//! # }
//! ```
mod error;
mod num_display;
/// Provides architecture-specific opcode parsing.
pub mod opcodes;
/// Lower-level structs for interpreting the format data. Can be used if the convenience APIs are too limiting.
pub mod raw;
mod reader;
pub use error::*;
use raw::*;
/// A parsed representation of the unwind info.
///
/// The UnwindInfo contains a list of pages, each of which contain a list of
/// function entries.
pub struct UnwindInfo<'a> {
/// The full __unwind_info section data.
data: &'a [u8],
/// The list of global opcodes.
global_opcodes: &'a [Opcode],
/// The list of page entries in this UnwindInfo.
pages: &'a [PageEntry],
}
/// The information about a single function in the UnwindInfo.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Function {
/// The address where this function starts.
pub start_address: u32,
/// The address where this function ends. Includes the padding at the end of
/// the function. In reality, this is the address of the *next* function
/// entry, or for the last function this is the address of the sentinel page
/// entry.
pub end_address: u32,
/// The opcode which describes the unwinding information for this function.
/// This opcode needs to be parsed in an architecture-specific manner.
/// See the [opcodes] module for the facilities to do so.
pub opcode: u32,
}
impl<'a> UnwindInfo<'a> {
/// Create an [UnwindInfo] instance which wraps the raw bytes of a mach-O binary's
/// `__unwind_info` section. The data can have arbitrary alignment. The parsing done
/// in this function is minimal; it's basically just three bounds checks.
pub fn parse(data: &'a [u8]) -> Result<Self, Error> {
let header = CompactUnwindInfoHeader::parse(data)?;
let global_opcodes = header.global_opcodes(data)?;
let pages = header.pages(data)?;
Ok(Self {
data,
global_opcodes,
pages,
})
}
/// Returns an iterator over all the functions in this UnwindInfo.
pub fn functions(&self) -> FunctionIter<'a> {
FunctionIter {
data: self.data,
global_opcodes: self.global_opcodes,
pages: self.pages,
cur_page: None,
}
}
/// Returns the range of addresses covered by unwind information.
pub fn address_range(&self) -> core::ops::Range<u32> {
if self.pages.is_empty() {
return 0..0;
}
let first_page = self.pages.first().unwrap();
let last_page = self.pages.last().unwrap();
first_page.first_address()..last_page.first_address()
}
/// Looks up the unwind information for the function that covers the given address.
/// Returns `Ok(Some(function))` if a function was found.
/// Returns `Ok(None)` if the address was outside of the range of addresses covered
/// by the unwind info.
/// Returns `Err(error)` if there was a problem with the format of the `__unwind_info`
/// data.
///
/// This lookup is architecture agnostic. The opcode is returned as a u32.
/// To actually perform unwinding, the opcode needs to be parsed in an
/// architecture-specific manner.
///
/// The design of the compact unwinding format makes this lookup extremely cheap.
/// It's just two binary searches: First to find the right page, end then to find
/// the right function within a page. The search happens inside the wrapped data,
/// with no extra copies.
pub fn lookup(&self, pc: u32) -> Result<Option<Function>, Error> {
let Self {
pages,
data,
global_opcodes,
} = self;
let page_index = match pages.binary_search_by_key(&pc, PageEntry::first_address) {
Ok(i) => i,
Err(insertion_index) => {
if insertion_index == 0 {
return Ok(None);
}
insertion_index - 1
}
};
if page_index == pages.len() - 1 {
// We found the sentinel last page, which just marks the end of the range.
// So the looked up address is at or after the end address, i.e. outside the
// range of addresses covered by this UnwindInfo.
return Ok(None);
}
let page_entry = &pages[page_index];
let next_page_entry = &pages[page_index + 1];
let page_offset = page_entry.page_offset();
match page_entry.page_kind(data)? {
consts::PAGE_KIND_REGULAR => {
let page = RegularPage::parse(data, page_offset.into())?;
let functions = page.functions(data, page_offset)?;
let function_index =
match functions.binary_search_by_key(&pc, RegularFunctionEntry::address) {
Ok(i) => i,
Err(insertion_index) => {
if insertion_index == 0 {
return Err(Error::InvalidPageEntryFirstAddress);
}
insertion_index - 1
}
};
let entry = &functions[function_index];
let fun_address = entry.address();
let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) {
next_entry.address()
} else {
next_page_entry.first_address()
};
Ok(Some(Function {
start_address: fun_address,
end_address: next_fun_address,
opcode: entry.opcode(),
}))
}
consts::PAGE_KIND_COMPRESSED => {
let page = CompressedPage::parse(data, page_offset.into())?;
let functions = page.functions(data, page_offset)?;
let page_address = page_entry.first_address();
let rel_pc = pc - page_address;
let function_index = match functions.binary_search_by_key(&rel_pc, |&entry| {
CompressedFunctionEntry::new(entry.into()).relative_address()
}) {
Ok(i) => i,
Err(insertion_index) => {
if insertion_index == 0 {
return Err(Error::InvalidPageEntryFirstAddress);
}
insertion_index - 1
}
};
let entry = CompressedFunctionEntry::new(functions[function_index].into());
let fun_address = page_address + entry.relative_address();
let next_fun_address = if let Some(next_entry) = functions.get(function_index + 1) {
let next_entry = CompressedFunctionEntry::new((*next_entry).into());
page_address + next_entry.relative_address()
} else {
next_page_entry.first_address()
};
let opcode_index: usize = entry.opcode_index().into();
let opcode = if opcode_index < global_opcodes.len() {
global_opcodes[opcode_index].opcode()
} else {
let local_opcodes = page.local_opcodes(data, page_offset)?;
let local_index = opcode_index - global_opcodes.len();
local_opcodes[local_index].opcode()
};
Ok(Some(Function {
start_address: fun_address,
end_address: next_fun_address,
opcode,
}))
}
consts::PAGE_KIND_SENTINEL => {
// Only the last page should be a sentinel page, and we've already checked earlier
// that we're not in the last page.
Err(Error::UnexpectedSentinelPage)
}
_ => Err(Error::InvalidPageKind),
}
}
}
/// An iterator over the functions in an UnwindInfo page.
pub struct FunctionIter<'a> {
/// The full __unwind_info section data.
data: &'a [u8],
/// The list of global opcodes.
global_opcodes: &'a [Opcode],
/// The slice of the remaining to-be-iterated-over pages.
pages: &'a [PageEntry],
/// The page whose functions we're iterating over at the moment.
cur_page: Option<PageWithPartialFunctions<'a>>,
}
/// The current page of the function iterator.
/// The functions field is the slice of the remaining to-be-iterated-over functions.
#[derive(Clone, Copy)]
enum PageWithPartialFunctions<'a> {
Regular {
next_page_address: u32,
functions: &'a [RegularFunctionEntry],
},
Compressed {
page_address: u32,
next_page_address: u32,
local_opcodes: &'a [Opcode],
functions: &'a [U32],
},
}
impl<'a> FunctionIter<'a> {
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Option<Function>, Error> {
loop {
let cur_page = if let Some(cur_page) = self.cur_page.as_mut() {
cur_page
} else {
let cur_page = match self.next_page()? {
Some(page) => page,
None => return Ok(None),
};
self.cur_page.insert(cur_page)
};
match cur_page {
PageWithPartialFunctions::Regular {
next_page_address,
functions,
} => {
if let Some((entry, remainder)) = functions.split_first() {
*functions = remainder;
let start_address = entry.address();
let end_address = remainder
.first()
.map(RegularFunctionEntry::address)
.unwrap_or(*next_page_address);
return Ok(Some(Function {
start_address,
end_address,
opcode: entry.opcode(),
}));
}
}
PageWithPartialFunctions::Compressed {
page_address,
functions,
next_page_address,
local_opcodes,
} => {
if let Some((entry, remainder)) = functions.split_first() {
*functions = remainder;
let entry = CompressedFunctionEntry::new((*entry).into());
let start_address = *page_address + entry.relative_address();
let end_address = match remainder.first() {
Some(next_entry) => {
let next_entry = CompressedFunctionEntry::new((*next_entry).into());
*page_address + next_entry.relative_address()
}
None => *next_page_address,
};
let opcode_index: usize = entry.opcode_index().into();
let opcode = if opcode_index < self.global_opcodes.len() {
self.global_opcodes[opcode_index].opcode()
} else {
let local_index = opcode_index - self.global_opcodes.len();
local_opcodes[local_index].opcode()
};
return Ok(Some(Function {
start_address,
end_address,
opcode,
}));
}
}
}
self.cur_page = None;
}
}
fn next_page(&mut self) -> Result<Option<PageWithPartialFunctions<'a>>, Error> {
let (page_entry, remainder) = match self.pages.split_first() {
Some(split) => split,
None => return Ok(None),
};
self.pages = remainder;
let next_page_entry = match remainder.first() {
Some(entry) => entry,
None => return Ok(None),
};
let page_offset = page_entry.page_offset();
let page_address = page_entry.first_address();
let next_page_address = next_page_entry.first_address();
let data = self.data;
let cur_page = match page_entry.page_kind(data)? {
consts::PAGE_KIND_REGULAR => {
let page = RegularPage::parse(data, page_offset.into())?;
PageWithPartialFunctions::Regular {
functions: page.functions(data, page_offset)?,
next_page_address,
}
}
consts::PAGE_KIND_COMPRESSED => {
let page = CompressedPage::parse(data, page_offset.into())?;
PageWithPartialFunctions::Compressed {
page_address,
next_page_address,
functions: page.functions(data, page_offset)?,
local_opcodes: page.local_opcodes(data, page_offset)?,
}
}
consts::PAGE_KIND_SENTINEL => return Err(Error::UnexpectedSentinelPage),
_ => return Err(Error::InvalidPageKind),
};
Ok(Some(cur_page))
}
}