Source code

Revision control

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "LulMain.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h> // write(), only for testing LUL
#include <algorithm> // std::sort
#include <string>
#include <utility>
#include "mozilla/Assertions.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/CheckedInt.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/MemoryChecking.h"
#include "mozilla/Sprintf.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Unused.h"
#include "BaseProfiler.h"
#include "LulCommonExt.h"
#include "LulElfExt.h"
#include "LulMainInt.h"
using mozilla::baseprofiler::profiler_current_process_id;
using mozilla::baseprofiler::profiler_current_thread_id;
// Set this to 1 for verbose logging
#define DEBUG_MAIN 0
namespace lul {
using mozilla::CheckedInt;
using mozilla::DebugOnly;
using mozilla::MallocSizeOf;
using mozilla::Unused;
using std::pair;
using std::string;
using std::vector;
// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
//
// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
// Any such function -- and, hence, the transitive closure of those
// reachable from it -- must not do any dynamic memory allocation.
// Doing so risks deadlock. There is exactly one root function for
// the transitive closure: Lul::Unwind.
//
// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
////////////////////////////////////////////////////////////////
// RuleSet //
////////////////////////////////////////////////////////////////
static const char* NameOf_DW_REG(int16_t aReg) {
switch (aReg) {
case DW_REG_CFA:
return "cfa";
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
case DW_REG_INTEL_XBP:
return "xbp";
case DW_REG_INTEL_XSP:
return "xsp";
case DW_REG_INTEL_XIP:
return "xip";
#elif defined(GP_ARCH_arm)
case DW_REG_ARM_R7:
return "r7";
case DW_REG_ARM_R11:
return "r11";
case DW_REG_ARM_R12:
return "r12";
case DW_REG_ARM_R13:
return "r13";
case DW_REG_ARM_R14:
return "r14";
case DW_REG_ARM_R15:
return "r15";
#elif defined(GP_ARCH_arm64)
case DW_REG_AARCH64_X29:
return "x29";
case DW_REG_AARCH64_X30:
return "x30";
case DW_REG_AARCH64_SP:
return "sp";
#elif defined(GP_ARCH_mips64)
case DW_REG_MIPS_SP:
return "sp";
case DW_REG_MIPS_FP:
return "fp";
case DW_REG_MIPS_PC:
return "pc";
#else
# error "Unsupported arch"
#endif
default:
return "???";
}
}
string LExpr::ShowRule(const char* aNewReg) const {
char buf[64];
string res = string(aNewReg) + "=";
switch (mHow) {
case UNKNOWN:
res += "Unknown";
break;
case NODEREF:
SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
res += buf;
break;
case DEREF:
SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
res += buf;
break;
case PFXEXPR:
SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
res += buf;
break;
default:
res += "???";
break;
}
return res;
}
void RuleSet::Print(void (*aLog)(const char*)) const {
char buf[96];
SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr,
(unsigned long long int)(mAddr + mLen - 1));
string res = string(buf);
res += mCfaExpr.ShowRule("cfa");
res += " in";
// For each reg we care about, print the recovery expression.
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
res += mXipExpr.ShowRule(" RA");
res += mXspExpr.ShowRule(" SP");
res += mXbpExpr.ShowRule(" BP");
#elif defined(GP_ARCH_arm)
res += mR15expr.ShowRule(" R15");
res += mR7expr.ShowRule(" R7");
res += mR11expr.ShowRule(" R11");
res += mR12expr.ShowRule(" R12");
res += mR13expr.ShowRule(" R13");
res += mR14expr.ShowRule(" R14");
#elif defined(GP_ARCH_arm64)
res += mX29expr.ShowRule(" X29");
res += mX30expr.ShowRule(" X30");
res += mSPexpr.ShowRule(" SP");
#elif defined(GP_ARCH_mips64)
res += mPCexpr.ShowRule(" PC");
res += mSPexpr.ShowRule(" SP");
res += mFPexpr.ShowRule(" FP");
#else
# error "Unsupported arch"
#endif
aLog(res.c_str());
}
LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
switch (aRegno) {
case DW_REG_CFA:
return &mCfaExpr;
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
case DW_REG_INTEL_XIP:
return &mXipExpr;
case DW_REG_INTEL_XSP:
return &mXspExpr;
case DW_REG_INTEL_XBP:
return &mXbpExpr;
#elif defined(GP_ARCH_arm)
case DW_REG_ARM_R15:
return &mR15expr;
case DW_REG_ARM_R14:
return &mR14expr;
case DW_REG_ARM_R13:
return &mR13expr;
case DW_REG_ARM_R12:
return &mR12expr;
case DW_REG_ARM_R11:
return &mR11expr;
case DW_REG_ARM_R7:
return &mR7expr;
#elif defined(GP_ARCH_arm64)
case DW_REG_AARCH64_X29:
return &mX29expr;
case DW_REG_AARCH64_X30:
return &mX30expr;
case DW_REG_AARCH64_SP:
return &mSPexpr;
#elif defined(GP_ARCH_mips64)
case DW_REG_MIPS_SP:
return &mSPexpr;
case DW_REG_MIPS_FP:
return &mFPexpr;
case DW_REG_MIPS_PC:
return &mPCexpr;
#else
# error "Unknown arch"
#endif
default:
return nullptr;
}
}
RuleSet::RuleSet() {
mAddr = 0;
mLen = 0;
// The only other fields are of type LExpr and those are initialised
// by LExpr::LExpr().
}
////////////////////////////////////////////////////////////////
// SecMap //
////////////////////////////////////////////////////////////////
// See header file LulMainInt.h for comments about invariants.
SecMap::SecMap(void (*aLog)(const char*))
: mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {}
SecMap::~SecMap() { mRuleSets.clear(); }
// RUNS IN NO-MALLOC CONTEXT
RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
// Binary search mRuleSets to find one that brackets |ia|.
// lo and hi need to be signed, else the loop termination tests
// don't work properly. Note that this works correctly even when
// mRuleSets.size() == 0.
// Can't do this until the array has been sorted and preened.
MOZ_ASSERT(mUsable);
long int lo = 0;
long int hi = (long int)mRuleSets.size() - 1;
while (true) {
// current unsearched space is from lo to hi, inclusive.
if (lo > hi) {
// not found
return nullptr;
}
long int mid = lo + ((hi - lo) / 2);
RuleSet* mid_ruleSet = &mRuleSets[mid];
uintptr_t mid_minAddr = mid_ruleSet->mAddr;
uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
if (ia < mid_minAddr) {
hi = mid - 1;
continue;
}
if (ia > mid_maxAddr) {
lo = mid + 1;
continue;
}
MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
return mid_ruleSet;
}
// NOTREACHED
}
// Add a RuleSet to the collection. The rule is copied in. Calling
// this makes the map non-searchable.
void SecMap::AddRuleSet(const RuleSet* rs) {
mUsable = false;
mRuleSets.push_back(*rs);
}
// Add a PfxInstr to the vector of such instrs, and return the index
// in the vector. Calling this makes the map non-searchable.
uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
mUsable = false;
mPfxInstrs.push_back(pfxi);
return mPfxInstrs.size() - 1;
}
static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
return rs1.mAddr < rs2.mAddr;
}
// Prepare the map for searching. Completely remove any which don't
// fall inside the specified range [start, +len).
void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) {
if (mRuleSets.empty()) {
return;
}
MOZ_ASSERT(aLen > 0);
if (aLen == 0) {
// This should never happen.
mRuleSets.clear();
return;
}
// Sort by start addresses.
std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
// Detect any entry not completely contained within [start, +len).
// Set its length to zero, so that the next pass will remove it.
for (size_t i = 0; i < mRuleSets.size(); ++i) {
RuleSet* rs = &mRuleSets[i];
if (rs->mLen > 0 &&
(rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
rs->mLen = 0;
}
}
// Iteratively truncate any overlaps and remove any zero length
// entries that might result, or that may have been present
// initially. Unless the input is seriously screwy, this is
// expected to iterate only once.
while (true) {
size_t i;
size_t n = mRuleSets.size();
size_t nZeroLen = 0;
if (n == 0) {
break;
}
for (i = 1; i < n; ++i) {
RuleSet* prev = &mRuleSets[i - 1];
RuleSet* here = &mRuleSets[i];
MOZ_ASSERT(prev->mAddr <= here->mAddr);
if (prev->mAddr + prev->mLen > here->mAddr) {
prev->mLen = here->mAddr - prev->mAddr;
}
if (prev->mLen == 0) nZeroLen++;
}
if (mRuleSets[n - 1].mLen == 0) {
nZeroLen++;
}
// At this point, the entries are in-order and non-overlapping.
// If none of them are zero-length, we are done.
if (nZeroLen == 0) {
break;
}
// Slide back the entries to remove the zero length ones.
size_t j = 0; // The write-point.
for (i = 0; i < n; ++i) {
if (mRuleSets[i].mLen == 0) {
continue;
}
if (j != i) mRuleSets[j] = mRuleSets[i];
++j;
}
MOZ_ASSERT(i == n);
MOZ_ASSERT(nZeroLen <= n);
MOZ_ASSERT(j == n - nZeroLen);
while (nZeroLen > 0) {
mRuleSets.pop_back();
nZeroLen--;
}
MOZ_ASSERT(mRuleSets.size() == j);
}
size_t n = mRuleSets.size();
#ifdef DEBUG
// Do a final check on the rules: their address ranges must be
// ascending, non overlapping, non zero sized.
if (n > 0) {
MOZ_ASSERT(mRuleSets[0].mLen > 0);
for (size_t i = 1; i < n; ++i) {
RuleSet* prev = &mRuleSets[i - 1];
RuleSet* here = &mRuleSets[i];
MOZ_ASSERT(prev->mAddr < here->mAddr);
MOZ_ASSERT(here->mLen > 0);
MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
}
}
#endif
// Set the summary min and max address values.
if (n == 0) {
// Use the values defined in comments in the class declaration.
mSummaryMinAddr = 1;
mSummaryMaxAddr = 0;
} else {
mSummaryMinAddr = mRuleSets[0].mAddr;
mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1;
}
char buf[150];
SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
(int)n, (unsigned long long int)mSummaryMinAddr,
(unsigned long long int)mSummaryMaxAddr);
buf[sizeof(buf) - 1] = 0;
mLog(buf);
// Is now usable for binary search.
mUsable = true;
#if 0
mLog("\nRulesets after preening\n");
for (size_t i = 0; i < mRuleSets.size(); ++i) {
mRuleSets[i].Print(mLog);
mLog("\n");
}
mLog("\n");
#endif
}
bool SecMap::IsEmpty() { return mRuleSets.empty(); }
size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
size_t n = aMallocSizeOf(this);
// It's conceivable that these calls would be unsafe with some
// implementations of std::vector, but it seems to be working for now...
n += aMallocSizeOf(mRuleSets.data());
n += aMallocSizeOf(mPfxInstrs.data());
return n;
}
////////////////////////////////////////////////////////////////
// SegArray //
////////////////////////////////////////////////////////////////
// A SegArray holds a set of address ranges that together exactly
// cover an address range, with no overlaps or holes. Each range has
// an associated value, which in this case has been specialised to be
// a simple boolean. The representation is kept to minimal canonical
// form in which adjacent ranges with the same associated value are
// merged together. Each range is represented by a |struct Seg|.
//
// SegArrays are used to keep track of which parts of the address
// space are known to contain instructions.
class SegArray {
public:
void add(uintptr_t lo, uintptr_t hi, bool val) {
if (lo > hi) {
return;
}
split_at(lo);
if (hi < UINTPTR_MAX) {
split_at(hi + 1);
}
std::vector<Seg>::size_type iLo, iHi, i;
iLo = find(lo);
iHi = find(hi);
for (i = iLo; i <= iHi; ++i) {
mSegs[i].val = val;
}
preen();
}
// RUNS IN NO-MALLOC CONTEXT
bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
/*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
std::vector<Seg>::size_type i = find(addr);
if (!mSegs[i].val) {
return false;
}
*rx_min = mSegs[i].lo;
*rx_max = mSegs[i].hi;
return true;
}
SegArray() {
Seg s(0, UINTPTR_MAX, false);
mSegs.push_back(s);
}
private:
struct Seg {
Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
uintptr_t lo;
uintptr_t hi;
bool val;
};
void preen() {
for (std::vector<Seg>::iterator iter = mSegs.begin();
iter < mSegs.end() - 1; ++iter) {
if (iter[0].val != iter[1].val) {
continue;
}
iter[0].hi = iter[1].hi;
mSegs.erase(iter + 1);
// Back up one, so as not to miss an opportunity to merge
// with the entry after this one.
--iter;
}
}
// RUNS IN NO-MALLOC CONTEXT
std::vector<Seg>::size_type find(uintptr_t a) {
long int lo = 0;
long int hi = (long int)mSegs.size();
while (true) {
// The unsearched space is lo .. hi inclusive.
if (lo > hi) {
// Not found. This can't happen.
return (std::vector<Seg>::size_type)(-1);
}
long int mid = lo + ((hi - lo) / 2);
uintptr_t mid_lo = mSegs[mid].lo;
uintptr_t mid_hi = mSegs[mid].hi;
if (a < mid_lo) {
hi = mid - 1;
continue;
}
if (a > mid_hi) {
lo = mid + 1;
continue;
}
return (std::vector<Seg>::size_type)mid;
}
}
void split_at(uintptr_t a) {
std::vector<Seg>::size_type i = find(a);
if (mSegs[i].lo == a) {
return;
}
mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
mSegs[i].hi = a - 1;
mSegs[i + 1].lo = a;
}
void show() {
printf("<< %d entries:\n", (int)mSegs.size());
for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
++iter) {
printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo,
(unsigned long long int)(*iter).hi,
(*iter).val ? "true" : "false");
}
printf(">>\n");
}
std::vector<Seg> mSegs;
};
////////////////////////////////////////////////////////////////
// PriMap //
////////////////////////////////////////////////////////////////
class PriMap {
public:
explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
// RUNS IN NO-MALLOC CONTEXT
pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
SecMap* sm = FindSecMap(ia);
return pair<const RuleSet*, const vector<PfxInstr>*>(
sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
}
// Add a secondary map. No overlaps allowed w.r.t. existing
// secondary maps.
void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
// We can't add an empty SecMap to the PriMap. But that's OK
// since we'd never be able to find anything in it anyway.
if (aSecMap->IsEmpty()) {
return;
}
// Iterate through the SecMaps and find the right place for this
// one. At the same time, ensure that the in-order
// non-overlapping invariant is preserved (and, generally, holds).
// FIXME: this gives a cost that is O(N^2) in the total number of
// shared objects in the system. ToDo: better.
MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
size_t num_secMaps = mSecMaps.size();
uintptr_t i;
for (i = 0; i < num_secMaps; ++i) {
mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
// |aSecMap| needs to be inserted immediately before mSecMaps[i].
break;
}
}
MOZ_ASSERT(i <= num_secMaps);
if (i == num_secMaps) {
// It goes at the end.
mSecMaps.push_back(std::move(aSecMap));
} else {
std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
mSecMaps.begin() + i;
mSecMaps.insert(iter, std::move(aSecMap));
}
char buf[100];
SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
(int)mSecMaps.size());
buf[sizeof(buf) - 1] = 0;
mLog(buf);
}
// Remove and delete any SecMaps in the mapping, that intersect
// with the specified address range.
void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
MOZ_ASSERT(avma_min <= avma_max);
size_t num_secMaps = mSecMaps.size();
if (num_secMaps > 0) {
intptr_t i;
// Iterate from end to start over the vector, so as to ensure
// that the special case where |avma_min| and |avma_max| denote
// the entire address space, can be completed in time proportional
// to the number of elements in the map.
for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
if (sm_i->mSummaryMaxAddr < avma_min ||
avma_max < sm_i->mSummaryMinAddr) {
// There's no overlap. Move on.
continue;
}
// We need to remove mSecMaps[i] and slide all those above it
// downwards to cover the hole.
mSecMaps.erase(mSecMaps.begin() + i);
}
}
}
// Return the number of currently contained SecMaps.
size_t CountSecMaps() { return mSecMaps.size(); }
size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
size_t n = aMallocSizeOf(this);
// It's conceivable that this call would be unsafe with some
// implementations of std::vector, but it seems to be working for now...
n += aMallocSizeOf(mSecMaps.data());
for (size_t i = 0; i < mSecMaps.size(); i++) {
n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
}
return n;
}
private:
// RUNS IN NO-MALLOC CONTEXT
SecMap* FindSecMap(uintptr_t ia) {
// Binary search mSecMaps to find one that brackets |ia|.
// lo and hi need to be signed, else the loop termination tests
// don't work properly.
long int lo = 0;
long int hi = (long int)mSecMaps.size() - 1;
while (true) {
// current unsearched space is from lo to hi, inclusive.
if (lo > hi) {
// not found
return nullptr;
}
long int mid = lo + ((hi - lo) / 2);
mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
if (ia < mid_minAddr) {
hi = mid - 1;
continue;
}
if (ia > mid_maxAddr) {
lo = mid + 1;
continue;
}
MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
return mid_secMap.get();
}
// NOTREACHED
}
private:
// sorted array of per-object ranges, non overlapping, non empty
std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
// a logging sink, for debugging.
void (*mLog)(const char*);
};
////////////////////////////////////////////////////////////////
// LUL //
////////////////////////////////////////////////////////////////
#define LUL_LOG(_str) \
do { \
char buf[200]; \
SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
uint64_t(profiler_current_process_id().ToNumber()), \
uint64_t(profiler_current_thread_id().ToNumber()), this, \
(_str)); \
buf[sizeof(buf) - 1] = 0; \
mLog(buf); \
} while (0)
LUL::LUL(void (*aLog)(const char*))
: mLog(aLog),
mAdminMode(true),
mAdminThreadId(profiler_current_thread_id()),
mPriMap(new PriMap(aLog)),
mSegArray(new SegArray()),
mUSU(new UniqueStringUniverse()) {
LUL_LOG("LUL::LUL: Created object");
}
LUL::~LUL() {
LUL_LOG("LUL::~LUL: Destroyed object");
delete mPriMap;
delete mSegArray;
mLog = nullptr;
delete mUSU;
}
void LUL::MaybeShowStats() {
// This is racey in the sense that it can't guarantee that
// n_new == n_new_Context + n_new_CFI + n_new_Scanned
// if it should happen that mStats is updated by some other thread
// in between computation of n_new and n_new_{Context,CFI,FP}.
// But it's just stats printing, so we don't really care.
uint32_t n_new = mStats - mStatsPrevious;
if (n_new >= 5000) {
uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
mStatsPrevious = mStats;
char buf[200];
SprintfLiteral(buf,
"LUL frame stats: TOTAL %5u"
" CTX %4u CFI %4u FP %4u",
n_new, n_new_Context, n_new_CFI, n_new_FP);
buf[sizeof(buf) - 1] = 0;
mLog(buf);
}
}
size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
size_t n = aMallocSizeOf(this);
n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it
// is worthwhile:
// - mSegArray
// - mUSU
return n;
}
void LUL::EnableUnwinding() {
LUL_LOG("LUL::EnableUnwinding");
// Don't assert for Admin mode here. That is, tolerate a call here
// if we are already in Unwinding mode.
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
mAdminMode = false;
}
void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
const void* aMappedImage) {
MOZ_RELEASE_ASSERT(mAdminMode);
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
mLog(":\n");
char buf[200];
SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
(unsigned long long int)aRXavma, (unsigned long long int)aSize,
aFileName);
buf[sizeof(buf) - 1] = 0;
mLog(buf);
// Ignore obviously-stupid notifications.
if (aSize > 0) {
// Here's a new mapping, for this object.
mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog);
// Read CFI or EXIDX unwind data into |smap|.
if (!aMappedImage) {
(void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
} else {
(void)lul::ReadSymbolDataInternal(
(const uint8_t*)aMappedImage, string(aFileName),
std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
}
mLog("NotifyMap .. preparing entries\n");
smap->PrepareRuleSets(aRXavma, aSize);
SprintfLiteral(buf, "NotifyMap got %lld entries\n",
(long long int)smap->Size());
buf[sizeof(buf) - 1] = 0;
mLog(buf);
// Add it to the primary map (the top level set of mapped objects).
mPriMap->AddSecMap(std::move(smap));
// Tell the segment array about the mapping, so that the stack
// scan and __kernel_syscall mechanisms know where valid code is.
mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
}
}
void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
MOZ_RELEASE_ASSERT(mAdminMode);
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
mLog(":\n");
char buf[200];
SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
(unsigned long long int)aRXavma,
(unsigned long long int)aSize);
buf[sizeof(buf) - 1] = 0;
mLog(buf);
// Ignore obviously-stupid notifications.
if (aSize > 0) {
// Tell the segment array about the mapping, so that the stack
// scan and __kernel_syscall mechanisms know where valid code is.
mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
}
}
void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
MOZ_RELEASE_ASSERT(mAdminMode);
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
mLog(":\n");
char buf[100];
SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
(unsigned long long int)aRXavmaMin,
(unsigned long long int)aRXavmaMax);
buf[sizeof(buf) - 1] = 0;
mLog(buf);
MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
// Remove from the primary map, any secondary maps that intersect
// with the address range. Also delete the secondary maps.
mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
// Tell the segment array that the address range no longer
// contains valid code.
mSegArray->add(aRXavmaMin, aRXavmaMax, false);
SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
(int)mPriMap->CountSecMaps());
buf[sizeof(buf) - 1] = 0;
mLog(buf);
}
size_t LUL::CountMappings() {
MOZ_RELEASE_ASSERT(mAdminMode);
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
return mPriMap->CountSecMaps();
}
// RUNS IN NO-MALLOC CONTEXT
static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
if (!aAddr.Valid()) {
return TaggedUWord();
}
// Lower limit check. |aAddr.Value()| is the lowest requested address
// and |aStackImg->mStartAvma| is the lowest address we actually have,
// so the comparison is straightforward.
if (aAddr.Value() < aStackImg->mStartAvma) {
return TaggedUWord();
}
// Upper limit check. We must compute the highest requested address
// and the highest address we actually have, but being careful to
// avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the
// 3/7 values below that, then we will get overflow. See bug #1245477.
typedef CheckedInt<uintptr_t> CheckedUWord;
CheckedUWord highest_requested_plus_one =
CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
CheckedUWord highest_available_plus_one =
CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
if (!highest_requested_plus_one.isValid() // overflow?
|| !highest_available_plus_one.isValid() // overflow?
|| (highest_requested_plus_one.value() >
highest_available_plus_one.value())) { // in range?
return TaggedUWord();
}
return TaggedUWord(
*(uintptr_t*)(&aStackImg
->mContents[aAddr.Value() - aStackImg->mStartAvma]));
}
// RUNS IN NO-MALLOC CONTEXT
static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
TaggedUWord aCFA) {
switch (aReg) {
case DW_REG_CFA:
return aCFA;
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
case DW_REG_INTEL_XBP:
return aOldRegs->xbp;
case DW_REG_INTEL_XSP:
return aOldRegs->xsp;
case DW_REG_INTEL_XIP:
return aOldRegs->xip;
#elif defined(GP_ARCH_arm)
case DW_REG_ARM_R7:
return aOldRegs->r7;
case DW_REG_ARM_R11:
return aOldRegs->r11;
case DW_REG_ARM_R12:
return aOldRegs->r12;
case DW_REG_ARM_R13:
return aOldRegs->r13;
case DW_REG_ARM_R14:
return aOldRegs->r14;
case DW_REG_ARM_R15:
return aOldRegs->r15;
#elif defined(GP_ARCH_arm64)
case DW_REG_AARCH64_X29:
return aOldRegs->x29;
case DW_REG_AARCH64_X30:
return aOldRegs->x30;
case DW_REG_AARCH64_SP:
return aOldRegs->sp;
#elif defined(GP_ARCH_mips64)
case DW_REG_MIPS_SP:
return aOldRegs->sp;
case DW_REG_MIPS_FP:
return aOldRegs->fp;
case DW_REG_MIPS_PC:
return aOldRegs->pc;
#else
# error "Unsupported arch"
#endif
default:
MOZ_ASSERT(0);
return TaggedUWord();
}
}
// RUNS IN NO-MALLOC CONTEXT
// See prototype for comment.
TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
TaggedUWord aCFA, const StackImage* aStackImg,
const vector<PfxInstr>& aPfxInstrs) {
// A small evaluation stack, and a stack pointer, which points to
// the highest numbered in-use element.
const int N_STACK = 10;
TaggedUWord stack[N_STACK];
int stackPointer = -1;
for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
#define PUSH(_tuw) \
do { \
if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
stack[++stackPointer] = (_tuw); \
} while (0)
#define POP(_lval) \
do { \
if (stackPointer < 0) goto fail; /* underflow */ \
_lval = stack[stackPointer--]; \
} while (0)
// Cursor in the instruction sequence.
size_t curr = start + 1;
// Check the start point is sane.
size_t nInstrs = aPfxInstrs.size();
if (start < 0 || (size_t)start >= nInstrs) goto fail;
{
// The instruction sequence must start with PX_Start. If not,
// something is seriously wrong.
PfxInstr first = aPfxInstrs[start];
if (first.mOpcode != PX_Start) goto fail;
// Push the CFA on the stack to start with (or not), as required by
// the original DW_OP_*expression* CFI.
if (first.mOperand != 0) PUSH(aCFA);
}
while (true) {
if (curr >= nInstrs) goto fail; // ran off the end of the sequence
PfxInstr pfxi = aPfxInstrs[curr++];
if (pfxi.mOpcode == PX_End) break; // we're done
switch (pfxi.mOpcode) {
case PX_Start:
// This should appear only at the start of the sequence.
goto fail;
case PX_End:
// We just took care of that, so we shouldn't see it again.
MOZ_ASSERT(0);
goto fail;
case PX_SImm32:
PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
break;
case PX_DwReg: {
DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
MOZ_ASSERT(reg != DW_REG_CFA);
PUSH(EvaluateReg(reg, aOldRegs, aCFA));
break;
}
case PX_Deref: {
TaggedUWord addr;
POP(addr);
PUSH(DerefTUW(addr, aStackImg));
break;
}
case PX_Add: {
TaggedUWord x, y;
POP(x);
POP(y);
PUSH(y + x);
break;
}
case PX_Sub: {
TaggedUWord x, y;
POP(x);
POP(y);
PUSH(y - x);
break;
}
case PX_And: {
TaggedUWord x, y;
POP(x);
POP(y);
PUSH(y & x);
break;
}
case PX_Or: {
TaggedUWord x, y;
POP(x);
POP(y);
PUSH(y | x);
break;
}
case PX_CmpGES: {
TaggedUWord x, y;
POP(x);
POP(y);
PUSH(y.CmpGEs(x));
break;
}
case PX_Shl: {
TaggedUWord x, y;
POP(x);
POP(y);
PUSH(y << x);
break;
}
default:
MOZ_ASSERT(0);
goto fail;
}
} // while (true)
// Evaluation finished. The top value on the stack is the result.
if (stackPointer >= 0) {
return stack[stackPointer];
}
// Else fall through
fail:
return TaggedUWord();
#undef PUSH
#undef POP
}
// RUNS IN NO-MALLOC CONTEXT
TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
const StackImage* aStackImg,
const vector<PfxInstr>* aPfxInstrs) const {
switch (mHow) {
case UNKNOWN:
return TaggedUWord();
case NODEREF: {
TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
tuw = tuw + TaggedUWord((intptr_t)mOffset);
return tuw;
}
case DEREF: {
TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
tuw = tuw + TaggedUWord((intptr_t)mOffset);
return DerefTUW(tuw, aStackImg);
}
case PFXEXPR: {
MOZ_ASSERT(aPfxInstrs);
if (!aPfxInstrs) {
return TaggedUWord();
}
return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
}
default:
MOZ_ASSERT(0);
return TaggedUWord();
}
}
// RUNS IN NO-MALLOC CONTEXT
static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
// Take a copy of regs, since we'll need to refer to the old values
// whilst computing the new ones.
UnwindRegs old_regs = *aRegs;
// Mark all the current register values as invalid, so that the
// caller can see, on our return, which ones have been computed
// anew. If we don't even manage to compute a new PC value, then
// the caller will have to abandon the unwind.
// FIXME: Create and use instead: aRegs->SetAllInvalid();
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
aRegs->xbp = TaggedUWord();
aRegs->xsp = TaggedUWord();
aRegs->xip = TaggedUWord();
#elif defined(GP_ARCH_arm)
aRegs->r7 = TaggedUWord();
aRegs->r11 = TaggedUWord();
aRegs->r12 = TaggedUWord();
aRegs->r13 = TaggedUWord();
aRegs->r14 = TaggedUWord();
aRegs->r15 = TaggedUWord();
#elif defined(GP_ARCH_arm64)
aRegs->x29 = TaggedUWord();
aRegs->x30 = TaggedUWord();
aRegs->sp = TaggedUWord();
aRegs->pc = TaggedUWord();
#elif defined(GP_ARCH_mips64)
aRegs->sp = TaggedUWord();
aRegs->fp = TaggedUWord();
aRegs->pc = TaggedUWord();
#else
# error "Unsupported arch"
#endif
// This is generally useful.
const TaggedUWord inval = TaggedUWord();
// First, compute the CFA.
TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
aStackImg, aPfxInstrs);
// If we didn't manage to compute the CFA, well .. that's ungood,
// but keep going anyway. It'll be OK provided none of the register
// value rules mention the CFA. In any case, compute the new values
// for each register that we're tracking.
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
aRegs->xbp =
aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->xsp =
aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->xip =
aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#elif defined(GP_ARCH_arm)
aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->r11 =
aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->r12 =
aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->r13 =
aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->r14 =
aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->r15 =
aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#elif defined(GP_ARCH_arm64)
aRegs->x29 =
aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->x30 =
aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#elif defined(GP_ARCH_mips64)
aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#else
# error "Unsupported arch"
#endif
// We're done. Any regs for which we didn't manage to compute a
// new value will now be marked as invalid.
}
// RUNS IN NO-MALLOC CONTEXT
void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
/*OUT*/ uintptr_t* aFrameSPs,
/*OUT*/ size_t* aFramesUsed,
/*OUT*/ size_t* aFramePointerFramesAcquired,
size_t aFramesAvail, UnwindRegs* aStartRegs,
StackImage* aStackImg) {
MOZ_RELEASE_ASSERT(!mAdminMode);
/////////////////////////////////////////////////////////
// BEGIN UNWIND
*aFramesUsed = 0;
UnwindRegs regs = *aStartRegs;
TaggedUWord last_valid_sp = TaggedUWord();
while (true) {
if (DEBUG_MAIN) {
char buf[300];
mLog("\n");
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
SprintfLiteral(
buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
(int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
(int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
(int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
buf[sizeof(buf) - 1] = 0;
mLog(buf);
#elif defined(GP_ARCH_arm)
SprintfLiteral(
buf,
"LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
" r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
(int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
(int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
(int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
(int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
(int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
(int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
buf[sizeof(buf) - 1] = 0;
mLog(buf);
#elif defined(GP_ARCH_arm64)
SprintfLiteral(
buf,
"LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx"
" sp %d/%llx\n",
(int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
(int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
(int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
(int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
buf[sizeof(buf) - 1] = 0;
mLog(buf);
#elif defined(GP_ARCH_mips64)
SprintfLiteral(
buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n",
(int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
(int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
(int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
buf[sizeof(buf) - 1] = 0;
mLog(buf);
#else
# error "Unsupported arch"
#endif
}
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
TaggedUWord ia = regs.xip;
TaggedUWord sp = regs.xsp;
#elif defined(GP_ARCH_arm)
TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
TaggedUWord sp = regs.r13;
#elif defined(GP_ARCH_arm64)
TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
TaggedUWord sp = regs.sp;
#elif defined(GP_ARCH_mips64)
TaggedUWord ia = regs.pc;
TaggedUWord sp = regs.sp;
#else
# error "Unsupported arch"
#endif
if (*aFramesUsed >= aFramesAvail) {
break;
}
// If we don't have a valid value for the PC, give up.
if (!ia.Valid()) {
break;
}
// If this is the innermost frame, record the SP value, which
// presumably is valid. If this isn't the innermost frame, and we
// have a valid SP value, check that its SP value isn't less that
// the one we've seen so far, so as to catch potential SP value
// cycles.
if (*aFramesUsed == 0) {
last_valid_sp = sp;
} else {
MOZ_ASSERT(last_valid_sp.Valid());
if (sp.Valid()) {
if (sp.Value() < last_valid_sp.Value()) {
// Hmm, SP going in the wrong direction. Let's stop.
break;
}
// Remember where we got to.
last_valid_sp = sp;
}
}
// For the innermost frame, the IA value is what we need. For all
// other frames, it's actually the return address, so back up one
// byte so as to get it into the calling instruction.
aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
(*aFramesUsed)++;
// Find the RuleSet for the current IA, if any. This will also
// query the backing (secondary) maps if it isn't found in the
// thread-local cache.
// If this isn't the innermost frame, back up into the calling insn.
if (*aFramesUsed > 1) {
ia = ia + TaggedUWord((uintptr_t)(-1));
}
pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
mPriMap->Lookup(ia.Value());
const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
if (DEBUG_MAIN) {
char buf[100];
SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
(unsigned long long int)ia.Value(), ruleset);
buf[sizeof(buf) - 1] = 0;
mLog(buf);
}
#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
/////////////////////////////////////////////
////
// On 32 bit x86-linux, syscalls are often done via the VDSO
// function __kernel_vsyscall, which doesn't have a corresponding
// object that we can read debuginfo from. That effectively kills
// off all stack traces for threads blocked in syscalls. Hence
// special-case by looking at the code surrounding the program
// counter.
//
// 0xf7757420 <__kernel_vsyscall+0>: push %ecx
// 0xf7757421 <__kernel_vsyscall+1>: push %edx
// 0xf7757422 <__kernel_vsyscall+2>: push %ebp
// 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
// 0xf7757425 <__kernel_vsyscall+5>: sysenter
// 0xf7757427 <__kernel_vsyscall+7>: nop
// 0xf7757428 <__kernel_vsyscall+8>: nop
// 0xf7757429 <__kernel_vsyscall+9>: nop
// 0xf775742a <__kernel_vsyscall+10>: nop
// 0xf775742b <__kernel_vsyscall+11>: nop
// 0xf775742c <__kernel_vsyscall+12>: nop
// 0xf775742d <__kernel_vsyscall+13>: nop
// 0xf775742e <__kernel_vsyscall+14>: int $0x80
// 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
// 0xf7757431 <__kernel_vsyscall+17>: pop %edx
// 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
// 0xf7757433 <__kernel_vsyscall+19>: ret
//
// In cases where the sampled thread is blocked in a syscall, its
// program counter will point at "pop %ebp". Hence we look for
// the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
// the corresponding register-recovery actions are:
// new_ebp = *(old_esp + 0)
// new eip = *(old_esp + 12)
// new_esp = old_esp + 16
//
// It may also be the case that the program counter points two
// nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
// the case where the syscall has been restarted but the thread
// hasn't been rescheduled. The code below doesn't handle that;
// it could easily be made to.
//
if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
uintptr_t insns_min, insns_max;
uintptr_t eip = ia.Value();
bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
uint8_t* eipC = (uint8_t*)eip;
if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
TaggedUWord sp_plus_0 = sp;
TaggedUWord sp_plus_12 = sp;
TaggedUWord sp_plus_16 = sp;
sp_plus_12 = sp_plus_12 + TaggedUWord(12);
sp_plus_16 = sp_plus_16 + TaggedUWord(16);
TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
TaggedUWord new_esp = sp_plus_16;
if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
regs.xbp = new_ebp;
regs.xip = new_eip;
regs.xsp = new_esp;
continue;
}
}
}
}
////
/////////////////////////////////////////////
#endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
// So, do we have a ruleset for this address? If so, use it now.
if (ruleset) {
if (DEBUG_MAIN) {
ruleset->Print(mLog);
mLog("\n");
}
// Use the RuleSet to compute the registers for the previous
// frame. |regs| is modified in-place.
UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
continue;
}
#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
defined(GP_PLAT_amd64_freebsd)
// There's no RuleSet for the specified address. On amd64/x86_linux, see if
// it's possible to recover the caller's frame by using the frame pointer.
// We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
// and assume the following layout:
//
// <--- new_SP
// +----------+
// | new_IP | (return address)
// +----------+
// | new_BP | <--- old_BP
// +----------+
// | .... |
// | .... |
// | .... |
// +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
const size_t wordSzB = sizeof(uintptr_t);
TaggedUWord old_xsp = regs.xsp;
// points at new_BP ?
TaggedUWord old_xbp = regs.xbp;
// points at new_IP ?
TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
// is the new_SP ?
TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
// We don't need to do any range, alignment or validity checks for
// addresses passed to DerefTUW, since that performs them itself, and
// returns an invalid value on failure. Any such value will poison
// subsequent uses, and we do a final check for validity before putting
// the computed values into |regs|.
TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
if (new_xbp.Valid() && new_xbp.IsAligned() &&
old_xbp.Value() < new_xbp.Value()) {
TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
TaggedUWord new_xsp = old_xbp_plus2;
if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
regs.xbp = new_xbp;
regs.xip = new_xip;
regs.xsp = new_xsp;
(*aFramePointerFramesAcquired)++;
continue;
}
}
}
#elif defined(GP_ARCH_arm64)
// Here is an example of generated code for prologue and epilogue..
//
// stp x29, x30, [sp, #-16]!
// mov x29, sp
// ...
// ldp x29, x30, [sp], #16
// ret
//
// Next is another example of generated code.
//
// stp x20, x19, [sp, #-32]!
// stp x29, x30, [sp, #16]
// add x29, sp, #0x10
// ...
// ldp x29, x30, [sp, #16]
// ldp x20, x19, [sp], #32
// ret
//
// Previous x29 and x30 register are stored in the address of x29 register.
// But since sp register value depends on local variables, we cannot compute
// previous sp register from current sp/fp/lr register and there is no
// regular rule for sp register in prologue. But since return address is lr
// register, if x29 is valid, we will get return address without sp
// register.
//
// So we assume the following layout that if no rule set. x29 is frame
// pointer, so we will be able to compute x29 and x30 .
//
// +----------+ <--- new_sp (cannot compute)
// | .... |
// +----------+
// | new_lr | (return address)
// +----------+
// | new_fp | <--- old_fp
// +----------+
// | .... |
// | .... |
// +----------+ <---- old_sp (arbitrary, but unused)
TaggedUWord old_fp = regs.x29;
if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
last_valid_sp.Value() <= old_fp.Value()) {
TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
if (new_fp.Valid() && new_fp.IsAligned() &&
old_fp.Value() < new_fp.Value()) {
TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
if (new_lr.Valid()) {
regs.x29 = new_fp;
regs.x30 = new_lr;
// When using frame pointer to walk stack, we cannot compute sp
// register since we cannot compute sp register from fp/lr/sp
// register, and there is no regular rule to compute previous sp
// register. So mark as invalid.
regs.sp = TaggedUWord();
(*aFramePointerFramesAcquired)++;
continue;
}
}
}
#endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
// defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android)
// We failed to recover a frame either using CFI or FP chasing, and we
// have no other ways to recover the frame. So we have to give up.
break;
} // top level unwind loop
// END UNWIND
/////////////////////////////////////////////////////////
}
////////////////////////////////////////////////////////////////
// LUL Unit Testing //
////////////////////////////////////////////////////////////////
static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
#if defined(GP_ARCH_mips64)
static __attribute__((noinline)) unsigned long __getpc(void) {
unsigned long rtaddr;
__asm__ volatile("move %0, $31" : "=r"(rtaddr));
return rtaddr;
}
#endif
// This function is innermost in the test call sequence. It uses LUL
// to unwind, and compares the result with the sequence specified in
// the director string. These need to agree in order for the test to
// pass. In order not to screw up the results, this function needs
// to have a not-very big stack frame, since we're only presenting
// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
// that chunk unavoidably includes the frame for this function.
//
// This function must not be inlined into its callers. Doing so will
// cause the expected-vs-actual backtrace consistency checking to
// fail. Prints summary results to |aLUL|'s logging sink and also
// returns a boolean indicating whether or not the test passed.
static __attribute__((noinline)) bool GetAndCheckStackTrace(
LUL* aLUL, const char* dstring) {
// Get hold of the current unwind-start registers.
UnwindRegs startRegs;
memset(&startRegs, 0, sizeof(startRegs));
#if defined(GP_ARCH_amd64)
volatile uintptr_t block[3];
MOZ_ASSERT(sizeof(block) == 24);
__asm__ __volatile__(
"leaq 0(%%rip), %%r15"
"\n\t"
"movq %%r15, 0(%0)"
"\n\t"
"movq %%rsp, 8(%0)"
"\n\t"
"movq %%rbp, 16(%0)"
"\n"
:
: "r"(&block[0])
: "memory", "r15");
startRegs.xip = TaggedUWord(block[0]);
startRegs.xsp = TaggedUWord(block[1]);
startRegs.xbp = TaggedUWord(block[2]);
const uintptr_t REDZONE_SIZE = 128;
uintptr_t start = block[1] - REDZONE_SIZE;
#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
volatile uintptr_t block[3];
MOZ_ASSERT(sizeof(block) == 12);
__asm__ __volatile__(
".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
"\n\t"
"popl %%edi"
"\n\t"
"movl %%edi, 0(%0)"
"\n\t"
"movl %%esp, 4(%0)"
"\n\t"
"movl %%ebp, 8(%0)"
"\n"
:
: "r"(&block[0])
: "memory", "edi");
startRegs.xip = TaggedUWord(block[0]);
startRegs.xsp = TaggedUWord(block[1]);
startRegs.xbp = TaggedUWord(block[2]);
const uintptr_t REDZONE_SIZE = 0;
uintptr_t start = block[1] - REDZONE_SIZE;
#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
volatile uintptr_t block[6];
MOZ_ASSERT(sizeof(block) == 24);
__asm__ __volatile__(
"mov r0, r15"
"\n\t"
"str r0, [%0, #0]"
"\n\t"
"str r14, [%0, #4]"
"\n\t"
"str r13, [%0, #8]"
"\n\t"
"str r12, [%0, #12]"
"\n\t"
"str r11, [%0, #16]"
"\n\t"
"str r7, [%0, #20]"
"\n"
:
: "r"(&block[0])
: "memory", "r0");
startRegs.r15 = TaggedUWord(block[0]);
startRegs.r14 = TaggedUWord(block[1]);