LulMain.cpp - mozsearch

mozilla-central/tools/profiler/lul/LulMain.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim: set ts=8 sts=2 et sw=2 tw=80: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "LulMain.h"

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <unistd.h>  // write(), only for testing LUL

#include <algorithm>  // std::sort

#include <string>

#include <utility>

#include "GeckoProfiler.h"  // for profiler_current_thread_id()

#include "LulCommonExt.h"

#include "LulElfExt.h"

#include "LulMainInt.h"

#include "mozilla/ArrayUtils.h"

#include "mozilla/Assertions.h"

#include "mozilla/CheckedInt.h"

#include "mozilla/DebugOnly.h"

#include "mozilla/MemoryChecking.h"

#include "mozilla/Sprintf.h"

#include "mozilla/UniquePtr.h"

#include "mozilla/Unused.h"

// Set this to 1 for verbose logging

#define DEBUG_MAIN 0

namespace lul {

using mozilla::CheckedInt;

using mozilla::DebugOnly;

using mozilla::MallocSizeOf;

using mozilla::Unused;

using std::pair;

using std::string;

using std::vector;

// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING

//

// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.

// Any such function -- and, hence, the transitive closure of those

// reachable from it -- must not do any dynamic memory allocation.

// Doing so risks deadlock.  There is exactly one root function for

// the transitive closure: Lul::Unwind.

//

// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING

////////////////////////////////////////////////////////////////

// RuleSet                                                    //

////////////////////////////////////////////////////////////////

static const char* NameOf_DW_REG(int16_t aReg) {

  switch (aReg) {

    case DW_REG_CFA:

      return "cfa";

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

    case DW_REG_INTEL_XBP:

      return "xbp";

    case DW_REG_INTEL_XSP:

      return "xsp";

    case DW_REG_INTEL_XIP:

      return "xip";

#elif defined(GP_ARCH_arm)

    case DW_REG_ARM_R7:

      return "r7";

    case DW_REG_ARM_R11:

      return "r11";

    case DW_REG_ARM_R12:

      return "r12";

    case DW_REG_ARM_R13:

      return "r13";

    case DW_REG_ARM_R14:

      return "r14";

    case DW_REG_ARM_R15:

      return "r15";

#elif defined(GP_ARCH_arm64)

    case DW_REG_AARCH64_X29:

      return "x29";

    case DW_REG_AARCH64_X30:

      return "x30";

    case DW_REG_AARCH64_SP:

      return "sp";

#elif defined(GP_ARCH_mips64)

    case DW_REG_MIPS_SP:

      return "sp";

    case DW_REG_MIPS_FP:

      return "fp";

    case DW_REG_MIPS_PC:

      return "pc";

#else

#  error "Unsupported arch"

#endif

    default:

      return "???";

string LExpr::ShowRule(const char* aNewReg) const {

  char buf[64];

  string res = string(aNewReg) + "=";

  switch (mHow) {

    case UNKNOWN:

      res += "Unknown";

      break;

    case NODEREF:

      SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);

      res += buf;

      break;

    case DEREF:

      SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);

      res += buf;

      break;

    case PFXEXPR:

      SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);

      res += buf;

      break;

    default:

      res += "???";

      break;

  return res;

void RuleSet::Print(uintptr_t avma, uintptr_t len,

                    void (*aLog)(const char*)) const {

  char buf[96];

  SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma,

                 (unsigned long long int)(avma + len - 1));

  string res = string(buf);

  res += mCfaExpr.ShowRule("cfa");

  res += " in";

  // For each reg we care about, print the recovery expression.

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

  res += mXipExpr.ShowRule(" RA");

  res += mXspExpr.ShowRule(" SP");

  res += mXbpExpr.ShowRule(" BP");

#elif defined(GP_ARCH_arm)

  res += mR15expr.ShowRule(" R15");

  res += mR7expr.ShowRule(" R7");

  res += mR11expr.ShowRule(" R11");

  res += mR12expr.ShowRule(" R12");

  res += mR13expr.ShowRule(" R13");

  res += mR14expr.ShowRule(" R14");

#elif defined(GP_ARCH_arm64)

  res += mX29expr.ShowRule(" X29");

  res += mX30expr.ShowRule(" X30");

  res += mSPexpr.ShowRule(" SP");

#elif defined(GP_ARCH_mips64)

  res += mPCexpr.ShowRule(" PC");

  res += mSPexpr.ShowRule(" SP");

  res += mFPexpr.ShowRule(" FP");

#else

#  error "Unsupported arch"

#endif

  aLog(res.c_str());

LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {

  switch (aRegno) {

    case DW_REG_CFA:

      return &mCfaExpr;

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

    case DW_REG_INTEL_XIP:

      return &mXipExpr;

    case DW_REG_INTEL_XSP:

      return &mXspExpr;

    case DW_REG_INTEL_XBP:

      return &mXbpExpr;

#elif defined(GP_ARCH_arm)

    case DW_REG_ARM_R15:

      return &mR15expr;

    case DW_REG_ARM_R14:

      return &mR14expr;

    case DW_REG_ARM_R13:

      return &mR13expr;

    case DW_REG_ARM_R12:

      return &mR12expr;

    case DW_REG_ARM_R11:

      return &mR11expr;

    case DW_REG_ARM_R7:

      return &mR7expr;

#elif defined(GP_ARCH_arm64)

    case DW_REG_AARCH64_X29:

      return &mX29expr;

    case DW_REG_AARCH64_X30:

      return &mX30expr;

    case DW_REG_AARCH64_SP:

      return &mSPexpr;

#elif defined(GP_ARCH_mips64)

    case DW_REG_MIPS_SP:

      return &mSPexpr;

    case DW_REG_MIPS_FP:

      return &mFPexpr;

    case DW_REG_MIPS_PC:

      return &mPCexpr;

#else

#  error "Unknown arch"

#endif

    default:

      return nullptr;

RuleSet::RuleSet() {

  // All fields are of type LExpr and so are initialised by LExpr::LExpr().

////////////////////////////////////////////////////////////////

// SecMap                                                     //

////////////////////////////////////////////////////////////////

// See header file LulMainInt.h for comments about invariants.

SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen,

               void (*aLog)(const char*))

    : mUsable(false),

      mUniqifier(new mozilla::HashMap<RuleSet, uint32_t, RuleSet,

                                      InfallibleAllocPolicy>),

      mLog(aLog) {

  if (mapLen == 0) {

    // Degenerate case.

    mMapMinAVMA = 1;

    mMapMaxAVMA = 0;

  } else {

    mMapMinAVMA = mapStartAVMA;

    mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1;

SecMap::~SecMap() {

  mExtents.clear();

  mDictionary.clear();

  if (mUniqifier) {

    mUniqifier->clear();

    mUniqifier = nullptr;

// RUNS IN NO-MALLOC CONTEXT

RuleSet* SecMap::FindRuleSet(uintptr_t ia) {

  // Binary search mExtents to find one that brackets |ia|.

  // lo and hi need to be signed, else the loop termination tests

  // don't work properly.  Note that this works correctly even when

  // mExtents.size() == 0.

  // Can't do this until the array has been sorted and preened.

  MOZ_ASSERT(mUsable);

  long int lo = 0;

  long int hi = (long int)mExtents.size() - 1;

  while (true) {

    // current unsearched space is from lo to hi, inclusive.

    if (lo > hi) {

      // not found

      return nullptr;

    long int mid = lo + ((hi - lo) / 2);

    Extent* mid_extent = &mExtents[mid];

    uintptr_t mid_offset = mid_extent->offset();

    uintptr_t mid_len = mid_extent->len();

    uintptr_t mid_minAddr = mMapMinAVMA + mid_offset;

    uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1;

    if (ia < mid_minAddr) {

      hi = mid - 1;

      continue;

    if (ia > mid_maxAddr) {

      lo = mid + 1;

      continue;

    MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);

    uint32_t mid_extent_dictIx = mid_extent->dictIx();

    MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size());

    return &mDictionary[mid_extent_dictIx];

  // NOTREACHED

// Add a RuleSet to the collection.  The rule is copied in.  Calling

// this makes the map non-searchable.

void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) {

  mUsable = false;

  // Zero length RuleSet?  Meaningless, but ignore it anyway.

  if (len == 0) {

    return;

  // Ignore attempts to add RuleSets whose address range doesn't fall within

  // the declared address range for the SecMap.  Maybe we should print some

  // kind of error message rather than silently ignoring them.

  if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) {

    return;

  // Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte

  // chunk of address space, the following must now hold.

  MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF);

  // See if `mUniqifier` already has `rs`.  If so set `dictIx` to the assigned

  // dictionary index; if not, add `rs` to `mUniqifier` and assign a new

  // dictionary index.  This is the core of the RuleSet-de-duplication process.

  uint32_t dictIx = 0;

  mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>::AddPtr

      p = mUniqifier->lookupForAdd(*rs);

  if (!p) {

    dictIx = mUniqifier->count();

    // If this ever fails, Extents::dictIx will need to be changed to be a

    // type wider than the current uint16_t.

    MOZ_RELEASE_ASSERT(dictIx < (1 << 16));

    // This returns `false` on OOM.  We ignore the return value since we asked

    // for it to use the InfallibleAllocPolicy.

    DebugOnly<bool> addedOK = mUniqifier->add(p, *rs, dictIx);

    MOZ_ASSERT(addedOK);

  } else {

    dictIx = p->value();

  uint32_t offset = (uint32_t)(avma - mMapMinAVMA);

  while (len > 0) {

    // Because Extents::len is a uint16_t, we have to add multiple `mExtents`

    // entries to cover the case where `len` is equal to or greater than 2^16.

    // This happens only exceedingly rarely.  In order to get more test

    // coverage on what would otherwise be a very low probability (less than

    // 0.0002%) corner case, we do this in steps of 4095.  On libxul.so as of

    // Sept 2020, this increases the number of `mExtents` entries by about

    // 0.05%, hence has no meaningful effect on space use, but increases the

    // use of this corner case, and hence its test coverage, by a factor of 250.

    uint32_t this_step_len = (len > 4095) ? 4095 : len;

    mExtents.emplace_back(offset, this_step_len, dictIx);

    offset += this_step_len;

    len -= this_step_len;

// Add a PfxInstr to the vector of such instrs, and return the index

// in the vector.  Calling this makes the map non-searchable.

uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {

  mUsable = false;

  mPfxInstrs.push_back(pfxi);

  return mPfxInstrs.size() - 1;

// Prepare the map for searching, by sorting it, de-overlapping entries and

// removing any resulting zero-length entries.  At the start of this routine,

// all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero

// length, as a result of the checks in AddRuleSet().

void SecMap::PrepareRuleSets() {

  // At this point, the de-duped RuleSets are in `mUniqifier`, and

  // `mDictionary` is empty.  This method will, amongst other things, copy

  // them into `mDictionary` in order of their assigned dictionary-index

  // values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`;

  // after this method, it has no further use.

  MOZ_RELEASE_ASSERT(mUniqifier);

  MOZ_RELEASE_ASSERT(mDictionary.empty());

  if (mExtents.empty()) {

    mUniqifier->clear();

    mUniqifier = nullptr;

    return;

  if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) {

    // The map is empty.  This should never happen.

    mExtents.clear();

    mUniqifier->clear();

    mUniqifier = nullptr;

    return;

  MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA);

  // We must have at least one Extent, and as a consequence there must be at

  // least one entry in the uniqifier.

  MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty());

#ifdef DEBUG

  // Check invariants on incoming Extents.

  for (size_t i = 0; i < mExtents.size(); ++i) {

    Extent* ext = &mExtents[i];

    uint32_t len = ext->len();

    MOZ_ASSERT(len > 0);

    MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */);

    uint32_t offset = ext->offset();

    uintptr_t avma = mMapMinAVMA + (uintptr_t)offset;

    // Upper bounds test.  There's no lower bounds test because `offset` is a

    // positive displacement from `mMapMinAVMA`, so a small underrun will

    // manifest as `len` being close to 2^32.

    MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA);

#endif

  // Sort by start addresses.

  std::sort(mExtents.begin(), mExtents.end(),

            [](const Extent& ext1, const Extent& ext2) {

              return ext1.offset() < ext2.offset();

});

  // Iteratively truncate any overlaps and remove any zero length

  // entries that might result, or that may have been present

  // initially.  Unless the input is seriously screwy, this is

  // expected to iterate only once.

  while (true) {

    size_t i;

    size_t n = mExtents.size();

    size_t nZeroLen = 0;

    if (n == 0) {

      break;

    for (i = 1; i < n; ++i) {

      Extent* prev = &mExtents[i - 1];

      Extent* here = &mExtents[i];

      MOZ_ASSERT(prev->offset() <= here->offset());

      if (prev->offset() + prev->len() > here->offset()) {

        prev->setLen(here->offset() - prev->offset());

      if (prev->len() == 0) {

        nZeroLen++;

    if (mExtents[n - 1].len() == 0) {

      nZeroLen++;

    // At this point, the entries are in-order and non-overlapping.

    // If none of them are zero-length, we are done.

    if (nZeroLen == 0) {

      break;

    // Slide back the entries to remove the zero length ones.

    size_t j = 0;  // The write-point.

    for (i = 0; i < n; ++i) {

      if (mExtents[i].len() == 0) {

        continue;

      if (j != i) {

        mExtents[j] = mExtents[i];

      ++j;

    MOZ_ASSERT(i == n);

    MOZ_ASSERT(nZeroLen <= n);

    MOZ_ASSERT(j == n - nZeroLen);

    while (nZeroLen > 0) {

      mExtents.pop_back();

      nZeroLen--;

    MOZ_ASSERT(mExtents.size() == j);

  size_t nExtents = mExtents.size();

#ifdef DEBUG

  // Do a final check on the extents: their address ranges must be

  // ascending, non overlapping, non zero sized.

  if (nExtents > 0) {

    MOZ_ASSERT(mExtents[0].len() > 0);

    for (size_t i = 1; i < nExtents; ++i) {

      const Extent* prev = &mExtents[i - 1];

      const Extent* here = &mExtents[i];

      MOZ_ASSERT(prev->offset() < here->offset());

      MOZ_ASSERT(here->len() > 0);

      MOZ_ASSERT(prev->offset() + prev->len() <= here->offset());

#endif

  // Create the final dictionary by enumerating the uniqifier.

  size_t nUniques = mUniqifier->count();

  RuleSet dummy;

  mozilla::PodZero(&dummy);

  mDictionary.reserve(nUniques);

  for (size_t i = 0; i < nUniques; i++) {

    mDictionary.push_back(dummy);

  for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) {

    MOZ_RELEASE_ASSERT(iter.get().value() < nUniques);

    mDictionary[iter.get().value()] = iter.get().key();

  mUniqifier = nullptr;

  char buf[150];

  SprintfLiteral(

      buf,

      "PrepareRuleSets: %lu extents, %lu rulesets, "

      "avma min/max 0x%llx, 0x%llx\n",

      (unsigned long int)nExtents, (unsigned long int)mDictionary.size(),

      (unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA);

  buf[sizeof(buf) - 1] = 0;

  mLog(buf);

  // Is now usable for binary search.

  mUsable = true;

#if 0

  mLog("\nRulesets after preening\n");

  for (size_t i = 0; i < nExtents; ++i) {

    const Extent* extent = &mExtents[i];

    uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset();

    mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog);

    mLog("\n");

  mLog("\n");

#endif

bool SecMap::IsEmpty() { return mExtents.empty(); }

size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {

  size_t n = aMallocSizeOf(this);

  // It's conceivable that these calls would be unsafe with some

  // implementations of std::vector, but it seems to be working for now...

  n += aMallocSizeOf(mPfxInstrs.data());

  if (mUniqifier) {

    n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf);

  n += aMallocSizeOf(mDictionary.data());

  n += aMallocSizeOf(mExtents.data());

  return n;

////////////////////////////////////////////////////////////////

// SegArray                                                   //

////////////////////////////////////////////////////////////////

// A SegArray holds a set of address ranges that together exactly

// cover an address range, with no overlaps or holes.  Each range has

// an associated value, which in this case has been specialised to be

// a simple boolean.  The representation is kept to minimal canonical

// form in which adjacent ranges with the same associated value are

// merged together.  Each range is represented by a |struct Seg|.

//

// SegArrays are used to keep track of which parts of the address

// space are known to contain instructions.

class SegArray {

 public:

  void add(uintptr_t lo, uintptr_t hi, bool val) {

    if (lo > hi) {

      return;

    split_at(lo);

    if (hi < UINTPTR_MAX) {

      split_at(hi + 1);

    std::vector<Seg>::size_type iLo, iHi, i;

    iLo = find(lo);

    iHi = find(hi);

    for (i = iLo; i <= iHi; ++i) {

      mSegs[i].val = val;

    preen();

  // RUNS IN NO-MALLOC CONTEXT

  bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,

                              /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {

    std::vector<Seg>::size_type i = find(addr);

    if (!mSegs[i].val) {

      return false;

    *rx_min = mSegs[i].lo;

    *rx_max = mSegs[i].hi;

    return true;

  SegArray() {

    Seg s(0, UINTPTR_MAX, false);

    mSegs.push_back(s);

 private:

  struct Seg {

    Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}

    uintptr_t lo;

    uintptr_t hi;

    bool val;

};

  void preen() {

    for (std::vector<Seg>::iterator iter = mSegs.begin();

         iter < mSegs.end() - 1; ++iter) {

      if (iter[0].val != iter[1].val) {

        continue;

      iter[0].hi = iter[1].hi;

      mSegs.erase(iter + 1);

      // Back up one, so as not to miss an opportunity to merge

      // with the entry after this one.

      --iter;

  // RUNS IN NO-MALLOC CONTEXT

  std::vector<Seg>::size_type find(uintptr_t a) {

    long int lo = 0;

    long int hi = (long int)mSegs.size();

    while (true) {

      // The unsearched space is lo .. hi inclusive.

      if (lo > hi) {

        // Not found.  This can't happen.

        return (std::vector<Seg>::size_type)(-1);

      long int mid = lo + ((hi - lo) / 2);

      uintptr_t mid_lo = mSegs[mid].lo;

      uintptr_t mid_hi = mSegs[mid].hi;

      if (a < mid_lo) {

        hi = mid - 1;

        continue;

      if (a > mid_hi) {

        lo = mid + 1;

        continue;

      return (std::vector<Seg>::size_type)mid;

  void split_at(uintptr_t a) {

    std::vector<Seg>::size_type i = find(a);

    if (mSegs[i].lo == a) {

      return;

    mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);

    mSegs[i].hi = a - 1;

    mSegs[i + 1].lo = a;

  void show() {

    printf("<< %d entries:\n", (int)mSegs.size());

    for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();

         ++iter) {

      printf("  %016llx  %016llx  %s\n", (unsigned long long int)(*iter).lo,

             (unsigned long long int)(*iter).hi,

             (*iter).val ? "true" : "false");

    printf(">>\n");

  std::vector<Seg> mSegs;

};

////////////////////////////////////////////////////////////////

// PriMap                                                     //

////////////////////////////////////////////////////////////////

class PriMap {

 public:

  explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}

  // RUNS IN NO-MALLOC CONTEXT

  pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {

    SecMap* sm = FindSecMap(ia);

    return pair<const RuleSet*, const vector<PfxInstr>*>(

        sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);

  // Add a secondary map.  No overlaps allowed w.r.t. existing

  // secondary maps.

  void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {

    // We can't add an empty SecMap to the PriMap.  But that's OK

    // since we'd never be able to find anything in it anyway.

    if (aSecMap->IsEmpty()) {

      return;

    // Iterate through the SecMaps and find the right place for this

    // one.  At the same time, ensure that the in-order

    // non-overlapping invariant is preserved (and, generally, holds).

    // FIXME: this gives a cost that is O(N^2) in the total number of

    // shared objects in the system.  ToDo: better.

    MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA);

    size_t num_secMaps = mSecMaps.size();

    uintptr_t i;

    for (i = 0; i < num_secMaps; ++i) {

      mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];

      MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA);

      if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) {

        // |aSecMap| needs to be inserted immediately before mSecMaps[i].

        break;

    MOZ_ASSERT(i <= num_secMaps);

    if (i == num_secMaps) {

      // It goes at the end.

      mSecMaps.push_back(std::move(aSecMap));

    } else {

      std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =

          mSecMaps.begin() + i;

      mSecMaps.insert(iter, std::move(aSecMap));

    char buf[100];

    SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",

                   (int)mSecMaps.size());

    buf[sizeof(buf) - 1] = 0;

    mLog(buf);

  // Remove and delete any SecMaps in the mapping, that intersect

  // with the specified address range.

  void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {

    MOZ_ASSERT(avma_min <= avma_max);

    size_t num_secMaps = mSecMaps.size();

    if (num_secMaps > 0) {

      intptr_t i;

      // Iterate from end to start over the vector, so as to ensure

      // that the special case where |avma_min| and |avma_max| denote

      // the entire address space, can be completed in time proportional

      // to the number of elements in the map.

      for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {

        mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];

        if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) {

          // There's no overlap.  Move on.

          continue;

        // We need to remove mSecMaps[i] and slide all those above it

        // downwards to cover the hole.

        mSecMaps.erase(mSecMaps.begin() + i);

  // Return the number of currently contained SecMaps.

  size_t CountSecMaps() { return mSecMaps.size(); }

  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {

    size_t n = aMallocSizeOf(this);

    // It's conceivable that this call would be unsafe with some

    // implementations of std::vector, but it seems to be working for now...

    n += aMallocSizeOf(mSecMaps.data());

    for (size_t i = 0; i < mSecMaps.size(); i++) {

      n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);

    return n;

 private:

  // RUNS IN NO-MALLOC CONTEXT

  SecMap* FindSecMap(uintptr_t ia) {

    // Binary search mSecMaps to find one that brackets |ia|.

    // lo and hi need to be signed, else the loop termination tests

    // don't work properly.

    long int lo = 0;

    long int hi = (long int)mSecMaps.size() - 1;

    while (true) {

      // current unsearched space is from lo to hi, inclusive.

      if (lo > hi) {

        // not found

        return nullptr;

      long int mid = lo + ((hi - lo) / 2);

      mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];

      uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA;

      uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA;

      if (ia < mid_minAddr) {

        hi = mid - 1;

        continue;

      if (ia > mid_maxAddr) {

        lo = mid + 1;

        continue;

      MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);

      return mid_secMap.get();

    // NOTREACHED

 private:

  // sorted array of per-object ranges, non overlapping, non empty

  std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;

  // a logging sink, for debugging.

  void (*mLog)(const char*);

};

////////////////////////////////////////////////////////////////

// LUL                                                        //

////////////////////////////////////////////////////////////////

#define LUL_LOG(_str)                                                          \

  do {                                                                         \

    char buf[200];                                                             \

    SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \

                   uint64_t(profiler_current_process_id().ToNumber()),         \

                   uint64_t(profiler_current_thread_id().ToNumber()), this,    \

                   (_str));                                                    \

    buf[sizeof(buf) - 1] = 0;                                                  \

    mLog(buf);                                                                 \

  } while (0)

LUL::LUL(void (*aLog)(const char*))

    : mLog(aLog),

      mAdminMode(true),

      mAdminThreadId(profiler_current_thread_id()),

      mPriMap(new PriMap(aLog)),

      mSegArray(new SegArray()),

      mUSU(new UniqueStringUniverse()) {

  LUL_LOG("LUL::LUL: Created object");

LUL::~LUL() {

  LUL_LOG("LUL::~LUL: Destroyed object");

  delete mPriMap;

  delete mSegArray;

  mLog = nullptr;

  delete mUSU;

void LUL::MaybeShowStats() {

  // This is racey in the sense that it can't guarantee that

  //   n_new == n_new_Context + n_new_CFI + n_new_Scanned

  // if it should happen that mStats is updated by some other thread

  // in between computation of n_new and n_new_{Context,CFI,FP}.

  // But it's just stats printing, so we don't really care.

  uint32_t n_new = mStats - mStatsPrevious;

  if (n_new >= 5000) {

    uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;

    uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;

    uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;

    mStatsPrevious = mStats;

    char buf[200];

    SprintfLiteral(buf,

                   "LUL frame stats: TOTAL %5u"

                   "    CTX %4u    CFI %4u    FP %4u",

                   n_new, n_new_Context, n_new_CFI, n_new_FP);

    buf[sizeof(buf) - 1] = 0;

    mLog(buf);

size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {

  size_t n = aMallocSizeOf(this);

  n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);

  // Measurement of the following members may be added later if DMD finds it

  // is worthwhile:

  // - mSegArray

  // - mUSU

  return n;

void LUL::EnableUnwinding() {

  LUL_LOG("LUL::EnableUnwinding");

  // Don't assert for Admin mode here.  That is, tolerate a call here

  // if we are already in Unwinding mode.

  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mAdminMode = false;

void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,

                         const void* aMappedImage) {

  MOZ_RELEASE_ASSERT(mAdminMode);

  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mLog(":\n");

  char buf[200];

  SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",

                 (unsigned long long int)aRXavma, (unsigned long long int)aSize,

                 aFileName);

  buf[sizeof(buf) - 1] = 0;

  mLog(buf);

  // We can't have a SecMap covering more than 2^32-1 bytes of address space.

  // See the definition of SecMap for why.  Rather than crash the system, just

  // limit the SecMap's size accordingly.  This case is never actually

  // expected to happen.

  if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) {

    aSize = (uintptr_t)0xFFFFFFFF;

  MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF);

  // Ignore obviously-stupid notifications.

  if (aSize > 0) {

    // Here's a new mapping, for this object.

    mozilla::UniquePtr<SecMap> smap =

        mozilla::MakeUnique<SecMap>(aRXavma, (uint32_t)aSize, mLog);

    // Read CFI or EXIDX unwind data into |smap|.

    if (!aMappedImage) {

      (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),

                                smap.get(), (void*)aRXavma, aSize, mUSU, mLog);

    } else {

      (void)lul::ReadSymbolDataInternal(

          (const uint8_t*)aMappedImage, string(aFileName),

          std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);

    mLog("NotifyMap .. preparing entries\n");

    smap->PrepareRuleSets();

    SprintfLiteral(buf, "NotifyMap got %lld entries\n",

                   (long long int)smap->Size());

    buf[sizeof(buf) - 1] = 0;

    mLog(buf);

    // Add it to the primary map (the top level set of mapped objects).

    mPriMap->AddSecMap(std::move(smap));

    // Tell the segment array about the mapping, so that the stack

    // scan and __kernel_syscall mechanisms know where valid code is.

    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);

void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {

  MOZ_RELEASE_ASSERT(mAdminMode);

  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mLog(":\n");

  char buf[200];

  SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",

                 (unsigned long long int)aRXavma,

                 (unsigned long long int)aSize);

  buf[sizeof(buf) - 1] = 0;

  mLog(buf);

  // Ignore obviously-stupid notifications.

  if (aSize > 0) {

    // Tell the segment array about the mapping, so that the stack

    // scan and __kernel_syscall mechanisms know where valid code is.

    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);

void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {

  MOZ_RELEASE_ASSERT(mAdminMode);

  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mLog(":\n");

  char buf[100];

  SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",

                 (unsigned long long int)aRXavmaMin,

                 (unsigned long long int)aRXavmaMax);

  buf[sizeof(buf) - 1] = 0;

  mLog(buf);

  MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);

  // Remove from the primary map, any secondary maps that intersect

  // with the address range.  Also delete the secondary maps.

  mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);

  // Tell the segment array that the address range no longer

  // contains valid code.

  mSegArray->add(aRXavmaMin, aRXavmaMax, false);

  SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",

                 (int)mPriMap->CountSecMaps());

  buf[sizeof(buf) - 1] = 0;

  mLog(buf);

size_t LUL::CountMappings() {

  MOZ_RELEASE_ASSERT(mAdminMode);

  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  return mPriMap->CountSecMaps();

// RUNS IN NO-MALLOC CONTEXT

static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {

  if (!aAddr.Valid()) {

    return TaggedUWord();

  // Lower limit check.  |aAddr.Value()| is the lowest requested address

  // and |aStackImg->mStartAvma| is the lowest address we actually have,

  // so the comparison is straightforward.

  if (aAddr.Value() < aStackImg->mStartAvma) {

    return TaggedUWord();

  // Upper limit check.  We must compute the highest requested address

  // and the highest address we actually have, but being careful to

  // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the

  // 3/7 values below that, then we will get overflow.  See bug #1245477.

  typedef CheckedInt<uintptr_t> CheckedUWord;

  CheckedUWord highest_requested_plus_one =

      CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));

  CheckedUWord highest_available_plus_one =

      CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);

  if (!highest_requested_plus_one.isValid()     // overflow?

      || !highest_available_plus_one.isValid()  // overflow?

      || (highest_requested_plus_one.value() >

          highest_available_plus_one.value())) {  // in range?

    return TaggedUWord();

  return TaggedUWord(

      *(uintptr_t*)(&aStackImg

                         ->mContents[aAddr.Value() - aStackImg->mStartAvma]));

// RUNS IN NO-MALLOC CONTEXT

static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,

                               TaggedUWord aCFA) {

  switch (aReg) {

    case DW_REG_CFA:

      return aCFA;

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

    case DW_REG_INTEL_XBP:

      return aOldRegs->xbp;

    case DW_REG_INTEL_XSP:

      return aOldRegs->xsp;

    case DW_REG_INTEL_XIP:

      return aOldRegs->xip;

#elif defined(GP_ARCH_arm)

    case DW_REG_ARM_R7:

      return aOldRegs->r7;

    case DW_REG_ARM_R11:

      return aOldRegs->r11;

    case DW_REG_ARM_R12:

      return aOldRegs->r12;

    case DW_REG_ARM_R13:

      return aOldRegs->r13;

    case DW_REG_ARM_R14:

      return aOldRegs->r14;

    case DW_REG_ARM_R15:

      return aOldRegs->r15;

#elif defined(GP_ARCH_arm64)

    case DW_REG_AARCH64_X29:

      return aOldRegs->x29;

    case DW_REG_AARCH64_X30:

      return aOldRegs->x30;

    case DW_REG_AARCH64_SP:

      return aOldRegs->sp;

#elif defined(GP_ARCH_mips64)

    case DW_REG_MIPS_SP:

      return aOldRegs->sp;

    case DW_REG_MIPS_FP:

      return aOldRegs->fp;

    case DW_REG_MIPS_PC:

      return aOldRegs->pc;

#else

#  error "Unsupported arch"

#endif

    default:

      MOZ_ASSERT(0);

      return TaggedUWord();

// RUNS IN NO-MALLOC CONTEXT

// See prototype for comment.

TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,

                            TaggedUWord aCFA, const StackImage* aStackImg,

                            const vector<PfxInstr>& aPfxInstrs) {

  // A small evaluation stack, and a stack pointer, which points to

  // the highest numbered in-use element.

  const int N_STACK = 10;

  TaggedUWord stack[N_STACK];

  int stackPointer = -1;

  for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();

#define PUSH(_tuw)                                             \

  do {                                                         \

    if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \

    stack[++stackPointer] = (_tuw);                            \

  } while (0)

#define POP(_lval)                                   \

  do {                                               \

    if (stackPointer < 0) goto fail; /* underflow */ \

    _lval = stack[stackPointer--];                   \

  } while (0)

  // Cursor in the instruction sequence.

  size_t curr = start + 1;

  // Check the start point is sane.

  size_t nInstrs = aPfxInstrs.size();

  if (start < 0 || (size_t)start >= nInstrs) goto fail;

    // The instruction sequence must start with PX_Start.  If not,

    // something is seriously wrong.

    PfxInstr first = aPfxInstrs[start];

    if (first.mOpcode != PX_Start) goto fail;

    // Push the CFA on the stack to start with (or not), as required by

    // the original DW_OP_*expression* CFI.

    if (first.mOperand != 0) PUSH(aCFA);

  while (true) {

    if (curr >= nInstrs) goto fail;  // ran off the end of the sequence

    PfxInstr pfxi = aPfxInstrs[curr++];

    if (pfxi.mOpcode == PX_End) break;  // we're done

    switch (pfxi.mOpcode) {

      case PX_Start:

        // This should appear only at the start of the sequence.

        goto fail;

      case PX_End:

        // We just took care of that, so we shouldn't see it again.

        MOZ_ASSERT(0);

        goto fail;

      case PX_SImm32:

        PUSH(TaggedUWord((intptr_t)pfxi.mOperand));

        break;

      case PX_DwReg: {

        DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;

        MOZ_ASSERT(reg != DW_REG_CFA);

        PUSH(EvaluateReg(reg, aOldRegs, aCFA));

        break;

      case PX_Deref: {

        TaggedUWord addr;

        POP(addr);

        PUSH(DerefTUW(addr, aStackImg));

        break;

      case PX_Add: {

        TaggedUWord x, y;

        POP(x);

        POP(y);

        PUSH(y + x);

        break;

      case PX_Sub: {

        TaggedUWord x, y;

        POP(x);

        POP(y);

        PUSH(y - x);

        break;

      case PX_And: {

        TaggedUWord x, y;

        POP(x);

        POP(y);

        PUSH(y & x);

        break;

      case PX_Or: {

        TaggedUWord x, y;

        POP(x);

        POP(y);

        PUSH(y | x);

        break;

      case PX_CmpGES: {

        TaggedUWord x, y;

        POP(x);

        POP(y);

        PUSH(y.CmpGEs(x));

        break;

      case PX_Shl: {

        TaggedUWord x, y;

        POP(x);

        POP(y);

        PUSH(y << x);

        break;

      default:

        MOZ_ASSERT(0);

        goto fail;

  }  // while (true)

  // Evaluation finished.  The top value on the stack is the result.

  if (stackPointer >= 0) {

    return stack[stackPointer];

  // Else fall through

fail:

  return TaggedUWord();

#undef PUSH

#undef POP

// RUNS IN NO-MALLOC CONTEXT

TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,

                                const StackImage* aStackImg,

                                const vector<PfxInstr>* aPfxInstrs) const {

  switch (mHow) {

    case UNKNOWN:

      return TaggedUWord();

    case NODEREF: {

      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);

      tuw = tuw + TaggedUWord((intptr_t)mOffset);

      return tuw;

    case DEREF: {

      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);

      tuw = tuw + TaggedUWord((intptr_t)mOffset);

      return DerefTUW(tuw, aStackImg);

    case PFXEXPR: {

      MOZ_ASSERT(aPfxInstrs);

      if (!aPfxInstrs) {

        return TaggedUWord();

      return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);

    default:

      MOZ_ASSERT(0);

      return TaggedUWord();

// RUNS IN NO-MALLOC CONTEXT

static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,

                       const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {

  // Take a copy of regs, since we'll need to refer to the old values

  // whilst computing the new ones.

  UnwindRegs old_regs = *aRegs;

  // Mark all the current register values as invalid, so that the

  // caller can see, on our return, which ones have been computed

  // anew.  If we don't even manage to compute a new PC value, then

  // the caller will have to abandon the unwind.

  // FIXME: Create and use instead: aRegs->SetAllInvalid();

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

  aRegs->xbp = TaggedUWord();

  aRegs->xsp = TaggedUWord();

  aRegs->xip = TaggedUWord();

#elif defined(GP_ARCH_arm)

  aRegs->r7 = TaggedUWord();

  aRegs->r11 = TaggedUWord();

  aRegs->r12 = TaggedUWord();

  aRegs->r13 = TaggedUWord();

  aRegs->r14 = TaggedUWord();

  aRegs->r15 = TaggedUWord();

#elif defined(GP_ARCH_arm64)

  aRegs->x29 = TaggedUWord();

  aRegs->x30 = TaggedUWord();

  aRegs->sp = TaggedUWord();

  aRegs->pc = TaggedUWord();

#elif defined(GP_ARCH_mips64)

  aRegs->sp = TaggedUWord();

  aRegs->fp = TaggedUWord();

  aRegs->pc = TaggedUWord();

#else

#  error "Unsupported arch"

#endif

  // This is generally useful.

  const TaggedUWord inval = TaggedUWord();

  // First, compute the CFA.

  TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,

                                               aStackImg, aPfxInstrs);

  // If we didn't manage to compute the CFA, well .. that's ungood,

  // but keep going anyway.  It'll be OK provided none of the register

  // value rules mention the CFA.  In any case, compute the new values

  // for each register that we're tracking.

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

  aRegs->xbp =

      aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->xsp =

      aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->xip =

      aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

#elif defined(GP_ARCH_arm)

  aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->r11 =

      aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->r12 =

      aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->r13 =

      aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->r14 =

      aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->r15 =

      aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

#elif defined(GP_ARCH_arm64)

  aRegs->x29 =

      aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->x30 =

      aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

#elif defined(GP_ARCH_mips64)

  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

  aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);

#else

#  error "Unsupported arch"

#endif

  // We're done.  Any regs for which we didn't manage to compute a

  // new value will now be marked as invalid.

// RUNS IN NO-MALLOC CONTEXT

void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,

                 /*OUT*/ uintptr_t* aFrameSPs,

                 /*OUT*/ size_t* aFramesUsed,

                 /*OUT*/ size_t* aFramePointerFramesAcquired,

                 size_t aFramesAvail, UnwindRegs* aStartRegs,

                 StackImage* aStackImg) {

  MOZ_RELEASE_ASSERT(!mAdminMode);

  /////////////////////////////////////////////////////////

  // BEGIN UNWIND

  *aFramesUsed = 0;

  UnwindRegs regs = *aStartRegs;

  TaggedUWord last_valid_sp = TaggedUWord();

  while (true) {

    if (DEBUG_MAIN) {

      char buf[300];

      mLog("\n");

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

      SprintfLiteral(

          buf, "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",

          (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),

          (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),

          (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());

      buf[sizeof(buf) - 1] = 0;

      mLog(buf);

#elif defined(GP_ARCH_arm)

      SprintfLiteral(

          buf,

          "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"

          "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",

          (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),

          (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),

          (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),

          (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),

          (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),

          (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());

      buf[sizeof(buf) - 1] = 0;

      mLog(buf);

#elif defined(GP_ARCH_arm64)

      SprintfLiteral(

          buf,

          "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"

          "  sp %d/%llx\n",

          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),

          (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),

          (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),

          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());

      buf[sizeof(buf) - 1] = 0;

      mLog(buf);

#elif defined(GP_ARCH_mips64)

      SprintfLiteral(

          buf, "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",

          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),

          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),

          (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());

      buf[sizeof(buf) - 1] = 0;

      mLog(buf);

#else

#  error "Unsupported arch"

#endif

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)

    TaggedUWord ia = regs.xip;

    TaggedUWord sp = regs.xsp;

#elif defined(GP_ARCH_arm)

    TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);

    TaggedUWord sp = regs.r13;

#elif defined(GP_ARCH_arm64)

    TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);

    TaggedUWord sp = regs.sp;

#elif defined(GP_ARCH_mips64)

    TaggedUWord ia = regs.pc;

    TaggedUWord sp = regs.sp;

#else

#  error "Unsupported arch"

#endif

    if (*aFramesUsed >= aFramesAvail) {

      break;

    // If we don't have a valid value for the PC, give up.

    if (!ia.Valid()) {

      break;

    // If this is the innermost frame, record the SP value, which

    // presumably is valid.  If this isn't the innermost frame, and we

    // have a valid SP value, check that its SP value isn't less that

    // the one we've seen so far, so as to catch potential SP value

    // cycles.

    if (*aFramesUsed == 0) {

      last_valid_sp = sp;

    } else {

      MOZ_ASSERT(last_valid_sp.Valid());

      if (sp.Valid()) {

        if (sp.Value() < last_valid_sp.Value()) {

          // Hmm, SP going in the wrong direction.  Let's stop.

          break;

        // Remember where we got to.

        last_valid_sp = sp;

    aFramePCs[*aFramesUsed] = ia.Value();

    aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;

    (*aFramesUsed)++;

    // Find the RuleSet for the current IA, if any.  This will also

    // query the backing (secondary) maps if it isn't found in the

    // thread-local cache.

    // If this isn't the innermost frame, back up into the calling insn.

    if (*aFramesUsed > 1) {

      ia = ia + TaggedUWord((uintptr_t)(-1));

    pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =

        mPriMap->Lookup(ia.Value());

    const RuleSet* ruleset = ruleset_and_pfxinstrs.first;

    const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;

    if (DEBUG_MAIN) {

      char buf[100];

      SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",

                     (unsigned long long int)ia.Value(), ruleset);

      buf[sizeof(buf) - 1] = 0;

      mLog(buf);

#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)

    /////////////////////////////////////////////

    ////

    // On 32 bit x86-linux, syscalls are often done via the VDSO

    // function __kernel_vsyscall, which doesn't have a corresponding

    // object that we can read debuginfo from.  That effectively kills

    // off all stack traces for threads blocked in syscalls.  Hence

    // special-case by looking at the code surrounding the program

    // counter.

//

    // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx

    // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx

    // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp

    // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp

    // 0xf7757425 <__kernel_vsyscall+5>:	sysenter

    // 0xf7757427 <__kernel_vsyscall+7>:	nop

    // 0xf7757428 <__kernel_vsyscall+8>:	nop

    // 0xf7757429 <__kernel_vsyscall+9>:	nop

    // 0xf775742a <__kernel_vsyscall+10>:	nop

    // 0xf775742b <__kernel_vsyscall+11>:	nop

    // 0xf775742c <__kernel_vsyscall+12>:	nop

    // 0xf775742d <__kernel_vsyscall+13>:	nop

    // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80

    // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp

    // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx

    // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx

    // 0xf7757433 <__kernel_vsyscall+19>:	ret

//

    // In cases where the sampled thread is blocked in a syscall, its

    // program counter will point at "pop %ebp".  Hence we look for

    // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and

    // the corresponding register-recovery actions are:

    //    new_ebp = *(old_esp + 0)

    //    new eip = *(old_esp + 12)

    //    new_esp = old_esp + 16

//

    // It may also be the case that the program counter points two

    // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in

    // the case where the syscall has been restarted but the thread

    // hasn't been rescheduled.  The code below doesn't handle that;

    // it could easily be made to.

//

    if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {

      uintptr_t insns_min, insns_max;

      uintptr_t eip = ia.Value();

      bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);

      if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {

        uint8_t* eipC = (uint8_t*)eip;

        if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&

            eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {

          TaggedUWord sp_plus_0 = sp;

          TaggedUWord sp_plus_12 = sp;

          TaggedUWord sp_plus_16 = sp;

          sp_plus_12 = sp_plus_12 + TaggedUWord(12);

          sp_plus_16 = sp_plus_16 + TaggedUWord(16);

          TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);

          TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);

          TaggedUWord new_esp = sp_plus_16;

          if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {

            regs.xbp = new_ebp;

            regs.xip = new_eip;

            regs.xsp = new_esp;

            continue;

    ////

    /////////////////////////////////////////////

#endif  // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)

    // So, do we have a ruleset for this address?  If so, use it now.

    if (ruleset) {

      if (DEBUG_MAIN) {

        ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog);

        mLog("\n");

      // Use the RuleSet to compute the registers for the previous

      // frame.  |regs| is modified in-place.

      UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);

      continue;

#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||     \

    defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \

    defined(GP_PLAT_amd64_freebsd)

    // There's no RuleSet for the specified address.  On amd64/x86_linux, see if

    // it's possible to recover the caller's frame by using the frame pointer.

    // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),

    // and assume the following layout:

//

    //                 <--- new_SP

    //   +----------+

    //   |  new_IP  |  (return address)

    //   +----------+

    //   |  new_BP  |  <--- old_BP

    //   +----------+

    //   |   ....   |

    //   |   ....   |

    //   |   ....   |

    //   +----------+  <---- old_SP (arbitrary, but must be <= old_BP)

    const size_t wordSzB = sizeof(uintptr_t);

    TaggedUWord old_xsp = regs.xsp;

    // points at new_BP ?

    TaggedUWord old_xbp = regs.xbp;

    // points at new_IP ?

    TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);

    // is the new_SP ?

    TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);

    if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&

        old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {

      // We don't need to do any range, alignment or validity checks for

      // addresses passed to DerefTUW, since that performs them itself, and

      // returns an invalid value on failure.  Any such value will poison

      // subsequent uses, and we do a final check for validity before putting

      // the computed values into |regs|.

      TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);

      if (new_xbp.Valid() && new_xbp.IsAligned() &&

          old_xbp.Value() < new_xbp.Value()) {

        TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);

        TaggedUWord new_xsp = old_xbp_plus2;

        if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {

          regs.xbp = new_xbp;

          regs.xip = new_xip;

          regs.xsp = new_xsp;

          (*aFramePointerFramesAcquired)++;

          continue;

#elif defined(GP_ARCH_arm64)

    // Here is an example of generated code for prologue and epilogue..

//

    // stp     x29, x30, [sp, #-16]!

    // mov     x29, sp

    // ...

    // ldp     x29, x30, [sp], #16

    // ret

//

    // Next is another example of generated code.

//

    // stp     x20, x19, [sp, #-32]!

    // stp     x29, x30, [sp, #16]

    // add     x29, sp, #0x10

    // ...

    // ldp     x29, x30, [sp, #16]

    // ldp     x20, x19, [sp], #32

    // ret

//

    // Previous x29 and x30 register are stored in the address of x29 register.

    // But since sp register value depends on local variables, we cannot compute

    // previous sp register from current sp/fp/lr register and there is no

    // regular rule for sp register in prologue. But since return address is lr

    // register, if x29 is valid, we will get return address without sp

    // register.

//

    // So we assume the following layout that if no rule set. x29 is frame

    // pointer, so we will be able to compute x29 and x30 .

//

    //   +----------+  <--- new_sp (cannot compute)

    //   |   ....   |

    //   +----------+

    //   |  new_lr  |  (return address)

    //   +----------+

    //   |  new_fp  |  <--- old_fp

    //   +----------+

    //   |   ....   |

    //   |   ....   |

    //   +----------+  <---- old_sp (arbitrary, but unused)

    TaggedUWord old_fp = regs.x29;

    if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&

        last_valid_sp.Value() <= old_fp.Value()) {

      TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);

      if (new_fp.Valid() && new_fp.IsAligned() &&

          old_fp.Value() < new_fp.Value()) {

        TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);

        TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);

        if (new_lr.Valid()) {

          regs.x29 = new_fp;

          regs.x30 = new_lr;

          // When using frame pointer to walk stack, we cannot compute sp

          // register since we cannot compute sp register from fp/lr/sp

          // register, and there is no regular rule to compute previous sp

          // register. So mark as invalid.

          regs.sp = TaggedUWord();

          (*aFramePointerFramesAcquired)++;

          continue;

#endif  // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||

        // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||

        // defined(GP_PLAT_amd64_freebsd)

    // We failed to recover a frame either using CFI or FP chasing, and we

    // have no other ways to recover the frame.  So we have to give up.

    break;

  }  // top level unwind loop

  // END UNWIND

  /////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////

// LUL Unit Testing                                           //

////////////////////////////////////////////////////////////////

static const int LUL_UNIT_TEST_STACK_SIZE = 32768;

#if defined(GP_ARCH_mips64)

static __attribute__((noinline)) unsigned long __getpc(void) {

  unsigned long rtaddr;

  __asm__ volatile("move %0, $31" : "=r"(rtaddr));

  return rtaddr;

#endif

// This function is innermost in the test call sequence.  It uses LUL

// to unwind, and compares the result with the sequence specified in

// the director string.  These need to agree in order for the test to

// pass.  In order not to screw up the results, this function needs

// to have a not-very big stack frame, since we're only presenting

// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and

// that chunk unavoidably includes the frame for this function.

//

// This function must not be inlined into its callers.  Doing so will

// cause the expected-vs-actual backtrace consistency checking to

// fail.  Prints summary results to |aLUL|'s logging sink and also

// returns a boolean indicating whether or not the test failed.

static __attribute__((noinline)) bool GetAndCheckStackTrace(

    LUL* aLUL, const char* dstring) {

  // Get hold of the current unwind-start registers.

  UnwindRegs startRegs;

  memset(&startRegs, 0, sizeof(startRegs));

#if defined(GP_ARCH_amd64)

  volatile uintptr_t block[3];

  MOZ_ASSERT(sizeof(block) == 24);

  __asm__ __volatile__(

      "leaq 0(%%rip), %%r15"

      "\n\t"

      "movq %%r15, 0(%0)"

      "\n\t"

      "movq %%rsp, 8(%0)"

      "\n\t"

      "movq %%rbp, 16(%0)"

      "\n"

      : "r"(&block[0])

      : "memory", "r15");

  startRegs.xip = TaggedUWord(block[0]);

  startRegs.xsp = TaggedUWord(block[1]);

  startRegs.xbp = TaggedUWord(block[2]);

  const uintptr_t REDZONE_SIZE = 128;

  uintptr_t start = block[1] - REDZONE_SIZE;

#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)

  volatile uintptr_t block[3];

  MOZ_ASSERT(sizeof(block) == 12);

  __asm__ __volatile__(

      ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/

      "\n\t"

      "popl %%edi"

      "\n\t"

      "movl %%edi, 0(%0)"

      "\n\t"

      "movl %%esp, 4(%0)"

      "\n\t"

      "movl %%ebp, 8(%0)"

      "\n"

      : "r"(&block[0])

      : "memory", "edi");

  startRegs.xip = TaggedUWord(block[0]);

  startRegs.xsp = TaggedUWord(block[1]);

  startRegs.xbp = TaggedUWord(block[2]);

  const uintptr_t REDZONE_SIZE = 0;

  uintptr_t start = block[1] - REDZONE_SIZE;

#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)

  volatile uintptr_t block[6];

  MOZ_ASSERT(sizeof(block) == 24);

  __asm__ __volatile__(

      "mov r0, r15"

      "\n\t"

      "str r0,  [%0, #0]"

      "\n\t"

      "str r14, [%0, #4]"

      "\n\t"

      "str r13, [%0, #8]"

      "\n\t"

      "str r12, [%0, #12]"

      "\n\t"

      "str r11, [%0, #16]"

      "\n\t"

      "str r7,  [%0, #20]"

      "\n"

      : "r"(&block[0])

      : "memory", "r0");

  startRegs.r15 = TaggedUWord(block[0]);

  startRegs.r14 = TaggedUWord(block[1]);

  startRegs.r13 = TaggedUWord(block[2]);

  startRegs.r12 = TaggedUWord(block[3]);

  startRegs.r11 = TaggedUWord(block[4]);

  startRegs.r7 = TaggedUWord(block[5]);

  const uintptr_t REDZONE_SIZE = 0;

  uintptr_t start = block[1] - REDZONE_SIZE;

#elif defined(GP_ARCH_arm64)

  volatile uintptr_t block[4];

  MOZ_ASSERT(sizeof(block) == 32);

  __asm__ __volatile__(

      "adr x0, . \n\t"

      "str x0, [%0, #0] \n\t"

      "str x29, [%0, #8] \n\t"

      "str x30, [%0, #16] \n\t"

      "mov x0, sp \n\t"

      "str x0, [%0, #24] \n\t"

      : "r"(&block[0])

      : "memory", "x0");

  startRegs.pc = TaggedUWord(block[0]);

  startRegs.x29 = TaggedUWord(block[1]);

  startRegs.x30 = TaggedUWord(block[2]);

  startRegs.sp = TaggedUWord(block[3]);

  const uintptr_t REDZONE_SIZE = 0;

  uintptr_t start = block[1] - REDZONE_SIZE;

#elif defined(GP_ARCH_mips64)

  volatile uintptr_t block[3];

  MOZ_ASSERT(sizeof(block) == 24);

  __asm__ __volatile__(

      "sd $29, 8(%0)     \n"

      "sd $30, 16(%0)    \n"

      : "r"(block)

      : "memory");

  block[0] = __getpc();

  startRegs.pc = TaggedUWord(block[0]);

  startRegs.sp = TaggedUWord(block[1]);

  startRegs.fp = TaggedUWord(block[2]);

  const uintptr_t REDZONE_SIZE = 0;

  uintptr_t start = block[1] - REDZONE_SIZE;

#else

#  error "Unsupported platform"

#endif

  // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the

  // stack.

  uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;

  uintptr_t ws = sizeof(void*);

  start &= ~(ws - 1);

  end &= ~(ws - 1);

  uintptr_t nToCopy = end - start;

  if (nToCopy > lul::N_STACK_BYTES) {

    nToCopy = lul::N_STACK_BYTES;

  MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);

  StackImage* stackImg = new StackImage();

  stackImg->mLen = nToCopy;

  stackImg->mStartAvma = start;

  if (nToCopy > 0) {

    MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);

    memcpy(&stackImg->mContents[0], (void*)start, nToCopy);

  // Unwind it.

  const int MAX_TEST_FRAMES = 64;

  uintptr_t framePCs[MAX_TEST_FRAMES];

  uintptr_t frameSPs[MAX_TEST_FRAMES];

  size_t framesAvail = std::size(framePCs);

  size_t framesUsed = 0;

  size_t framePointerFramesAcquired = 0;

  aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,

               &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);

  delete stackImg;

  // if (0) {

  //  // Show what we have.

  //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);

  //  for (size_t i = 0; i < framesUsed; i++) {

  //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",

  //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);

  //  }

  //  fprintf(stderr, "\n");

//}

  // Check to see if there's a consistent binding between digits in

  // the director string ('1' .. '8') and the PC values acquired by

  // the unwind.  If there isn't, the unwinding has failed somehow.

  uintptr_t binding[8];  // binding for '1' .. binding for '8'

  memset((void*)binding, 0, sizeof(binding));

  // The general plan is to work backwards along the director string

  // and forwards along the framePCs array.  Doing so corresponds to

  // working outwards from the innermost frame of the recursive test set.

  const char* cursor = dstring;

  // Find the end.  This leaves |cursor| two bytes past the first

  // character we want to look at -- see comment below.

  while (*cursor) cursor++;

  // Counts the number of consistent frames.

  size_t nConsistent = 0;

  // Iterate back to the start of the director string.  The starting

  // points are a bit complex.  We can't use framePCs[0] because that

  // contains the PC in this frame (above).  We can't use framePCs[1]

  // because that will contain the PC at return point in the recursive

  // test group (TestFn[1-8]) for their call "out" to this function,

  // GetAndCheckStackTrace.  Although LUL will compute a correct

  // return address, that will not be the same return address as for a

  // recursive call out of the the function to another function in the

  // group.  Hence we can only start consistency checking at

  // framePCs[2].

//

  // To be consistent, then, we must ignore the last element in the

  // director string as that corresponds to framePCs[1].  Hence the

  // start points are: framePCs[2] and the director string 2 bytes

  // before the terminating zero.

//

  // Also as a result of this, the number of consistent frames counted

  // will always be one less than the length of the director string

  // (not including its terminating zero).

  size_t frameIx;

  for (cursor = cursor - 2, frameIx = 2;

       cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {

    char c = *cursor;

    uintptr_t pc = framePCs[frameIx];

    // If this doesn't hold, the director string is ill-formed.

    MOZ_ASSERT(c >= '1' && c <= '8');

    int n = ((int)c) - ((int)'1');

    if (binding[n] == 0) {

      // There's no binding for |c| yet, so install |pc| and carry on.

      binding[n] = pc;

      nConsistent++;

      continue;

    // There's a pre-existing binding for |c|.  Check it's consistent.

    if (binding[n] != pc) {

      // Not consistent.  Give up now.

      break;

    // Consistent.  Keep going.

    nConsistent++;

  // So, did we succeed?

  bool passed = nConsistent + 1 == strlen(dstring);

  // Show the results.

  char buf[200];

  SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);

  buf[sizeof(buf) - 1] = 0;

  aLUL->mLog(buf);

  SprintfLiteral(buf, "LULUnitTest:     %d consistent, %d in dstring: %s\n",

                 (int)nConsistent, (int)strlen(dstring),

                 passed ? "PASS" : "FAIL");

  buf[sizeof(buf) - 1] = 0;

  aLUL->mLog(buf);

  return !passed;

// Macro magic to create a set of 8 mutually recursive functions with

// varying frame sizes.  These will recurse amongst themselves as

// specified by |strP|, the directory string, and call

// GetAndCheckStackTrace when the string becomes empty, passing it the

// original value of the string.  This checks the result, printing

// results on |aLUL|'s logging sink, and also returns a boolean

// indicating whether or not the results are acceptable (correct).

#define DECL_TEST_FN(NAME) \

  bool NAME(LUL* aLUL, const char* strPorig, const char* strP);

#define GEN_TEST_FN(NAME, FRAMESIZE)                                          \

  bool NAME(LUL* aLUL, const char* strPorig, const char* strP) {              \

    /* Create a frame of size (at least) FRAMESIZE, so that the */            \

    /* 8 functions created by this macro offer some variation in frame */     \

    /* sizes.  This isn't as simple as it might seem, since a clever */       \

    /* optimizing compiler (eg, clang-5) detects that the array is unused */  \

    /* and removes it.  We try to defeat this by passing it to a function */  \

    /* in a different compilation unit, and hoping that clang does not */     \

    /* notice that the call is a no-op. */                                    \

    char space[FRAMESIZE];                                                    \

    Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \

    if (*strP == '\0') {                                                      \

      /* We've come to the end of the director string. */                     \

      /* Take a stack snapshot. */                                            \

      /* We purposefully use a negation to avoid tail-call optimization */    \

      return !GetAndCheckStackTrace(aLUL, strPorig);                          \

    } else {                                                                  \

      /* Recurse onwards.  This is a bit subtle.  The obvious */              \

      /* thing to do here is call onwards directly, from within the */        \

      /* arms of the case statement.  That gives a problem in that */         \

      /* there will be multiple return points inside each function when */    \

      /* unwinding, so it will be difficult to check for consistency */       \

      /* against the director string.  Instead, we make an indirect */        \

      /* call, so as to guarantee that there is only one call site */         \

      /* within each function.  This does assume that the compiler */         \

      /* won't transform it back to the simple direct-call form. */           \

      /* To discourage it from doing so, the call is bracketed with */        \

      /* __asm__ __volatile__ sections so as to make it not-movable. */       \

      bool (*nextFn)(LUL*, const char*, const char*) = NULL;                  \

      switch (*strP) {                                                        \

        case '1':                                                             \

          nextFn = TestFn1;                                                   \

          break;                                                              \

        case '2':                                                             \

          nextFn = TestFn2;                                                   \

          break;                                                              \

        case '3':                                                             \

          nextFn = TestFn3;                                                   \

          break;                                                              \

        case '4':                                                             \

          nextFn = TestFn4;                                                   \

          break;                                                              \

        case '5':                                                             \

          nextFn = TestFn5;                                                   \

          break;                                                              \

        case '6':                                                             \

          nextFn = TestFn6;                                                   \

          break;                                                              \

        case '7':                                                             \

          nextFn = TestFn7;                                                   \

          break;                                                              \

        case '8':                                                             \

          nextFn = TestFn8;                                                   \

          break;                                                              \

        default:                                                              \

          nextFn = TestFn8;                                                   \

          break;                                                              \

      }                                                                       \

      /* "use" |space| immediately after the recursive call, */               \

      /* so as to dissuade clang from deallocating the space while */         \

      /* the call is active, or otherwise messing with the stack frame. */    \

      __asm__ __volatile__("" ::: "cc", "memory");                            \

      bool passed = nextFn(aLUL, strPorig, strP + 1);                         \

      Unused << write(1, space, 0);                                           \

      __asm__ __volatile__("" ::: "cc", "memory");                            \

      return passed;                                                          \

    }                                                                         \

// The test functions are mutually recursive, so it is necessary to

// declare them before defining them.

DECL_TEST_FN(TestFn1)

DECL_TEST_FN(TestFn2)

DECL_TEST_FN(TestFn3)

DECL_TEST_FN(TestFn4)

DECL_TEST_FN(TestFn5)

DECL_TEST_FN(TestFn6)

DECL_TEST_FN(TestFn7)

DECL_TEST_FN(TestFn8)

GEN_TEST_FN(TestFn1, 123)

GEN_TEST_FN(TestFn2, 456)

GEN_TEST_FN(TestFn3, 789)

GEN_TEST_FN(TestFn4, 23)

GEN_TEST_FN(TestFn5, 47)

GEN_TEST_FN(TestFn6, 117)

GEN_TEST_FN(TestFn7, 1)

GEN_TEST_FN(TestFn8, 99)

// This starts the test sequence going.  Call here to generate a

// sequence of calls as directed by the string |dstring|.  The call

// sequence will, from its innermost frame, finish by calling

// GetAndCheckStackTrace() and passing it |dstring|.

// GetAndCheckStackTrace() will unwind the stack, check consistency

// of those results against |dstring|, and print a pass/fail message

// to aLUL's logging sink.  It also updates the counters in *aNTests

// and aNTestsPassed.

__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,

                                       /*OUT*/ int* aNTestsPassed, LUL* aLUL,

                                       const char* dstring) {

  // Ensure that the stack has at least this much space on it.  This

  // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes

  // and hand it to LUL.  Safe in the sense that no segfault can

  // happen because the stack is at least this big.  This is all

  // somewhat dubious in the sense that a sufficiently clever compiler

  // (clang, for one) can figure out that space[] is unused and delete

  // it from the frame.  Hence the somewhat elaborate hoop jumping to

  // fill it up before the call and to at least appear to use the

  // value afterwards.

  int i;

  volatile char space[LUL_UNIT_TEST_STACK_SIZE];

  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {

    space[i] = (char)(i & 0x7F);

  // Really run the test.

  bool passed = TestFn1(aLUL, dstring, dstring);

  // Appear to use space[], by visiting the value to compute some kind

  // of checksum, and then (apparently) using the checksum.

  int sum = 0;

  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {

    // If this doesn't fool LLVM, I don't know what will.

    sum += space[i] - 3 * i;

  __asm__ __volatile__("" : : "r"(sum));

  // Update the counters.

  (*aNTests)++;

  if (passed) {

    (*aNTestsPassed)++;

void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,

                     LUL* aLUL) {

  aLUL->mLog(":\n");

  aLUL->mLog("LULUnitTest: BEGIN\n");

  *aNTests = *aNTestsPassed = 0;

  TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");

  TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");

  TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");

  TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");

  TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");

  TestUnw(aNTests, aNTestsPassed, aLUL,

          "123456781122334455667788777777777777777777777");

  aLUL->mLog("LULUnitTest: END\n");

  aLUL->mLog(":\n");

}  // namespace lul