Replay.cpp - mozsearch

firefox-main/memory/replace/logalloc/replay/Replay.cpp (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Memory Allocator

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim: set ts=8 sts=2 et sw=2 tw=80: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#define MOZ_MEMORY_IMPL

#include "mozmemory_wrap.h"

#ifdef _WIN32

#  include <windows.h>

#  include <io.h>

typedef intptr_t ssize_t;

#else

#  include <sys/mman.h>

#  include <unistd.h>

#endif

#ifdef XP_LINUX

#  include <fcntl.h>

#  include <stdlib.h>

#endif

#include <algorithm>

#include <cmath>

#include <cstdio>

#include <cstring>

#include "mozilla/Assertions.h"

#include "mozilla/MathAlgorithms.h"

#include "mozilla/Maybe.h"

#include "FdPrintf.h"

using namespace mozilla;

static void die(const char* message) {

  /* Here, it doesn't matter that fprintf may allocate memory. */

  fprintf(stderr, "%s\n", message);

  exit(1);

#ifdef XP_LINUX

MOZ_RUNINIT static size_t sPageSize = []() { return sysconf(_SC_PAGESIZE); }();

#endif

/* We don't want to be using malloc() to allocate our internal tracking

 * data, because that would change the parameters of what is being measured,

 * so we want to use data types that directly use mmap/VirtualAlloc. */

template <typename T, size_t Len>

class MappedArray {

 public:

  MappedArray() : mPtr(nullptr) {

#ifdef XP_LINUX

    MOZ_RELEASE_ASSERT(!((sizeof(T) * Len) & (sPageSize - 1)),

                       "MappedArray size must be a multiple of the page size");

#endif

  ~MappedArray() {

    if (mPtr) {

#ifdef _WIN32

      VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE);

#elif defined(XP_LINUX)

      munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(mPtr) -

                                     sPageSize),

             sizeof(T) * Len + sPageSize * 2);

#else

      munmap(mPtr, sizeof(T) * Len);

#endif

  T& operator[](size_t aIndex) const {

    if (mPtr) {

      return mPtr[aIndex];

#ifdef _WIN32

    mPtr = reinterpret_cast<T*>(VirtualAlloc(

        nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));

    if (mPtr == nullptr) {

      die("VirtualAlloc error");

#else

    size_t data_size = sizeof(T) * Len;

    size_t size = data_size;

#  ifdef XP_LINUX

    // See below

    size += sPageSize * 2;

#  endif

    mPtr = reinterpret_cast<T*>(mmap(nullptr, size, PROT_READ | PROT_WRITE,

                                     MAP_ANON | MAP_PRIVATE, -1, 0));

    if (mPtr == MAP_FAILED) {

      die("Mmap error");

#  ifdef XP_LINUX

    // On Linux we request a page on either side of the allocation and

    // mprotect them.  This prevents mappings in /proc/self/smaps from being

    // merged and allows us to parse this file to calculate the allocator's RSS.

    MOZ_ASSERT(0 == mprotect(mPtr, sPageSize, 0));

    MOZ_ASSERT(0 == mprotect(reinterpret_cast<void*>(

                                 reinterpret_cast<uintptr_t>(mPtr) + data_size +

                                 sPageSize),

                             sPageSize, 0));

    mPtr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(mPtr) + sPageSize);

#  endif

#endif

    return mPtr[aIndex];

  bool ownsMapping(uintptr_t addr) const { return addr == (uintptr_t)mPtr; }

  bool allocated() const { return !!mPtr; }

 private:

  mutable T* mPtr;

};

/* Type for records of allocations. */

struct MemSlot {

  void* mPtr;

  // mRequest is only valid if mPtr is non-null.  It doesn't need to be cleared

  // when memory is freed or realloc()ed.

  size_t mRequest;

};

/* An almost infinite list of slots.

 * In essence, this is a linked list of arrays of groups of slots.

 * Each group is 1MB. On 64-bits, one group allows to store 64k allocations.

 * Each MemSlotList instance can store 1023 such groups, which means more

 * than 67M allocations. In case more would be needed, we chain to another

 * MemSlotList, and so on.

 * Using 1023 groups makes the MemSlotList itself page sized on 32-bits

 * and 2 pages-sized on 64-bits.

*/

class MemSlotList {

  static constexpr size_t kGroups = 1024 - 1;

  static constexpr size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot);

  MappedArray<MemSlot, kGroupSize> mSlots[kGroups];

  MappedArray<MemSlotList, 1> mNext;

 public:

  MemSlot& operator[](size_t aIndex) const {

    if (aIndex < kGroupSize * kGroups) {

      return mSlots[aIndex / kGroupSize][aIndex % kGroupSize];

    aIndex -= kGroupSize * kGroups;

    return mNext[0][aIndex];

  // Ask if any of the memory-mapped buffers use this range.

  bool ownsMapping(uintptr_t aStart) const {

    for (const auto& slot : mSlots) {

      if (slot.allocated() && slot.ownsMapping(aStart)) {

        return true;

    return mNext.ownsMapping(aStart) ||

           (mNext.allocated() && mNext[0].ownsMapping(aStart));

};

/* Helper class for memory buffers */

class Buffer {

 public:

  Buffer() : mBuf(nullptr), mLength(0) {}

  Buffer(const void* aBuf, size_t aLength)

      : mBuf(reinterpret_cast<const char*>(aBuf)), mLength(aLength) {}

  /* Constructor for string literals. */

  template <size_t Size>

  explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {}

  /* Returns a sub-buffer up-to but not including the given aNeedle character.

   * The "parent" buffer itself is altered to begin after the aNeedle

   * character.

   * If the aNeedle character is not found, return the entire buffer, and empty

   * the "parent" buffer. */

  Buffer SplitChar(char aNeedle) {

    char* buf = const_cast<char*>(mBuf);

    char* c = reinterpret_cast<char*>(memchr(buf, aNeedle, mLength));

    if (!c) {

      return Split(mLength);

    Buffer result = Split(c - buf);

    // Remove the aNeedle character itself.

    Split(1);

    return result;

  // Advance to the position after aNeedle.  This is like SplitChar but does not

  // return the skipped portion.

  void Skip(char aNeedle, unsigned nTimes = 1) {

    for (unsigned i = 0; i < nTimes; i++) {

      SplitChar(aNeedle);

  void SkipWhitespace() {

    while (mLength > 0) {

      if (!IsSpace(mBuf[0])) {

        break;

      mBuf++;

      mLength--;

  static bool IsSpace(char c) {

    switch (c) {

      case ' ':

      case '\t':

      case '\n':

      case '\v':

      case '\f':

      case '\r':

        return true;

    return false;

  /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is

   * amputated of those aLength characters. If the "parent" buffer is smaller

   * than aLength, then its length is used instead. */

  Buffer Split(size_t aLength) {

    Buffer result(mBuf, std::min(aLength, mLength));

    mLength -= result.mLength;

    mBuf += result.mLength;

    return result;

  /* Move the buffer (including its content) to the memory address of the aOther

   * buffer. */

  void Slide(Buffer aOther) {

    memmove(const_cast<char*>(aOther.mBuf), mBuf, mLength);

    mBuf = aOther.mBuf;

  /* Returns whether the two involved buffers have the same content. */

  bool operator==(Buffer aOther) {

    return mLength == aOther.mLength &&

           (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength));

  bool operator!=(Buffer aOther) { return !(*this == aOther); }

  /* Returns true if the buffer is not empty. */

  explicit operator bool() { return mLength; }

  char operator[](size_t n) const { return mBuf[n]; }

  /* Returns the memory location of the buffer. */

  const char* get() { return mBuf; }

  /* Returns the memory location of the end of the buffer (technically, the

   * first byte after the buffer). */

  const char* GetEnd() { return mBuf + mLength; }

  /* Extend the buffer over the content of the other buffer, assuming it is

   * adjacent. */

  void Extend(Buffer aOther) {

    MOZ_ASSERT(aOther.mBuf == GetEnd());

    mLength += aOther.mLength;

  size_t Length() const { return mLength; }

 private:

  const char* mBuf;

  size_t mLength;

};

/* Helper class to read from a file descriptor line by line. */

class FdReader {

 public:

  explicit FdReader(int aFd, bool aNeedClose = false)

      : mFd(aFd),

        mNeedClose(aNeedClose),

        mData(&mRawBuf, 0),

        mBuf(&mRawBuf, sizeof(mRawBuf)) {}

  FdReader(FdReader&& aOther) noexcept

      : mFd(aOther.mFd),

        mNeedClose(aOther.mNeedClose),

        mData(&mRawBuf, 0),

        mBuf(&mRawBuf, sizeof(mRawBuf)) {

    memcpy(mRawBuf, aOther.mRawBuf, sizeof(mRawBuf));

    aOther.mFd = -1;

    aOther.mNeedClose = false;

    aOther.mData = Buffer();

    aOther.mBuf = Buffer();

  FdReader& operator=(const FdReader&) = delete;

  FdReader(const FdReader&) = delete;

  ~FdReader() {

    if (mNeedClose) {

      close(mFd);

  /* Read a line from the file descriptor and returns it as a Buffer instance */

  Buffer ReadLine() {

    while (true) {

      Buffer result = mData.SplitChar('\n');

      /* There are essentially three different cases here:

       * - '\n' was found "early". In this case, the end of the result buffer

       *   is before the beginning of the mData buffer (since SplitChar

       *   amputated it).

       * - '\n' was found as the last character of mData. In this case, mData

       *   is empty, but still points at the end of mBuf. result points to what

       *   used to be in mData, without the last character.

       * - '\n' was not found. In this case too, mData is empty and points at

       *   the end of mBuf. But result points to the entire buffer that used to

       *   be pointed by mData.

       * Only in the latter case do both result and mData's end match, and it's

       * the only case where we need to refill the buffer.

*/

      if (result.GetEnd() != mData.GetEnd()) {

        return result;

      /* Since SplitChar emptied mData, make it point to what it had before. */

      mData = result;

      /* And move it to the beginning of the read buffer. */

      mData.Slide(mBuf);

      FillBuffer();

      if (!mData) {

        return Buffer();

 private:

  /* Fill the read buffer. */

  void FillBuffer() {

    size_t size = mBuf.GetEnd() - mData.GetEnd();

    Buffer remainder(mData.GetEnd(), size);

    ssize_t len = 1;

    while (remainder && len > 0) {

      len = ::read(mFd, const_cast<char*>(remainder.get()), size);

      if (len < 0) {

        die("Read error");

      size -= len;

      mData.Extend(remainder.Split(len));

  /* File descriptor to read from. */

  int mFd;

  bool mNeedClose;

  /* Part of data that was read from the file descriptor but not returned with

   * ReadLine yet. */

  Buffer mData;

  /* Buffer representation of mRawBuf */

  Buffer mBuf;

  /* read() buffer */

  char mRawBuf[4096];

};

MOZ_BEGIN_EXTERN_C

/* Function declarations for all the replace_malloc _impl functions.

 * See memory/build/replace_malloc.c */

#define MALLOC_DECL(name, return_type, ...) \

  return_type name##_impl(__VA_ARGS__);

#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC

#include "malloc_decls.h"

#define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__);

#define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC

#include "malloc_decls.h"

MOZ_END_EXTERN_C

template <unsigned Base = 10>

size_t parseNumber(Buffer aBuf) {

  if (!aBuf) {

    die("Malformed input");

  size_t result = 0;

  for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) {

    result *= Base;

    if ((*c >= '0' && *c <= '9')) {

      result += *c - '0';

    } else if (Base == 16 && *c >= 'a' && *c <= 'f') {

      result += *c - 'a' + 10;

    } else if (Base == 16 && *c >= 'A' && *c <= 'F') {

      result += *c - 'A' + 10;

    } else {

      die("Malformed input");

  return result;

static size_t percent(size_t a, size_t b) {

  if (!b) {

    return 0;

  return size_t(round(double(a) / double(b) * 100.0));

class Distribution {

 public:

  // Default constructor used for array initialisation.

  Distribution()

      : mMaxSize(0),

        mNextSmallest(0),

        mShift(0),

        mArrayOffset(0),

        mArraySlots(0),

        mTotalRequests(0),

        mRequests{0} {}

  Distribution(size_t max_size, size_t next_smallest, size_t bucket_size)

      : mMaxSize(max_size),

        mNextSmallest(next_smallest),

        mShift(CeilingLog2(bucket_size)),

        mArrayOffset(1 + next_smallest),

        mArraySlots((max_size - next_smallest) >> mShift),

        mTotalRequests(0),

        mRequests{

0,

} {

    MOZ_ASSERT(mMaxSize);

    MOZ_RELEASE_ASSERT(mArraySlots <= MAX_NUM_BUCKETS);

  Distribution& operator=(const Distribution& aOther) = default;

  void addRequest(size_t request) {

    MOZ_ASSERT(mMaxSize);

    mRequests[(request - mArrayOffset) >> mShift]++;

    mTotalRequests++;

  void printDist(platform_handle_t std_err) {

    MOZ_ASSERT(mMaxSize);

    // The translation to turn a slot index into a memory request size.

    const size_t array_offset_add = (1 << mShift) + mNextSmallest;

    FdPrintf(std_err, "\n%zu-bin Distribution:\n", mMaxSize);

    FdPrintf(std_err, "   request   :  count percent\n");

    size_t range_start = mNextSmallest + 1;

    for (size_t j = 0; j < mArraySlots; j++) {

      size_t range_end = (j << mShift) + array_offset_add;

      FdPrintf(std_err, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end,

               mRequests[j], percent(mRequests[j], mTotalRequests));

      range_start = range_end + 1;

  size_t maxSize() const { return mMaxSize; }

 private:

  static constexpr size_t MAX_NUM_BUCKETS = 16;

  // If size is zero this distribution is uninitialised.

  size_t mMaxSize;

  size_t mNextSmallest;

  // Parameters to convert a size into a slot number.

  unsigned mShift;

  unsigned mArrayOffset;

  // The number of slots.

  unsigned mArraySlots;

  size_t mTotalRequests;

  size_t mRequests[MAX_NUM_BUCKETS];

};

#ifdef XP_LINUX

struct MemoryMap {

  uintptr_t mStart;

  uintptr_t mEnd;

  bool mReadable;

  bool mPrivate;

  bool mAnon;

  bool mIsStack;

  bool mIsSpecial;

  size_t mRSS;

  bool IsCandidate() const {

    // Candidates mappings are:

    //  * anonymous

    //  * they are private (not shared),

    //  * anonymous or "[heap]" (not another area such as stack),

//

    // The only mappings we're falsely including are the .bss segments for

    // shared libraries.

    return mReadable && mPrivate && mAnon && !mIsStack && !mIsSpecial;

};

class SMapsReader : private FdReader {

 private:

  explicit SMapsReader(FdReader&& reader) : FdReader(std::move(reader)) {}

 public:

  static Maybe<SMapsReader> open() {

    int fd = ::open(FILENAME, O_RDONLY);

    if (fd < 0) {

      perror(FILENAME);

      return mozilla::Nothing();

    return Some(SMapsReader(FdReader(fd, true)));

  Maybe<MemoryMap> readMap(platform_handle_t aStdErr) {

    // This is not very tolerant of format changes because things like

    // parseNumber will crash if they get a bad value.  TODO: make this

    // soft-fail.

    Buffer line = ReadLine();

    if (!line) {

      return Nothing();

    // We're going to be at the start of an entry, start tokenising the first

    // line.

    // Range

    Buffer range = line.SplitChar(' ');

    uintptr_t range_start = parseNumber<16>(range.SplitChar('-'));

    uintptr_t range_end = parseNumber<16>(range);

    // Mode.

    Buffer mode = line.SplitChar(' ');

    if (mode.Length() != 4) {

      FdPrintf(aStdErr, "Couldn't parse SMAPS file\n");

      return Nothing();

    bool readable = mode[0] == 'r';

    bool private_ = mode[3] == 'p';

    // Offset, device and inode.

    line.SkipWhitespace();

    bool zero_offset = !parseNumber<16>(line.SplitChar(' '));

    line.SkipWhitespace();

    bool no_device = line.SplitChar(' ') == Buffer("00:00");

    line.SkipWhitespace();

    bool zero_inode = !parseNumber(line.SplitChar(' '));

    bool is_anon = zero_offset && no_device && zero_inode;

    // Filename, or empty for anon mappings.

    line.SkipWhitespace();

    Buffer filename = line.SplitChar(' ');

    bool is_stack;

    bool is_special;

    if (filename && filename[0] == '[') {

      is_stack = filename == Buffer("[stack]");

      is_special = filename == Buffer("[vdso]") ||

                   filename == Buffer("[vvar]") ||

                   filename == Buffer("[vsyscall]");

    } else {

      is_stack = false;

      is_special = false;

    size_t rss = 0;

    while ((line = ReadLine())) {

      Buffer field = line.SplitChar(':');

      if (field == Buffer("VmFlags")) {

        // This is the last field, at least in the current format. Break this

        // loop to read the next mapping.

        break;

      if (field == Buffer("Rss")) {

        line.SkipWhitespace();

        Buffer value = line.SplitChar(' ');

        rss = parseNumber(value) * 1024;

    return Some(MemoryMap({range_start, range_end, readable, private_, is_anon,

                           is_stack, is_special, rss}));

  static constexpr char FILENAME[] = "/proc/self/smaps";

};

#endif  // XP_LINUX

/* Class to handle dispatching the replay function calls to replace-malloc. */

class Replay {

 public:

  Replay() {

#ifdef _WIN32

    // See comment in FdPrintf.h as to why native win32 handles are used.

    mStdErr = GetStdHandle(STD_ERROR_HANDLE);

#else

    mStdErr = fileno(stderr);

#endif

#ifdef XP_LINUX

    BuildInitialMapInfo();

#endif

  void enableSlopCalculation() { mCalculateSlop = true; }

  void enableMemset() { mDoMemset = true; }

  MemSlot& operator[](size_t index) const { return mSlots[index]; }

  void malloc(Buffer& aArgs, Buffer& aResult) {

    MemSlot& aSlot = SlotForResult(aResult);

    mOps++;

    size_t size = parseNumber(aArgs);

    aSlot.mPtr = ::malloc_impl(size);

    if (aSlot.mPtr) {

      aSlot.mRequest = size;

      MaybeCommit(aSlot);

      if (mCalculateSlop) {

        mTotalRequestedSize += size;

        mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);

  void posix_memalign(Buffer& aArgs, Buffer& aResult) {

    MemSlot& aSlot = SlotForResult(aResult);

    mOps++;

    size_t alignment = parseNumber(aArgs.SplitChar(','));

    size_t size = parseNumber(aArgs);

    void* ptr;

    if (::posix_memalign_impl(&ptr, alignment, size) == 0) {

      aSlot.mPtr = ptr;

      aSlot.mRequest = size;

      MaybeCommit(aSlot);

      if (mCalculateSlop) {

        mTotalRequestedSize += size;

        mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);

    } else {

      aSlot.mPtr = nullptr;

  void aligned_alloc(Buffer& aArgs, Buffer& aResult) {

    MemSlot& aSlot = SlotForResult(aResult);

    mOps++;

    size_t alignment = parseNumber(aArgs.SplitChar(','));

    size_t size = parseNumber(aArgs);

    aSlot.mPtr = ::aligned_alloc_impl(alignment, size);

    if (aSlot.mPtr) {

      aSlot.mRequest = size;

      MaybeCommit(aSlot);

      if (mCalculateSlop) {

        mTotalRequestedSize += size;

        mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);

  void calloc(Buffer& aArgs, Buffer& aResult) {

    MemSlot& aSlot = SlotForResult(aResult);

    mOps++;

    size_t num = parseNumber(aArgs.SplitChar(','));

    size_t size = parseNumber(aArgs);

    aSlot.mPtr = ::calloc_impl(num, size);

    if (aSlot.mPtr) {

      aSlot.mRequest = num * size;

      MaybeCommit(aSlot);

      if (mCalculateSlop) {

        mTotalRequestedSize += num * size;

        mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);

  void realloc(Buffer& aArgs, Buffer& aResult) {

    MemSlot& aSlot = SlotForResult(aResult);

    mOps++;

    Buffer dummy = aArgs.SplitChar('#');

    if (dummy) {

      die("Malformed input");

    size_t slot_id = parseNumber(aArgs.SplitChar(','));

    size_t size = parseNumber(aArgs);

    MemSlot& old_slot = (*this)[slot_id];

    void* old_ptr = old_slot.mPtr;

    old_slot.mPtr = nullptr;

    aSlot.mPtr = ::realloc_impl(old_ptr, size);

    if (aSlot.mPtr) {

      aSlot.mRequest = size;

      MaybeCommit(aSlot);

      if (mCalculateSlop) {

        mTotalRequestedSize += size;

        mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);

  void free(Buffer& aArgs, Buffer& aResult) {

    if (aResult) {

      die("Malformed input");

    mOps++;

    Buffer dummy = aArgs.SplitChar('#');

    if (dummy) {

      die("Malformed input");

    size_t slot_id = parseNumber(aArgs);

    MemSlot& slot = (*this)[slot_id];

    ::free_impl(slot.mPtr);

    slot.mPtr = nullptr;

  void memalign(Buffer& aArgs, Buffer& aResult) {

    MemSlot& aSlot = SlotForResult(aResult);

    mOps++;

    size_t alignment = parseNumber(aArgs.SplitChar(','));

    size_t size = parseNumber(aArgs);

    aSlot.mPtr = ::memalign_impl(alignment, size);

    if (aSlot.mPtr) {

      aSlot.mRequest = size;

      MaybeCommit(aSlot);

      if (mCalculateSlop) {

        mTotalRequestedSize += size;

        mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);

  void valloc(Buffer& aArgs, Buffer& aResult) {

    MemSlot& aSlot = SlotForResult(aResult);

    mOps++;

    size_t size = parseNumber(aArgs);

    aSlot.mPtr = ::valloc_impl(size);

    if (aSlot.mPtr) {

      aSlot.mRequest = size;

      MaybeCommit(aSlot);

      if (mCalculateSlop) {

        mTotalRequestedSize += size;

        mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);

  void jemalloc_stats(Buffer& aArgs, Buffer& aResult) {

    if (aArgs || aResult) {

      die("Malformed input");

    mOps++;

    jemalloc_stats_t stats;

    // Using a variable length array here is a GCC & Clang extension. But it

    // allows us to place this on the stack and not alter jemalloc's profiling.

    const size_t num_bins = ::jemalloc_stats_num_bins();

    const size_t MAX_NUM_BINS = 100;

    if (num_bins > MAX_NUM_BINS) {

      die("Exceeded maximum number of jemalloc stats bins");

    jemalloc_bin_stats_t bin_stats[MAX_NUM_BINS] = {{0}};

    ::jemalloc_stats_internal(&stats, bin_stats);

#ifdef XP_LINUX

    size_t rss = get_rss();

#endif

    size_t num_objects = 0;

    size_t num_sloppy_objects = 0;

    size_t total_allocated = 0;

    size_t total_slop = 0;

    size_t large_slop = 0;

    size_t large_used = 0;

    size_t huge_slop = 0;

    size_t huge_used = 0;

    size_t bin_slop[MAX_NUM_BINS] = {0};

    for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) {

      MemSlot& slot = mSlots[slot_id];

      if (slot.mPtr) {

        size_t used = ::malloc_usable_size_impl(slot.mPtr);

        size_t slop = used - slot.mRequest;

        total_allocated += used;

        total_slop += slop;

        num_objects++;

        if (slop) {

          num_sloppy_objects++;

        if (used <=

            (stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max)) {

          // We know that this is an inefficient linear search, but there's a

          // small number of bins and this is simple.

          for (unsigned i = 0; i < num_bins; i++) {

            auto& bin = bin_stats[i];

            if (used == bin.size) {

              bin_slop[i] += slop;

              break;

        } else if (used <= stats.large_max) {

          large_slop += slop;

          large_used += used;

        } else {

          huge_slop += slop;

          huge_used += used;

    // This formula corresponds to the calculation of wasted (from committed and

    // the other parameters) within jemalloc_stats()

    size_t committed = stats.allocated + stats.waste + stats.pages_dirty +

                       stats.bookkeeping + stats.bin_unused;

    FdPrintf(mStdErr, "\n");

    FdPrintf(mStdErr, "Objects:          %9zu\n", num_objects);

    FdPrintf(mStdErr, "Slots:            %9zu\n", mNumUsedSlots);

    FdPrintf(mStdErr, "Ops:              %9zu\n", mOps);

    FdPrintf(mStdErr, "mapped:           %9zu\n", stats.mapped);

    FdPrintf(mStdErr, "committed:        %9zu\n", committed);

#ifdef XP_LINUX

    if (rss) {

      FdPrintf(mStdErr, "rss:              %9zu\n", rss);

#endif

    FdPrintf(mStdErr, "allocated:        %9zu\n", stats.allocated);

    FdPrintf(mStdErr, "waste:            %9zu\n", stats.waste);

    FdPrintf(mStdErr, "dirty:            %9zu\n", stats.pages_dirty);

    FdPrintf(mStdErr, "fresh:            %9zu\n", stats.pages_fresh);

    FdPrintf(mStdErr, "madvised:         %9zu\n", stats.pages_madvised);

    FdPrintf(mStdErr, "bookkeep:         %9zu\n", stats.bookkeeping);

    FdPrintf(mStdErr, "bin-unused:       %9zu\n", stats.bin_unused);

    FdPrintf(mStdErr, "quantum-max:      %9zu\n", stats.quantum_max);

    FdPrintf(mStdErr, "quantum-wide-max: %9zu\n", stats.quantum_wide_max);

    FdPrintf(mStdErr, "subpage-max:      %9zu\n", stats.subpage_max);

    FdPrintf(mStdErr, "large-max:        %9zu\n", stats.large_max);

    if (mCalculateSlop) {

      size_t slop = mTotalAllocatedSize - mTotalRequestedSize;

      FdPrintf(mStdErr,

               "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n",

               slop / 1024, mTotalAllocatedSize / 1024,

               percent(slop, mTotalAllocatedSize));

    FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n",

             num_sloppy_objects, num_objects,

             percent(num_sloppy_objects, num_objects));

    FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n",

             total_slop / 1024, total_allocated / 1024,

             percent(total_slop, total_allocated));

    FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size",

             "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)",

             "used (r)");

    for (unsigned i = 0; i < num_bins; i++) {

      auto& bin = bin_stats[i];

      MOZ_ASSERT(bin.size);

      FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n",

               bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024,

               percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total),

               bin.num_non_full_runs, bin.num_runs,

               percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs));

    FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent");

    for (unsigned i = 0; i < num_bins; i++) {

      auto& bin = bin_stats[i];

      size_t used = bin.bytes_total - bin.bytes_unused;

      FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i], used,

               percent(bin_slop[i], used));

    FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used,

             percent(large_slop, large_used));

    FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used,

             percent(huge_slop, huge_used));

    print_distributions(stats, bin_stats);

 private:

/*

   * Create and print frequency distributions of memory requests.

*/

  void print_distributions(jemalloc_stats_t& stats,

                           jemalloc_bin_stats_t* bin_stats) {

    const size_t num_bins = ::jemalloc_stats_num_bins();

    // We compute distributions for all of the bins for small allocations

    // (num_bins) plus two more distributions for larger allocations.

    Distribution dists[num_bins + 2];

    unsigned last_size = 0;

    unsigned num_dists = 0;

    for (unsigned i = 0; i < num_bins; i++) {

      auto& bin = bin_stats[i];

      auto& dist = dists[num_dists++];

      MOZ_ASSERT(bin.size);

      if (bin.size <= 16) {

        // 1 byte buckets.

        dist = Distribution(bin.size, last_size, 1);

      } else if (bin.size <= stats.quantum_max) {

        // 4 buckets, (4 bytes per bucket with a 16 byte quantum).

        dist = Distribution(bin.size, last_size, stats.quantum / 4);

      } else if (bin.size <= stats.quantum_wide_max) {

        // 8 buckets, (32 bytes per bucket with a 256 byte quantum-wide).

        dist = Distribution(bin.size, last_size, stats.quantum_wide / 8);

      } else {

        // 16 buckets.

        dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16);

      last_size = bin.size;

    // 16 buckets.

    dists[num_dists] = Distribution(stats.page_size, last_size,

                                    (stats.page_size - last_size) / 16);

    num_dists++;

    // Buckets are 1/4 of the page size (12 buckets).

    dists[num_dists] =

        Distribution(stats.page_size * 4, stats.page_size, stats.page_size / 4);

    num_dists++;

    MOZ_RELEASE_ASSERT(num_dists <= num_bins + 2);

    for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) {

      MemSlot& slot = mSlots[slot_id];

      if (slot.mPtr) {

        for (size_t i = 0; i < num_dists; i++) {

          if (slot.mRequest <= dists[i].maxSize()) {

            dists[i].addRequest(slot.mRequest);

            break;

    for (unsigned i = 0; i < num_dists; i++) {

      dists[i].printDist(mStdErr);

#ifdef XP_LINUX

  size_t get_rss() {

    if (mGetRSSFailed) {

      return 0;

    // On Linux we can determine the RSS of the heap area by examining the

    // smaps file.

    mozilla::Maybe<SMapsReader> reader = SMapsReader::open();

    if (!reader) {

      mGetRSSFailed = true;

      return 0;

    size_t rss = 0;

    while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) {

      if (map->IsCandidate() && !mSlots.ownsMapping(map->mStart) &&

          !InitialMapsContains(map->mStart)) {

        rss += map->mRSS;

    return rss;

  bool InitialMapsContains(uintptr_t aRangeStart) {

    for (unsigned i = 0; i < mNumInitialMaps; i++) {

      MOZ_ASSERT(i < MAX_INITIAL_MAPS);

      if (mInitialMaps[i] == aRangeStart) {

        return true;

    return false;

 public:

  void BuildInitialMapInfo() {

    if (mGetRSSFailed) {

      return;

    Maybe<SMapsReader> reader = SMapsReader::open();

    if (!reader) {

      mGetRSSFailed = true;

      return;

    while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) {

      if (map->IsCandidate()) {

        if (mNumInitialMaps >= MAX_INITIAL_MAPS) {

          FdPrintf(mStdErr, "Too many initial mappings, can't compute RSS\n");

          mGetRSSFailed = false;

          return;

        mInitialMaps[mNumInitialMaps++] = map->mStart;

#endif

 private:

  MemSlot& SlotForResult(Buffer& aResult) {

    /* Parse result value and get the corresponding slot. */

    Buffer dummy = aResult.SplitChar('=');

    Buffer dummy2 = aResult.SplitChar('#');

    if (dummy || dummy2) {

      die("Malformed input");

    size_t slot_id = parseNumber(aResult);

    mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1);

    return mSlots[slot_id];

  void MaybeCommit(MemSlot& aSlot) {

    if (mDoMemset) {

      // Write any byte, 0x55 isn't significant.

      memset(aSlot.mPtr, 0x55, aSlot.mRequest);

  platform_handle_t mStdErr;

  size_t mOps = 0;

  // The number of slots that have been used. It is used to iterate over slots

  // without accessing those we haven't initialised.

  size_t mNumUsedSlots = 0;

  MemSlotList mSlots;

  size_t mTotalRequestedSize = 0;

  size_t mTotalAllocatedSize = 0;

  // Whether to calculate slop for all allocations over the runtime of a

  // process.

  bool mCalculateSlop = false;

  bool mDoMemset = false;

#ifdef XP_LINUX

  // If we have a failure reading smaps info then this is used to disable that

  // feature.

  bool mGetRSSFailed = false;

  // The initial memory mappings are recorded here at start up.  We exclude

  // memory in these mappings when computing RSS.  We assume they do not grow

  // and that no regions are allocated near them, this is true because they'll

  // only record the .bss and .data segments from our binary and shared objects

  // or regions that logalloc-replay has created for MappedArrays.

//

  // 64 should be enough for anybody.

  static constexpr unsigned MAX_INITIAL_MAPS = 64;

  uintptr_t mInitialMaps[MAX_INITIAL_MAPS];

  unsigned mNumInitialMaps = 0;

#endif  // XP_LINUX

};

MOZ_RUNINIT static Replay replay;

int main(int argc, const char* argv[]) {

  size_t first_pid = 0;

  FdReader reader(0);

  for (int i = 1; i < argc; i++) {

    const char* option = argv[i];

    if (strcmp(option, "-s") == 0) {

      // Do accounting to calculate allocation slop.

      replay.enableSlopCalculation();

    } else if (strcmp(option, "-c") == 0) {

      // Touch memory as we allocate it.

      replay.enableMemset();

    } else {

      fprintf(stderr, "Unknown command line option: %s\n", option);

      return EXIT_FAILURE;

  /* Read log from stdin and dispatch function calls to the Replay instance.

   * The log format is essentially:

   *   <pid> <tid> <function>([<args>])[=<result>]

   * <args> is a comma separated list of arguments.

   * The logs are expected to be preprocessed so that allocations are

   * attributed a tracking slot. The input is trusted not to have crazy

   * values for these slot numbers.

   * <result>, as well as some of the args to some of the function calls are

   * such slot numbers.

*/

  while (true) {

    Buffer line = reader.ReadLine();

    if (!line) {

      break;

    size_t pid = parseNumber(line.SplitChar(' '));

    if (!first_pid) {

      first_pid = pid;

    /* The log may contain data for several processes, only entries for the

     * very first that appears are treated. */

    if (first_pid != pid) {

      continue;

    /* The log contains thread ids for manual analysis, but we just ignore them

     * for now. */

    parseNumber(line.SplitChar(' '));

    Buffer func = line.SplitChar('(');

    Buffer args = line.SplitChar(')');

    if (func == Buffer("jemalloc_stats")) {

      replay.jemalloc_stats(args, line);

    } else if (func == Buffer("free")) {

      replay.free(args, line);

    } else if (func == Buffer("malloc")) {

      replay.malloc(args, line);

    } else if (func == Buffer("posix_memalign")) {

      replay.posix_memalign(args, line);

    } else if (func == Buffer("aligned_alloc")) {

      replay.aligned_alloc(args, line);

    } else if (func == Buffer("calloc")) {

      replay.calloc(args, line);

    } else if (func == Buffer("realloc")) {

      replay.realloc(args, line);

    } else if (func == Buffer("memalign")) {

      replay.memalign(args, line);

    } else if (func == Buffer("valloc")) {

      replay.valloc(args, line);

    } else {

      die("Malformed input");

  return 0;