Source code

Revision control

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// There are three kinds of samples done by the profiler.
//
// - A "periodic" sample is the most complex kind. It is done in response to a
// timer while the profiler is active. It involves writing a stack trace plus
// a variety of other values (memory measurements, responsiveness
// measurements, markers, etc.) into the main ProfileBuffer. The sampling is
// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
// get the register values.
//
// - A "synchronous" sample is a simpler kind. It is done in response to an API
// call (profiler_get_backtrace()). It involves writing a stack trace and
// little else into a temporary ProfileBuffer, and wrapping that up in a
// ProfilerBacktrace that can be subsequently used in a marker. The sampling
// is done on-thread, and so Registers::SyncPopulate() is used to get the
// register values.
//
// - A "backtrace" sample is the simplest kind. It is done in response to an
// API call (profiler_suspend_and_sample_thread()). It involves getting a
// stack trace via a ProfilerStackCollector; it does not write to a
// ProfileBuffer. The sampling is done from off-thread, and so uses
// SuspendAndSampleAndResumeThread() to get the register values.
#include "platform.h"
#include "GeckoProfiler.h"
#include "GeckoProfilerReporter.h"
#include "PageInformation.h"
#include "ProfileBuffer.h"
#include "ProfiledThreadData.h"
#include "ProfilerBacktrace.h"
#include "ProfilerChild.h"
#include "ProfilerCodeAddressService.h"
#include "ProfilerIOInterposeObserver.h"
#include "ProfilerParent.h"
#include "RegisteredThread.h"
#include "shared-libraries.h"
#include "ThreadInfo.h"
#include "VTuneProfiler.h"
#include "js/TraceLoggerAPI.h"
#include "js/ProfilingFrameIterator.h"
#include "memory_hooks.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/Atomics.h"
#include "mozilla/AutoProfilerLabel.h"
#include "mozilla/ExtensionPolicyService.h"
#include "mozilla/extensions/WebExtensionPolicy.h"
#include "mozilla/Preferences.h"
#include "mozilla/Printf.h"
#include "mozilla/ProfileBufferChunkManagerSingle.h"
#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
#include "mozilla/ProfileChunkedBuffer.h"
#include "mozilla/SchedulerGroup.h"
#include "mozilla/Services.h"
#include "mozilla/StackWalk.h"
#ifdef XP_WIN
# include "mozilla/StackWalkThread.h"
#endif
#include "mozilla/StaticPtr.h"
#include "mozilla/ThreadLocal.h"
#include "mozilla/TimeStamp.h"
#include "mozilla/Tuple.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Vector.h"
#include "BaseProfiler.h"
#include "nsDirectoryServiceDefs.h"
#include "nsDirectoryServiceUtils.h"
#include "nsIDocShell.h"
#include "nsIHttpProtocolHandler.h"
#include "nsIObserverService.h"
#include "nsIPropertyBag2.h"
#include "nsIXULAppInfo.h"
#include "nsIXULRuntime.h"
#include "nsJSPrincipals.h"
#include "nsMemoryReporterManager.h"
#include "nsPIDOMWindow.h"
#include "nsProfilerStartParams.h"
#include "nsScriptSecurityManager.h"
#include "nsSystemInfo.h"
#include "nsThreadUtils.h"
#include "nsXULAppAPI.h"
#include "Tracing.h"
#include "prdtoa.h"
#include "prtime.h"
#include <algorithm>
#include <errno.h>
#include <fstream>
#include <ostream>
#include <set>
#include <sstream>
#include <type_traits>
#if defined(GP_OS_android)
# include "mozilla/java/GeckoJavaSamplerNatives.h"
#endif
// Win32 builds always have frame pointers, so FramePointerStackWalk() always
// works.
#if defined(GP_PLAT_x86_windows)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif
// Win64 builds always omit frame pointers, so we use the slower
// MozStackWalk(), which works in that case.
#if defined(GP_PLAT_amd64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_MOZ_STACK_WALK
#endif
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
// MozStackWalk().
#if defined(GP_PLAT_arm64_windows)
# define HAVE_NATIVE_UNWIND
# define USE_MOZ_STACK_WALK
#endif
// Mac builds only have frame pointers when MOZ_PROFILING is specified, so
// FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
// on Mac.
#if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
# define HAVE_NATIVE_UNWIND
# define USE_FRAME_POINTER_STACK_WALK
#endif
// Android builds use the ARM Exception Handling ABI to unwind.
#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
# define HAVE_NATIVE_UNWIND
# define USE_EHABI_STACKWALK
# include "EHABIStackWalk.h"
#endif
// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
defined(GP_PLAT_arm64_freebsd)
# define HAVE_NATIVE_UNWIND
# define USE_LUL_STACKWALK
# include "lul/LulMain.h"
# include "lul/platform-linux-lul.h"
// On linux we use LUL for periodic samples and synchronous samples, but we use
// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
// (See the comment at the top of the file for a definition of
// periodic/synchronous/backtrace.).
//
// FramePointerStackWalk can produce incomplete stacks when the current entry is
// in a shared library without framepointers, however LUL can take a long time
// to initialize, which is undesirable for consumers of
// profiler_suspend_and_sample_thread like the Background Hang Reporter.
# if defined(MOZ_PROFILING)
# define USE_FRAME_POINTER_STACK_WALK
# endif
#endif
// We can only stackwalk without expensive initialization on platforms which
// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
// which can be expensive.
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
# define HAVE_FASTINIT_NATIVE_UNWIND
#endif
#ifdef MOZ_VALGRIND
# include <valgrind/memcheck.h>
#else
# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
#endif
#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
# include <ucontext.h>
#endif
using namespace mozilla;
using mozilla::profiler::detail::RacyFeatures;
LazyLogModule gProfilerLog("prof");
namespace mozilla::profiler::detail {
// Statically initialized to 0, then set once from profiler_init(), which should
// be called from the main thread before any other use of the profiler.
ProfilerThreadId scProfilerMainThreadId;
} // namespace mozilla::profiler::detail
#if defined(GP_OS_android)
class GeckoJavaSampler
: public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
private:
GeckoJavaSampler();
public:
static double GetProfilerTime() {
if (!profiler_is_active()) {
return 0.0;
}
return profiler_time();
};
};
#endif
constexpr static bool ValidateFeatures() {
int expectedFeatureNumber = 0;
// Feature numbers should start at 0 and increase by 1 each.
#define CHECK_FEATURE(n_, str_, Name_, desc_) \
if ((n_) != expectedFeatureNumber) { \
return false; \
} \
++expectedFeatureNumber;
PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
#undef CHECK_FEATURE
return true;
}
static_assert(ValidateFeatures(), "Feature list is invalid");
// Return all features that are available on this platform.
static uint32_t AvailableFeatures() {
uint32_t features = 0;
#define ADD_FEATURE(n_, str_, Name_, desc_) \
ProfilerFeature::Set##Name_(features);
// Add all the possible features.
PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
#undef ADD_FEATURE
// Now remove features not supported on this platform/configuration.
#if !defined(GP_OS_android)
ProfilerFeature::ClearJava(features);
#endif
#if !defined(HAVE_NATIVE_UNWIND)
ProfilerFeature::ClearStackWalk(features);
#endif
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
if (getenv("XPCOM_MEM_BLOAT_LOG")) {
NS_WARNING("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations.");
// The memory hooks are available, but the bloat log is enabled, which is
// not compatible with the native allocations tracking. See the comment in
// enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for
// more information.
ProfilerFeature::ClearNativeAllocations(features);
}
#else
// The memory hooks are not available.
ProfilerFeature::ClearNativeAllocations(features);
#endif
if (!JS::TraceLoggerSupported()) {
ProfilerFeature::ClearJSTracer(features);
}
#if !defined(GP_OS_windows)
ProfilerFeature::ClearNoTimerResolutionChange(features);
#endif
return features;
}
// Default features common to all contexts (even if not available).
static uint32_t DefaultFeatures() {
return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
ProfilerFeature::StackWalk | ProfilerFeature::Threads |
ProfilerFeature::CPUUtilization | ProfilerFeature::Screenshots;
}
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
// available).
static uint32_t StartupExtraDefaultFeatures() {
// Enable file I/Os by default for startup profiles as startup is heavy on
// I/O operations.
return ProfilerFeature::FileIOAll;
}
// RAII class to lock the profiler mutex.
// It provides a mechanism to determine if it is locked or not in order for
// memory hooks to avoid re-entering the profiler locked state.
class MOZ_RAII PSAutoLock {
public:
PSAutoLock() : mLock(gPSMutex) {}
PSAutoLock(const PSAutoLock&) = delete;
void operator=(const PSAutoLock&) = delete;
static bool IsLockedOnCurrentThread() {
return gPSMutex.IsLockedOnCurrentThread();
}
private:
static mozilla::baseprofiler::detail::BaseProfilerMutex gPSMutex;
mozilla::baseprofiler::detail::BaseProfilerAutoLock mLock;
};
/* static */ mozilla::baseprofiler::detail::BaseProfilerMutex
PSAutoLock::gPSMutex{"Gecko Profiler mutex"};
// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
// fields.
typedef const PSAutoLock& PSLockRef;
#define PS_GET(type_, name_) \
static type_ name_(PSLockRef) { \
MOZ_ASSERT(sInstance); \
return sInstance->m##name_; \
}
#define PS_GET_LOCKLESS(type_, name_) \
static type_ name_() { \
MOZ_ASSERT(sInstance); \
return sInstance->m##name_; \
}
#define PS_GET_AND_SET(type_, name_) \
PS_GET(type_, name_) \
static void Set##name_(PSLockRef, type_ a##name_) { \
MOZ_ASSERT(sInstance); \
sInstance->m##name_ = a##name_; \
}
static const size_t MAX_JS_FRAMES = 1024;
using JsFrameBuffer = JS::ProfilingFrameIterator::Frame[MAX_JS_FRAMES];
// All functions in this file can run on multiple threads unless they have an
// NS_IsMainThread() assertion.
// This class contains the profiler's core global state, i.e. that which is
// valid even when the profiler is not active. Most profile operations can't do
// anything useful when this class is not instantiated, so we release-assert
// its non-nullness in all such operations.
//
// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
// PSAutoLock reference as an argument as proof that the gPSMutex is currently
// locked. This makes it clear when gPSMutex is locked and helps avoid
// accidental unlocked accesses to global state. There are ways to circumvent
// this mechanism, but please don't do so without *very* good reason and a
// detailed explanation.
//
// The exceptions to this rule:
//
// - mProcessStartTime, because it's immutable;
//
// - each thread's RacyRegisteredThread object is accessible without locking via
// TLSRegisteredThread::RacyRegisteredThread().
class CorePS {
private:
CorePS()
: mProcessStartTime(TimeStamp::ProcessCreation()),
// This needs its own mutex, because it is used concurrently from
// functions guarded by gPSMutex as well as others without safety (e.g.,
// profiler_add_marker). It is *not* used inside the critical section of
// the sampler, because mutexes cannot be used there.
mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex)
#ifdef USE_LUL_STACKWALK
,
mLul(nullptr)
#endif
{
MOZ_ASSERT(NS_IsMainThread(),
"CorePS must be created from the main thread");
}
~CorePS() {}
public:
static void Create(PSLockRef aLock) {
MOZ_ASSERT(!sInstance);
sInstance = new CorePS();
}
static void Destroy(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
delete sInstance;
sInstance = nullptr;
}
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
// being locked. This is because CorePS is instantiated so early on the main
// thread that we don't have to worry about it being racy.
static bool Exists() { return !!sInstance; }
static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
size_t& aProfSize, size_t& aLulSize) {
MOZ_ASSERT(sInstance);
aProfSize += aMallocSizeOf(sInstance);
for (auto& registeredThread : sInstance->mRegisteredThreads) {
aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
}
for (auto& registeredPage : sInstance->mRegisteredPages) {
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
}
// Measurement of the following things may be added later if DMD finds it
// is worthwhile:
// - CorePS::mRegisteredThreads itself (its elements' children are
// measured above)
// - CorePS::mRegisteredPages itself (its elements' children are
// measured above)
// - CorePS::mInterposeObserver
#if defined(USE_LUL_STACKWALK)
if (sInstance->mLul) {
aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
}
#endif
}
// No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
// No PSLockRef is needed for this field because it's thread-safe.
PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer)
PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
PS_GET(JsFrameBuffer&, JsFrames)
static void AppendRegisteredThread(
PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
MOZ_ASSERT(sInstance);
MOZ_RELEASE_ASSERT(
sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
}
static void RemoveRegisteredThread(PSLockRef,
RegisteredThread* aRegisteredThread) {
MOZ_ASSERT(sInstance);
// Remove aRegisteredThread from mRegisteredThreads.
for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
if (rt.get() == aRegisteredThread) {
sInstance->mRegisteredThreads.erase(&rt);
return;
}
}
}
PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
static void AppendRegisteredPage(PSLockRef,
RefPtr<PageInformation>&& aRegisteredPage) {
MOZ_ASSERT(sInstance);
struct RegisteredPageComparator {
PageInformation* aA;
bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
};
auto foundPageIter = std::find_if(
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
RegisteredPageComparator{aRegisteredPage.get()});
if (foundPageIter != sInstance->mRegisteredPages.end()) {
if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) {
// When a BrowsingContext is loaded, the first url loaded in it will be
// about:blank, and if the principal matches, the first document loaded
// in it will share an inner window. That's why we should delete the
// intermittent about:blank if they share the inner window.
sInstance->mRegisteredPages.erase(foundPageIter);
} else {
// Do not register the same page again.
return;
}
}
MOZ_RELEASE_ASSERT(
sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
}
static void RemoveRegisteredPage(PSLockRef,
uint64_t aRegisteredInnerWindowID) {
MOZ_ASSERT(sInstance);
// Remove RegisteredPage from mRegisteredPages by given inner window ID.
sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
return rd->InnerWindowID() == aRegisteredInnerWindowID;
});
}
static void ClearRegisteredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
sInstance->mRegisteredPages.clear();
}
PS_GET(const Vector<BaseProfilerCount*>&, Counters)
static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
MOZ_ASSERT(sInstance);
// we don't own the counter; they may be stored in static objects
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
}
static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
// we may be called to remove a counter after the profiler is stopped or
// late in shutdown.
if (sInstance) {
auto* counter = std::find(sInstance->mCounters.begin(),
sInstance->mCounters.end(), aCounter);
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
sInstance->mCounters.erase(counter);
}
}
#ifdef USE_LUL_STACKWALK
static lul::LUL* Lul(PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mLul.get();
}
static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
MOZ_ASSERT(sInstance);
sInstance->mLul = std::move(aLul);
}
#endif
PS_GET_AND_SET(const nsACString&, ProcessName)
PS_GET_AND_SET(const nsACString&, ETLDplus1)
private:
// The singleton instance
static CorePS* sInstance;
// The time that the process started.
const TimeStamp mProcessStartTime;
// The thread-safe blocks-oriented buffer into which all profiling data is
// recorded.
// ActivePS controls the lifetime of the underlying contents buffer: When
// ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes;
// see ActivePS for further details.
// Note: This needs to live here outside of ActivePS, because some producers
// are indirectly controlled (e.g., by atomic flags) and therefore may still
// attempt to write some data shortly after ActivePS has shutdown and deleted
// the underlying buffer in memory.
ProfileChunkedBuffer mCoreBuffer;
// Info on all the registered threads.
// ThreadIds in mRegisteredThreads are unique.
Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
// Info on all the registered pages.
// InnerWindowIDs in mRegisteredPages are unique.
Vector<RefPtr<PageInformation>> mRegisteredPages;
// Non-owning pointers to all active counters
Vector<BaseProfilerCount*> mCounters;
#ifdef USE_LUL_STACKWALK
// LUL's state. Null prior to the first activation, non-null thereafter.
UniquePtr<lul::LUL> mLul;
#endif
// Process name, provided by child process initialization code.
nsAutoCString mProcessName;
// Private name, provided by child process initialization code (eTLD+1 in
// fission)
nsAutoCString mETLDplus1;
// This memory buffer is used by the MergeStacks mechanism. Previously it was
// stack allocated, but this led to a stack overflow, as it was too much
// memory. Here the buffer can be pre-allocated, and shared with the
// MergeStacks feature as needed. MergeStacks is only run while holding the
// lock, so it is safe to have only one instance allocated for all of the
// threads.
JsFrameBuffer mJsFrames;
};
CorePS* CorePS::sInstance = nullptr;
ProfileChunkedBuffer& profiler_get_core_buffer() {
MOZ_ASSERT(CorePS::Exists());
return CorePS::CoreBuffer();
}
class SamplerThread;
static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
double aInterval, bool aStackWalkEnabled,
bool aNoTimerResolutionChange);
struct LiveProfiledThreadData {
RegisteredThread* mRegisteredThread;
UniquePtr<ProfiledThreadData> mProfiledThreadData;
};
// The buffer size is provided as a number of "entries", this is their size in
// bytes.
constexpr static uint32_t scBytesPerEntry = 8;
// This class contains the profiler's global state that is valid only when the
// profiler is active. When not instantiated, the profiler is inactive.
//
// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
// CorePS.
//
class ActivePS {
private:
// We need to decide how many chunks of what size we want to fit in the given
// total maximum capacity for this process, in the (likely) context of
// multiple processes doing the same choice and having an inter-process
// mechanism to control the overal memory limit.
// Minimum chunk size allowed, enough for at least one stack.
constexpr static uint32_t scMinimumChunkSize =
2 * ProfileBufferChunkManager::scExpectedMaximumStackSize;
// Ideally we want at least 2 unreleased chunks to work with (1 current and 1
// next), and 2 released chunks (so that one can be recycled when old, leaving
// one with some data).
constexpr static uint32_t scMinimumNumberOfChunks = 4;
// And we want to limit chunks to a maximum size, which is a compromise
// between:
// - A big size, which helps with reducing the rate of allocations and IPCs.
// - A small size, which helps with equalizing the duration of recorded data
// (as the inter-process controller will discard the oldest chunks in all
// Firefox processes).
constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
public:
// We should be able to store at least the minimum number of the smallest-
// possible chunks.
constexpr static uint32_t scMinimumBufferSize =
scMinimumNumberOfChunks * scMinimumChunkSize;
// Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler:
constexpr static uint32_t scMinimumBufferEntries =
scMinimumBufferSize / scBytesPerEntry;
// Limit to 2GiB.
constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
constexpr static uint32_t scMaximumBufferEntries =
scMaximumBufferSize / scBytesPerEntry;
constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
if (aEntries <= scMinimumBufferEntries) {
return scMinimumBufferEntries;
}
if (aEntries >= scMaximumBufferEntries) {
return scMaximumBufferEntries;
}
return aEntries;
}
private:
constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
scBytesPerEntry / scMinimumNumberOfChunks,
size_t(scMaximumChunkSize)));
}
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
// Filter out any features unavailable in this platform/configuration.
aFeatures &= AvailableFeatures();
// Always enable ProfilerFeature::Threads if we have a filter, because
// users sometimes ask to filter by a list of threads but forget to
// explicitly specify ProfilerFeature::Threads.
if (aFilterCount > 0) {
aFeatures |= ProfilerFeature::Threads;
}
// Some features imply others.
if (aFeatures & ProfilerFeature::FileIOAll) {
aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
} else if (aFeatures & ProfilerFeature::FileIO) {
aFeatures |= ProfilerFeature::MainThreadIO;
}
return aFeatures;
}
ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
uint64_t aActiveTabID, const Maybe<double>& aDuration)
: mGeneration(sNextGeneration++),
mCapacity(aCapacity),
mDuration(aDuration),
mInterval(aInterval),
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
mActiveTabID(aActiveTabID),
mProfileBufferChunkManager(
size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry,
ChunkSizeForEntries(aCapacity.Value())),
mProfileBuffer([this]() -> ProfileChunkedBuffer& {
CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager);
return CorePS::CoreBuffer();
}()),
// The new sampler thread doesn't start sampling immediately because the
// main loop within Run() is blocked until this function's caller
// unlocks gPSMutex.
mSamplerThread(NewSamplerThread(
aLock, mGeneration, aInterval,
ProfilerFeature::HasStackWalk(aFeatures),
ProfilerFeature::HasNoTimerResolutionChange(aFeatures))),
mInterposeObserver((ProfilerFeature::HasMainThreadIO(aFeatures) ||
ProfilerFeature::HasFileIO(aFeatures) ||
ProfilerFeature::HasFileIOAll(aFeatures))
? new ProfilerIOInterposeObserver()
: nullptr),
mIsPaused(false),
mIsSamplingPaused(false)
#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
,
mWasSamplingPaused(false)
#endif
{
// Deep copy aFilters.
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
for (uint32_t i = 0; i < aFilterCount; ++i) {
mFilters[i] = aFilters[i];
}
#if !defined(RELEASE_OR_BETA)
if (mInterposeObserver) {
// We need to register the observer on the main thread, because we want
// to observe IO that happens on the main thread.
// IOInterposer needs to be initialized before calling
// IOInterposer::Register or our observer will be silently dropped.
if (NS_IsMainThread()) {
IOInterposer::Init();
IOInterposer::Register(IOInterposeObserver::OpAll, mInterposeObserver);
} else {
RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::ActivePS", [=]() {
IOInterposer::Init();
IOInterposer::Register(IOInterposeObserver::OpAll, observer);
}));
}
}
#endif
}
~ActivePS() {
#if !defined(RELEASE_OR_BETA)
if (mInterposeObserver) {
// We need to unregister the observer on the main thread, because that's
// where we've registered it.
if (NS_IsMainThread()) {
IOInterposer::Unregister(IOInterposeObserver::OpAll,
mInterposeObserver);
} else {
RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
NS_DispatchToMainThread(
NS_NewRunnableFunction("ActivePS::~ActivePS", [=]() {
IOInterposer::Unregister(IOInterposeObserver::OpAll, observer);
}));
}
}
#endif
CorePS::CoreBuffer().ResetChunkManager();
}
bool ThreadSelected(const char* aThreadName) {
if (mFilters.empty()) {
return true;
}
std::string name = aThreadName;
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
for (uint32_t i = 0; i < mFilters.length(); ++i) {
std::string filter = mFilters[i];
if (filter == "*") {
return true;
}
std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
// Crude, non UTF-8 compatible, case insensitive substring search
if (name.find(filter) != std::string::npos) {
return true;
}
// If the filter starts with pid:, check for a pid match
if (filter.find("pid:") == 0) {
std::string mypid =
std::to_string(profiler_current_process_id().ToNumber());
if (filter.compare(4, std::string::npos, mypid) == 0) {
return true;
}
}
}
return false;
}
public:
static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
uint32_t aFeatures, const char** aFilters,
uint32_t aFilterCount, uint64_t aActiveTabID,
const Maybe<double>& aDuration) {
MOZ_ASSERT(!sInstance);
sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
aFilterCount, aActiveTabID, aDuration);
}
[[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
auto samplerThread = sInstance->mSamplerThread;
delete sInstance;
sInstance = nullptr;
return samplerThread;
}
static bool Exists(PSLockRef) { return !!sInstance; }
static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
const Maybe<double>& aDuration, double aInterval,
uint32_t aFeatures, const char** aFilters,
uint32_t aFilterCount, uint64_t aActiveTabID) {
MOZ_ASSERT(sInstance);
if (sInstance->mCapacity != aCapacity ||
sInstance->mDuration != aDuration ||
sInstance->mInterval != aInterval ||
sInstance->mFeatures != aFeatures ||
sInstance->mFilters.length() != aFilterCount ||
sInstance->mActiveTabID != aActiveTabID) {
return false;
}
for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
return false;
}
}
return true;
}
static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
MOZ_ASSERT(sInstance);
size_t n = aMallocSizeOf(sInstance);
n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
// Measurement of the following members may be added later if DMD finds it
// is worthwhile:
// - mLiveProfiledThreads (both the array itself, and the contents)
// - mDeadProfiledThreads (both the array itself, and the contents)
//
return n;
}
static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
MOZ_ASSERT(sInstance);
return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
sInstance->ThreadSelected(aInfo->Name()));
}
[[nodiscard]] static bool AppendPostSamplingCallback(
PSLockRef, PostSamplingCallback&& aCallback);
// Writes out the current active configuration of the profile.
static void WriteActiveConfiguration(
PSLockRef aLock, JSONWriter& aWriter,
const Span<const char>& aPropertyName = MakeStringSpan("")) {
if (!sInstance) {
if (!aPropertyName.empty()) {
aWriter.NullProperty(aPropertyName);
} else {
aWriter.NullElement();
}
return;
};
if (!aPropertyName.empty()) {
aWriter.StartObjectProperty(aPropertyName);
} else {
aWriter.StartObjectElement();
}
{
aWriter.StartArrayProperty("features", aWriter.SingleLineStyle);
#define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_) \
if (profiler_feature_active(ProfilerFeature::Name_)) { \
aWriter.StringElement(str_); \
}
PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES)
#undef WRITE_ACTIVE_FEATURES
aWriter.EndArray();
}
{
aWriter.StartArrayProperty("threads", aWriter.SingleLineStyle);
for (const auto& filter : sInstance->mFilters) {
aWriter.StringElement(filter);
}
aWriter.EndArray();
}
{
// Now write all the simple values.
// The interval is also available on profile.meta.interval
aWriter.DoubleProperty("interval", sInstance->mInterval);
aWriter.IntProperty("capacity", sInstance->mCapacity.Value());
if (sInstance->mDuration) {
aWriter.DoubleProperty("duration", sInstance->mDuration.value());
}
// Here, we are converting uint64_t to double. Tab IDs are
// being created using `nsContentUtils::GenerateProcessSpecificId`, which
// is specifically designed to only use 53 of the 64 bits to be lossless
// when passed into and out of JS as a double.
aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
}
aWriter.EndObject();
}
PS_GET(uint32_t, Generation)
PS_GET(PowerOfTwo32, Capacity)
PS_GET(Maybe<double>, Duration)
PS_GET(double, Interval)
PS_GET(uint32_t, Features)
PS_GET(uint64_t, ActiveTabID)
#define PS_GET_FEATURE(n_, str_, Name_, desc_) \
static bool Feature##Name_(PSLockRef) { \
MOZ_ASSERT(sInstance); \
return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
}
PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
#undef PS_GET_FEATURE
static uint32_t JSFlags(PSLockRef aLock) {
uint32_t Flags = 0;
Flags |=
FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
Flags |= FeatureJSTracer(aLock)
? uint32_t(JSInstrumentationFlags::TraceLogging)
: 0;
Flags |= FeatureJSAllocations(aLock)
? uint32_t(JSInstrumentationFlags::Allocations)
: 0;
return Flags;
}
PS_GET(const Vector<std::string>&, Filters)
// Not using PS_GET, because only the "Controlled" interface of
// `mProfileBufferChunkManager` should be exposed here.
static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mProfileBufferChunkManager;
}
static void FulfillChunkRequests(PSLockRef) {
MOZ_ASSERT(sInstance);
sInstance->mProfileBufferChunkManager.FulfillChunkRequests();
}
static ProfileBuffer& Buffer(PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mProfileBuffer;
}
static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
return sInstance->mLiveProfiledThreads;
}
// Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
// for all threads that should be included in a profile, both for threads
// that are still registered, and for threads that have been unregistered but
// still have data in the buffer.
// For threads that have already been unregistered, the RegisteredThread
// pointer will be null.
// The returned array is sorted by thread register time.
// Do not hold on to the return value across thread registration or profiler
// restarts.
static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
ProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
MOZ_RELEASE_ASSERT(
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
sInstance->mDeadProfiledThreads.length()));
for (auto& t : sInstance->mLiveProfiledThreads) {
MOZ_RELEASE_ASSERT(array.append(
std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
}
for (auto& t : sInstance->mDeadProfiledThreads) {
MOZ_RELEASE_ASSERT(
array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
}
std::sort(array.begin(), array.end(),
[](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
return a.second->Info()->RegisterTime() <
b.second->Info()->RegisterTime();
});
return array;
}
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
Vector<RefPtr<PageInformation>> array;
for (auto& d : CorePS::RegisteredPages(aLock)) {
MOZ_RELEASE_ASSERT(array.append(d));
}
for (auto& d : sInstance->mDeadProfiledPages) {
MOZ_RELEASE_ASSERT(array.append(d));
}
// We don't need to sort the pages like threads since we won't show them
// as a list.
return array;
}
// Do a linear search through mLiveProfiledThreads to find the
// ProfiledThreadData object for a RegisteredThread.
static ProfiledThreadData* GetProfiledThreadData(
PSLockRef, RegisteredThread* aRegisteredThread) {
MOZ_ASSERT(sInstance);
for (const LiveProfiledThreadData& thread :
sInstance->mLiveProfiledThreads) {
if (thread.mRegisteredThread == aRegisteredThread) {
return thread.mProfiledThreadData.get();
}
}
return nullptr;
}
static ProfiledThreadData* AddLiveProfiledThread(
PSLockRef, RegisteredThread* aRegisteredThread,
UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
MOZ_ASSERT(sInstance);
MOZ_RELEASE_ASSERT(
sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
aRegisteredThread, std::move(aProfiledThreadData)}));
// Return a weak pointer to the ProfiledThreadData object.
return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
}
static void UnregisterThread(PSLockRef aLockRef,
RegisteredThread* aRegisteredThread) {
MOZ_ASSERT(sInstance);
DiscardExpiredDeadProfiledThreads(aLockRef);
// Find the right entry in the mLiveProfiledThreads array and remove the
// element, moving the ProfiledThreadData object for the thread into the
// mDeadProfiledThreads array.
// The thread's RegisteredThread object gets destroyed here.
for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
if (thread.mRegisteredThread == aRegisteredThread) {
thread.mProfiledThreadData->NotifyUnregistered(
sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
std::move(thread.mProfiledThreadData)));
sInstance->mLiveProfiledThreads.erase(
&sInstance->mLiveProfiledThreads[i]);
return;
}
}
}
PS_GET_AND_SET(bool, IsPaused)
// True if sampling is paused (though generic `SetIsPaused()` or specific
// `SetIsSamplingPaused()`).
static bool IsSamplingPaused(PSLockRef lock) {
MOZ_ASSERT(sInstance);
return IsPaused(lock) || sInstance->mIsSamplingPaused;
}
static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
MOZ_ASSERT(sInstance);
sInstance->mIsSamplingPaused = aIsSamplingPaused;
}
#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
PS_GET_AND_SET(bool, WasSamplingPaused)
#endif
static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard any dead threads that were unregistered before bufferRangeStart.
sInstance->mDeadProfiledThreads.eraseIf(
[bufferRangeStart](
const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
Maybe<uint64_t> bufferPosition =
aProfiledThreadData->BufferPositionWhenUnregistered();
MOZ_RELEASE_ASSERT(bufferPosition,
"should have unregistered this thread");
return *bufferPosition < bufferRangeStart;
});
}
static void UnregisterPage(PSLockRef aLock,
uint64_t aRegisteredInnerWindowID) {
MOZ_ASSERT(sInstance);
auto& registeredPages = CorePS::RegisteredPages(aLock);
for (size_t i = 0; i < registeredPages.length(); i++) {
RefPtr<PageInformation>& page = registeredPages[i];
if (page->InnerWindowID() == aRegisteredInnerWindowID) {
page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
MOZ_RELEASE_ASSERT(
sInstance->mDeadProfiledPages.append(std::move(page)));
registeredPages.erase(&registeredPages[i--]);
}
}
}
static void DiscardExpiredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard any dead pages that were unregistered before
// bufferRangeStart.
sInstance->mDeadProfiledPages.eraseIf(
[bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
Maybe<uint64_t> bufferPosition =
aProfiledPage->BufferPositionWhenUnregistered();
MOZ_RELEASE_ASSERT(bufferPosition,
"should have unregistered this page");
return *bufferPosition < bufferRangeStart;
});
}
static void ClearUnregisteredPages(PSLockRef) {
MOZ_ASSERT(sInstance);
sInstance->mDeadProfiledPages.clear();
}
static void ClearExpiredExitProfiles(PSLockRef) {
MOZ_ASSERT(sInstance);
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
// Discard exit profiles that were gathered before our buffer RangeStart.
// If we have started to overwrite our data from when the Base profile was
// added, we should get rid of that Base profile because it's now older than
// our oldest Gecko profile data.
//
// When adding: (In practice the starting buffer should be empty)
// v Start == End
// | <-- Buffer range, initially empty.
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
//
// Later, still in range:
// v Start v End
// |=========| <-- Buffer range growing.
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
//
// Even later, now out of range:
// v Start v End
// |============| <-- Buffer range full and sliding.
// ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it
if (sInstance->mBaseProfileThreads &&
sInstance->mGeckoIndexWhenBaseProfileAdded
.ConvertToProfileBufferIndex() <
CorePS::CoreBuffer().GetState().mRangeStart) {
DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
sInstance->mBaseProfileThreads.get());
sInstance->mBaseProfileThreads.reset();
}
sInstance->mExitProfiles.eraseIf(
[bufferRangeStart](const ExitProfile& aExitProfile) {
return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
});
}
static void AddBaseProfileThreads(PSLockRef aLock,
UniquePtr<char[]> aBaseProfileThreads) {
MOZ_ASSERT(sInstance);
DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get());
sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
sInstance->mGeckoIndexWhenBaseProfileAdded =
ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
CorePS::CoreBuffer().GetState().mRangeEnd);
}
static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ClearExpiredExitProfiles(aLock);
DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p",
sInstance->mBaseProfileThreads.get());
return std::move(sInstance->mBaseProfileThreads);
}
static void AddExitProfile(PSLockRef aLock, const nsCString& aExitProfile) {
MOZ_ASSERT(sInstance);
ClearExpiredExitProfiles(aLock);
MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
}
static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) {
MOZ_ASSERT(sInstance);
ClearExpiredExitProfiles(aLock);
Vector<nsCString> profiles;
MOZ_RELEASE_ASSERT(
profiles.initCapacity(sInstance->mExitProfiles.length()));
for (auto& profile : sInstance->mExitProfiles) {
MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
}
sInstance->mExitProfiles.clear();
return profiles;
}
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
static void SetMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
MOZ_ASSERT(sInstance);
sInstance->mMemoryCounter = aMemoryCounter;
}
static bool IsMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
MOZ_ASSERT(sInstance);
return sInstance->mMemoryCounter == aMemoryCounter;
}
#endif
private:
// The singleton instance.
static ActivePS* sInstance;
// We need to track activity generations. If we didn't we could have the
// following scenario.
//
// - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
// gPSMutex, deletes the SamplerThread (which does a join).
//
// - profiler_start() runs on a different thread, locks gPSMutex,
// re-instantiates ActivePS, unlocks gPSMutex -- all before the join
// completes.
//
// - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
// and continues as if the start/stop pair didn't occur. Also
// profiler_stop() is stuck, unable to finish.
//
// By checking ActivePS *and* the generation, we can avoid this scenario.
// sNextGeneration is used to track the next generation number; it is static
// because it must persist across different ActivePS instantiations.
const uint32_t mGeneration;
static uint32_t sNextGeneration;
// The maximum number of entries in mProfileBuffer.
const PowerOfTwo32 mCapacity;
// The maximum duration of entries in mProfileBuffer, in seconds.
const Maybe<double> mDuration;
// The interval between samples, measured in milliseconds.
const double mInterval;
// The profile features that are enabled.
const uint32_t mFeatures;
// Substrings of names of threads we want to profile.
Vector<std::string> mFilters;
// ID of the active browser screen's active tab.
// It's being used to determine the profiled tab. It's "0" if we failed to
// get the ID.
const uint64_t mActiveTabID;
// The chunk manager used by `mProfileBuffer` below.
ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager;
// The buffer into which all samples are recorded.
ProfileBuffer mProfileBuffer;
// ProfiledThreadData objects for any threads that were profiled at any point
// during this run of the profiler:
// - mLiveProfiledThreads contains all threads that are still registered, and
// - mDeadProfiledThreads contains all threads that have already been
// unregistered but for which there is still data in the profile buffer.
Vector<LiveProfiledThreadData> mLiveProfiledThreads;
Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
// Info on all the dead pages.
// Registered pages are being moved to this array after unregistration.
// We are keeping them in case we need them in the profile data.
// We are removing them when we ensure that we won't need them anymore.
Vector<RefPtr<PageInformation>> mDeadProfiledPages;
// The current sampler thread. This class is not responsible for destroying
// the SamplerThread object; the Destroy() method returns it so the caller
// can destroy it.
SamplerThread* const mSamplerThread;
// The interposer that records main thread I/O.
RefPtr<ProfilerIOInterposeObserver> mInterposeObserver;
// Is the profiler fully paused?
bool mIsPaused;
// Is the profiler periodic sampling paused?
bool mIsSamplingPaused;
#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
// Used to record whether the sampler was paused just before forking. False
// at all times except just before/after forking.
bool mWasSamplingPaused;
#endif
// Optional startup profile thread array from BaseProfiler.
UniquePtr<char[]> mBaseProfileThreads;
ProfileBufferBlockIndex mGeckoIndexWhenBaseProfileAdded;
struct ExitProfile {
nsCString mJSON;
uint64_t mBufferPositionAtGatherTime;
};
Vector<ExitProfile> mExitProfiles;
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
Atomic<const BaseProfilerCount*> mMemoryCounter;
#endif
};
ActivePS* ActivePS::sInstance = nullptr;
uint32_t ActivePS::sNextGeneration = 0;
#undef PS_GET
#undef PS_GET_LOCKLESS
#undef PS_GET_AND_SET
using ProfilerStateChangeMutex =
mozilla::baseprofiler::detail::BaseProfilerMutex;
using ProfilerStateChangeLock =
mozilla::baseprofiler::detail::BaseProfilerAutoLock;
static ProfilerStateChangeMutex gProfilerStateChangeMutex;
struct IdentifiedProfilingStateChangeCallback {
ProfilingStateSet mProfilingStateSet;
ProfilingStateChangeCallback mProfilingStateChangeCallback;
uintptr_t mUniqueIdentifier;
explicit IdentifiedProfilingStateChangeCallback(
ProfilingStateSet aProfilingStateSet,
ProfilingStateChangeCallback&& aProfilingStateChangeCallback,
uintptr_t aUniqueIdentifier)
: mProfilingStateSet(aProfilingStateSet),
mProfilingStateChangeCallback(aProfilingStateChangeCallback),
mUniqueIdentifier(aUniqueIdentifier) {}
};
using IdentifiedProfilingStateChangeCallbackUPtr =
UniquePtr<IdentifiedProfilingStateChangeCallback>;
static Vector<IdentifiedProfilingStateChangeCallbackUPtr>
mIdentifiedProfilingStateChangeCallbacks;
void profiler_add_state_change_callback(
ProfilingStateSet aProfilingStateSet,
ProfilingStateChangeCallback&& aCallback,
uintptr_t aUniqueIdentifier /* = 0 */) {
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
#ifdef DEBUG
// Check if a non-zero id is not already used. Bug forgive it in non-DEBUG
// builds; in the worst case they may get removed too early.
if (aUniqueIdentifier != 0) {
for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
mIdentifiedProfilingStateChangeCallbacks) {
MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier);
}
}
#endif // DEBUG
if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) &&
profiler_is_active()) {
aCallback(ProfilingState::AlreadyActive);
}
(void)mIdentifiedProfilingStateChangeCallbacks.append(
MakeUnique<IdentifiedProfilingStateChangeCallback>(
aProfilingStateSet, std::move(aCallback), aUniqueIdentifier));
}
// Remove the callback with the given identifier.
void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) {
MOZ_ASSERT(aUniqueIdentifier != 0);
if (aUniqueIdentifier == 0) {
// Forgive zero in non-DEBUG builds.
return;
}
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
mIdentifiedProfilingStateChangeCallbacks.eraseIf(
[aUniqueIdentifier](
const IdentifiedProfilingStateChangeCallbackUPtr& aIdedCallback) {
if (aIdedCallback->mUniqueIdentifier != aUniqueIdentifier) {
return false;
}
if (aIdedCallback->mProfilingStateSet.contains(
ProfilingState::RemovingCallback)) {
aIdedCallback->mProfilingStateChangeCallback(
ProfilingState::RemovingCallback);
}
return true;
});
}
static void invoke_profiler_state_change_callbacks(
ProfilingState aProfilingState) {
MOZ_ASSERT(!PSAutoLock::IsLockedOnCurrentThread());
ProfilerStateChangeLock lock(gProfilerStateChangeMutex);
for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
mIdentifiedProfilingStateChangeCallbacks) {
if (idedCallback->mProfilingStateSet.contains(aProfilingState)) {
idedCallback->mProfilingStateChangeCallback(aProfilingState);
}
}
}
Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
// Each live thread has a RegisteredThread, and we store a reference to it in
// TLS. This class encapsulates that TLS, and also handles the associated
// profiling stack used by AutoProfilerLabel.
class TLSRegisteredThread {
public:
// This should only be called once before any other access.
// In this case it's called from `profiler_init()` on the main thread, before
// the main thread registers itself.
static void Init() {
MOZ_ASSERT(sState == State::Uninitialized, "Already initialized");
AutoProfilerLabel::ProfilingStackOwnerTLS::Init();
MOZ_ASSERT(
AutoProfilerLabel::ProfilingStackOwnerTLS::sState !=
AutoProfilerLabel::ProfilingStackOwnerTLS::State::Uninitialized,
"Unexpected ProfilingStackOwnerTLS::sState after "
"ProfilingStackOwnerTLS::Init()");
sState =
(AutoProfilerLabel::ProfilingStackOwnerTLS::sState ==
AutoProfilerLabel::ProfilingStackOwnerTLS::State::Initialized &&
sRegisteredThread.init())
? State::Initialized
: State::Unavailable;
}
static bool IsTLSInited() {
MOZ_ASSERT(sState != State::Uninitialized,
"TLSRegisteredThread should only be accessed after Init()");
return sState == State::Initialized;
}
// Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
static class RegisteredThread* RegisteredThread(PSLockRef) {
if (!IsTLSInited()) {
return nullptr;
}
return sRegisteredThread.get();
}
// Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
static class RacyRegisteredThread* RacyRegisteredThread() {
if (!IsTLSInited()) {
return nullptr;
}
class RegisteredThread* registeredThread = sRegisteredThread.get();
return registeredThread ? &registeredThread->RacyRegisteredThread()
: nullptr;
}
// Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
// RacyRegisteredThread() can also be used to get the ProfilingStack, but that
// is marginally slower because it requires an extra pointer indirection.
static ProfilingStack* Stack() {
if (!IsTLSInited()) {
return nullptr;
}
ProfilingStackOwner* profilingStackOwner =
AutoProfilerLabel::ProfilingStackOwnerTLS::Get();
if (!profilingStackOwner) {
return nullptr;
}
return &profilingStackOwner->ProfilingStack();
}
static void SetRegisteredThreadAndAutoProfilerLabelProfilingStack(
PSLockRef, class RegisteredThread* aRegisteredThread) {
if (!IsTLSInited()) {
return;
}
MOZ_RELEASE_ASSERT(
aRegisteredThread,
"Use ResetRegisteredThread() instead of SetRegisteredThread(nullptr)");
sRegisteredThread.set(aRegisteredThread);
ProfilingStackOwner& profilingStackOwner =
aRegisteredThread->RacyRegisteredThread().ProfilingStackOwner();
profilingStackOwner.AddRef();
AutoProfilerLabel::ProfilingStackOwnerTLS::Set(&profilingStackOwner);
}
// Only reset the registered thread. The AutoProfilerLabel's ProfilingStack
// is kept, because the thread may not have unregistered itself yet, so it may
// still push/pop labels even after the profiler has shut down.
static void ResetRegisteredThread(PSLockRef) {
if (!IsTLSInited()) {
return;
}
sRegisteredThread.set(nullptr);
}
// Reset the AutoProfilerLabels' ProfilingStack, because the thread is
// unregistering itself.
static void ResetAutoProfilerLabelProfilingStack(PSLockRef) {
if (!IsTLSInited()) {
return;
}
MOZ_RELEASE_ASSERT(
AutoProfilerLabel::ProfilingStackOwnerTLS::Get(),
"ResetAutoProfilerLabelProfilingStack should only be called once");
AutoProfilerLabel::ProfilingStackOwnerTLS::Get()->Release();
AutoProfilerLabel::ProfilingStackOwnerTLS::Set(nullptr);
}
private:
// Only written once from `profiler_init` calling
// `TLSRegisteredThread::Init()`; all reads should only happen after `Init()`,
// so there is no need to make it atomic.
enum class State { Uninitialized = 0, Initialized, Unavailable };
static State sState;
// This is a non-owning reference to the RegisteredThread;
// CorePS::mRegisteredThreads is the owning reference. On thread
// deregistration, this reference is cleared and the RegisteredThread is
// destroyed.
static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
};
// Zero-initialized to State::Uninitialized.
/* static */
TLSRegisteredThread::State TLSRegisteredThread::sState;
/* static */
MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
// Only written once from `profiler_init` (through `TLSRegisteredThread::Init()`
// and `AutoProfilerLabel::ProfilingStackOwnerTLS::Init()`); all reads should
// only happen after `Init()`, so there is no need to make it atomic.
// Zero-initialized to State::Uninitialized.
/* static */
AutoProfilerLabel::ProfilingStackOwnerTLS::State
AutoProfilerLabel::ProfilingStackOwnerTLS::sState;
// Although you can access a thread's ProfilingStack via
// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
// directly to the ProfilingStack. Here's why.
//
// - We need to be able to push to and pop from the ProfilingStack in
// AutoProfilerLabel.
//
// - The class functions are hot and must be defined in GeckoProfiler.h so they
// can be inlined.
//
// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
// GeckoProfiler.h.
//
// This second pointer isn't ideal, but does provide a way to satisfy those
// constraints. TLSRegisteredThread is responsible for updating it.
//
// The (Racy)RegisteredThread and AutoProfilerLabel::ProfilingStackOwnerTLS
// co-own the thread's ProfilingStack, so whichever is reset second, is
// responsible for destroying the ProfilingStack; Because MOZ_THREAD_LOCAL
// doesn't support RefPtr, AddRef&Release are done explicitly in
// TLSRegisteredThread.
/* static */
MOZ_THREAD_LOCAL(ProfilingStackOwner*)