Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
// There are three kinds of samples done by the profiler.
8
//
9
// - A "periodic" sample is the most complex kind. It is done in response to a
10
// timer while the profiler is active. It involves writing a stack trace plus
11
// a variety of other values (memory measurements, responsiveness
12
// measurements, markers, etc.) into the main ProfileBuffer. The sampling is
13
// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
14
// get the register values.
15
//
16
// - A "synchronous" sample is a simpler kind. It is done in response to an API
17
// call (profiler_get_backtrace()). It involves writing a stack trace and
18
// little else into a temporary ProfileBuffer, and wrapping that up in a
19
// ProfilerBacktrace that can be subsequently used in a marker. The sampling
20
// is done on-thread, and so Registers::SyncPopulate() is used to get the
21
// register values.
22
//
23
// - A "backtrace" sample is the simplest kind. It is done in response to an
24
// API call (profiler_suspend_and_sample_thread()). It involves getting a
25
// stack trace via a ProfilerStackCollector; it does not write to a
26
// ProfileBuffer. The sampling is done from off-thread, and so uses
27
// SuspendAndSampleAndResumeThread() to get the register values.
28
29
#include "platform.h"
30
31
#include "GeckoProfiler.h"
32
#include "GeckoProfilerReporter.h"
33
#include "PageInformation.h"
34
#include "ProfiledThreadData.h"
35
#include "ProfilerBacktrace.h"
36
#include "ProfileBuffer.h"
37
#include "ProfilerIOInterposeObserver.h"
38
#include "ProfilerMarkerPayload.h"
39
#include "ProfilerParent.h"
40
#include "RegisteredThread.h"
41
#include "shared-libraries.h"
42
#include "ThreadInfo.h"
43
#include "VTuneProfiler.h"
44
45
#include "js/TraceLoggerAPI.h"
46
#include "js/ProfilingFrameIterator.h"
47
#include "memory_hooks.h"
48
#include "mozilla/ArrayUtils.h"
49
#include "mozilla/Atomics.h"
50
#include "mozilla/AutoProfilerLabel.h"
51
#include "mozilla/ExtensionPolicyService.h"
52
#include "mozilla/extensions/WebExtensionPolicy.h"
53
#include "mozilla/Printf.h"
54
#include "mozilla/Services.h"
55
#include "mozilla/StackWalk.h"
56
#include "mozilla/StaticPtr.h"
57
#include "mozilla/SystemGroup.h"
58
#include "mozilla/ThreadLocal.h"
59
#include "mozilla/TimeStamp.h"
60
#include "mozilla/TypeTraits.h"
61
#include "mozilla/Tuple.h"
62
#include "mozilla/UniquePtr.h"
63
#include "mozilla/Vector.h"
64
#include "BaseProfiler.h"
65
#include "nsDirectoryServiceDefs.h"
66
#include "nsDirectoryServiceUtils.h"
67
#include "nsIDocShell.h"
68
#include "nsIHttpProtocolHandler.h"
69
#include "nsIObserverService.h"
70
#include "nsIPropertyBag2.h"
71
#include "nsIXULAppInfo.h"
72
#include "nsIXULRuntime.h"
73
#include "nsJSPrincipals.h"
74
#include "nsMemoryReporterManager.h"
75
#include "nsProfilerStartParams.h"
76
#include "nsScriptSecurityManager.h"
77
#include "nsThreadUtils.h"
78
#include "nsXULAppAPI.h"
79
#include "prdtoa.h"
80
#include "prtime.h"
81
82
#include <algorithm>
83
#include <errno.h>
84
#include <fstream>
85
#include <ostream>
86
#include <sstream>
87
88
#ifdef MOZ_TASK_TRACER
89
# include "GeckoTaskTracer.h"
90
#endif
91
92
#if defined(GP_OS_android)
93
# include "GeneratedJNINatives.h"
94
# include "GeneratedJNIWrappers.h"
95
#endif
96
97
// Win32 builds always have frame pointers, so FramePointerStackWalk() always
98
// works.
99
#if defined(GP_PLAT_x86_windows)
100
# define HAVE_NATIVE_UNWIND
101
# define USE_FRAME_POINTER_STACK_WALK
102
#endif
103
104
// Win64 builds always omit frame pointers, so we use the slower
105
// MozStackWalk(), which works in that case.
106
#if defined(GP_PLAT_amd64_windows)
107
# define HAVE_NATIVE_UNWIND
108
# define USE_MOZ_STACK_WALK
109
#endif
110
111
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
112
// MozStackWalk().
113
#if defined(GP_PLAT_arm64_windows)
114
# define HAVE_NATIVE_UNWIND
115
# define USE_MOZ_STACK_WALK
116
#endif
117
118
// Mac builds only have frame pointers when MOZ_PROFILING is specified, so
119
// FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
120
// on Mac.
121
#if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
122
# define HAVE_NATIVE_UNWIND
123
# define USE_FRAME_POINTER_STACK_WALK
124
#endif
125
126
// Android builds use the ARM Exception Handling ABI to unwind.
127
#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
128
# define HAVE_NATIVE_UNWIND
129
# define USE_EHABI_STACKWALK
130
# include "EHABIStackWalk.h"
131
#endif
132
133
// Linux builds use LUL, which uses DWARF info to unwind stacks.
134
#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
135
defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
136
defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
137
defined(GP_PLAT_arm64_android)
138
# define HAVE_NATIVE_UNWIND
139
# define USE_LUL_STACKWALK
140
# include "lul/LulMain.h"
141
# include "lul/platform-linux-lul.h"
142
143
// On linux we use LUL for periodic samples and synchronous samples, but we use
144
// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
145
// (See the comment at the top of the file for a definition of
146
// periodic/synchronous/backtrace.).
147
//
148
// FramePointerStackWalk can produce incomplete stacks when the current entry is
149
// in a shared library without framepointers, however LUL can take a long time
150
// to initialize, which is undesirable for consumers of
151
// profiler_suspend_and_sample_thread like the Background Hang Reporter.
152
# if defined(MOZ_PROFILING)
153
# define USE_FRAME_POINTER_STACK_WALK
154
# endif
155
#endif
156
157
// We can only stackwalk without expensive initialization on platforms which
158
// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
159
// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
160
// which can be expensive.
161
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
162
# define HAVE_FASTINIT_NATIVE_UNWIND
163
#endif
164
165
#ifdef MOZ_VALGRIND
166
# include <valgrind/memcheck.h>
167
#else
168
# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
169
#endif
170
171
#if defined(GP_OS_linux) || defined(GP_OS_android)
172
# include <ucontext.h>
173
#endif
174
175
using namespace mozilla;
176
using mozilla::profiler::detail::RacyFeatures;
177
178
LazyLogModule gProfilerLog("prof");
179
180
#if defined(GP_OS_android)
181
class GeckoJavaSampler
182
: public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
183
private:
184
GeckoJavaSampler();
185
186
public:
187
static double GetProfilerTime() {
188
if (!profiler_is_active()) {
189
return 0.0;
190
}
191
return profiler_time();
192
};
193
};
194
#endif
195
196
// Return all features that are available on this platform.
197
static uint32_t AvailableFeatures() {
198
uint32_t features = 0;
199
200
#define ADD_FEATURE(n_, str_, Name_, desc_) \
201
ProfilerFeature::Set##Name_(features);
202
203
// Add all the possible features.
204
PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
205
206
#undef ADD_FEATURE
207
208
// Now remove features not supported on this platform/configuration.
209
#if !defined(GP_OS_android)
210
ProfilerFeature::ClearJava(features);
211
#endif
212
#if !defined(HAVE_NATIVE_UNWIND)
213
ProfilerFeature::ClearStackWalk(features);
214
#endif
215
#if !defined(MOZ_TASK_TRACER)
216
ProfilerFeature::ClearTaskTracer(features);
217
#endif
218
#if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
219
if (getenv("XPCOM_MEM_BLOAT_LOG")) {
220
// The memory hooks are available, but the bloat log is enabled, which is
221
// not compatible with the native allocations tracking. See the comment in
222
// enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for
223
// more information.
224
ProfilerFeature::ClearNativeAllocations(features);
225
}
226
#else
227
// The memory hooks are not available.
228
ProfilerFeature::ClearNativeAllocations(features);
229
#endif
230
if (!JS::TraceLoggerSupported()) {
231
ProfilerFeature::ClearJSTracer(features);
232
}
233
234
return features;
235
}
236
237
// Default features common to all contexts (even if not available).
238
static uint32_t DefaultFeatures() {
239
return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
240
ProfilerFeature::StackWalk | ProfilerFeature::Threads;
241
}
242
243
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
244
// available).
245
static uint32_t StartupExtraDefaultFeatures() {
246
// Enable mainthreadio by default for startup profiles as startup is heavy on
247
// I/O operations, and main thread I/O is really important to see there.
248
return ProfilerFeature::MainThreadIO;
249
}
250
251
// The class is a thin shell around mozglue PlatformMutex. It does not preserve
252
// behavior in JS record/replay. It provides a mechanism to determine if it is
253
// locked or not in order for memory hooks to avoid re-entering the profiler
254
// locked state.
255
class PSMutex : private ::mozilla::detail::MutexImpl {
256
public:
257
PSMutex()
258
: ::mozilla::detail::MutexImpl(
259
::mozilla::recordreplay::Behavior::DontPreserve) {}
260
261
void Lock() {
262
const int tid = profiler_current_thread_id();
263
MOZ_ASSERT(tid != 0);
264
265
// This is only designed to catch recursive locking:
266
// - If the current thread doesn't own the mutex, `mOwningThreadId` must be
267
// zero or a different thread id written by another thread; it may change
268
// again at any time, but never to the current thread's id.
269
// - If the current thread owns the mutex, `mOwningThreadId` must be its id.
270
MOZ_ASSERT(mOwningThreadId != tid);
271
272
::mozilla::detail::MutexImpl::lock();
273
274
// We now hold the mutex, it should have been in the unlocked state before.
275
MOZ_ASSERT(mOwningThreadId == 0);
276
// And we can write our own thread id.
277
mOwningThreadId = tid;
278
}
279
280
void Unlock() {
281
// This should never trigger! But check just in case something has gone
282
// very wrong (e.g., memory corruption).
283
AssertCurrentThreadOwns();
284
285
// We're still holding the mutex here, so it's safe to just reset
286
// `mOwningThreadId`.
287
mOwningThreadId = 0;
288
289
::mozilla::detail::MutexImpl::unlock();
290
}
291
292
// Does the current thread own this mutex?
293
// False positive or false negatives are not possible:
294
// - If `true`, the current thread owns the mutex, it has written its own
295
// `mOwningThreadId` when taking the lock, and no-one else can modify it
296
// until the current thread itself unlocks the mutex.
297
// - If `false`, the current thread does not own the mutex, therefore either
298
// `mOwningThreadId` is zero (unlocked), or it is a different thread id
299
// written by another thread, but it can never be the current thread's id
300
// until the current thread itself locks the mutex.
301
bool IsLockedOnCurrentThread() const {
302
return mOwningThreadId == profiler_current_thread_id();
303
}
304
305
void AssertCurrentThreadOwns() const {
306
MOZ_ASSERT(IsLockedOnCurrentThread());
307
}
308
309
void AssertCurrentThreadDoesNotOwn() const {
310
MOZ_ASSERT(!IsLockedOnCurrentThread());
311
}
312
313
private:
314
// Zero when unlocked, or the thread id of the owning thread.
315
// This should only be used to compare with the current thread id; any other
316
// number (0 or other id) could change at any time because the current thread
317
// wouldn't own the lock.
318
Atomic<int, MemoryOrdering::SequentiallyConsistent,
319
recordreplay::Behavior::DontPreserve>
320
mOwningThreadId{0};
321
};
322
323
// RAII class to lock the profiler mutex.
324
class MOZ_RAII PSAutoLock {
325
public:
326
explicit PSAutoLock(PSMutex& aMutex) : mMutex(aMutex) { mMutex.Lock(); }
327
~PSAutoLock() { mMutex.Unlock(); }
328
329
private:
330
PSMutex& mMutex;
331
};
332
333
// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
334
// fields.
335
typedef const PSAutoLock& PSLockRef;
336
337
#define PS_GET(type_, name_) \
338
static type_ name_(PSLockRef) { \
339
MOZ_ASSERT(sInstance); \
340
return sInstance->m##name_; \
341
}
342
343
#define PS_GET_LOCKLESS(type_, name_) \
344
static type_ name_() { \
345
MOZ_ASSERT(sInstance); \
346
return sInstance->m##name_; \
347
}
348
349
#define PS_GET_AND_SET(type_, name_) \
350
PS_GET(type_, name_) \
351
static void Set##name_(PSLockRef, type_ a##name_) { \
352
MOZ_ASSERT(sInstance); \
353
sInstance->m##name_ = a##name_; \
354
}
355
356
static const size_t MAX_JS_FRAMES = 1024;
357
using JsFrameBuffer = JS::ProfilingFrameIterator::Frame[MAX_JS_FRAMES];
358
359
// All functions in this file can run on multiple threads unless they have an
360
// NS_IsMainThread() assertion.
361
362
// This class contains the profiler's core global state, i.e. that which is
363
// valid even when the profiler is not active. Most profile operations can't do
364
// anything useful when this class is not instantiated, so we release-assert
365
// its non-nullness in all such operations.
366
//
367
// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
368
// PSAutoLock reference as an argument as proof that the gPSMutex is currently
369
// locked. This makes it clear when gPSMutex is locked and helps avoid
370
// accidental unlocked accesses to global state. There are ways to circumvent
371
// this mechanism, but please don't do so without *very* good reason and a
372
// detailed explanation.
373
//
374
// The exceptions to this rule:
375
//
376
// - mProcessStartTime, because it's immutable;
377
//
378
// - each thread's RacyRegisteredThread object is accessible without locking via
379
// TLSRegisteredThread::RacyRegisteredThread().
380
class CorePS {
381
private:
382
CorePS()
383
: mProcessStartTime(TimeStamp::ProcessCreation()),
384
// This needs its own mutex, because it is used concurrently from
385
// functions guarded by gPSMutex as well as others without safety (e.g.,
386
// profiler_add_marker). It is *not* used inside the critical section of
387
// the sampler, because mutexes cannot be used there.
388
mCoreBlocksRingBuffer(BlocksRingBuffer::ThreadSafety::WithMutex)
389
#ifdef USE_LUL_STACKWALK
390
,
391
mLul(nullptr)
392
#endif
393
{
394
}
395
396
~CorePS() {}
397
398
public:
399
static void Create(PSLockRef aLock) {
400
MOZ_ASSERT(!sInstance);
401
sInstance = new CorePS();
402
}
403
404
static void Destroy(PSLockRef aLock) {
405
MOZ_ASSERT(sInstance);
406
delete sInstance;
407
sInstance = nullptr;
408
}
409
410
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
411
// being locked. This is because CorePS is instantiated so early on the main
412
// thread that we don't have to worry about it being racy.
413
static bool Exists() { return !!sInstance; }
414
415
static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
416
size_t& aProfSize, size_t& aLulSize) {
417
MOZ_ASSERT(sInstance);
418
419
aProfSize += aMallocSizeOf(sInstance);
420
421
for (auto& registeredThread : sInstance->mRegisteredThreads) {
422
aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
423
}
424
425
for (auto& registeredPage : sInstance->mRegisteredPages) {
426
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
427
}
428
429
// Measurement of the following things may be added later if DMD finds it
430
// is worthwhile:
431
// - CorePS::mRegisteredThreads itself (its elements' children are
432
// measured above)
433
// - CorePS::mRegisteredPages itself (its elements' children are
434
// measured above)
435
// - CorePS::mInterposeObserver
436
437
#if defined(USE_LUL_STACKWALK)
438
if (sInstance->mLul) {
439
aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
440
}
441
#endif
442
}
443
444
// No PSLockRef is needed for this field because it's immutable.
445
PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
446
447
// No PSLockRef is needed for this field because it's thread-safe.
448
PS_GET_LOCKLESS(BlocksRingBuffer&, CoreBlocksRingBuffer)
449
450
PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
451
452
PS_GET(JsFrameBuffer&, JsFrames)
453
454
static void AppendRegisteredThread(
455
PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
456
MOZ_ASSERT(sInstance);
457
MOZ_RELEASE_ASSERT(
458
sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
459
}
460
461
static void RemoveRegisteredThread(PSLockRef,
462
RegisteredThread* aRegisteredThread) {
463
MOZ_ASSERT(sInstance);
464
// Remove aRegisteredThread from mRegisteredThreads.
465
for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
466
if (rt.get() == aRegisteredThread) {
467
sInstance->mRegisteredThreads.erase(&rt);
468
return;
469
}
470
}
471
}
472
473
PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
474
475
static void AppendRegisteredPage(PSLockRef,
476
RefPtr<PageInformation>&& aRegisteredPage) {
477
MOZ_ASSERT(sInstance);
478
struct RegisteredPageComparator {
479
PageInformation* aA;
480
bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
481
};
482
483
auto foundPageIter = std::find_if(
484
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
485
RegisteredPageComparator{aRegisteredPage.get()});
486
487
if (foundPageIter != sInstance->mRegisteredPages.end()) {
488
if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) {
489
// When a BrowsingContext is loaded, the first url loaded in it will be
490
// about:blank, and if the principal matches, the first document loaded
491
// in it will share an inner window. That's why we should delete the
492
// intermittent about:blank if they share the inner window.
493
sInstance->mRegisteredPages.erase(foundPageIter);
494
} else {
495
// Do not register the same page again.
496
return;
497
}
498
}
499
500
MOZ_RELEASE_ASSERT(
501
sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
502
}
503
504
static void RemoveRegisteredPage(PSLockRef,
505
uint64_t aRegisteredInnerWindowID) {
506
MOZ_ASSERT(sInstance);
507
// Remove RegisteredPage from mRegisteredPages by given inner window ID.
508
sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
509
return rd->InnerWindowID() == aRegisteredInnerWindowID;
510
});
511
}
512
513
static void ClearRegisteredPages(PSLockRef) {
514
MOZ_ASSERT(sInstance);
515
sInstance->mRegisteredPages.clear();
516
}
517
518
PS_GET(const Vector<BaseProfilerCount*>&, Counters)
519
520
static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
521
MOZ_ASSERT(sInstance);
522
// we don't own the counter; they may be stored in static objects
523
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
524
}
525
526
static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
527
// we may be called to remove a counter after the profiler is stopped or
528
// late in shutdown.
529
if (sInstance) {
530
auto* counter = std::find(sInstance->mCounters.begin(),
531
sInstance->mCounters.end(), aCounter);
532
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
533
sInstance->mCounters.erase(counter);
534
}
535
}
536
537
#ifdef USE_LUL_STACKWALK
538
static lul::LUL* Lul(PSLockRef) {
539
MOZ_ASSERT(sInstance);
540
return sInstance->mLul.get();
541
}
542
static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
543
MOZ_ASSERT(sInstance);
544
sInstance->mLul = std::move(aLul);
545
}
546
#endif
547
548
PS_GET_AND_SET(const nsACString&, ProcessName)
549
550
private:
551
// The singleton instance
552
static CorePS* sInstance;
553
554
// The time that the process started.
555
const TimeStamp mProcessStartTime;
556
557
// The thread-safe blocks-oriented ring buffer into which all profiling data
558
// is recorded.
559
// ActivePS controls the lifetime of the underlying contents buffer: When
560
// ActivePS does not exist, mCoreBlocksRingBuffer is empty and rejects all
561
// reads&writes; see ActivePS for further details.
562
// Note: This needs to live here outside of ActivePS, because some producers
563
// are indirectly controlled (e.g., by atomic flags) and therefore may still
564
// attempt to write some data shortly after ActivePS has shutdown and deleted
565
// the underlying buffer in memory.
566
BlocksRingBuffer mCoreBlocksRingBuffer;
567
568
// Info on all the registered threads.
569
// ThreadIds in mRegisteredThreads are unique.
570
Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
571
572
// Info on all the registered pages.
573
// InnerWindowIDs in mRegisteredPages are unique.
574
Vector<RefPtr<PageInformation>> mRegisteredPages;
575
576
// Non-owning pointers to all active counters
577
Vector<BaseProfilerCount*> mCounters;
578
579
#ifdef USE_LUL_STACKWALK
580
// LUL's state. Null prior to the first activation, non-null thereafter.
581
UniquePtr<lul::LUL> mLul;
582
#endif
583
584
// Process name, provided by child process initialization code.
585
nsAutoCString mProcessName;
586
587
// This memory buffer is used by the MergeStacks mechanism. Previously it was
588
// stack allocated, but this led to a stack overflow, as it was too much
589
// memory. Here the buffer can be pre-allocated, and shared with the
590
// MergeStacks feature as needed. MergeStacks is only run while holding the
591
// lock, so it is safe to have only one instance allocated for all of the
592
// threads.
593
JsFrameBuffer mJsFrames;
594
};
595
596
CorePS* CorePS::sInstance = nullptr;
597
598
class SamplerThread;
599
600
static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
601
double aInterval);
602
603
struct LiveProfiledThreadData {
604
RegisteredThread* mRegisteredThread;
605
UniquePtr<ProfiledThreadData> mProfiledThreadData;
606
};
607
608
// This class contains the profiler's global state that is valid only when the
609
// profiler is active. When not instantiated, the profiler is inactive.
610
//
611
// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
612
// CorePS.
613
//
614
class ActivePS {
615
private:
616
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
617
// Filter out any features unavailable in this platform/configuration.
618
aFeatures &= AvailableFeatures();
619
620
// Always enable ProfilerFeature::Threads if we have a filter, because
621
// users sometimes ask to filter by a list of threads but forget to
622
// explicitly specify ProfilerFeature::Threads.
623
if (aFilterCount > 0) {
624
aFeatures |= ProfilerFeature::Threads;
625
}
626
627
return aFeatures;
628
}
629
630
ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
631
uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
632
uint64_t aActiveBrowsingContextID, const Maybe<double>& aDuration)
633
: mGeneration(sNextGeneration++),
634
mCapacity(aCapacity),
635
mDuration(aDuration),
636
mInterval(aInterval),
637
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
638
mActiveBrowsingContextID(aActiveBrowsingContextID),
639
// 8 bytes per entry.
640
mProfileBuffer(CorePS::CoreBlocksRingBuffer(),
641
PowerOfTwo32(aCapacity.Value() * 8)),
642
// The new sampler thread doesn't start sampling immediately because the
643
// main loop within Run() is blocked until this function's caller
644
// unlocks gPSMutex.
645
mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval)),
646
mInterposeObserver(ProfilerFeature::HasMainThreadIO(aFeatures)
647
? new ProfilerIOInterposeObserver()
648
: nullptr),
649
mIsPaused(false)
650
#if defined(GP_OS_linux)
651
,
652
mWasPaused(false)
653
#endif
654
{
655
// Deep copy aFilters.
656
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
657
for (uint32_t i = 0; i < aFilterCount; ++i) {
658
mFilters[i] = aFilters[i];
659
}
660
661
#if !defined(RELEASE_OR_BETA)
662
if (mInterposeObserver) {
663
// We need to register the observer on the main thread, because we want
664
// to observe IO that happens on the main thread.
665
// IOInterposer needs to be initialized before calling
666
// IOInterposer::Register or our observer will be silently dropped.
667
if (NS_IsMainThread()) {
668
IOInterposer::Init();
669
IOInterposer::Register(IOInterposeObserver::OpAll, mInterposeObserver);
670
} else {
671
RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
672
NS_DispatchToMainThread(
673
NS_NewRunnableFunction("ActivePS::ActivePS", [=]() {
674
IOInterposer::Init();
675
IOInterposer::Register(IOInterposeObserver::OpAll, observer);
676
}));
677
}
678
}
679
#endif
680
}
681
682
~ActivePS() {
683
#if !defined(RELEASE_OR_BETA)
684
if (mInterposeObserver) {
685
// We need to unregister the observer on the main thread, because that's
686
// where we've registered it.
687
if (NS_IsMainThread()) {
688
IOInterposer::Unregister(IOInterposeObserver::OpAll,
689
mInterposeObserver);
690
} else {
691
RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
692
NS_DispatchToMainThread(
693
NS_NewRunnableFunction("ActivePS::~ActivePS", [=]() {
694
IOInterposer::Unregister(IOInterposeObserver::OpAll, observer);
695
}));
696
}
697
}
698
#endif
699
}
700
701
bool ThreadSelected(const char* aThreadName) {
702
if (mFilters.empty()) {
703
return true;
704
}
705
706
std::string name = aThreadName;
707
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
708
709
for (uint32_t i = 0; i < mFilters.length(); ++i) {
710
std::string filter = mFilters[i];
711
712
if (filter == "*") {
713
return true;
714
}
715
716
std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
717
718
// Crude, non UTF-8 compatible, case insensitive substring search
719
if (name.find(filter) != std::string::npos) {
720
return true;
721
}
722
723
// If the filter starts with pid:, check for a pid match
724
if (filter.find("pid:") == 0) {
725
std::string mypid = std::to_string(profiler_current_process_id());
726
if (filter.compare(4, std::string::npos, mypid) == 0) {
727
return true;
728
}
729
}
730
}
731
732
return false;
733
}
734
735
public:
736
static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
737
uint32_t aFeatures, const char** aFilters,
738
uint32_t aFilterCount, uint64_t aActiveBrowsingContextID,
739
const Maybe<double>& aDuration) {
740
MOZ_ASSERT(!sInstance);
741
sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
742
aFilterCount, aActiveBrowsingContextID, aDuration);
743
}
744
745
static MOZ_MUST_USE SamplerThread* Destroy(PSLockRef aLock) {
746
MOZ_ASSERT(sInstance);
747
auto samplerThread = sInstance->mSamplerThread;
748
delete sInstance;
749
sInstance = nullptr;
750
751
return samplerThread;
752
}
753
754
static bool Exists(PSLockRef) { return !!sInstance; }
755
756
static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
757
const Maybe<double>& aDuration, double aInterval,
758
uint32_t aFeatures, const char** aFilters,
759
uint32_t aFilterCount, uint64_t aActiveBrowsingContextID) {
760
MOZ_ASSERT(sInstance);
761
if (sInstance->mCapacity != aCapacity ||
762
sInstance->mDuration != aDuration ||
763
sInstance->mInterval != aInterval ||
764
sInstance->mFeatures != aFeatures ||
765
sInstance->mFilters.length() != aFilterCount ||
766
sInstance->mActiveBrowsingContextID != aActiveBrowsingContextID) {
767
return false;
768
}
769
770
for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
771
if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
772
return false;
773
}
774
}
775
return true;
776
}
777
778
static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
779
MOZ_ASSERT(sInstance);
780
781
size_t n = aMallocSizeOf(sInstance);
782
783
n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
784
785
// Measurement of the following members may be added later if DMD finds it
786
// is worthwhile:
787
// - mLiveProfiledThreads (both the array itself, and the contents)
788
// - mDeadProfiledThreads (both the array itself, and the contents)
789
//
790
791
return n;
792
}
793
794
static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
795
MOZ_ASSERT(sInstance);
796
return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
797
sInstance->ThreadSelected(aInfo->Name()));
798
}
799
800
static MOZ_MUST_USE bool AppendPostSamplingCallback(
801
PSLockRef, PostSamplingCallback&& aCallback);
802
803
// Writes out the current active configuration of the profile.
804
static void WriteActiveConfiguration(PSLockRef aLock, JSONWriter& aWriter,
805
const char* aPropertyName = nullptr) {
806
if (!sInstance) {
807
if (aPropertyName) {
808
aWriter.NullProperty(aPropertyName);
809
} else {
810
aWriter.NullElement();
811
}
812
return;
813
};
814
815
if (aPropertyName) {
816
aWriter.StartObjectProperty(aPropertyName);
817
} else {
818
aWriter.StartObjectElement();
819
}
820
821
{
822
aWriter.StartArrayProperty("features", aWriter.SingleLineStyle);
823
#define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_) \
824
if (profiler_feature_active(ProfilerFeature::Name_)) { \
825
aWriter.StringElement(str_); \
826
}
827
828
PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES)
829
#undef WRITE_ACTIVE_FEATURES
830
aWriter.EndArray();
831
}
832
{
833
aWriter.StartArrayProperty("threads", aWriter.SingleLineStyle);
834
for (const auto& filter : sInstance->mFilters) {
835
aWriter.StringElement(filter.c_str());
836
}
837
aWriter.EndArray();
838
}
839
{
840
// Now write all the simple values.
841
842
// The interval is also available on profile.meta.interval
843
aWriter.DoubleProperty("interval", sInstance->mInterval);
844
aWriter.IntProperty("capacity", sInstance->mCapacity.Value());
845
if (sInstance->mDuration) {
846
aWriter.DoubleProperty("duration", sInstance->mDuration.value());
847
}
848
// Here, we are converting uint64_t to double. Browsing Context IDs are
849
// being created using `nsContentUtils::GenerateProcessSpecificId`, which
850
// is specifically designed to only use 53 of the 64 bits to be lossless
851
// when passed into and out of JS as a double.
852
aWriter.DoubleProperty("activeBrowsingContextID",
853
sInstance->mActiveBrowsingContextID);
854
}
855
aWriter.EndObject();
856
}
857
858
PS_GET(uint32_t, Generation)
859
860
PS_GET(PowerOfTwo32, Capacity)
861
862
PS_GET(Maybe<double>, Duration)
863
864
PS_GET(double, Interval)
865
866
PS_GET(uint32_t, Features)
867
868
PS_GET(uint64_t, ActiveBrowsingContextID)
869
870
#define PS_GET_FEATURE(n_, str_, Name_, desc_) \
871
static bool Feature##Name_(PSLockRef) { \
872
MOZ_ASSERT(sInstance); \
873
return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
874
}
875
876
PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
877
878
#undef PS_GET_FEATURE
879
880
static uint32_t JSFlags(PSLockRef aLock) {
881
uint32_t Flags = 0;
882
Flags |=
883
FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
884
Flags |= FeatureTrackOptimizations(aLock)
885
? uint32_t(JSInstrumentationFlags::TrackOptimizations)
886
: 0;
887
Flags |= FeatureJSTracer(aLock)
888
? uint32_t(JSInstrumentationFlags::TraceLogging)
889
: 0;
890
Flags |= FeatureJSAllocations(aLock)
891
? uint32_t(JSInstrumentationFlags::Allocations)
892
: 0;
893
return Flags;
894
}
895
896
PS_GET(const Vector<std::string>&, Filters)
897
898
static ProfileBuffer& Buffer(PSLockRef) {
899
MOZ_ASSERT(sInstance);
900
return sInstance->mProfileBuffer;
901
}
902
903
static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
904
MOZ_ASSERT(sInstance);
905
return sInstance->mLiveProfiledThreads;
906
}
907
908
// Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
909
// for all threads that should be included in a profile, both for threads
910
// that are still registered, and for threads that have been unregistered but
911
// still have data in the buffer.
912
// For threads that have already been unregistered, the RegisteredThread
913
// pointer will be null.
914
// The returned array is sorted by thread register time.
915
// Do not hold on to the return value across thread registration or profiler
916
// restarts.
917
static Vector<Pair<RegisteredThread*, ProfiledThreadData*>> ProfiledThreads(
918
PSLockRef) {
919
MOZ_ASSERT(sInstance);
920
Vector<Pair<RegisteredThread*, ProfiledThreadData*>> array;
921
MOZ_RELEASE_ASSERT(
922
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
923
sInstance->mDeadProfiledThreads.length()));
924
for (auto& t : sInstance->mLiveProfiledThreads) {
925
MOZ_RELEASE_ASSERT(array.append(
926
MakePair(t.mRegisteredThread, t.mProfiledThreadData.get())));
927
}
928
for (auto& t : sInstance->mDeadProfiledThreads) {
929
MOZ_RELEASE_ASSERT(
930
array.append(MakePair((RegisteredThread*)nullptr, t.get())));
931
}
932
933
std::sort(array.begin(), array.end(),
934
[](const Pair<RegisteredThread*, ProfiledThreadData*>& a,
935
const Pair<RegisteredThread*, ProfiledThreadData*>& b) {
936
return a.second()->Info()->RegisterTime() <
937
b.second()->Info()->RegisterTime();
938
});
939
return array;
940
}
941
942
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
943
MOZ_ASSERT(sInstance);
944
Vector<RefPtr<PageInformation>> array;
945
for (auto& d : CorePS::RegisteredPages(aLock)) {
946
MOZ_RELEASE_ASSERT(array.append(d));
947
}
948
for (auto& d : sInstance->mDeadProfiledPages) {
949
MOZ_RELEASE_ASSERT(array.append(d));
950
}
951
// We don't need to sort the pages like threads since we won't show them
952
// as a list.
953
return array;
954
}
955
956
// Do a linear search through mLiveProfiledThreads to find the
957
// ProfiledThreadData object for a RegisteredThread.
958
static ProfiledThreadData* GetProfiledThreadData(
959
PSLockRef, RegisteredThread* aRegisteredThread) {
960
MOZ_ASSERT(sInstance);
961
for (const LiveProfiledThreadData& thread :
962
sInstance->mLiveProfiledThreads) {
963
if (thread.mRegisteredThread == aRegisteredThread) {
964
return thread.mProfiledThreadData.get();
965
}
966
}
967
return nullptr;
968
}
969
970
static ProfiledThreadData* AddLiveProfiledThread(
971
PSLockRef, RegisteredThread* aRegisteredThread,
972
UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
973
MOZ_ASSERT(sInstance);
974
MOZ_RELEASE_ASSERT(
975
sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
976
aRegisteredThread, std::move(aProfiledThreadData)}));
977
978
// Return a weak pointer to the ProfiledThreadData object.
979
return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
980
}
981
982
static void UnregisterThread(PSLockRef aLockRef,
983
RegisteredThread* aRegisteredThread) {
984
MOZ_ASSERT(sInstance);
985
986
DiscardExpiredDeadProfiledThreads(aLockRef);
987
988
// Find the right entry in the mLiveProfiledThreads array and remove the
989
// element, moving the ProfiledThreadData object for the thread into the
990
// mDeadProfiledThreads array.
991
// The thread's RegisteredThread object gets destroyed here.
992
for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
993
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
994
if (thread.mRegisteredThread == aRegisteredThread) {
995
thread.mProfiledThreadData->NotifyUnregistered(
996
sInstance->mProfileBuffer.BufferRangeEnd());
997
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
998
std::move(thread.mProfiledThreadData)));
999
sInstance->mLiveProfiledThreads.erase(
1000
&sInstance->mLiveProfiledThreads[i]);
1001
return;
1002
}
1003
}
1004
}
1005
1006
PS_GET_AND_SET(bool, IsPaused)
1007
1008
#if defined(GP_OS_linux)
1009
PS_GET_AND_SET(bool, WasPaused)
1010
#endif
1011
1012
static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
1013
MOZ_ASSERT(sInstance);
1014
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1015
// Discard any dead threads that were unregistered before bufferRangeStart.
1016
sInstance->mDeadProfiledThreads.eraseIf(
1017
[bufferRangeStart](
1018
const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
1019
Maybe<uint64_t> bufferPosition =
1020
aProfiledThreadData->BufferPositionWhenUnregistered();
1021
MOZ_RELEASE_ASSERT(bufferPosition,
1022
"should have unregistered this thread");
1023
return *bufferPosition < bufferRangeStart;
1024
});
1025
}
1026
1027
static void UnregisterPage(PSLockRef aLock,
1028
uint64_t aRegisteredInnerWindowID) {
1029
MOZ_ASSERT(sInstance);
1030
auto& registeredPages = CorePS::RegisteredPages(aLock);
1031
for (size_t i = 0; i < registeredPages.length(); i++) {
1032
RefPtr<PageInformation>& page = registeredPages[i];
1033
if (page->InnerWindowID() == aRegisteredInnerWindowID) {
1034
page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
1035
MOZ_RELEASE_ASSERT(
1036
sInstance->mDeadProfiledPages.append(std::move(page)));
1037
registeredPages.erase(&registeredPages[i--]);
1038
}
1039
}
1040
}
1041
1042
static void DiscardExpiredPages(PSLockRef) {
1043
MOZ_ASSERT(sInstance);
1044
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1045
// Discard any dead pages that were unregistered before
1046
// bufferRangeStart.
1047
sInstance->mDeadProfiledPages.eraseIf(
1048
[bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
1049
Maybe<uint64_t> bufferPosition =
1050
aProfiledPage->BufferPositionWhenUnregistered();
1051
MOZ_RELEASE_ASSERT(bufferPosition,
1052
"should have unregistered this page");
1053
return *bufferPosition < bufferRangeStart;
1054
});
1055
}
1056
1057
static void ClearUnregisteredPages(PSLockRef) {
1058
MOZ_ASSERT(sInstance);
1059
sInstance->mDeadProfiledPages.clear();
1060
}
1061
1062
static void ClearExpiredExitProfiles(PSLockRef) {
1063
MOZ_ASSERT(sInstance);
1064
uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1065
// Discard exit profiles that were gathered before our buffer RangeStart.
1066
#ifdef MOZ_BASE_PROFILER
1067
// If we have started to overwrite our data from when the Base profile was
1068
// added, we should get rid of that Base profile because it's now older than
1069
// our oldest Gecko profile data.
1070
//
1071
// When adding: (In practice the starting buffer should be empty)
1072
// v Start == End
1073
// | <-- Buffer range, initially empty.
1074
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
1075
//
1076
// Later, still in range:
1077
// v Start v End
1078
// |=========| <-- Buffer range growing.
1079
// ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
1080
//
1081
// Even later, now out of range:
1082
// v Start v End
1083
// |============| <-- Buffer range full and sliding.
1084
// ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it
1085
if (sInstance->mBaseProfileThreads &&
1086
sInstance->mGeckoIndexWhenBaseProfileAdded <
1087
CorePS::CoreBlocksRingBuffer().GetState().mRangeStart) {
1088
DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
1089
sInstance->mBaseProfileThreads.get());
1090
sInstance->mBaseProfileThreads.reset();
1091
}
1092
#endif
1093
sInstance->mExitProfiles.eraseIf(
1094
[bufferRangeStart](const ExitProfile& aExitProfile) {
1095
return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
1096
});
1097
}
1098
1099
#ifdef MOZ_BASE_PROFILER
1100
static void AddBaseProfileThreads(PSLockRef aLock,
1101
UniquePtr<char[]> aBaseProfileThreads) {
1102
MOZ_ASSERT(sInstance);
1103
DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get());
1104
sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
1105
sInstance->mGeckoIndexWhenBaseProfileAdded =
1106
CorePS::CoreBlocksRingBuffer().GetState().mRangeEnd;
1107
}
1108
1109
static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
1110
MOZ_ASSERT(sInstance);
1111
1112
ClearExpiredExitProfiles(aLock);
1113
1114
DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p",
1115
sInstance->mBaseProfileThreads.get());
1116
return std::move(sInstance->mBaseProfileThreads);
1117
}
1118
#endif
1119
1120
static void AddExitProfile(PSLockRef aLock, const nsCString& aExitProfile) {
1121
MOZ_ASSERT(sInstance);
1122
1123
ClearExpiredExitProfiles(aLock);
1124
1125
MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
1126
ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
1127
}
1128
1129
static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) {
1130
MOZ_ASSERT(sInstance);
1131
1132
ClearExpiredExitProfiles(aLock);
1133
1134
Vector<nsCString> profiles;
1135
MOZ_RELEASE_ASSERT(
1136
profiles.initCapacity(sInstance->mExitProfiles.length()));
1137
for (auto& profile : sInstance->mExitProfiles) {
1138
MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
1139
}
1140
sInstance->mExitProfiles.clear();
1141
return profiles;
1142
}
1143
1144
private:
1145
// The singleton instance.
1146
static ActivePS* sInstance;
1147
1148
// We need to track activity generations. If we didn't we could have the
1149
// following scenario.
1150
//
1151
// - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
1152
// gPSMutex, deletes the SamplerThread (which does a join).
1153
//
1154
// - profiler_start() runs on a different thread, locks gPSMutex,
1155
// re-instantiates ActivePS, unlocks gPSMutex -- all before the join
1156
// completes.
1157
//
1158
// - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
1159
// and continues as if the start/stop pair didn't occur. Also
1160
// profiler_stop() is stuck, unable to finish.
1161
//
1162
// By checking ActivePS *and* the generation, we can avoid this scenario.
1163
// sNextGeneration is used to track the next generation number; it is static
1164
// because it must persist across different ActivePS instantiations.
1165
const uint32_t mGeneration;
1166
static uint32_t sNextGeneration;
1167
1168
// The maximum number of entries in mProfileBuffer.
1169
const PowerOfTwo32 mCapacity;
1170
1171
// The maximum duration of entries in mProfileBuffer, in seconds.
1172
const Maybe<double> mDuration;
1173
1174
// The interval between samples, measured in milliseconds.
1175
const double mInterval;
1176
1177
// The profile features that are enabled.
1178
const uint32_t mFeatures;
1179
1180
// Substrings of names of threads we want to profile.
1181
Vector<std::string> mFilters;
1182
1183
// Browsing Context ID of the active active browser screen's active tab.
1184
// It's being used to determine the profiled tab. It's "0" if we failed to
1185
// get the ID.
1186
const uint64_t mActiveBrowsingContextID;
1187
1188
// The buffer into which all samples are recorded.
1189
ProfileBuffer mProfileBuffer;
1190
1191
// ProfiledThreadData objects for any threads that were profiled at any point
1192
// during this run of the profiler:
1193
// - mLiveProfiledThreads contains all threads that are still registered, and
1194
// - mDeadProfiledThreads contains all threads that have already been
1195
// unregistered but for which there is still data in the profile buffer.
1196
Vector<LiveProfiledThreadData> mLiveProfiledThreads;
1197
Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
1198
1199
// Info on all the dead pages.
1200
// Registered pages are being moved to this array after unregistration.
1201
// We are keeping them in case we need them in the profile data.
1202
// We are removing them when we ensure that we won't need them anymore.
1203
Vector<RefPtr<PageInformation>> mDeadProfiledPages;
1204
1205
// The current sampler thread. This class is not responsible for destroying
1206
// the SamplerThread object; the Destroy() method returns it so the caller
1207
// can destroy it.
1208
SamplerThread* const mSamplerThread;
1209
1210
// The interposer that records main thread I/O.
1211
RefPtr<ProfilerIOInterposeObserver> mInterposeObserver;
1212
1213
// Is the profiler paused?
1214
bool mIsPaused;
1215
1216
#if defined(GP_OS_linux)
1217
// Used to record whether the profiler was paused just before forking. False
1218
// at all times except just before/after forking.
1219
bool mWasPaused;
1220
#endif
1221
1222
#ifdef MOZ_BASE_PROFILER
1223
// Optional startup profile thread array from BaseProfiler.
1224
UniquePtr<char[]> mBaseProfileThreads;
1225
BlocksRingBuffer::BlockIndex mGeckoIndexWhenBaseProfileAdded;
1226
#endif
1227
1228
struct ExitProfile {
1229
nsCString mJSON;
1230
uint64_t mBufferPositionAtGatherTime;
1231
};
1232
Vector<ExitProfile> mExitProfiles;
1233
};
1234
1235
ActivePS* ActivePS::sInstance = nullptr;
1236
uint32_t ActivePS::sNextGeneration = 0;
1237
1238
#undef PS_GET
1239
#undef PS_GET_LOCKLESS
1240
#undef PS_GET_AND_SET
1241
1242
// The mutex that guards accesses to CorePS and ActivePS.
1243
static PSMutex gPSMutex;
1244
1245
Atomic<uint32_t, MemoryOrdering::Relaxed, recordreplay::Behavior::DontPreserve>
1246
RacyFeatures::sActiveAndFeatures(0);
1247
1248
// Each live thread has a RegisteredThread, and we store a reference to it in
1249
// TLS. This class encapsulates that TLS.
1250
class TLSRegisteredThread {
1251
public:
1252
static bool Init(PSLockRef) {
1253
bool ok1 = sRegisteredThread.init();
1254
bool ok2 = AutoProfilerLabel::sProfilingStackOwnerTLS.init();
1255
return ok1 && ok2;
1256
}
1257
1258
// Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
1259
static class RegisteredThread* RegisteredThread(PSLockRef) {
1260
return sRegisteredThread.get();
1261
}
1262
1263
// Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
1264
static class RacyRegisteredThread* RacyRegisteredThread() {
1265
class RegisteredThread* registeredThread = sRegisteredThread.get();
1266
return registeredThread ? &registeredThread->RacyRegisteredThread()
1267
: nullptr;
1268
}
1269
1270
// Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
1271
// RacyRegisteredThread() can also be used to get the ProfilingStack, but that
1272
// is marginally slower because it requires an extra pointer indirection.
1273
static ProfilingStack* Stack() {
1274
ProfilingStackOwner* profilingStackOwner =
1275
AutoProfilerLabel::sProfilingStackOwnerTLS.get();
1276
if (!profilingStackOwner) {
1277
return nullptr;
1278
}
1279
return &profilingStackOwner->ProfilingStack();
1280
}
1281
1282
static void SetRegisteredThreadAndAutoProfilerLabelProfilingStack(
1283
PSLockRef, class RegisteredThread* aRegisteredThread) {
1284
MOZ_RELEASE_ASSERT(
1285
aRegisteredThread,
1286
"Use ResetRegisteredThread() instead of SetRegisteredThread(nullptr)");
1287
sRegisteredThread.set(aRegisteredThread);
1288
ProfilingStackOwner& profilingStackOwner =
1289
aRegisteredThread->RacyRegisteredThread().ProfilingStackOwner();
1290
profilingStackOwner.AddRef();
1291
AutoProfilerLabel::sProfilingStackOwnerTLS.set(&profilingStackOwner);
1292
}
1293
1294
// Only reset the registered thread. The AutoProfilerLabel's ProfilingStack
1295
// is kept, because the thread may not have unregistered itself yet, so it may
1296
// still push/pop labels even after the profiler has shut down.
1297
static void ResetRegisteredThread(PSLockRef) {
1298
sRegisteredThread.set(nullptr);
1299
}
1300
1301
// Reset the AutoProfilerLabels' ProfilingStack, because the thread is
1302
// unregistering itself.
1303
static void ResetAutoProfilerLabelProfilingStack(PSLockRef) {
1304
MOZ_RELEASE_ASSERT(
1305
AutoProfilerLabel::sProfilingStackOwnerTLS.get(),
1306
"ResetAutoProfilerLabelProfilingStack should only be called once");
1307
AutoProfilerLabel::sProfilingStackOwnerTLS.get()->Release();
1308
AutoProfilerLabel::sProfilingStackOwnerTLS.set(nullptr);
1309
}
1310
1311
private:
1312
// This is a non-owning reference to the RegisteredThread;
1313
// CorePS::mRegisteredThreads is the owning reference. On thread
1314
// deregistration, this reference is cleared and the RegisteredThread is
1315
// destroyed.
1316
static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
1317
};
1318
1319
MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
1320
1321
// Although you can access a thread's ProfilingStack via
1322
// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
1323
// directly to the ProfilingStack. Here's why.
1324
//
1325
// - We need to be able to push to and pop from the ProfilingStack in
1326
// AutoProfilerLabel.
1327
//
1328
// - The class functions are hot and must be defined in GeckoProfiler.h so they
1329
// can be inlined.
1330
//
1331
// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
1332
// GeckoProfiler.h.
1333
//
1334
// This second pointer isn't ideal, but does provide a way to satisfy those
1335
// constraints. TLSRegisteredThread is responsible for updating it.
1336
//
1337
// The (Racy)RegisteredThread and AutoProfilerLabel::sProfilingStackOwnerTLS
1338
// co-own the thread's ProfilingStack, so whichever is reset second, is
1339
// responsible for destroying the ProfilingStack; Because MOZ_THREAD_LOCAL
1340
// doesn't support RefPtr, AddRef&Release are done explicitly in
1341
// TLSRegisteredThread.
1342
MOZ_THREAD_LOCAL(ProfilingStackOwner*)
1343
AutoProfilerLabel::sProfilingStackOwnerTLS;
1344
1345
void ProfilingStackOwner::DumpStackAndCrash() const {
1346
fprintf(stderr,
1347
"ProfilingStackOwner::DumpStackAndCrash() thread id: %d, size: %u\n",
1348
profiler_current_thread_id(), unsigned(mProfilingStack.stackSize()));
1349
js::ProfilingStackFrame* allFrames = mProfilingStack.frames;
1350
for (uint32_t i = 0; i < mProfilingStack.stackSize(); i++) {
1351
js::ProfilingStackFrame& frame = allFrames[i];
1352
if (frame.isLabelFrame()) {
1353
fprintf(stderr, "%u: label frame, sp=%p, label='%s' (%s)\n", unsigned(i),
1354
frame.stackAddress(), frame.label(),
1355
frame.dynamicString() ? frame.dynamicString() : "-");
1356
} else {
1357
fprintf(stderr, "%u: non-label frame\n", unsigned(i));
1358
}
1359
}
1360
1361
MOZ_CRASH("Non-empty stack!");
1362
}
1363
1364
// The name of the main thread.
1365
static const char* const kMainThreadName = "GeckoMain";
1366
1367
////////////////////////////////////////////////////////////////////////
1368
// BEGIN sampling/unwinding code
1369
1370
// The registers used for stack unwinding and a few other sampling purposes.
1371
// The ctor does nothing; users are responsible for filling in the fields.
1372
class Registers {
1373
public:
1374
Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
1375
1376
#if defined(HAVE_NATIVE_UNWIND)
1377
// Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
1378
void SyncPopulate();
1379
#endif
1380
1381
void Clear() { memset(this, 0, sizeof(*this)); }
1382
1383
// These fields are filled in by
1384
// Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
1385
// samples, and by SyncPopulate() for synchronous samples.
1386
Address mPC; // Instruction pointer.
1387
Address mSP; // Stack pointer.
1388
Address mFP; // Frame pointer.
1389
Address mLR; // ARM link register.
1390
#if defined(GP_OS_linux) || defined(GP_OS_android)
1391
// This contains all the registers, which means it duplicates the four fields
1392
// above. This is ok.
1393
ucontext_t* mContext; // The context from the signal handler.
1394
#endif
1395
};
1396
1397
// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
1398
// looping on corrupted stacks.
1399
static const size_t MAX_NATIVE_FRAMES = 1024;
1400
1401
struct NativeStack {
1402
void* mPCs[MAX_NATIVE_FRAMES];
1403
void* mSPs[MAX_NATIVE_FRAMES];
1404
size_t mCount; // Number of frames filled.
1405
1406
NativeStack() : mPCs(), mSPs(), mCount(0) {}
1407
};
1408
1409
Atomic<bool> WALKING_JS_STACK(false);
1410
1411
struct AutoWalkJSStack {
1412
bool walkAllowed;
1413
1414
AutoWalkJSStack() : walkAllowed(false) {
1415
walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
1416
}
1417
1418
~AutoWalkJSStack() {
1419
if (walkAllowed) {
1420
WALKING_JS_STACK = false;
1421
}
1422
}
1423
};
1424
1425
// Merges the profiling stack, native stack, and JS stack, outputting the
1426
// details to aCollector.
1427
static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
1428
const RegisteredThread& aRegisteredThread,
1429
const Registers& aRegs, const NativeStack& aNativeStack,
1430
ProfilerStackCollector& aCollector,
1431
JsFrameBuffer aJsFrames) {
1432
// WARNING: this function runs within the profiler's "critical section".
1433
// WARNING: this function might be called while the profiler is inactive, and
1434
// cannot rely on ActivePS.
1435
1436
const ProfilingStack& profilingStack =
1437
aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1438
const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
1439
uint32_t profilingStackFrameCount = profilingStack.stackSize();
1440
JSContext* context = aRegisteredThread.GetJSContext();
1441
1442
// Make a copy of the JS stack into a JSFrame array. This is necessary since,
1443
// like the native stack, the JS stack is iterated youngest-to-oldest and we
1444
// need to iterate oldest-to-youngest when adding frames to aInfo.
1445
1446
// Non-periodic sampling passes Nothing() as the buffer write position to
1447
// ProfilingFrameIterator to avoid incorrectly resetting the buffer position
1448
// of sampled JIT frames inside the JS engine.
1449
Maybe<uint64_t> samplePosInBuffer;
1450
if (!aIsSynchronous) {
1451
// aCollector.SamplePositionInBuffer() will return Nothing() when
1452
// profiler_suspend_and_sample_thread is called from the background hang
1453
// reporter.
1454
samplePosInBuffer = aCollector.SamplePositionInBuffer();
1455
}
1456
uint32_t jsCount = 0;
1457
1458
// Only walk jit stack if profiling frame iterator is turned on.
1459
if (context && JS::IsProfilingEnabledForContext(context)) {
1460
AutoWalkJSStack autoWalkJSStack;
1461
1462
if (autoWalkJSStack.walkAllowed) {
1463
JS::ProfilingFrameIterator::RegisterState registerState;
1464
registerState.pc = aRegs.mPC;
1465
registerState.sp = aRegs.mSP;
1466
registerState.lr = aRegs.mLR;
1467
registerState.fp = aRegs.mFP;
1468
1469
JS::ProfilingFrameIterator jsIter(context, registerState,
1470
samplePosInBuffer);
1471
for (; jsCount < MAX_JS_FRAMES && !jsIter.done(); ++jsIter) {
1472
if (aIsSynchronous || jsIter.isWasm()) {
1473
uint32_t extracted =
1474
jsIter.extractStack(aJsFrames, jsCount, MAX_JS_FRAMES);
1475
jsCount += extracted;
1476
if (jsCount == MAX_JS_FRAMES) {
1477
break;
1478
}
1479
} else {
1480
Maybe<JS::ProfilingFrameIterator::Frame> frame =
1481
jsIter.getPhysicalFrameWithoutLabel();
1482
if (frame.isSome()) {
1483
aJsFrames[jsCount++] = frame.value();
1484
}
1485
}
1486
}
1487
}
1488
}
1489
1490
// While the profiling stack array is ordered oldest-to-youngest, the JS and
1491
// native arrays are ordered youngest-to-oldest. We must add frames to aInfo
1492
// oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
1493
// and native arrays backwards. Note: this means the terminating condition
1494
// jsIndex and nativeIndex is being < 0.
1495
uint32_t profilingStackIndex = 0;
1496
int32_t jsIndex = jsCount - 1;
1497
int32_t nativeIndex = aNativeStack.mCount - 1;
1498
1499
uint8_t* lastLabelFrameStackAddr = nullptr;
1500
uint8_t* jitEndStackAddr = nullptr;
1501
1502
// Iterate as long as there is at least one frame remaining.
1503
while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
1504
nativeIndex >= 0) {
1505
// There are 1 to 3 frames available. Find and add the oldest.
1506
uint8_t* profilingStackAddr = nullptr;
1507
uint8_t* jsStackAddr = nullptr;
1508
uint8_t* nativeStackAddr = nullptr;
1509
uint8_t* jsActivationAddr = nullptr;
1510
1511
if (profilingStackIndex != profilingStackFrameCount) {
1512
const js::ProfilingStackFrame& profilingStackFrame =
1513
profilingStackFrames[profilingStackIndex];
1514
1515
if (profilingStackFrame.isLabelFrame() ||
1516
profilingStackFrame.isSpMarkerFrame()) {
1517
lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
1518
}
1519
1520
// Skip any JS_OSR frames. Such frames are used when the JS interpreter
1521
// enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
1522
// To avoid both the profiling stack frame and jit frame being recorded
1523
// (and showing up twice), the interpreter marks the interpreter
1524
// profiling stack frame as JS_OSR to ensure that it doesn't get counted.
1525
if (profilingStackFrame.isOSRFrame()) {
1526
profilingStackIndex++;
1527
continue;
1528
}
1529
1530
MOZ_ASSERT(lastLabelFrameStackAddr);
1531
profilingStackAddr = lastLabelFrameStackAddr;
1532
}
1533
1534
if (jsIndex >= 0) {
1535
jsStackAddr = (uint8_t*)aJsFrames[jsIndex].stackAddress;
1536
jsActivationAddr = (uint8_t*)aJsFrames[jsIndex].activation;
1537
}
1538
1539
if (nativeIndex >= 0) {
1540
nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
1541
}
1542
1543
// If there's a native stack frame which has the same SP as a profiling
1544
// stack frame, pretend we didn't see the native stack frame. Ditto for a
1545
// native stack frame which has the same SP as a JS stack frame. In effect
1546
// this means profiling stack frames or JS frames trump conflicting native
1547
// frames.
1548
if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
1549
jsStackAddr == nativeStackAddr)) {
1550
nativeStackAddr = nullptr;
1551
nativeIndex--;
1552
MOZ_ASSERT(profilingStackAddr || jsStackAddr);
1553
}
1554
1555
// Sanity checks.
1556
MOZ_ASSERT_IF(profilingStackAddr,
1557
profilingStackAddr != jsStackAddr &&
1558
profilingStackAddr != nativeStackAddr);
1559
MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr &&
1560
jsStackAddr != nativeStackAddr);
1561
MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr &&
1562
nativeStackAddr != jsStackAddr);
1563
1564
// Check to see if profiling stack frame is top-most.
1565
if (profilingStackAddr > jsStackAddr &&
1566
profilingStackAddr > nativeStackAddr) {
1567
MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
1568
const js::ProfilingStackFrame& profilingStackFrame =
1569
profilingStackFrames[profilingStackIndex];
1570
1571
// Sp marker frames are just annotations and should not be recorded in
1572
// the profile.
1573
if (!profilingStackFrame.isSpMarkerFrame()) {
1574
// The JIT only allows the top-most frame to have a nullptr pc.
1575
MOZ_ASSERT_IF(
1576
profilingStackFrame.isJsFrame() && profilingStackFrame.script() &&
1577
!profilingStackFrame.pc(),
1578
&profilingStackFrame ==
1579
&profilingStack.frames[profilingStack.stackSize() - 1]);
1580
aCollector.CollectProfilingStackFrame(profilingStackFrame);
1581
}
1582
profilingStackIndex++;
1583
continue;
1584
}
1585
1586
// Check to see if JS jit stack frame is top-most
1587
if (jsStackAddr > nativeStackAddr) {
1588
MOZ_ASSERT(jsIndex >= 0);
1589
const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex];
1590
jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress;
1591
// Stringifying non-wasm JIT frames is delayed until streaming time. To
1592
// re-lookup the entry in the JitcodeGlobalTable, we need to store the
1593
// JIT code address (OptInfoAddr) in the circular buffer.
1594
//
1595
// Note that we cannot do this when we are sychronously sampling the
1596
// current thread; that is, when called from profiler_get_backtrace. The
1597
// captured backtrace is usually externally stored for an indeterminate
1598
// amount of time, such as in nsRefreshDriver. Problematically, the
1599
// stored backtrace may be alive across a GC during which the profiler
1600
// itself is disabled. In that case, the JS engine is free to discard its
1601
// JIT code. This means that if we inserted such OptInfoAddr entries into
1602
// the buffer, nsRefreshDriver would now be holding on to a backtrace
1603
// with stale JIT code return addresses.
1604
if (aIsSynchronous ||
1605
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
1606
aCollector.CollectWasmFrame(jsFrame.label);
1607
} else if (jsFrame.kind ==
1608
JS::ProfilingFrameIterator::Frame_BaselineInterpreter) {
1609
// For now treat this as a C++ Interpreter frame by materializing a
1610
// ProfilingStackFrame.
1611
JSScript* script = jsFrame.interpreterScript;
1612
jsbytecode* pc = jsFrame.interpreterPC();
1613
js::ProfilingStackFrame stackFrame;
1614
stackFrame.initJsFrame("", jsFrame.label, script, pc, jsFrame.realmID);
1615
aCollector.CollectProfilingStackFrame(stackFrame);
1616
} else {
1617
MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
1618
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
1619
aCollector.CollectJitReturnAddr(jsFrame.returnAddress());
1620
}
1621
1622
jsIndex--;
1623
continue;
1624
}
1625
1626
// If we reach here, there must be a native stack frame and it must be the
1627
// greatest frame.
1628
if (nativeStackAddr &&
1629
// If the latest JS frame was JIT, this could be the native frame that
1630
// corresponds to it. In that case, skip the native frame, because
1631
// there's no need for the same frame to be present twice in the stack.
1632
// The JS frame can be considered the symbolicated version of the native
1633
// frame.
1634
(!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) &&
1635
// This might still be a JIT operation, check to make sure that is not
1636
// in range of the NEXT JavaScript's stacks' activation address.
1637
(!jsActivationAddr || nativeStackAddr > jsActivationAddr)) {
1638
MOZ_ASSERT(nativeIndex >= 0);
1639
void* addr = (void*)aNativeStack.mPCs[nativeIndex];
1640
aCollector.CollectNativeLeafAddr(addr);
1641
}
1642
if (nativeIndex >= 0) {
1643
nativeIndex--;
1644
}
1645
}
1646
1647
// Update the JS context with the current profile sample buffer generation.
1648
//
1649
// Only do this for periodic samples. We don't want to do this for
1650
// synchronous samples, and we also don't want to do it for calls to
1651
// profiler_suspend_and_sample_thread() from the background hang reporter -
1652
// in that case, aCollector.BufferRangeStart() will return Nothing().
1653
if (!aIsSynchronous && context && aCollector.BufferRangeStart()) {
1654
uint64_t bufferRangeStart = *aCollector.BufferRangeStart();
1655
JS::SetJSContextProfilerSampleBufferRangeStart(context, bufferRangeStart);
1656
}
1657
}
1658
1659
#if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
1660
static HANDLE GetThreadHandle(PlatformData* aData);
1661
#endif
1662
1663
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
1664
static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
1665
void* aClosure) {
1666
NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
1667
MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
1668
nativeStack->mSPs[nativeStack->mCount] = aSP;
1669
nativeStack->mPCs[nativeStack->mCount] = aPC;
1670
nativeStack->mCount++;
1671
}
1672
#endif
1673
1674
#if defined(USE_FRAME_POINTER_STACK_WALK)
1675
static void DoFramePointerBacktrace(PSLockRef aLock,
1676
const RegisteredThread& aRegisteredThread,
1677
const Registers& aRegs,
1678
NativeStack& aNativeStack) {
1679
// WARNING: this function runs within the profiler's "critical section".
1680
// WARNING: this function might be called while the profiler is inactive, and
1681
// cannot rely on ActivePS.
1682
1683
// Start with the current function. We use 0 as the frame number here because
1684
// the FramePointerStackWalk() call below will use 1..N. This is a bit weird
1685
// but it doesn't matter because StackWalkCallback() doesn't use the frame
1686
// number argument.
1687
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1688
1689
uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1690
1691
const void* stackEnd = aRegisteredThread.StackTop();
1692
if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
1693
FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1694
&aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
1695
const_cast<void*>(stackEnd));
1696
}
1697
}
1698
#endif
1699
1700
#if defined(USE_MOZ_STACK_WALK)
1701
static void DoMozStackWalkBacktrace(PSLockRef aLock,
1702
const RegisteredThread& aRegisteredThread,
1703
const Registers& aRegs,
1704
NativeStack& aNativeStack) {
1705
// WARNING: this function runs within the profiler's "critical section".
1706
// WARNING: this function might be called while the profiler is inactive, and
1707
// cannot rely on ActivePS.
1708
1709
// Start with the current function. We use 0 as the frame number here because
1710
// the MozStackWalkThread() call below will use 1..N. This is a bit weird but
1711
// it doesn't matter because StackWalkCallback() doesn't use the frame number
1712
// argument.
1713
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1714
1715
uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1716
1717
HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
1718
MOZ_ASSERT(thread);
1719
MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1720
&aNativeStack, thread, /* context */ nullptr);
1721
}
1722
#endif
1723
1724
#ifdef USE_EHABI_STACKWALK
1725
static void DoEHABIBacktrace(PSLockRef aLock,
1726
const RegisteredThread& aRegisteredThread,
1727
const Registers& aRegs,
1728
NativeStack& aNativeStack) {
1729
// WARNING: this function runs within the profiler's "critical section".
1730
// WARNING: this function might be called while the profiler is inactive, and
1731
// cannot rely on ActivePS.
1732
1733
const mcontext_t* mcontext = &aRegs.mContext->uc_mcontext;
1734
mcontext_t savedContext;
1735
const ProfilingStack& profilingStack =
1736
aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1737
1738
// The profiling stack contains an "EnterJIT" frame whenever we enter
1739
// JIT code with profiling enabled; the stack pointer value points
1740
// the saved registers. We use this to unwind resume unwinding
1741
// after encounting JIT code.
1742
for (uint32_t i = profilingStack.stackSize(); i > 0; --i) {
1743
// The profiling stack grows towards higher indices, so we iterate
1744
// backwards (from callee to caller).
1745
const js::ProfilingStackFrame& frame = profilingStack.frames[i - 1];
1746
if (!frame.isJsFrame() && strcmp(frame.label(), "EnterJIT") == 0) {
1747
// Found JIT entry frame. Unwind up to that point (i.e., force
1748
// the stack walk to stop before the block of saved registers;
1749
// note that it yields nondecreasing stack pointers), then restore
1750
// the saved state.
1751
uint32_t* vSP = reinterpret_cast<uint32_t*>(frame.stackAddress());
1752
1753
aNativeStack.mCount +=
1754
EHABIStackWalk(*mcontext, /* stackBase = */ vSP,
1755
aNativeStack.mSPs + aNativeStack.mCount,
1756
aNativeStack.mPCs + aNativeStack.mCount,
1757
MAX_NATIVE_FRAMES - aNativeStack.mCount);
1758
1759
memset(&savedContext, 0, sizeof(savedContext));
1760
1761
// See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
1762
savedContext.arm_r4 = *vSP++;
1763
savedContext.arm_r5 = *vSP++;
1764
savedContext.arm_r6 = *vSP++;
1765
savedContext.arm_r7 = *vSP++;
1766
savedContext.arm_r8 = *vSP++;
1767
savedContext.arm_r9 = *vSP++;
1768
savedContext.arm_r10 = *vSP++;
1769
savedContext.arm_fp = *vSP++;
1770
savedContext.arm_lr = *vSP++;
1771
savedContext.arm_sp = reinterpret_cast<uint32_t>(vSP);
1772
savedContext.arm_pc = savedContext.arm_lr;
1773
mcontext = &savedContext;
1774
}
1775
}
1776
1777
// Now unwind whatever's left (starting from either the last EnterJIT frame
1778
// or, if no EnterJIT was found, the original registers).
1779
aNativeStack.mCount +=
1780
EHABIStackWalk(*mcontext, const_cast<void*>(aRegisteredThread.StackTop()),
1781
aNativeStack.mSPs + aNativeStack.mCount,
1782
aNativeStack.mPCs + aNativeStack.mCount,
1783
MAX_NATIVE_FRAMES - aNativeStack.mCount);
1784
}
1785
#endif
1786
1787
#ifdef USE_LUL_STACKWALK
1788
1789
// See the comment at the callsite for why this function is necessary.
1790
# if defined(MOZ_HAVE_ASAN_BLACKLIST)
1791
MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
1792
size_t aLen) {
1793
// The obvious thing to do here is call memcpy(). However, although
1794
// ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
1795
// false positive still manifests! So we must implement memcpy() ourselves
1796
// within this function.
1797
char* dst = static_cast<char*>(aDst);
1798
const char* src = static_cast<const char*>(aSrc);
1799
1800
for (size_t i = 0; i < aLen; i++) {
1801
dst[i] = src[i];
1802
}
1803
}
1804
# endif
1805
1806
static void DoLULBacktrace(PSLockRef aLock,
1807
const RegisteredThread& aRegisteredThread,
1808
const Registers& aRegs, NativeStack& aNativeStack) {
1809
// WARNING: this function runs within the profiler's "critical section".
1810
// WARNING: this function might be called while the profiler is inactive, and
1811
// cannot rely on ActivePS.
1812
1813
const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
1814
1815
lul::UnwindRegs startRegs;
1816
memset(&startRegs, 0, sizeof(startRegs));
1817
1818
# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
1819
startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
1820
startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
1821
startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
1822
# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1823
startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
1824
startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
1825
startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
1826
startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
1827
startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
1828
startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
1829
# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
1830
startRegs.pc = lul::TaggedUWord(mc->pc);
1831
startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
1832
startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
1833
startRegs.sp = lul::TaggedUWord(mc->sp);
1834
# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1835
startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
1836
startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
1837
startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
1838
# elif defined(GP_PLAT_mips64_linux)
1839
startRegs.pc = lul::TaggedUWord(mc->pc);
1840
startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
1841
startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
1842
# else
1843
# error "Unknown plat"
1844
# endif
1845
1846
// Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
1847
// stack's registered top point. Do some basic sanity checks too. This
1848
// assumes that the TaggedUWord holding the stack pointer value is valid, but
1849
// it should be, since it was constructed that way in the code just above.
1850
1851
// We could construct |stackImg| so that LUL reads directly from the stack in
1852
// question, rather than from a copy of it. That would reduce overhead and
1853
// space use a bit. However, it gives a problem with dynamic analysis tools
1854
// (ASan, TSan, Valgrind) which is that such tools will report invalid or
1855
// racing memory accesses, and such accesses will be reported deep inside LUL.
1856
// By taking a copy here, we can either sanitise the copy (for Valgrind) or
1857
// copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
1858
// to try and suppress errors inside LUL.
1859
//
1860
// N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
1861
// observed in some minutes of testing, whilst keeping the size of this
1862
// function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
1863
// practice are small, 4KB or less, and so the copy costs are insignificant
1864
// compared to other profiler overhead.
1865
//
1866
// |stackImg| is allocated on this (the sampling thread's) stack. That
1867
// implies that the frame for this function is at least N_STACK_BYTES large.
1868
// In general it would be considered unacceptable to have such a large frame
1869
// on a stack, but it only exists for the unwinder thread, and so is not
1870
// expected to be a problem. Allocating it on the heap is troublesome because
1871
// this function runs whilst the sampled thread is suspended, so any heap
1872
// allocation risks deadlock. Allocating it as a global variable is not
1873
// thread safe, which would be a problem if we ever allow multiple sampler
1874
// threads. Hence allocating it on the stack seems to be the least-worst
1875
// option.
1876
1877
lul::StackImage stackImg;
1878
1879
{
1880
# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
1881
uintptr_t rEDZONE_SIZE = 128;
1882
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1883
# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1884
uintptr_t rEDZONE_SIZE = 0;
1885
uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
1886
# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
1887
uintptr_t rEDZONE_SIZE = 0;
1888
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1889
# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1890
uintptr_t rEDZONE_SIZE = 0;
1891
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1892
# elif defined(GP_PLAT_mips64_linux)
1893
uintptr_t rEDZONE_SIZE = 0;
1894
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1895
# else
1896
# error "Unknown plat"
1897
# endif
1898
uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
1899
uintptr_t ws = sizeof(void*);
1900
start &= ~(ws - 1);
1901
end &= ~(ws - 1);
1902
uintptr_t nToCopy = 0;
1903
if (start < end) {
1904
nToCopy = end - start;
1905
if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
1906
}
1907
MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1908
stackImg.mLen = nToCopy;
1909
stackImg.mStartAvma = start;
1910
if (nToCopy > 0) {
1911
// If this is a vanilla memcpy(), ASAN makes the following complaint:
1912
//
1913
// ERROR: AddressSanitizer: stack-buffer-underflow ...
1914
// ...
1915
// HINT: this may be a false positive if your program uses some custom
1916
// stack unwind mechanism or swapcontext
1917
//
1918
// This code is very much a custom stack unwind mechanism! So we use an
1919
// alternative memcpy() implementation that is ignored by ASAN.
1920
# if defined(MOZ_HAVE_ASAN_BLACKLIST)
1921
ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1922
# else
1923
memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1924
# endif
1925
(void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
1926
}
1927
}
1928
1929
size_t framePointerFramesAcquired = 0;
1930
lul::LUL* lul = CorePS::Lul(aLock);
1931
lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
1932
reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
1933
&aNativeStack.mCount, &framePointerFramesAcquired,
1934
MAX_NATIVE_FRAMES, &startRegs, &stackImg);