Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
// There are three kinds of samples done by the profiler.
8
//
9
// - A "periodic" sample is the most complex kind. It is done in response to a
10
// timer while the profiler is active. It involves writing a stack trace plus
11
// a variety of other values (memory measurements, responsiveness
12
// measurements, markers, etc.) into the main ProfileBuffer. The sampling is
13
// done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
14
// get the register values.
15
//
16
// - A "synchronous" sample is a simpler kind. It is done in response to an API
17
// call (profiler_get_backtrace()). It involves writing a stack trace and
18
// little else into a temporary ProfileBuffer, and wrapping that up in a
19
// ProfilerBacktrace that can be subsequently used in a marker. The sampling
20
// is done on-thread, and so Registers::SyncPopulate() is used to get the
21
// register values.
22
//
23
// - A "backtrace" sample is the simplest kind. It is done in response to an
24
// API call (profiler_suspend_and_sample_thread()). It involves getting a
25
// stack trace via a ProfilerStackCollector; it does not write to a
26
// ProfileBuffer. The sampling is done from off-thread, and so uses
27
// SuspendAndSampleAndResumeThread() to get the register values.
28
29
#include "platform.h"
30
31
#include "GeckoProfiler.h"
32
#include "GeckoProfilerReporter.h"
33
#include "PageInformation.h"
34
#include "ProfiledThreadData.h"
35
#include "ProfilerBacktrace.h"
36
#include "ProfileBuffer.h"
37
#include "ProfilerIOInterposeObserver.h"
38
#include "ProfilerMarkerPayload.h"
39
#include "ProfilerParent.h"
40
#include "RegisteredThread.h"
41
#include "shared-libraries.h"
42
#include "ThreadInfo.h"
43
#include "VTuneProfiler.h"
44
45
#include "js/TraceLoggerAPI.h"
46
#include "js/ProfilingFrameIterator.h"
47
#include "memory_hooks.h"
48
#include "mozilla/ArrayUtils.h"
49
#include "mozilla/Atomics.h"
50
#include "mozilla/AutoProfilerLabel.h"
51
#include "mozilla/ExtensionPolicyService.h"
52
#include "mozilla/extensions/WebExtensionPolicy.h"
53
#include "mozilla/Printf.h"
54
#include "mozilla/Services.h"
55
#include "mozilla/StackWalk.h"
56
#include "mozilla/StaticPtr.h"
57
#include "mozilla/SystemGroup.h"
58
#include "mozilla/ThreadLocal.h"
59
#include "mozilla/TimeStamp.h"
60
#include "mozilla/Tuple.h"
61
#include "mozilla/UniquePtr.h"
62
#include "mozilla/Vector.h"
63
#include "BaseProfiler.h"
64
#include "nsDirectoryServiceDefs.h"
65
#include "nsDirectoryServiceUtils.h"
66
#include "nsIHttpProtocolHandler.h"
67
#include "nsIObserverService.h"
68
#include "nsIPropertyBag2.h"
69
#include "nsIXULAppInfo.h"
70
#include "nsIXULRuntime.h"
71
#include "nsJSPrincipals.h"
72
#include "nsMemoryReporterManager.h"
73
#include "nsProfilerStartParams.h"
74
#include "nsScriptSecurityManager.h"
75
#include "nsThreadUtils.h"
76
#include "nsXULAppAPI.h"
77
#include "prdtoa.h"
78
#include "prtime.h"
79
80
#include <algorithm>
81
#include <errno.h>
82
#include <fstream>
83
#include <ostream>
84
#include <sstream>
85
86
#ifdef MOZ_TASK_TRACER
87
# include "GeckoTaskTracer.h"
88
#endif
89
90
#if defined(GP_OS_android)
91
# include "GeneratedJNINatives.h"
92
# include "GeneratedJNIWrappers.h"
93
#endif
94
95
// Win32 builds always have frame pointers, so FramePointerStackWalk() always
96
// works.
97
#if defined(GP_PLAT_x86_windows)
98
# define HAVE_NATIVE_UNWIND
99
# define USE_FRAME_POINTER_STACK_WALK
100
#endif
101
102
// Win64 builds always omit frame pointers, so we use the slower
103
// MozStackWalk(), which works in that case.
104
#if defined(GP_PLAT_amd64_windows)
105
# define HAVE_NATIVE_UNWIND
106
# define USE_MOZ_STACK_WALK
107
#endif
108
109
// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
110
// MozStackWalk().
111
#if defined(GP_PLAT_arm64_windows)
112
# define HAVE_NATIVE_UNWIND
113
# define USE_MOZ_STACK_WALK
114
#endif
115
116
// Mac builds only have frame pointers when MOZ_PROFILING is specified, so
117
// FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
118
// on Mac.
119
#if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
120
# define HAVE_NATIVE_UNWIND
121
# define USE_FRAME_POINTER_STACK_WALK
122
#endif
123
124
// Android builds use the ARM Exception Handling ABI to unwind.
125
#if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
126
# define HAVE_NATIVE_UNWIND
127
# define USE_EHABI_STACKWALK
128
# include "EHABIStackWalk.h"
129
#endif
130
131
// Linux builds use LUL, which uses DWARF info to unwind stacks.
132
#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
133
defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
134
defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
135
defined(GP_PLAT_arm64_android)
136
# define HAVE_NATIVE_UNWIND
137
# define USE_LUL_STACKWALK
138
# include "lul/LulMain.h"
139
# include "lul/platform-linux-lul.h"
140
141
// On linux we use LUL for periodic samples and synchronous samples, but we use
142
// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
143
// (See the comment at the top of the file for a definition of
144
// periodic/synchronous/backtrace.).
145
//
146
// FramePointerStackWalk can produce incomplete stacks when the current entry is
147
// in a shared library without framepointers, however LUL can take a long time
148
// to initialize, which is undesirable for consumers of
149
// profiler_suspend_and_sample_thread like the Background Hang Reporter.
150
# if defined(MOZ_PROFILING)
151
# define USE_FRAME_POINTER_STACK_WALK
152
# endif
153
#endif
154
155
// We can only stackwalk without expensive initialization on platforms which
156
// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
157
// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
158
// which can be expensive.
159
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
160
# define HAVE_FASTINIT_NATIVE_UNWIND
161
#endif
162
163
#ifdef MOZ_VALGRIND
164
# include <valgrind/memcheck.h>
165
#else
166
# define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
167
#endif
168
169
#if defined(GP_OS_linux) || defined(GP_OS_android)
170
# include <ucontext.h>
171
#endif
172
173
using namespace mozilla;
174
using mozilla::profiler::detail::RacyFeatures;
175
176
LazyLogModule gProfilerLog("prof");
177
178
#if defined(GP_OS_android)
179
class GeckoJavaSampler
180
: public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
181
private:
182
GeckoJavaSampler();
183
184
public:
185
static double GetProfilerTime() {
186
if (!profiler_is_active()) {
187
return 0.0;
188
}
189
return profiler_time();
190
};
191
};
192
#endif
193
194
// Return all features that are available on this platform.
195
static uint32_t AvailableFeatures() {
196
uint32_t features = 0;
197
198
#define ADD_FEATURE(n_, str_, Name_, desc_) \
199
ProfilerFeature::Set##Name_(features);
200
201
// Add all the possible features.
202
PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
203
204
#undef ADD_FEATURE
205
206
// Now remove features not supported on this platform/configuration.
207
#if !defined(GP_OS_android)
208
ProfilerFeature::ClearJava(features);
209
#endif
210
#if !defined(HAVE_NATIVE_UNWIND)
211
ProfilerFeature::ClearStackWalk(features);
212
#endif
213
#if !defined(MOZ_TASK_TRACER)
214
ProfilerFeature::ClearTaskTracer(features);
215
#endif
216
if (!JS::TraceLoggerSupported()) {
217
ProfilerFeature::ClearJSTracer(features);
218
}
219
220
return features;
221
}
222
223
// Default features common to all contexts (even if not available).
224
static uint32_t DefaultFeatures() {
225
return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
226
ProfilerFeature::StackWalk | ProfilerFeature::Threads |
227
ProfilerFeature::Responsiveness;
228
}
229
230
// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
231
// available).
232
static uint32_t StartupExtraDefaultFeatures() {
233
// Enable mainthreadio by default for startup profiles as startup is heavy on
234
// I/O operations, and main thread I/O is really important to see there.
235
return ProfilerFeature::MainThreadIO;
236
}
237
238
class PSMutex : public StaticMutex {};
239
240
typedef BaseAutoLock<PSMutex&> PSAutoLock;
241
242
// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
243
// fields.
244
typedef const PSAutoLock& PSLockRef;
245
246
#define PS_GET(type_, name_) \
247
static type_ name_(PSLockRef) { return sInstance->m##name_; }
248
249
#define PS_GET_LOCKLESS(type_, name_) \
250
static type_ name_() { return sInstance->m##name_; }
251
252
#define PS_GET_AND_SET(type_, name_) \
253
PS_GET(type_, name_) \
254
static void Set##name_(PSLockRef, type_ a##name_) { \
255
sInstance->m##name_ = a##name_; \
256
}
257
258
// All functions in this file can run on multiple threads unless they have an
259
// NS_IsMainThread() assertion.
260
261
// This class contains the profiler's core global state, i.e. that which is
262
// valid even when the profiler is not active. Most profile operations can't do
263
// anything useful when this class is not instantiated, so we release-assert
264
// its non-nullness in all such operations.
265
//
266
// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
267
// PSAutoLock reference as an argument as proof that the gPSMutex is currently
268
// locked. This makes it clear when gPSMutex is locked and helps avoid
269
// accidental unlocked accesses to global state. There are ways to circumvent
270
// this mechanism, but please don't do so without *very* good reason and a
271
// detailed explanation.
272
//
273
// The exceptions to this rule:
274
//
275
// - mProcessStartTime, because it's immutable;
276
//
277
// - each thread's RacyRegisteredThread object is accessible without locking via
278
// TLSRegisteredThread::RacyRegisteredThread().
279
class CorePS {
280
private:
281
CorePS()
282
: mProcessStartTime(TimeStamp::ProcessCreation())
283
#ifdef USE_LUL_STACKWALK
284
,
285
mLul(nullptr)
286
#endif
287
{
288
}
289
290
~CorePS() {}
291
292
public:
293
static void Create(PSLockRef aLock) { sInstance = new CorePS(); }
294
295
static void Destroy(PSLockRef aLock) {
296
delete sInstance;
297
sInstance = nullptr;
298
}
299
300
// Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
301
// being locked. This is because CorePS is instantiated so early on the main
302
// thread that we don't have to worry about it being racy.
303
static bool Exists() { return !!sInstance; }
304
305
static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
306
size_t& aProfSize, size_t& aLulSize) {
307
aProfSize += aMallocSizeOf(sInstance);
308
309
for (auto& registeredThread : sInstance->mRegisteredThreads) {
310
aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
311
}
312
313
for (auto& registeredPage : sInstance->mRegisteredPages) {
314
aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
315
}
316
317
// Measurement of the following things may be added later if DMD finds it
318
// is worthwhile:
319
// - CorePS::mRegisteredThreads itself (its elements' children are
320
// measured above)
321
// - CorePS::mRegisteredPages itself (its elements' children are
322
// measured above)
323
// - CorePS::mInterposeObserver
324
325
#if defined(USE_LUL_STACKWALK)
326
if (sInstance->mLul) {
327
aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
328
}
329
#endif
330
}
331
332
// No PSLockRef is needed for this field because it's immutable.
333
PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
334
335
PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
336
337
static void AppendRegisteredThread(
338
PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
339
MOZ_RELEASE_ASSERT(
340
sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
341
}
342
343
static void RemoveRegisteredThread(PSLockRef,
344
RegisteredThread* aRegisteredThread) {
345
// Remove aRegisteredThread from mRegisteredThreads.
346
for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
347
if (rt.get() == aRegisteredThread) {
348
sInstance->mRegisteredThreads.erase(&rt);
349
return;
350
}
351
}
352
}
353
354
PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
355
356
static void AppendRegisteredPage(PSLockRef,
357
RefPtr<PageInformation>&& aRegisteredPage) {
358
// Disabling this assertion for now until we fix the same page registration
359
// issue. See Bug 1542918.
360
#if 0
361
struct RegisteredPageComparator {
362
PageInformation* aA;
363
bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
364
};
365
MOZ_RELEASE_ASSERT(std::none_of(
366
sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
367
RegisteredPageComparator{aRegisteredPage.get()}));
368
#endif
369
MOZ_RELEASE_ASSERT(
370
sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
371
}
372
373
static void RemoveRegisteredPages(PSLockRef,
374
const nsID& aRegisteredDocShellId) {
375
// Remove RegisteredPage from mRegisteredPages by given DocShell Id.
376
sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
377
return rd->DocShellId().Equals(aRegisteredDocShellId);
378
});
379
}
380
381
static void ClearRegisteredPages(PSLockRef) {
382
sInstance->mRegisteredPages.clear();
383
}
384
385
PS_GET(const Vector<BaseProfilerCount*>&, Counters)
386
387
static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
388
// we don't own the counter; they may be stored in static objects
389
MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
390
}
391
392
static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
393
// we may be called to remove a counter after the profiler is stopped or
394
// late in shutdown.
395
if (sInstance) {
396
auto* counter = std::find(sInstance->mCounters.begin(),
397
sInstance->mCounters.end(), aCounter);
398
MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
399
sInstance->mCounters.erase(counter);
400
}
401
}
402
403
#ifdef USE_LUL_STACKWALK
404
static lul::LUL* Lul(PSLockRef) { return sInstance->mLul.get(); }
405
static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
406
sInstance->mLul = std::move(aLul);
407
}
408
#endif
409
410
PS_GET_AND_SET(const nsACString&, ProcessName)
411
412
private:
413
// The singleton instance
414
static CorePS* sInstance;
415
416
// The time that the process started.
417
const TimeStamp mProcessStartTime;
418
419
// Info on all the registered threads.
420
// ThreadIds in mRegisteredThreads are unique.
421
Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
422
423
// Info on all the registered pages.
424
// DocShellId and DocShellHistoryId pairs in mRegisteredPages are unique.
425
Vector<RefPtr<PageInformation>> mRegisteredPages;
426
427
// Non-owning pointers to all active counters
428
Vector<BaseProfilerCount*> mCounters;
429
430
#ifdef USE_LUL_STACKWALK
431
// LUL's state. Null prior to the first activation, non-null thereafter.
432
UniquePtr<lul::LUL> mLul;
433
#endif
434
435
// Process name, provided by child process initialization code.
436
nsAutoCString mProcessName;
437
};
438
439
CorePS* CorePS::sInstance = nullptr;
440
441
class SamplerThread;
442
443
static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
444
double aInterval);
445
446
struct LiveProfiledThreadData {
447
RegisteredThread* mRegisteredThread;
448
UniquePtr<ProfiledThreadData> mProfiledThreadData;
449
};
450
451
// This class contains the profiler's global state that is valid only when the
452
// profiler is active. When not instantiated, the profiler is inactive.
453
//
454
// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
455
// CorePS.
456
//
457
class ActivePS {
458
private:
459
static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
460
// Filter out any features unavailable in this platform/configuration.
461
aFeatures &= AvailableFeatures();
462
463
// Always enable ProfilerFeature::Threads if we have a filter, because
464
// users sometimes ask to filter by a list of threads but forget to
465
// explicitly specify ProfilerFeature::Threads.
466
if (aFilterCount > 0) {
467
aFeatures |= ProfilerFeature::Threads;
468
}
469
470
return aFeatures;
471
}
472
473
ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
474
uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
475
const Maybe<double>& aDuration)
476
: mGeneration(sNextGeneration++),
477
mCapacity(aCapacity),
478
mDuration(aDuration),
479
mInterval(aInterval),
480
mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
481
mBuffer(MakeUnique<ProfileBuffer>(aCapacity))
482
// The new sampler thread doesn't start sampling immediately because the
483
// main loop within Run() is blocked until this function's caller
484
// unlocks gPSMutex.
485
,
486
mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval)),
487
mInterposeObserver(ProfilerFeature::HasMainThreadIO(aFeatures)
488
? new ProfilerIOInterposeObserver()
489
: nullptr)
490
#undef HAS_FEATURE
491
,
492
mIsPaused(false)
493
#if defined(GP_OS_linux)
494
,
495
mWasPaused(false)
496
#endif
497
{
498
// Deep copy aFilters.
499
MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
500
for (uint32_t i = 0; i < aFilterCount; ++i) {
501
mFilters[i] = aFilters[i];
502
}
503
504
#if !defined(RELEASE_OR_BETA)
505
if (mInterposeObserver) {
506
// We need to register the observer on the main thread, because we want
507
// to observe IO that happens on the main thread.
508
// IOInterposer needs to be initialized before calling
509
// IOInterposer::Register or our observer will be silently dropped.
510
if (NS_IsMainThread()) {
511
IOInterposer::Init();
512
IOInterposer::Register(IOInterposeObserver::OpAll, mInterposeObserver);
513
} else {
514
RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
515
NS_DispatchToMainThread(
516
NS_NewRunnableFunction("ActivePS::ActivePS", [=]() {
517
IOInterposer::Init();
518
IOInterposer::Register(IOInterposeObserver::OpAll, observer);
519
}));
520
}
521
}
522
#endif
523
}
524
525
~ActivePS() {
526
#if !defined(RELEASE_OR_BETA)
527
if (mInterposeObserver) {
528
// We need to unregister the observer on the main thread, because that's
529
// where we've registered it.
530
if (NS_IsMainThread()) {
531
IOInterposer::Unregister(IOInterposeObserver::OpAll,
532
mInterposeObserver);
533
} else {
534
RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
535
NS_DispatchToMainThread(
536
NS_NewRunnableFunction("ActivePS::~ActivePS", [=]() {
537
IOInterposer::Unregister(IOInterposeObserver::OpAll, observer);
538
}));
539
}
540
}
541
#endif
542
}
543
544
bool ThreadSelected(const char* aThreadName) {
545
MOZ_RELEASE_ASSERT(sInstance);
546
547
if (mFilters.empty()) {
548
return true;
549
}
550
551
std::string name = aThreadName;
552
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
553
554
for (uint32_t i = 0; i < mFilters.length(); ++i) {
555
std::string filter = mFilters[i];
556
std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
557
558
// Crude, non UTF-8 compatible, case insensitive substring search
559
if (name.find(filter) != std::string::npos) {
560
return true;
561
}
562
563
// If the filter starts with pid:, check for a pid match
564
if (filter.find("pid:") == 0) {
565
std::string mypid = std::to_string(profiler_current_process_id());
566
if (filter.compare(4, std::string::npos, mypid) == 0) {
567
return true;
568
}
569
}
570
}
571
572
return false;
573
}
574
575
public:
576
static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
577
uint32_t aFeatures, const char** aFilters,
578
uint32_t aFilterCount, const Maybe<double>& aDuration) {
579
sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
580
aFilterCount, aDuration);
581
}
582
583
static MOZ_MUST_USE SamplerThread* Destroy(PSLockRef aLock) {
584
auto samplerThread = sInstance->mSamplerThread;
585
delete sInstance;
586
sInstance = nullptr;
587
588
return samplerThread;
589
}
590
591
static bool Exists(PSLockRef) { return !!sInstance; }
592
593
static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
594
const Maybe<double>& aDuration, double aInterval,
595
uint32_t aFeatures, const char** aFilters,
596
uint32_t aFilterCount) {
597
if (sInstance->mCapacity != aCapacity ||
598
sInstance->mDuration != aDuration ||
599
sInstance->mInterval != aInterval ||
600
sInstance->mFeatures != aFeatures ||
601
sInstance->mFilters.length() != aFilterCount) {
602
return false;
603
}
604
605
for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
606
if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
607
return false;
608
}
609
}
610
return true;
611
}
612
613
static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
614
size_t n = aMallocSizeOf(sInstance);
615
616
n += sInstance->mBuffer->SizeOfIncludingThis(aMallocSizeOf);
617
618
// Measurement of the following members may be added later if DMD finds it
619
// is worthwhile:
620
// - mLiveProfiledThreads (both the array itself, and the contents)
621
// - mDeadProfiledThreads (both the array itself, and the contents)
622
//
623
624
return n;
625
}
626
627
static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
628
MOZ_RELEASE_ASSERT(sInstance);
629
630
return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
631
sInstance->ThreadSelected(aInfo->Name()));
632
}
633
634
PS_GET(uint32_t, Generation)
635
636
PS_GET(PowerOfTwo32, Capacity)
637
638
PS_GET(Maybe<double>, Duration)
639
640
PS_GET(double, Interval)
641
642
PS_GET(uint32_t, Features)
643
644
#define PS_GET_FEATURE(n_, str_, Name_, desc_) \
645
static bool Feature##Name_(PSLockRef) { \
646
return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
647
}
648
649
PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
650
651
#undef PS_GET_FEATURE
652
653
static uint32_t JSFlags(PSLockRef aLock) {
654
uint32_t Flags = 0;
655
Flags |=
656
FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
657
Flags |= FeatureTrackOptimizations(aLock)
658
? uint32_t(JSInstrumentationFlags::TrackOptimizations)
659
: 0;
660
Flags |= FeatureJSTracer(aLock)
661
? uint32_t(JSInstrumentationFlags::TraceLogging)
662
: 0;
663
Flags |= FeatureJSAllocations(aLock)
664
? uint32_t(JSInstrumentationFlags::Allocations)
665
: 0;
666
return Flags;
667
}
668
669
PS_GET(const Vector<std::string>&, Filters)
670
671
static ProfileBuffer& Buffer(PSLockRef) { return *sInstance->mBuffer.get(); }
672
673
static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
674
return sInstance->mLiveProfiledThreads;
675
}
676
677
// Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
678
// for all threads that should be included in a profile, both for threads
679
// that are still registered, and for threads that have been unregistered but
680
// still have data in the buffer.
681
// For threads that have already been unregistered, the RegisteredThread
682
// pointer will be null.
683
// The returned array is sorted by thread register time.
684
// Do not hold on to the return value across thread registration or profiler
685
// restarts.
686
static Vector<Pair<RegisteredThread*, ProfiledThreadData*>> ProfiledThreads(
687
PSLockRef) {
688
Vector<Pair<RegisteredThread*, ProfiledThreadData*>> array;
689
MOZ_RELEASE_ASSERT(
690
array.initCapacity(sInstance->mLiveProfiledThreads.length() +
691
sInstance->mDeadProfiledThreads.length()));
692
for (auto& t : sInstance->mLiveProfiledThreads) {
693
MOZ_RELEASE_ASSERT(array.append(
694
MakePair(t.mRegisteredThread, t.mProfiledThreadData.get())));
695
}
696
for (auto& t : sInstance->mDeadProfiledThreads) {
697
MOZ_RELEASE_ASSERT(
698
array.append(MakePair((RegisteredThread*)nullptr, t.get())));
699
}
700
701
std::sort(array.begin(), array.end(),
702
[](const Pair<RegisteredThread*, ProfiledThreadData*>& a,
703
const Pair<RegisteredThread*, ProfiledThreadData*>& b) {
704
return a.second()->Info()->RegisterTime() <
705
b.second()->Info()->RegisterTime();
706
});
707
return array;
708
}
709
710
static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
711
Vector<RefPtr<PageInformation>> array;
712
for (auto& d : CorePS::RegisteredPages(aLock)) {
713
MOZ_RELEASE_ASSERT(array.append(d));
714
}
715
for (auto& d : sInstance->mDeadProfiledPages) {
716
MOZ_RELEASE_ASSERT(array.append(d));
717
}
718
// We don't need to sort the DocShells like threads since we won't show them
719
// as a list.
720
return array;
721
}
722
723
// Do a linear search through mLiveProfiledThreads to find the
724
// ProfiledThreadData object for a RegisteredThread.
725
static ProfiledThreadData* GetProfiledThreadData(
726
PSLockRef, RegisteredThread* aRegisteredThread) {
727
for (const LiveProfiledThreadData& thread :
728
sInstance->mLiveProfiledThreads) {
729
if (thread.mRegisteredThread == aRegisteredThread) {
730
return thread.mProfiledThreadData.get();
731
}
732
}
733
return nullptr;
734
}
735
736
static ProfiledThreadData* AddLiveProfiledThread(
737
PSLockRef, RegisteredThread* aRegisteredThread,
738
UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
739
MOZ_RELEASE_ASSERT(
740
sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
741
aRegisteredThread, std::move(aProfiledThreadData)}));
742
743
// Return a weak pointer to the ProfiledThreadData object.
744
return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
745
}
746
747
static void UnregisterThread(PSLockRef aLockRef,
748
RegisteredThread* aRegisteredThread) {
749
DiscardExpiredDeadProfiledThreads(aLockRef);
750
751
// Find the right entry in the mLiveProfiledThreads array and remove the
752
// element, moving the ProfiledThreadData object for the thread into the
753
// mDeadProfiledThreads array.
754
// The thread's RegisteredThread object gets destroyed here.
755
for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
756
LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
757
if (thread.mRegisteredThread == aRegisteredThread) {
758
thread.mProfiledThreadData->NotifyUnregistered(
759
sInstance->mBuffer->mRangeEnd);
760
MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
761
std::move(thread.mProfiledThreadData)));
762
sInstance->mLiveProfiledThreads.erase(
763
&sInstance->mLiveProfiledThreads[i]);
764
return;
765
}
766
}
767
}
768
769
PS_GET_AND_SET(bool, IsPaused)
770
771
#if defined(GP_OS_linux)
772
PS_GET_AND_SET(bool, WasPaused)
773
#endif
774
775
static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
776
uint64_t bufferRangeStart = sInstance->mBuffer->mRangeStart;
777
// Discard any dead threads that were unregistered before bufferRangeStart.
778
sInstance->mDeadProfiledThreads.eraseIf(
779
[bufferRangeStart](
780
const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
781
Maybe<uint64_t> bufferPosition =
782
aProfiledThreadData->BufferPositionWhenUnregistered();
783
MOZ_RELEASE_ASSERT(bufferPosition,
784
"should have unregistered this thread");
785
return *bufferPosition < bufferRangeStart;
786
});
787
}
788
789
static void UnregisterPages(PSLockRef aLock,
790
const nsID& aRegisteredDocShellId) {
791
auto& registeredPages = CorePS::RegisteredPages(aLock);
792
for (size_t i = 0; i < registeredPages.length(); i++) {
793
RefPtr<PageInformation>& page = registeredPages[i];
794
if (page->DocShellId().Equals(aRegisteredDocShellId)) {
795
page->NotifyUnregistered(sInstance->mBuffer->mRangeEnd);
796
MOZ_RELEASE_ASSERT(
797
sInstance->mDeadProfiledPages.append(std::move(page)));
798
registeredPages.erase(&registeredPages[i--]);
799
}
800
}
801
}
802
803
static void DiscardExpiredPages(PSLockRef) {
804
uint64_t bufferRangeStart = sInstance->mBuffer->mRangeStart;
805
// Discard any dead pages that were unregistered before
806
// bufferRangeStart.
807
sInstance->mDeadProfiledPages.eraseIf(
808
[bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
809
Maybe<uint64_t> bufferPosition =
810
aProfiledPage->BufferPositionWhenUnregistered();
811
MOZ_RELEASE_ASSERT(bufferPosition,
812
"should have unregistered this page");
813
return *bufferPosition < bufferRangeStart;
814
});
815
}
816
817
static void ClearUnregisteredPages(PSLockRef) {
818
sInstance->mDeadProfiledPages.clear();
819
}
820
821
#if !defined(RELEASE_OR_BETA)
822
static void UnregisterIOInterposer(PSLockRef) {
823
if (!sInstance->mInterposeObserver) {
824
return;
825
}
826
827
IOInterposer::Unregister(IOInterposeObserver::OpAll,
828
sInstance->mInterposeObserver);
829
830
sInstance->mInterposeObserver = nullptr;
831
}
832
833
static void PauseIOInterposer(PSLockRef) {
834
if (!sInstance->mInterposeObserver) {
835
return;
836
}
837
838
IOInterposer::Unregister(IOInterposeObserver::OpAll,
839
sInstance->mInterposeObserver);
840
}
841
842
static void ResumeIOInterposer(PSLockRef) {
843
if (!sInstance->mInterposeObserver) {
844
return;
845
}
846
847
IOInterposer::Register(IOInterposeObserver::OpAll,
848
sInstance->mInterposeObserver);
849
}
850
#endif
851
852
static void ClearExpiredExitProfiles(PSLockRef) {
853
uint64_t bufferRangeStart = sInstance->mBuffer->mRangeStart;
854
// Discard exit profiles that were gathered before our buffer RangeStart.
855
#ifdef MOZ_BASE_PROFILER
856
if (bufferRangeStart != 0 && sInstance->mBaseProfileThreads) {
857
sInstance->mBaseProfileThreads.reset();
858
}
859
#endif
860
sInstance->mExitProfiles.eraseIf(
861
[bufferRangeStart](const ExitProfile& aExitProfile) {
862
return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
863
});
864
}
865
866
#ifdef MOZ_BASE_PROFILER
867
static void AddBaseProfileThreads(PSLockRef aLock,
868
UniquePtr<char[]> aBaseProfileThreads) {
869
sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
870
}
871
872
static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
873
ClearExpiredExitProfiles(aLock);
874
875
return std::move(sInstance->mBaseProfileThreads);
876
}
877
#endif
878
879
static void AddExitProfile(PSLockRef aLock, const nsCString& aExitProfile) {
880
ClearExpiredExitProfiles(aLock);
881
882
MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
883
ExitProfile{aExitProfile, sInstance->mBuffer->mRangeEnd}));
884
}
885
886
static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) {
887
ClearExpiredExitProfiles(aLock);
888
889
Vector<nsCString> profiles;
890
MOZ_RELEASE_ASSERT(
891
profiles.initCapacity(sInstance->mExitProfiles.length()));
892
for (auto& profile : sInstance->mExitProfiles) {
893
MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
894
}
895
sInstance->mExitProfiles.clear();
896
return profiles;
897
}
898
899
private:
900
// The singleton instance.
901
static ActivePS* sInstance;
902
903
// We need to track activity generations. If we didn't we could have the
904
// following scenario.
905
//
906
// - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
907
// gPSMutex, deletes the SamplerThread (which does a join).
908
//
909
// - profiler_start() runs on a different thread, locks gPSMutex,
910
// re-instantiates ActivePS, unlocks gPSMutex -- all before the join
911
// completes.
912
//
913
// - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
914
// and continues as if the start/stop pair didn't occur. Also
915
// profiler_stop() is stuck, unable to finish.
916
//
917
// By checking ActivePS *and* the generation, we can avoid this scenario.
918
// sNextGeneration is used to track the next generation number; it is static
919
// because it must persist across different ActivePS instantiations.
920
const uint32_t mGeneration;
921
static uint32_t sNextGeneration;
922
923
// The maximum number of entries in mBuffer.
924
const PowerOfTwo32 mCapacity;
925
926
// The maximum duration of entries in mBuffer, in seconds.
927
const Maybe<double> mDuration;
928
929
// The interval between samples, measured in milliseconds.
930
const double mInterval;
931
932
// The profile features that are enabled.
933
const uint32_t mFeatures;
934
935
// Substrings of names of threads we want to profile.
936
Vector<std::string> mFilters;
937
938
// The buffer into which all samples are recorded. Always non-null. Always
939
// used in conjunction with CorePS::m{Live,Dead}Threads.
940
const UniquePtr<ProfileBuffer> mBuffer;
941
942
// ProfiledThreadData objects for any threads that were profiled at any point
943
// during this run of the profiler:
944
// - mLiveProfiledThreads contains all threads that are still registered, and
945
// - mDeadProfiledThreads contains all threads that have already been
946
// unregistered but for which there is still data in the profile buffer.
947
Vector<LiveProfiledThreadData> mLiveProfiledThreads;
948
Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
949
950
// Info on all the dead pages.
951
// Registered pages are being moved to this array after unregistration.
952
// We are keeping them in case we need them in the profile data.
953
// We are removing them when we ensure that we won't need them anymore.
954
Vector<RefPtr<PageInformation>> mDeadProfiledPages;
955
956
// The current sampler thread. This class is not responsible for destroying
957
// the SamplerThread object; the Destroy() method returns it so the caller
958
// can destroy it.
959
SamplerThread* const mSamplerThread;
960
961
// The interposer that records main thread I/O.
962
RefPtr<ProfilerIOInterposeObserver> mInterposeObserver;
963
964
// Is the profiler paused?
965
bool mIsPaused;
966
967
#if defined(GP_OS_linux)
968
// Used to record whether the profiler was paused just before forking. False
969
// at all times except just before/after forking.
970
bool mWasPaused;
971
#endif
972
973
#ifdef MOZ_BASE_PROFILER
974
// Optional startup profile thread array from BaseProfiler.
975
UniquePtr<char[]> mBaseProfileThreads;
976
#endif
977
978
struct ExitProfile {
979
nsCString mJSON;
980
uint64_t mBufferPositionAtGatherTime;
981
};
982
Vector<ExitProfile> mExitProfiles;
983
};
984
985
ActivePS* ActivePS::sInstance = nullptr;
986
uint32_t ActivePS::sNextGeneration = 0;
987
988
#undef PS_GET
989
#undef PS_GET_LOCKLESS
990
#undef PS_GET_AND_SET
991
992
// The mutex that guards accesses to CorePS and ActivePS.
993
static PSMutex gPSMutex;
994
995
Atomic<uint32_t, MemoryOrdering::Relaxed, recordreplay::Behavior::DontPreserve>
996
RacyFeatures::sActiveAndFeatures(0);
997
998
// Each live thread has a RegisteredThread, and we store a reference to it in
999
// TLS. This class encapsulates that TLS.
1000
class TLSRegisteredThread {
1001
public:
1002
static bool Init(PSLockRef) {
1003
bool ok1 = sRegisteredThread.init();
1004
bool ok2 = AutoProfilerLabel::sProfilingStack.init();
1005
return ok1 && ok2;
1006
}
1007
1008
// Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
1009
static class RegisteredThread* RegisteredThread(PSLockRef) {
1010
return sRegisteredThread.get();
1011
}
1012
1013
// Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
1014
static class RacyRegisteredThread* RacyRegisteredThread() {
1015
class RegisteredThread* registeredThread = sRegisteredThread.get();
1016
return registeredThread ? &registeredThread->RacyRegisteredThread()
1017
: nullptr;
1018
}
1019
1020
// Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
1021
// RacyRegisteredThread() can also be used to get the ProfilingStack, but that
1022
// is marginally slower because it requires an extra pointer indirection.
1023
static ProfilingStack* Stack() {
1024
return AutoProfilerLabel::sProfilingStack.get();
1025
}
1026
1027
static void SetRegisteredThread(PSLockRef,
1028
class RegisteredThread* aRegisteredThread) {
1029
sRegisteredThread.set(aRegisteredThread);
1030
AutoProfilerLabel::sProfilingStack.set(
1031
aRegisteredThread
1032
? &aRegisteredThread->RacyRegisteredThread().ProfilingStack()
1033
: nullptr);
1034
}
1035
1036
private:
1037
// This is a non-owning reference to the RegisteredThread;
1038
// CorePS::mRegisteredThreads is the owning reference. On thread
1039
// deregistration, this reference is cleared and the RegisteredThread is
1040
// destroyed.
1041
static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
1042
};
1043
1044
MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
1045
1046
// Although you can access a thread's ProfilingStack via
1047
// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
1048
// directly to the ProfilingStack. Here's why.
1049
//
1050
// - We need to be able to push to and pop from the ProfilingStack in
1051
// AutoProfilerLabel.
1052
//
1053
// - The class functions are hot and must be defined in GeckoProfiler.h so they
1054
// can be inlined.
1055
//
1056
// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
1057
// GeckoProfiler.h.
1058
//
1059
// This second pointer isn't ideal, but does provide a way to satisfy those
1060
// constraints. TLSRegisteredThread is responsible for updating it.
1061
MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;
1062
1063
// The name of the main thread.
1064
static const char* const kMainThreadName = "GeckoMain";
1065
1066
////////////////////////////////////////////////////////////////////////
1067
// BEGIN sampling/unwinding code
1068
1069
// The registers used for stack unwinding and a few other sampling purposes.
1070
// The ctor does nothing; users are responsible for filling in the fields.
1071
class Registers {
1072
public:
1073
Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
1074
1075
#if defined(HAVE_NATIVE_UNWIND)
1076
// Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
1077
void SyncPopulate();
1078
#endif
1079
1080
void Clear() { memset(this, 0, sizeof(*this)); }
1081
1082
// These fields are filled in by
1083
// Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
1084
// samples, and by SyncPopulate() for synchronous samples.
1085
Address mPC; // Instruction pointer.
1086
Address mSP; // Stack pointer.
1087
Address mFP; // Frame pointer.
1088
Address mLR; // ARM link register.
1089
#if defined(GP_OS_linux) || defined(GP_OS_android)
1090
// This contains all the registers, which means it duplicates the four fields
1091
// above. This is ok.
1092
ucontext_t* mContext; // The context from the signal handler.
1093
#endif
1094
};
1095
1096
// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
1097
// looping on corrupted stacks.
1098
static const size_t MAX_NATIVE_FRAMES = 1024;
1099
static const size_t MAX_JS_FRAMES = 1024;
1100
1101
struct NativeStack {
1102
void* mPCs[MAX_NATIVE_FRAMES];
1103
void* mSPs[MAX_NATIVE_FRAMES];
1104
size_t mCount; // Number of frames filled.
1105
1106
NativeStack() : mPCs(), mSPs(), mCount(0) {}
1107
};
1108
1109
Atomic<bool> WALKING_JS_STACK(false);
1110
1111
struct AutoWalkJSStack {
1112
bool walkAllowed;
1113
1114
AutoWalkJSStack() : walkAllowed(false) {
1115
walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
1116
}
1117
1118
~AutoWalkJSStack() {
1119
if (walkAllowed) {
1120
WALKING_JS_STACK = false;
1121
}
1122
}
1123
};
1124
1125
// Merges the profiling stack, native stack, and JS stack, outputting the
1126
// details to aCollector.
1127
static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
1128
const RegisteredThread& aRegisteredThread,
1129
const Registers& aRegs, const NativeStack& aNativeStack,
1130
ProfilerStackCollector& aCollector) {
1131
// WARNING: this function runs within the profiler's "critical section".
1132
// WARNING: this function might be called while the profiler is inactive, and
1133
// cannot rely on ActivePS.
1134
1135
const ProfilingStack& profilingStack =
1136
aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1137
const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
1138
uint32_t profilingStackFrameCount = profilingStack.stackSize();
1139
JSContext* context = aRegisteredThread.GetJSContext();
1140
1141
// Make a copy of the JS stack into a JSFrame array. This is necessary since,
1142
// like the native stack, the JS stack is iterated youngest-to-oldest and we
1143
// need to iterate oldest-to-youngest when adding frames to aInfo.
1144
1145
// Non-periodic sampling passes Nothing() as the buffer write position to
1146
// ProfilingFrameIterator to avoid incorrectly resetting the buffer position
1147
// of sampled JIT frames inside the JS engine.
1148
Maybe<uint64_t> samplePosInBuffer;
1149
if (!aIsSynchronous) {
1150
// aCollector.SamplePositionInBuffer() will return Nothing() when
1151
// profiler_suspend_and_sample_thread is called from the background hang
1152
// reporter.
1153
samplePosInBuffer = aCollector.SamplePositionInBuffer();
1154
}
1155
uint32_t jsCount = 0;
1156
JS::ProfilingFrameIterator::Frame jsFrames[MAX_JS_FRAMES];
1157
1158
// Only walk jit stack if profiling frame iterator is turned on.
1159
if (context && JS::IsProfilingEnabledForContext(context)) {
1160
AutoWalkJSStack autoWalkJSStack;
1161
const uint32_t maxFrames = ArrayLength(jsFrames);
1162
1163
if (autoWalkJSStack.walkAllowed) {
1164
JS::ProfilingFrameIterator::RegisterState registerState;
1165
registerState.pc = aRegs.mPC;
1166
registerState.sp = aRegs.mSP;
1167
registerState.lr = aRegs.mLR;
1168
registerState.fp = aRegs.mFP;
1169
1170
JS::ProfilingFrameIterator jsIter(context, registerState,
1171
samplePosInBuffer);
1172
for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) {
1173
if (aIsSynchronous || jsIter.isWasm()) {
1174
uint32_t extracted =
1175
jsIter.extractStack(jsFrames, jsCount, maxFrames);
1176
jsCount += extracted;
1177
if (jsCount == maxFrames) {
1178
break;
1179
}
1180
} else {
1181
Maybe<JS::ProfilingFrameIterator::Frame> frame =
1182
jsIter.getPhysicalFrameWithoutLabel();
1183
if (frame.isSome()) {
1184
jsFrames[jsCount++] = frame.value();
1185
}
1186
}
1187
}
1188
}
1189
}
1190
1191
// While the profiling stack array is ordered oldest-to-youngest, the JS and
1192
// native arrays are ordered youngest-to-oldest. We must add frames to aInfo
1193
// oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
1194
// and native arrays backwards. Note: this means the terminating condition
1195
// jsIndex and nativeIndex is being < 0.
1196
uint32_t profilingStackIndex = 0;
1197
int32_t jsIndex = jsCount - 1;
1198
int32_t nativeIndex = aNativeStack.mCount - 1;
1199
1200
uint8_t* lastLabelFrameStackAddr = nullptr;
1201
uint8_t* jitEndStackAddr = nullptr;
1202
1203
// Iterate as long as there is at least one frame remaining.
1204
while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
1205
nativeIndex >= 0) {
1206
// There are 1 to 3 frames available. Find and add the oldest.
1207
uint8_t* profilingStackAddr = nullptr;
1208
uint8_t* jsStackAddr = nullptr;
1209
uint8_t* nativeStackAddr = nullptr;
1210
uint8_t* jsActivationAddr = nullptr;
1211
1212
if (profilingStackIndex != profilingStackFrameCount) {
1213
const js::ProfilingStackFrame& profilingStackFrame =
1214
profilingStackFrames[profilingStackIndex];
1215
1216
if (profilingStackFrame.isLabelFrame() ||
1217
profilingStackFrame.isSpMarkerFrame()) {
1218
lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
1219
}
1220
1221
// Skip any JS_OSR frames. Such frames are used when the JS interpreter
1222
// enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
1223
// To avoid both the profiling stack frame and jit frame being recorded
1224
// (and showing up twice), the interpreter marks the interpreter
1225
// profiling stack frame as JS_OSR to ensure that it doesn't get counted.
1226
if (profilingStackFrame.isOSRFrame()) {
1227
profilingStackIndex++;
1228
continue;
1229
}
1230
1231
MOZ_ASSERT(lastLabelFrameStackAddr);
1232
profilingStackAddr = lastLabelFrameStackAddr;
1233
}
1234
1235
if (jsIndex >= 0) {
1236
jsStackAddr = (uint8_t*)jsFrames[jsIndex].stackAddress;
1237
jsActivationAddr = (uint8_t*)jsFrames[jsIndex].activation;
1238
}
1239
1240
if (nativeIndex >= 0) {
1241
nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
1242
}
1243
1244
// If there's a native stack frame which has the same SP as a profiling
1245
// stack frame, pretend we didn't see the native stack frame. Ditto for a
1246
// native stack frame which has the same SP as a JS stack frame. In effect
1247
// this means profiling stack frames or JS frames trump conflicting native
1248
// frames.
1249
if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
1250
jsStackAddr == nativeStackAddr)) {
1251
nativeStackAddr = nullptr;
1252
nativeIndex--;
1253
MOZ_ASSERT(profilingStackAddr || jsStackAddr);
1254
}
1255
1256
// Sanity checks.
1257
MOZ_ASSERT_IF(profilingStackAddr,
1258
profilingStackAddr != jsStackAddr &&
1259
profilingStackAddr != nativeStackAddr);
1260
MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr &&
1261
jsStackAddr != nativeStackAddr);
1262
MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr &&
1263
nativeStackAddr != jsStackAddr);
1264
1265
// Check to see if profiling stack frame is top-most.
1266
if (profilingStackAddr > jsStackAddr &&
1267
profilingStackAddr > nativeStackAddr) {
1268
MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
1269
const js::ProfilingStackFrame& profilingStackFrame =
1270
profilingStackFrames[profilingStackIndex];
1271
1272
// Sp marker frames are just annotations and should not be recorded in
1273
// the profile.
1274
if (!profilingStackFrame.isSpMarkerFrame()) {
1275
// The JIT only allows the top-most frame to have a nullptr pc.
1276
MOZ_ASSERT_IF(
1277
profilingStackFrame.isJsFrame() && profilingStackFrame.script() &&
1278
!profilingStackFrame.pc(),
1279
&profilingStackFrame ==
1280
&profilingStack.frames[profilingStack.stackSize() - 1]);
1281
aCollector.CollectProfilingStackFrame(profilingStackFrame);
1282
}
1283
profilingStackIndex++;
1284
continue;
1285
}
1286
1287
// Check to see if JS jit stack frame is top-most
1288
if (jsStackAddr > nativeStackAddr) {
1289
MOZ_ASSERT(jsIndex >= 0);
1290
const JS::ProfilingFrameIterator::Frame& jsFrame = jsFrames[jsIndex];
1291
jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress;
1292
// Stringifying non-wasm JIT frames is delayed until streaming time. To
1293
// re-lookup the entry in the JitcodeGlobalTable, we need to store the
1294
// JIT code address (OptInfoAddr) in the circular buffer.
1295
//
1296
// Note that we cannot do this when we are sychronously sampling the
1297
// current thread; that is, when called from profiler_get_backtrace. The
1298
// captured backtrace is usually externally stored for an indeterminate
1299
// amount of time, such as in nsRefreshDriver. Problematically, the
1300
// stored backtrace may be alive across a GC during which the profiler
1301
// itself is disabled. In that case, the JS engine is free to discard its
1302
// JIT code. This means that if we inserted such OptInfoAddr entries into
1303
// the buffer, nsRefreshDriver would now be holding on to a backtrace
1304
// with stale JIT code return addresses.
1305
if (aIsSynchronous ||
1306
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
1307
aCollector.CollectWasmFrame(jsFrame.label);
1308
} else if (jsFrame.kind ==
1309
JS::ProfilingFrameIterator::Frame_BaselineInterpreter) {
1310
// For now treat this as a C++ Interpreter frame by materializing a
1311
// ProfilingStackFrame.
1312
JSScript* script = jsFrame.interpreterScript;
1313
jsbytecode* pc = jsFrame.interpreterPC();
1314
js::ProfilingStackFrame stackFrame;
1315
stackFrame.initJsFrame("", jsFrame.label, script, pc);
1316
aCollector.CollectProfilingStackFrame(stackFrame);
1317
} else {
1318
MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
1319
jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
1320
aCollector.CollectJitReturnAddr(jsFrame.returnAddress());
1321
}
1322
1323
jsIndex--;
1324
continue;
1325
}
1326
1327
// If we reach here, there must be a native stack frame and it must be the
1328
// greatest frame.
1329
if (nativeStackAddr &&
1330
// If the latest JS frame was JIT, this could be the native frame that
1331
// corresponds to it. In that case, skip the native frame, because
1332
// there's no need for the same frame to be present twice in the stack.
1333
// The JS frame can be considered the symbolicated version of the native
1334
// frame.
1335
(!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) &&
1336
// This might still be a JIT operation, check to make sure that is not
1337
// in range of the NEXT JavaScript's stacks' activation address.
1338
(!jsActivationAddr || nativeStackAddr > jsActivationAddr)) {
1339
MOZ_ASSERT(nativeIndex >= 0);
1340
void* addr = (void*)aNativeStack.mPCs[nativeIndex];
1341
aCollector.CollectNativeLeafAddr(addr);
1342
}
1343
if (nativeIndex >= 0) {
1344
nativeIndex--;
1345
}
1346
}
1347
1348
// Update the JS context with the current profile sample buffer generation.
1349
//
1350
// Only do this for periodic samples. We don't want to do this for
1351
// synchronous samples, and we also don't want to do it for calls to
1352
// profiler_suspend_and_sample_thread() from the background hang reporter -
1353
// in that case, aCollector.BufferRangeStart() will return Nothing().
1354
if (!aIsSynchronous && context && aCollector.BufferRangeStart()) {
1355
uint64_t bufferRangeStart = *aCollector.BufferRangeStart();
1356
JS::SetJSContextProfilerSampleBufferRangeStart(context, bufferRangeStart);
1357
}
1358
}
1359
1360
#if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
1361
static HANDLE GetThreadHandle(PlatformData* aData);
1362
#endif
1363
1364
#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
1365
static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
1366
void* aClosure) {
1367
NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
1368
MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
1369
nativeStack->mSPs[nativeStack->mCount] = aSP;
1370
nativeStack->mPCs[nativeStack->mCount] = aPC;
1371
nativeStack->mCount++;
1372
}
1373
#endif
1374
1375
#if defined(USE_FRAME_POINTER_STACK_WALK)
1376
static void DoFramePointerBacktrace(PSLockRef aLock,
1377
const RegisteredThread& aRegisteredThread,
1378
const Registers& aRegs,
1379
NativeStack& aNativeStack) {
1380
// WARNING: this function runs within the profiler's "critical section".
1381
// WARNING: this function might be called while the profiler is inactive, and
1382
// cannot rely on ActivePS.
1383
1384
// Start with the current function. We use 0 as the frame number here because
1385
// the FramePointerStackWalk() call below will use 1..N. This is a bit weird
1386
// but it doesn't matter because StackWalkCallback() doesn't use the frame
1387
// number argument.
1388
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1389
1390
uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1391
1392
const void* stackEnd = aRegisteredThread.StackTop();
1393
if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
1394
FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1395
&aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
1396
const_cast<void*>(stackEnd));
1397
}
1398
}
1399
#endif
1400
1401
#if defined(USE_MOZ_STACK_WALK)
1402
static void DoMozStackWalkBacktrace(PSLockRef aLock,
1403
const RegisteredThread& aRegisteredThread,
1404
const Registers& aRegs,
1405
NativeStack& aNativeStack) {
1406
// WARNING: this function runs within the profiler's "critical section".
1407
// WARNING: this function might be called while the profiler is inactive, and
1408
// cannot rely on ActivePS.
1409
1410
// Start with the current function. We use 0 as the frame number here because
1411
// the MozStackWalkThread() call below will use 1..N. This is a bit weird but
1412
// it doesn't matter because StackWalkCallback() doesn't use the frame number
1413
// argument.
1414
StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1415
1416
uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1417
1418
HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
1419
MOZ_ASSERT(thread);
1420
MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1421
&aNativeStack, thread, /* context */ nullptr);
1422
}
1423
#endif
1424
1425
#ifdef USE_EHABI_STACKWALK
1426
static void DoEHABIBacktrace(PSLockRef aLock,
1427
const RegisteredThread& aRegisteredThread,
1428
const Registers& aRegs,
1429
NativeStack& aNativeStack) {
1430
// WARNING: this function runs within the profiler's "critical section".
1431
// WARNING: this function might be called while the profiler is inactive, and
1432
// cannot rely on ActivePS.
1433
1434
const mcontext_t* mcontext = &aRegs.mContext->uc_mcontext;
1435
mcontext_t savedContext;
1436
const ProfilingStack& profilingStack =
1437
aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1438
1439
// The profiling stack contains an "EnterJIT" frame whenever we enter
1440
// JIT code with profiling enabled; the stack pointer value points
1441
// the saved registers. We use this to unwind resume unwinding
1442
// after encounting JIT code.
1443
for (uint32_t i = profilingStack.stackSize(); i > 0; --i) {
1444
// The profiling stack grows towards higher indices, so we iterate
1445
// backwards (from callee to caller).
1446
const js::ProfilingStackFrame& frame = profilingStack.frames[i - 1];
1447
if (!frame.isJsFrame() && strcmp(frame.label(), "EnterJIT") == 0) {
1448
// Found JIT entry frame. Unwind up to that point (i.e., force
1449
// the stack walk to stop before the block of saved registers;
1450
// note that it yields nondecreasing stack pointers), then restore
1451
// the saved state.
1452
uint32_t* vSP = reinterpret_cast<uint32_t*>(frame.stackAddress());
1453
1454
aNativeStack.mCount +=
1455
EHABIStackWalk(*mcontext, /* stackBase = */ vSP,
1456
aNativeStack.mSPs + aNativeStack.mCount,
1457
aNativeStack.mPCs + aNativeStack.mCount,
1458
MAX_NATIVE_FRAMES - aNativeStack.mCount);
1459
1460
memset(&savedContext, 0, sizeof(savedContext));
1461
1462
// See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
1463
savedContext.arm_r4 = *vSP++;
1464
savedContext.arm_r5 = *vSP++;
1465
savedContext.arm_r6 = *vSP++;
1466
savedContext.arm_r7 = *vSP++;
1467
savedContext.arm_r8 = *vSP++;
1468
savedContext.arm_r9 = *vSP++;
1469
savedContext.arm_r10 = *vSP++;
1470
savedContext.arm_fp = *vSP++;
1471
savedContext.arm_lr = *vSP++;
1472
savedContext.arm_sp = reinterpret_cast<uint32_t>(vSP);
1473
savedContext.arm_pc = savedContext.arm_lr;
1474
mcontext = &savedContext;
1475
}
1476
}
1477
1478
// Now unwind whatever's left (starting from either the last EnterJIT frame
1479
// or, if no EnterJIT was found, the original registers).
1480
aNativeStack.mCount +=
1481
EHABIStackWalk(*mcontext, const_cast<void*>(aRegisteredThread.StackTop()),
1482
aNativeStack.mSPs + aNativeStack.mCount,
1483
aNativeStack.mPCs + aNativeStack.mCount,
1484
MAX_NATIVE_FRAMES - aNativeStack.mCount);
1485
}
1486
#endif
1487
1488
#ifdef USE_LUL_STACKWALK
1489
1490
// See the comment at the callsite for why this function is necessary.
1491
# if defined(MOZ_HAVE_ASAN_BLACKLIST)
1492
MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
1493
size_t aLen) {
1494
// The obvious thing to do here is call memcpy(). However, although
1495
// ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
1496
// false positive still manifests! So we must implement memcpy() ourselves
1497
// within this function.
1498
char* dst = static_cast<char*>(aDst);
1499
const char* src = static_cast<const char*>(aSrc);
1500
1501
for (size_t i = 0; i < aLen; i++) {
1502
dst[i] = src[i];
1503
}
1504
}
1505
# endif
1506
1507
static void DoLULBacktrace(PSLockRef aLock,
1508
const RegisteredThread& aRegisteredThread,
1509
const Registers& aRegs, NativeStack& aNativeStack) {
1510
// WARNING: this function runs within the profiler's "critical section".
1511
// WARNING: this function might be called while the profiler is inactive, and
1512
// cannot rely on ActivePS.
1513
1514
const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
1515
1516
lul::UnwindRegs startRegs;
1517
memset(&startRegs, 0, sizeof(startRegs));
1518
1519
# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
1520
startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
1521
startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
1522
startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
1523
# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1524
startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
1525
startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
1526
startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
1527
startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
1528
startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
1529
startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
1530
# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
1531
startRegs.pc = lul::TaggedUWord(mc->pc);
1532
startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
1533
startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
1534
startRegs.sp = lul::TaggedUWord(mc->sp);
1535
# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1536
startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
1537
startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
1538
startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
1539
# elif defined(GP_PLAT_mips64_linux)
1540
startRegs.pc = lul::TaggedUWord(mc->pc);
1541
startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
1542
startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
1543
# else
1544
# error "Unknown plat"
1545
# endif
1546
1547
// Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
1548
// stack's registered top point. Do some basic sanity checks too. This
1549
// assumes that the TaggedUWord holding the stack pointer value is valid, but
1550
// it should be, since it was constructed that way in the code just above.
1551
1552
// We could construct |stackImg| so that LUL reads directly from the stack in
1553
// question, rather than from a copy of it. That would reduce overhead and
1554
// space use a bit. However, it gives a problem with dynamic analysis tools
1555
// (ASan, TSan, Valgrind) which is that such tools will report invalid or
1556
// racing memory accesses, and such accesses will be reported deep inside LUL.
1557
// By taking a copy here, we can either sanitise the copy (for Valgrind) or
1558
// copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
1559
// to try and suppress errors inside LUL.
1560
//
1561
// N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
1562
// observed in some minutes of testing, whilst keeping the size of this
1563
// function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
1564
// practice are small, 4KB or less, and so the copy costs are insignificant
1565
// compared to other profiler overhead.
1566
//
1567
// |stackImg| is allocated on this (the sampling thread's) stack. That
1568
// implies that the frame for this function is at least N_STACK_BYTES large.
1569
// In general it would be considered unacceptable to have such a large frame
1570
// on a stack, but it only exists for the unwinder thread, and so is not
1571
// expected to be a problem. Allocating it on the heap is troublesome because
1572
// this function runs whilst the sampled thread is suspended, so any heap
1573
// allocation risks deadlock. Allocating it as a global variable is not
1574
// thread safe, which would be a problem if we ever allow multiple sampler
1575
// threads. Hence allocating it on the stack seems to be the least-worst
1576
// option.
1577
1578
lul::StackImage stackImg;
1579
1580
{
1581
# if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
1582
uintptr_t rEDZONE_SIZE = 128;
1583
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1584
# elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1585
uintptr_t rEDZONE_SIZE = 0;
1586
uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
1587
# elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
1588
uintptr_t rEDZONE_SIZE = 0;
1589
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1590
# elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1591
uintptr_t rEDZONE_SIZE = 0;
1592
uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1593
# elif defined(GP_PLAT_mips64_linux)
1594
uintptr_t rEDZONE_SIZE = 0;
1595
uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1596
# else
1597
# error "Unknown plat"
1598
# endif
1599
uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
1600
uintptr_t ws = sizeof(void*);
1601
start &= ~(ws - 1);
1602
end &= ~(ws - 1);
1603
uintptr_t nToCopy = 0;
1604
if (start < end) {
1605
nToCopy = end - start;
1606
if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
1607
}
1608
MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1609
stackImg.mLen = nToCopy;
1610
stackImg.mStartAvma = start;
1611
if (nToCopy > 0) {
1612
// If this is a vanilla memcpy(), ASAN makes the following complaint:
1613
//
1614
// ERROR: AddressSanitizer: stack-buffer-underflow ...
1615
// ...
1616
// HINT: this may be a false positive if your program uses some custom
1617
// stack unwind mechanism or swapcontext
1618
//
1619
// This code is very much a custom stack unwind mechanism! So we use an
1620
// alternative memcpy() implementation that is ignored by ASAN.
1621
# if defined(MOZ_HAVE_ASAN_BLACKLIST)
1622
ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1623
# else
1624
memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1625
# endif
1626
(void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
1627
}
1628
}
1629
1630
size_t framePointerFramesAcquired = 0;
1631
lul::LUL* lul = CorePS::Lul(aLock);
1632
lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
1633
reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
1634
&aNativeStack.mCount, &framePointerFramesAcquired,
1635
MAX_NATIVE_FRAMES, &startRegs, &stackImg);
1636
1637
// Update stats in the LUL stats object. Unfortunately this requires
1638
// three global memory operations.
1639
lul->mStats.mContext += 1;
1640
lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
1641
lul->mStats.mFP += framePointerFramesAcquired;
1642
}
1643
1644
#endif
1645
1646
#ifdef HAVE_NATIVE_UNWIND
1647
static void DoNativeBacktrace(PSLockRef aLock,
1648
const RegisteredThread& aRegisteredThread,
1649
const Registers& aRegs,
1650
NativeStack& aNativeStack) {
1651
// This method determines which stackwalker is used for periodic and
1652
// synchronous samples. (Backtrace samples are treated differently, see
1653
// profiler_suspend_and_sample_thread() for details). The only part of the
1654
// ordering that matters is that LUL must precede FRAME_POINTER, because on
1655
// Linux they can both be present.
1656
# if defined(USE_LUL_STACKWALK)
1657
DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1658
# elif defined(USE_EHABI_STACKWALK)
1659
DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1660
# elif defined(USE_FRAME_POINTER_STACK_WALK)
1661
DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1662
# elif defined(USE_MOZ_STACK_WALK)
1663
DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1664
# else
1665
# error "Invalid configuration"
1666
# endif
1667
}
1668
#endif
1669
1670
// Writes some components shared by periodic and synchronous profiles to
1671
// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
1672
// and DoPeriodicSample().)
1673
//
1674
// The grammar for entry sequences is in a comment above
1675
// ProfileBuffer::StreamSamplesToJSON.
1676
static inline void DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
1677
RegisteredThread& aRegisteredThread,
1678
const TimeStamp& aNow, const Registers& aRegs,
1679
Maybe<uint64_t>* aLastSample,
1680
ProfileBuffer& aBuffer) {
1681
// WARNING: this function runs within the profiler's "critical section".
1682
1683
MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
1684
1685
uint64_t samplePos =
1686
aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
1687
if (aLastSample) {
1688
*aLastSample = Some(samplePos);
1689
}
1690
1691
TimeDuration delta = aNow - CorePS::ProcessStartTime();
1692
aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
1693
1694
ProfileBufferCollector collector(aBuffer, ActivePS::Features(aLock),
1695
samplePos);
1696
NativeStack nativeStack;
1697
#if defined(HAVE_NATIVE_UNWIND)
1698
if (ActivePS::FeatureStackWalk(aLock)) {
1699
DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack);
1700
1701
MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
1702
aRegs, nativeStack, collector);
1703
} else
1704
#endif
1705
{
1706
MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
1707
aRegs, nativeStack, collector);
1708
1709
// We can't walk the whole native stack, but we can record the top frame.
1710
if (ActivePS::FeatureLeaf(aLock)) {
1711
aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
1712
}
1713
}
1714
}
1715
1716
// Writes the components of a synchronous sample to the given ProfileBuffer.
1717
static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
1718
const TimeStamp& aNow, const Registers& aRegs,
1719
ProfileBuffer& aBuffer) {
1720
// WARNING: this function runs within the profiler's "critical section".
1721
1722
DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aNow,
1723
aRegs, /* aLastSample = */ nullptr, aBuffer);
1724
}
1725
1726
// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
1727
static void DoPeriodicSample(PSLockRef aLock,
1728
RegisteredThread& aRegisteredThread,
1729
ProfiledThreadData& aProfiledThreadData,
1730
const TimeStamp& aNow, const Registers& aRegs) {
1731
// WARNING: this function runs within the profiler's "critical section".
1732
1733
ProfileBuffer& buffer = ActivePS::Buffer(aLock);
1734
1735
DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aNow,
1736
aRegs, &aProfiledThreadData.LastSample(), buffer);
1737
1738
ProfilerMarkerLinkedList* pendingMarkersList =
1739
aRegisteredThread.RacyRegisteredThread().GetPendingMarkers();
1740
while (pendingMarkersList && pendingMarkersList->peek()) {
1741
ProfilerMarker* marker = pendingMarkersList->popHead();
1742
buffer.AddStoredMarker(marker);
1743
buffer.AddEntry(ProfileBufferEntry::Marker(marker));
1744
}
1745
1746
ThreadResponsiveness* resp = aProfiledThreadData.GetThreadResponsiveness();
1747
if (resp && resp->HasData()) {
1748
double delta = resp->GetUnresponsiveDuration(
1749
(aNow - CorePS::ProcessStartTime()).ToMilliseconds());
1750
buffer.AddEntry(ProfileBufferEntry::Responsiveness(delta));
1751
}
1752
}
1753
1754
// END sampling/unwinding code
1755
////////////////////////////////////////////////////////////////////////
1756
1757
////////////////////////////////////////////////////////////////////////
1758
// BEGIN saving/streaming code
1759
1760
const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
1761
1762
static int64_t SafeJSInteger(uint64_t aValue) {
1763
return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
1764
}
1765
1766
static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
1767
const SharedLibrary& aLib) {
1768
aWriter.StartObjectElement();
1769
aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
1770
aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
1771
aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
1772
aWriter.StringProperty("name",
1773
NS_ConvertUTF16toUTF8(aLib.GetModuleName()).get());
1774
aWriter.StringProperty("path",
1775
NS_ConvertUTF16toUTF8(aLib.GetModulePath()).get());
1776
aWriter.StringProperty("debugName",
1777
NS_ConvertUTF16toUTF8(aLib.GetDebugName()).get());
1778
aWriter.StringProperty("debugPath",
1779
NS_ConvertUTF16toUTF8(aLib.GetDebugPath()).get());
1780
aWriter.StringProperty("breakpadId", aLib.GetBreakpadId().get());
1781
aWriter.StringProperty("arch", aLib.GetArch().c_str());
1782
aWriter.EndObject();
1783
}
1784
1785
void AppendSharedLibraries(JSONWriter& aWriter) {
1786
SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
1787
info.SortByAddress();
1788
for (size_t i = 0; i < info.GetSize(); i++) {
1789
AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
1790
}
1791
}
1792
1793
#ifdef MOZ_TASK_TRACER
1794
static void StreamNameAndThreadId(JSONWriter& aWriter, const char* aName,
1795
int aThreadId) {
1796
aWriter.StartObjectElement();
1797
{
1798
if (XRE_GetProcessType() == GeckoProcessType_Plugin) {
1799
// TODO Add the proper plugin name
1800
aWriter.StringProperty("name", "Plugin");
1801
} else {
1802
aWriter.StringProperty("name", aName);
1803
}
1804
aWriter.IntProperty("tid", aThreadId);
1805
}
1806
aWriter.EndObject();
1807
}
1808
#endif
1809
1810
static void StreamTaskTracer(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
1811
#ifdef MOZ_TASK_TRACER
1812
MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1813
1814
aWriter.StartArrayProperty("data");
1815
{
1816
UniquePtr<Vector<nsCString>> data =
1817
tasktracer::GetLoggedData(CorePS::ProcessStartTime());
1818
for (const nsCString& dataString : *data) {
1819
aWriter.StringElement(dataString.get());
1820
}
1821
}
1822
aWriter.EndArray();
1823
1824
aWriter.StartArrayProperty("threads");
1825
{
1826
ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
1827
Vector<Pair<RegisteredThread*, ProfiledThreadData*>> threads =
1828
ActivePS::ProfiledThreads(aLock);
1829
for (auto& thread : threads) {
1830
RefPtr<ThreadInfo> info = thread.second()->Info();
1831
StreamNameAndThreadId(aWriter, info->Name(), info->ThreadId());
1832
}
1833
}
1834
aWriter.EndArray();
1835
1836
aWriter.DoubleProperty("start",
1837
static_cast<double>(tasktracer::GetStartTime()));
1838
#endif
1839
}
1840
1841
static void StreamCategories(SpliceableJSONWriter& aWriter) {
1842
// Same order as ProfilingCategory. Format:
1843
// [
1844
// {
1845
// name: "Idle",
1846
// color: "transparent",
1847
// subcategories: ["Other"],
1848
// },
1849
// {
1850
// name: "Other",
1851
// color: "grey",
1852
// subcategories: [
1853
// "JSM loading",
1854
// "Subprocess launching",
1855
// "DLL loading"
1856
// ]
1857
// },
1858
// ...
1859
// ]
1860
1861
#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
1862
aWriter.Start(); \
1863
aWriter.StringProperty("name", labelAsString); \
1864
aWriter.StringProperty("color", color); \
1865
aWriter.StartArrayProperty("subcategories");
1866
#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
1867
aWriter.StringElement(labelAsString);
1868
#define CATEGORY_JSON_END_CATEGORY \
1869
aWriter.EndArray(); \
1870
aWriter.EndObject();
1871
1872
PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
1873
CATEGORY_JSON_SUBCATEGORY, CATEGORY_JSON_END_CATEGORY)
1874
1875
#undef CATEGORY_JSON_BEGIN_CATEGORY
1876
#undef CATEGORY_JSON_SUBCATEGORY
1877
#undef CATEGORY_JSON_END_CATEGORY
1878
}
1879
1880
static void StreamMetaJSCustomObject(PSLockRef aLock,
1881
SpliceableJSONWriter& aWriter,
1882
bool aIsShuttingDown) {
1883
MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1884
1885
aWriter.IntProperty("version", 16);
1886
1887
// The "startTime" field holds the number of milliseconds since midnight
1888
// January 1, 1970 GMT. This grotty code computes (Now - (Now -
1889
// ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
1890
TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
1891
aWriter.DoubleProperty(
1892
"startTime",
1893
static_cast<double>(PR_Now() / 1000.0 - delta.ToMilliseconds()));
1894
1895
// Write the shutdownTime field. Unlike startTime, shutdownTime is not an
1896
// absolute time stamp: It's relative to startTime. This is consistent with
1897
// all other (non-"startTime") times anywhere in the profile JSON.
1898
if (aIsShuttingDown) {
1899
aWriter.DoubleProperty("shutdownTime", profiler_time());
1900
} else {
1901
aWriter.NullProperty("shutdownTime");
1902
}
1903
1904
aWriter.StartArrayProperty("categories");
1905
StreamCategories(aWriter);