Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
*
4
* Copyright 2016 Mozilla Foundation
5
*
6
* Licensed under the Apache License, Version 2.0 (the "License");
7
* you may not use this file except in compliance with the License.
8
* You may obtain a copy of the License at
9
*
11
*
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
17
*/
18
19
/*
20
* [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
21
*
22
* General assumptions for 32-bit vs 64-bit code:
23
*
24
* - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
25
* systems.
26
*
27
* - Code that knows that Register64 has a '.reg' member on 64-bit systems and
28
* '.high' and '.low' members on 32-bit systems, or knows the implications
29
* thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT.
30
*
31
*
32
* Coding standards:
33
*
34
* - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
35
* and surrounding functions; most functions fall into this class) where the
36
* meaning is obvious:
37
*
38
* - if there is a single source + destination register, it is called 'r'
39
* - if there is one source and a different destination, they are called 'rs'
40
* and 'rd'
41
* - if there is one source + destination register and another source register
42
* they are called 'r' and 'rs'
43
* - if there are two source registers and a destination register they are
44
* called 'rs0', 'rs1', and 'rd'.
45
*
46
* - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
47
*
48
* - Registers can be named non-generically for their function ('rp' for the
49
* 'pointer' register and 'rv' for the 'value' register are typical) and those
50
* names may or may not have an 'r' prefix.
51
*
52
* - "Larger" code generating functions make their own rules.
53
*
54
*
55
* General status notes:
56
*
57
* "FIXME" indicates a known or suspected bug. Always has a bug#.
58
*
59
* "TODO" indicates an opportunity for a general improvement, with an additional
60
* tag to indicate the area of improvement. Usually has a bug#.
61
*
62
* There are lots of machine dependencies here but they are pretty well isolated
63
* to a segment of the compiler. Many dependencies will eventually be factored
64
* into the MacroAssembler layer and shared with other code generators.
65
*
66
*
67
* High-value compiler performance improvements:
68
*
69
* - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
70
* etc methods) can avoid syncing the value stack if the specific register is
71
* in use but there is a free register to shuffle the specific register into.
72
* (This will also improve the generated code.) The sync happens often enough
73
* here to show up in profiles, because it is triggered by integer multiply
74
* and divide.
75
*
76
*
77
* High-value code generation improvements:
78
*
79
* - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
80
* the stack and then jumps to the code for the target case. If no cleanup is
81
* needed we could just branch conditionally to the target; if the same amount
82
* of cleanup is needed for all cases then the cleanup can be done before the
83
* dispatch. Both are highly likely.
84
*
85
* - (Bug 1316806) Register management around calls: At the moment we sync the
86
* value stack unconditionally (this is simple) but there are probably many
87
* common cases where we could instead save/restore live caller-saves
88
* registers and perform parallel assignment into argument registers. This
89
* may be important if we keep some locals in registers.
90
*
91
* - (Bug 1316808) Allocate some locals to registers on machines where there are
92
* enough registers. This is probably hard to do well in a one-pass compiler
93
* but it might be that just keeping register arguments and the first few
94
* locals in registers is a viable strategy; another (more general) strategy
95
* is caching locals in registers in straight-line code. Such caching could
96
* also track constant values in registers, if that is deemed valuable. A
97
* combination of techniques may be desirable: parameters and the first few
98
* locals could be cached on entry to the function but not statically assigned
99
* to registers throughout.
100
*
101
* (On a large corpus of code it should be possible to compute, for every
102
* signature comprising the types of parameters and locals, and using a static
103
* weight for loops, a list in priority order of which parameters and locals
104
* that should be assigned to registers. Or something like that. Wasm makes
105
* this simple. Static assignments are desirable because they are not flushed
106
* to memory by the pre-block sync() call.)
107
*/
108
109
#include "wasm/WasmBaselineCompile.h"
110
111
#include "mozilla/MathAlgorithms.h"
112
#include "mozilla/Maybe.h"
113
114
#include <algorithm>
115
#include <utility>
116
117
#include "jit/AtomicOp.h"
118
#include "jit/IonTypes.h"
119
#include "jit/JitAllocPolicy.h"
120
#include "jit/Label.h"
121
#include "jit/MacroAssembler.h"
122
#include "jit/MIR.h"
123
#include "jit/RegisterAllocator.h"
124
#include "jit/Registers.h"
125
#include "jit/RegisterSets.h"
126
#if defined(JS_CODEGEN_ARM)
127
# include "jit/arm/Assembler-arm.h"
128
#endif
129
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
130
# include "jit/x86-shared/Architecture-x86-shared.h"
131
# include "jit/x86-shared/Assembler-x86-shared.h"
132
#endif
133
#if defined(JS_CODEGEN_MIPS32)
134
# include "jit/mips-shared/Assembler-mips-shared.h"
135
# include "jit/mips32/Assembler-mips32.h"
136
#endif
137
#if defined(JS_CODEGEN_MIPS64)
138
# include "jit/mips-shared/Assembler-mips-shared.h"
139
# include "jit/mips64/Assembler-mips64.h"
140
#endif
141
142
#include "util/Memory.h"
143
#include "wasm/WasmGC.h"
144
#include "wasm/WasmGenerator.h"
145
#include "wasm/WasmInstance.h"
146
#include "wasm/WasmOpIter.h"
147
#include "wasm/WasmSignalHandlers.h"
148
#include "wasm/WasmStubs.h"
149
#include "wasm/WasmValidate.h"
150
151
#include "jit/MacroAssembler-inl.h"
152
153
using mozilla::DebugOnly;
154
using mozilla::FloorLog2;
155
using mozilla::IsPowerOfTwo;
156
using mozilla::Maybe;
157
158
namespace js {
159
namespace wasm {
160
161
using namespace js::jit;
162
163
using HandleNaNSpecially = bool;
164
using InvertBranch = bool;
165
using IsKnownNotZero = bool;
166
using IsUnsigned = bool;
167
using NeedsBoundsCheck = bool;
168
using WantResult = bool;
169
using ZeroOnOverflow = bool;
170
171
class BaseStackFrame;
172
173
// Two flags, useABI and interModule, control how calls are made.
174
//
175
// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
176
// except when InterModule::True is also set, when they are volatile.
177
//
178
// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
179
// In this case, we require InterModule::False. The calling convention
180
// is otherwise like UseABI::Wasm.
181
//
182
// UseABI::System implies that the Tls/Heap/Global registers are volatile.
183
// Additionally, the parameter passing mechanism may be slightly different from
184
// the UseABI::Wasm convention.
185
//
186
// When the Tls/Heap/Global registers are not volatile, the baseline compiler
187
// will restore the Tls register from its save slot before the call, since the
188
// baseline compiler uses the Tls register for other things.
189
//
190
// When those registers are volatile, the baseline compiler will reload them
191
// after the call (it will restore the Tls register from the save slot and load
192
// the other two from the Tls data).
193
194
enum class UseABI { Wasm, Builtin, System };
195
enum class InterModule { False = false, True = true };
196
197
#if defined(JS_CODEGEN_NONE)
198
# define RABALDR_SCRATCH_I32
199
# define RABALDR_SCRATCH_F32
200
# define RABALDR_SCRATCH_F64
201
202
static constexpr Register RabaldrScratchI32 = Register::Invalid();
203
static constexpr FloatRegister RabaldrScratchF32 = InvalidFloatReg;
204
static constexpr FloatRegister RabaldrScratchF64 = InvalidFloatReg;
205
#endif
206
207
#ifdef JS_CODEGEN_ARM64
208
# define RABALDR_CHUNKY_STACK
209
# define RABALDR_SCRATCH_I32
210
# define RABALDR_SCRATCH_F32
211
# define RABALDR_SCRATCH_F64
212
# define RABALDR_SCRATCH_F32_ALIASES_F64
213
214
static constexpr Register RabaldrScratchI32{Registers::x15};
215
216
// Note, the float scratch regs cannot be registers that are used for parameter
217
// passing in any ABI we use. Argregs tend to be low-numbered; register 30
218
// should be safe.
219
220
static constexpr FloatRegister RabaldrScratchF32{FloatRegisters::s30,
221
FloatRegisters::Single};
222
static constexpr FloatRegister RabaldrScratchF64{FloatRegisters::d30,
223
FloatRegisters::Double};
224
225
static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
226
static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
227
#endif
228
229
#ifdef JS_CODEGEN_X86
230
// The selection of EBX here steps gingerly around: the need for EDX
231
// to be allocatable for multiply/divide; ECX to be allocatable for
232
// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
233
// register; EBX not being one of the WasmTableCall registers; and
234
// needing a temp register for load/store that has a single-byte
235
// persona.
236
//
237
// The compiler assumes that RabaldrScratchI32 has a single-byte
238
// persona. Code for 8-byte atomic operations assumes that
239
// RabaldrScratchI32 is in fact ebx.
240
241
# define RABALDR_SCRATCH_I32
242
static constexpr Register RabaldrScratchI32 = ebx;
243
244
# define RABALDR_INT_DIV_I64_CALLOUT
245
#endif
246
247
#ifdef JS_CODEGEN_ARM
248
// We use our own scratch register, because the macro assembler uses
249
// the regular scratch register(s) pretty liberally. We could
250
// work around that in several cases but the mess does not seem
251
// worth it yet. CallTempReg2 seems safe.
252
253
# define RABALDR_SCRATCH_I32
254
static constexpr Register RabaldrScratchI32 = CallTempReg2;
255
256
# define RABALDR_INT_DIV_I64_CALLOUT
257
# define RABALDR_I64_TO_FLOAT_CALLOUT
258
# define RABALDR_FLOAT_TO_I64_CALLOUT
259
#endif
260
261
#ifdef JS_CODEGEN_MIPS32
262
# define RABALDR_SCRATCH_I32
263
static constexpr Register RabaldrScratchI32 = CallTempReg2;
264
265
# define RABALDR_INT_DIV_I64_CALLOUT
266
# define RABALDR_I64_TO_FLOAT_CALLOUT
267
# define RABALDR_FLOAT_TO_I64_CALLOUT
268
#endif
269
270
#ifdef JS_CODEGEN_MIPS64
271
# define RABALDR_SCRATCH_I32
272
static constexpr Register RabaldrScratchI32 = CallTempReg2;
273
#endif
274
275
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
276
# if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
277
# error "Bad configuration"
278
# endif
279
#endif
280
281
template <MIRType t>
282
struct RegTypeOf {
283
static_assert(t == MIRType::Float32 || t == MIRType::Double,
284
"Float mask type");
285
};
286
287
template <>
288
struct RegTypeOf<MIRType::Float32> {
289
static constexpr RegTypeName value = RegTypeName::Float32;
290
};
291
template <>
292
struct RegTypeOf<MIRType::Double> {
293
static constexpr RegTypeName value = RegTypeName::Float64;
294
};
295
296
// The strongly typed register wrappers are especially useful to distinguish
297
// float registers from double registers, but they also clearly distinguish
298
// 32-bit registers from 64-bit register pairs on 32-bit systems.
299
300
struct RegI32 : public Register {
301
RegI32() : Register(Register::Invalid()) {}
302
explicit RegI32(Register reg) : Register(reg) {
303
MOZ_ASSERT(reg != Invalid());
304
}
305
bool isInvalid() const { return *this == Invalid(); }
306
bool isValid() const { return !isInvalid(); }
307
static RegI32 Invalid() { return RegI32(); }
308
};
309
310
struct RegI64 : public Register64 {
311
RegI64() : Register64(Register64::Invalid()) {}
312
explicit RegI64(Register64 reg) : Register64(reg) {
313
MOZ_ASSERT(reg != Invalid());
314
}
315
bool isInvalid() const { return *this == Invalid(); }
316
bool isValid() const { return !isInvalid(); }
317
static RegI64 Invalid() { return RegI64(); }
318
};
319
320
struct RegPtr : public Register {
321
RegPtr() : Register(Register::Invalid()) {}
322
explicit RegPtr(Register reg) : Register(reg) {
323
MOZ_ASSERT(reg != Invalid());
324
}
325
bool isInvalid() const { return *this == Invalid(); }
326
bool isValid() const { return !isInvalid(); }
327
static RegPtr Invalid() { return RegPtr(); }
328
};
329
330
struct RegF32 : public FloatRegister {
331
RegF32() : FloatRegister() {}
332
explicit RegF32(FloatRegister reg) : FloatRegister(reg) {
333
MOZ_ASSERT(isSingle());
334
}
335
bool isValid() const { return !isInvalid(); }
336
static RegF32 Invalid() { return RegF32(); }
337
};
338
339
struct RegF64 : public FloatRegister {
340
RegF64() : FloatRegister() {}
341
explicit RegF64(FloatRegister reg) : FloatRegister(reg) {
342
MOZ_ASSERT(isDouble());
343
}
344
bool isValid() const { return !isInvalid(); }
345
static RegF64 Invalid() { return RegF64(); }
346
};
347
348
struct AnyReg {
349
union {
350
RegI32 i32_;
351
RegI64 i64_;
352
RegPtr ref_;
353
RegF32 f32_;
354
RegF64 f64_;
355
};
356
357
enum { I32, I64, REF, F32, F64 } tag;
358
359
explicit AnyReg(RegI32 r) {
360
tag = I32;
361
i32_ = r;
362
}
363
explicit AnyReg(RegI64 r) {
364
tag = I64;
365
i64_ = r;
366
}
367
explicit AnyReg(RegF32 r) {
368
tag = F32;
369
f32_ = r;
370
}
371
explicit AnyReg(RegF64 r) {
372
tag = F64;
373
f64_ = r;
374
}
375
explicit AnyReg(RegPtr r) {
376
tag = REF;
377
ref_ = r;
378
}
379
380
RegI32 i32() const {
381
MOZ_ASSERT(tag == I32);
382
return i32_;
383
}
384
RegI64 i64() const {
385
MOZ_ASSERT(tag == I64);
386
return i64_;
387
}
388
RegF32 f32() const {
389
MOZ_ASSERT(tag == F32);
390
return f32_;
391
}
392
RegF64 f64() const {
393
MOZ_ASSERT(tag == F64);
394
return f64_;
395
}
396
RegPtr ref() const {
397
MOZ_ASSERT(tag == REF);
398
return ref_;
399
}
400
401
AnyRegister any() const {
402
switch (tag) {
403
case F32:
404
return AnyRegister(f32_);
405
case F64:
406
return AnyRegister(f64_);
407
case I32:
408
return AnyRegister(i32_);
409
case I64:
410
#ifdef JS_PUNBOX64
411
return AnyRegister(i64_.reg);
412
#else
413
// The compiler is written so that this is never needed: any() is
414
// called on arbitrary registers for asm.js but asm.js does not have
415
// 64-bit ints. For wasm, any() is called on arbitrary registers
416
// only on 64-bit platforms.
417
MOZ_CRASH("AnyReg::any() on 32-bit platform");
418
#endif
419
case REF:
420
MOZ_CRASH("AnyReg::any() not implemented for ref types");
421
default:
422
MOZ_CRASH();
423
}
424
// Work around GCC 5 analysis/warning bug.
425
MOZ_CRASH("AnyReg::any(): impossible case");
426
}
427
};
428
429
// Platform-specific registers.
430
//
431
// All platforms must define struct SpecificRegs. All 32-bit platforms must
432
// have an abiReturnRegI64 member in that struct.
433
434
#if defined(JS_CODEGEN_X64)
435
struct SpecificRegs {
436
RegI32 eax, ecx, edx, edi, esi;
437
RegI64 rax, rcx, rdx;
438
439
SpecificRegs()
440
: eax(RegI32(js::jit::eax)),
441
ecx(RegI32(js::jit::ecx)),
442
edx(RegI32(js::jit::edx)),
443
edi(RegI32(js::jit::edi)),
444
esi(RegI32(js::jit::esi)),
445
rax(RegI64(Register64(js::jit::rax))),
446
rcx(RegI64(Register64(js::jit::rcx))),
447
rdx(RegI64(Register64(js::jit::rdx))) {}
448
};
449
#elif defined(JS_CODEGEN_X86)
450
struct SpecificRegs {
451
RegI32 eax, ecx, edx, edi, esi;
452
RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
453
454
SpecificRegs()
455
: eax(RegI32(js::jit::eax)),
456
ecx(RegI32(js::jit::ecx)),
457
edx(RegI32(js::jit::edx)),
458
edi(RegI32(js::jit::edi)),
459
esi(RegI32(js::jit::esi)),
460
ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
461
edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
462
abiReturnRegI64(edx_eax) {}
463
};
464
#elif defined(JS_CODEGEN_ARM)
465
struct SpecificRegs {
466
RegI64 abiReturnRegI64;
467
468
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
469
};
470
#elif defined(JS_CODEGEN_ARM64)
471
struct SpecificRegs {};
472
#elif defined(JS_CODEGEN_MIPS32)
473
struct SpecificRegs {
474
RegI64 abiReturnRegI64;
475
476
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
477
};
478
#elif defined(JS_CODEGEN_MIPS64)
479
struct SpecificRegs {};
480
#else
481
struct SpecificRegs {
482
# ifndef JS_64BIT
483
RegI64 abiReturnRegI64;
484
# endif
485
486
SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
487
};
488
#endif
489
490
class BaseCompilerInterface {
491
public:
492
// Spill all spillable registers.
493
//
494
// TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
495
// spilling only enough registers to satisfy current needs.
496
virtual void sync() = 0;
497
virtual void saveTempPtr(RegPtr r) = 0;
498
virtual void restoreTempPtr(RegPtr r) = 0;
499
};
500
501
// Register allocator.
502
503
class BaseRegAlloc {
504
// Notes on float register allocation.
505
//
506
// The general rule in SpiderMonkey is that float registers can alias double
507
// registers, but there are predicates to handle exceptions to that rule:
508
// hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually
509
// works is platform dependent and exposed through the aliased(n, &r)
510
// predicate, etc.
511
//
512
// - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
513
// cannot be treated as float.
514
// - hasMultiAlias(): on ARM and MIPS a double register aliases two float
515
// registers.
516
//
517
// On some platforms (x86, x64, ARM64) but not all (ARM)
518
// ScratchFloat32Register is the same as ScratchDoubleRegister.
519
//
520
// It's a basic invariant of the AllocatableRegisterSet that it deals
521
// properly with aliasing of registers: if s0 or s1 are allocated then d0 is
522
// not allocatable; if s0 and s1 are freed individually then d0 becomes
523
// allocatable.
524
525
BaseCompilerInterface* bc;
526
AllocatableGeneralRegisterSet availGPR;
527
AllocatableFloatRegisterSet availFPU;
528
#ifdef DEBUG
529
AllocatableGeneralRegisterSet
530
allGPR; // The registers available to the compiler
531
AllocatableFloatRegisterSet
532
allFPU; // after removing ScratchReg, HeapReg, etc
533
uint32_t scratchTaken;
534
#endif
535
#ifdef JS_CODEGEN_X86
536
AllocatableGeneralRegisterSet singleByteRegs;
537
#endif
538
539
bool hasGPR() { return !availGPR.empty(); }
540
541
bool hasGPR64() {
542
#ifdef JS_PUNBOX64
543
return !availGPR.empty();
544
#else
545
if (availGPR.empty()) {
546
return false;
547
}
548
Register r = allocGPR();
549
bool available = !availGPR.empty();
550
freeGPR(r);
551
return available;
552
#endif
553
}
554
555
template <MIRType t>
556
bool hasFPU() {
557
return availFPU.hasAny<RegTypeOf<t>::value>();
558
}
559
560
bool isAvailableGPR(Register r) { return availGPR.has(r); }
561
562
bool isAvailableFPU(FloatRegister r) { return availFPU.has(r); }
563
564
void allocGPR(Register r) {
565
MOZ_ASSERT(isAvailableGPR(r));
566
availGPR.take(r);
567
}
568
569
Register allocGPR() {
570
MOZ_ASSERT(hasGPR());
571
return availGPR.takeAny();
572
}
573
574
void allocInt64(Register64 r) {
575
#ifdef JS_PUNBOX64
576
allocGPR(r.reg);
577
#else
578
allocGPR(r.low);
579
allocGPR(r.high);
580
#endif
581
}
582
583
Register64 allocInt64() {
584
MOZ_ASSERT(hasGPR64());
585
#ifdef JS_PUNBOX64
586
return Register64(availGPR.takeAny());
587
#else
588
Register high = availGPR.takeAny();
589
Register low = availGPR.takeAny();
590
return Register64(high, low);
591
#endif
592
}
593
594
#ifdef JS_CODEGEN_ARM
595
// r12 is normally the ScratchRegister and r13 is always the stack pointer,
596
// so the highest possible pair has r10 as the even-numbered register.
597
598
static constexpr uint32_t PAIR_LIMIT = 10;
599
600
bool hasGPRPair() {
601
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
602
if (isAvailableGPR(Register::FromCode(i)) &&
603
isAvailableGPR(Register::FromCode(i + 1))) {
604
return true;
605
}
606
}
607
return false;
608
}
609
610
void allocGPRPair(Register* low, Register* high) {
611
MOZ_ASSERT(hasGPRPair());
612
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
613
if (isAvailableGPR(Register::FromCode(i)) &&
614
isAvailableGPR(Register::FromCode(i + 1))) {
615
*low = Register::FromCode(i);
616
*high = Register::FromCode(i + 1);
617
allocGPR(*low);
618
allocGPR(*high);
619
return;
620
}
621
}
622
MOZ_CRASH("No pair");
623
}
624
#endif
625
626
void allocFPU(FloatRegister r) {
627
MOZ_ASSERT(isAvailableFPU(r));
628
availFPU.take(r);
629
}
630
631
template <MIRType t>
632
FloatRegister allocFPU() {
633
return availFPU.takeAny<RegTypeOf<t>::value>();
634
}
635
636
void freeGPR(Register r) { availGPR.add(r); }
637
638
void freeInt64(Register64 r) {
639
#ifdef JS_PUNBOX64
640
freeGPR(r.reg);
641
#else
642
freeGPR(r.low);
643
freeGPR(r.high);
644
#endif
645
}
646
647
void freeFPU(FloatRegister r) { availFPU.add(r); }
648
649
public:
650
explicit BaseRegAlloc()
651
: bc(nullptr),
652
availGPR(GeneralRegisterSet::All()),
653
availFPU(FloatRegisterSet::All())
654
#ifdef DEBUG
655
,
656
scratchTaken(0)
657
#endif
658
#ifdef JS_CODEGEN_X86
659
,
660
singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
661
#endif
662
{
663
RegisterAllocator::takeWasmRegisters(availGPR);
664
665
// Allocate any private scratch registers.
666
#if defined(RABALDR_SCRATCH_I32)
667
if (RabaldrScratchI32 != RegI32::Invalid()) {
668
availGPR.take(RabaldrScratchI32);
669
}
670
#endif
671
672
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
673
static_assert(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
674
static_assert(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
675
#endif
676
677
#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
678
if (RabaldrScratchF32 != RegF32::Invalid()) {
679
availFPU.take(RabaldrScratchF32);
680
}
681
#endif
682
683
#if defined(RABALDR_SCRATCH_F64)
684
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
685
MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
686
# endif
687
if (RabaldrScratchF64 != RegF64::Invalid()) {
688
availFPU.take(RabaldrScratchF64);
689
}
690
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
691
MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
692
# endif
693
#endif
694
695
#ifdef DEBUG
696
allGPR = availGPR;
697
allFPU = availFPU;
698
#endif
699
}
700
701
void init(BaseCompilerInterface* bc) { this->bc = bc; }
702
703
enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4 };
704
705
#ifdef DEBUG
706
bool isScratchRegisterTaken(ScratchKind s) const {
707
return (scratchTaken & uint32_t(s)) != 0;
708
}
709
710
void setScratchRegisterTaken(ScratchKind s, bool state) {
711
if (state) {
712
scratchTaken |= uint32_t(s);
713
} else {
714
scratchTaken &= ~uint32_t(s);
715
}
716
}
717
#endif
718
719
#ifdef JS_CODEGEN_X86
720
bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
721
#endif
722
723
bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
724
725
bool isAvailableI64(RegI64 r) {
726
#ifdef JS_PUNBOX64
727
return isAvailableGPR(r.reg);
728
#else
729
return isAvailableGPR(r.low) && isAvailableGPR(r.high);
730
#endif
731
}
732
733
bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
734
735
bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
736
737
bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
738
739
// TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
740
// failure, only as much as we need.
741
742
MOZ_MUST_USE RegI32 needI32() {
743
if (!hasGPR()) {
744
bc->sync();
745
}
746
return RegI32(allocGPR());
747
}
748
749
void needI32(RegI32 specific) {
750
if (!isAvailableI32(specific)) {
751
bc->sync();
752
}
753
allocGPR(specific);
754
}
755
756
MOZ_MUST_USE RegI64 needI64() {
757
if (!hasGPR64()) {
758
bc->sync();
759
}
760
return RegI64(allocInt64());
761
}
762
763
void needI64(RegI64 specific) {
764
if (!isAvailableI64(specific)) {
765
bc->sync();
766
}
767
allocInt64(specific);
768
}
769
770
MOZ_MUST_USE RegPtr needPtr() {
771
if (!hasGPR()) {
772
bc->sync();
773
}
774
return RegPtr(allocGPR());
775
}
776
777
void needPtr(RegPtr specific) {
778
if (!isAvailablePtr(specific)) {
779
bc->sync();
780
}
781
allocGPR(specific);
782
}
783
784
// Use when you need a register for a short time but explicitly want to avoid
785
// a full sync().
786
MOZ_MUST_USE RegPtr needTempPtr(RegPtr fallback, bool* saved) {
787
if (hasGPR()) {
788
*saved = false;
789
return RegPtr(allocGPR());
790
}
791
*saved = true;
792
bc->saveTempPtr(fallback);
793
MOZ_ASSERT(isAvailablePtr(fallback));
794
allocGPR(fallback);
795
return RegPtr(fallback);
796
}
797
798
MOZ_MUST_USE RegF32 needF32() {
799
if (!hasFPU<MIRType::Float32>()) {
800
bc->sync();
801
}
802
return RegF32(allocFPU<MIRType::Float32>());
803
}
804
805
void needF32(RegF32 specific) {
806
if (!isAvailableF32(specific)) {
807
bc->sync();
808
}
809
allocFPU(specific);
810
}
811
812
MOZ_MUST_USE RegF64 needF64() {
813
if (!hasFPU<MIRType::Double>()) {
814
bc->sync();
815
}
816
return RegF64(allocFPU<MIRType::Double>());
817
}
818
819
void needF64(RegF64 specific) {
820
if (!isAvailableF64(specific)) {
821
bc->sync();
822
}
823
allocFPU(specific);
824
}
825
826
void freeI32(RegI32 r) { freeGPR(r); }
827
828
void freeI64(RegI64 r) { freeInt64(r); }
829
830
void freePtr(RegPtr r) { freeGPR(r); }
831
832
void freeF64(RegF64 r) { freeFPU(r); }
833
834
void freeF32(RegF32 r) { freeFPU(r); }
835
836
void freeTempPtr(RegPtr r, bool saved) {
837
freePtr(r);
838
if (saved) {
839
bc->restoreTempPtr(r);
840
MOZ_ASSERT(!isAvailablePtr(r));
841
}
842
}
843
844
#ifdef JS_CODEGEN_ARM
845
MOZ_MUST_USE RegI64 needI64Pair() {
846
if (!hasGPRPair()) {
847
bc->sync();
848
}
849
Register low, high;
850
allocGPRPair(&low, &high);
851
return RegI64(Register64(high, low));
852
}
853
#endif
854
855
#ifdef DEBUG
856
friend class LeakCheck;
857
858
class MOZ_RAII LeakCheck {
859
private:
860
const BaseRegAlloc& ra;
861
AllocatableGeneralRegisterSet knownGPR_;
862
AllocatableFloatRegisterSet knownFPU_;
863
864
public:
865
explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
866
knownGPR_ = ra.availGPR;
867
knownFPU_ = ra.availFPU;
868
}
869
870
~LeakCheck() {
871
MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
872
MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
873
}
874
875
void addKnownI32(RegI32 r) { knownGPR_.add(r); }
876
877
void addKnownI64(RegI64 r) {
878
# ifdef JS_PUNBOX64
879
knownGPR_.add(r.reg);
880
# else
881
knownGPR_.add(r.high);
882
knownGPR_.add(r.low);
883
# endif
884
}
885
886
void addKnownF32(RegF32 r) { knownFPU_.add(r); }
887
888
void addKnownF64(RegF64 r) { knownFPU_.add(r); }
889
890
void addKnownRef(RegPtr r) { knownGPR_.add(r); }
891
};
892
#endif
893
};
894
895
// Scratch register abstractions.
896
//
897
// We define our own scratch registers when the platform doesn't provide what we
898
// need. A notable use case is that we will need a private scratch register
899
// when the platform masm uses its scratch register very frequently (eg, ARM).
900
901
class BaseScratchRegister {
902
#ifdef DEBUG
903
BaseRegAlloc& ra;
904
BaseRegAlloc::ScratchKind kind_;
905
906
public:
907
explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
908
: ra(ra), kind_(kind) {
909
MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
910
ra.setScratchRegisterTaken(kind_, true);
911
}
912
~BaseScratchRegister() {
913
MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
914
ra.setScratchRegisterTaken(kind_, false);
915
}
916
#else
917
public:
918
explicit BaseScratchRegister(BaseRegAlloc& ra,
919
BaseRegAlloc::ScratchKind kind) {}
920
#endif
921
};
922
923
#ifdef RABALDR_SCRATCH_F64
924
class ScratchF64 : public BaseScratchRegister {
925
public:
926
explicit ScratchF64(BaseRegAlloc& ra)
927
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
928
operator RegF64() const { return RegF64(RabaldrScratchF64); }
929
};
930
#else
931
class ScratchF64 : public ScratchDoubleScope {
932
public:
933
explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
934
operator RegF64() const { return RegF64(FloatRegister(*this)); }
935
};
936
#endif
937
938
#ifdef RABALDR_SCRATCH_F32
939
class ScratchF32 : public BaseScratchRegister {
940
public:
941
explicit ScratchF32(BaseRegAlloc& ra)
942
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
943
operator RegF32() const { return RegF32(RabaldrScratchF32); }
944
};
945
#else
946
class ScratchF32 : public ScratchFloat32Scope {
947
public:
948
explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
949
operator RegF32() const { return RegF32(FloatRegister(*this)); }
950
};
951
#endif
952
953
#ifdef RABALDR_SCRATCH_I32
954
template <class RegType>
955
class ScratchGPR : public BaseScratchRegister {
956
public:
957
explicit ScratchGPR(BaseRegAlloc& ra)
958
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
959
operator RegType() const { return RegType(RabaldrScratchI32); }
960
};
961
#else
962
template <class RegType>
963
class ScratchGPR : public ScratchRegisterScope {
964
public:
965
explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
966
operator RegType() const { return RegType(Register(*this)); }
967
};
968
#endif
969
970
using ScratchI32 = ScratchGPR<RegI32>;
971
using ScratchPtr = ScratchGPR<RegPtr>;
972
973
#if defined(JS_CODEGEN_X86)
974
// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
975
// no other register will do. And we would normally have to allocate that
976
// register using ScratchI32 since normally the scratch register is EBX.
977
// But the whole point of ScratchI32 is to hide that relationship. By using
978
// the ScratchEBX alias, we document that at that point we require the
979
// scratch register to be EBX.
980
using ScratchEBX = ScratchI32;
981
982
// ScratchI8 is a mnemonic device: For some ops we need a register with a
983
// byte subregister.
984
using ScratchI8 = ScratchI32;
985
#endif
986
987
// The stack frame.
988
//
989
// The stack frame has four parts ("below" means at lower addresses):
990
//
991
// - the Frame element;
992
// - the Local area, including the DebugFrame element and possibly a spilled
993
// pointer to stack results, if any; allocated below the header with various
994
// forms of alignment;
995
// - the Dynamic area, comprising the temporary storage the compiler uses for
996
// register spilling, allocated below the Local area;
997
// - the Arguments area, comprising memory allocated for outgoing calls,
998
// allocated below the Dynamic area.
999
//
1000
// +==============================+
1001
// | Incoming stack arg |
1002
// | ... |
1003
// ------------- +==============================+
1004
// | Frame (fixed size) |
1005
// ------------- +==============================+ <-------------------- FP
1006
// ^ | DebugFrame (optional) | ^ ^ ^^
1007
// localSize | Register arg local | | | ||
1008
// | | ... | | | framePushed
1009
// | | Register stack result ptr?| | | ||
1010
// | | Non-arg local | | | ||
1011
// | | ... | | | ||
1012
// | +------------------------------+ | | ||
1013
// v | (padding) | | v ||
1014
// ------------- +==============================+ currentStackHeight ||
1015
// ^ | Dynamic (variable size) | | ||
1016
// dynamicSize | ... | | ||
1017
// v | ... | v ||
1018
// ------------- | (free space, sometimes) | --------- v|
1019
// +==============================+ <----- SP not-during calls
1020
// | Arguments (sometimes) | |
1021
// | ... | v
1022
// +==============================+ <----- SP during calls
1023
//
1024
// The Frame is addressed off the stack pointer. masm.framePushed() is always
1025
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
1026
// Frame, with the DebugFrame optionally below it.
1027
//
1028
// The Local area (including the DebugFrame and, if needed, the spilled value of
1029
// the stack results area pointer) is laid out by BaseLocalIter and is allocated
1030
// and deallocated by standard prologue and epilogue functions that manipulate
1031
// the stack pointer, but it is accessed via BaseStackFrame.
1032
//
1033
// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
1034
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
1035
// because the SP needs a specific alignment, and in this case there will
1036
// normally be some free space directly above the SP. The stack height does not
1037
// include the free space, it reflects the logically used space only.
1038
//
1039
// The Dynamic area is where space for stack results is allocated when calling
1040
// functions that return results on the stack. If a function has stack results,
1041
// a pointer to the low address of the stack result area is passed as an
1042
// additional argument, according to the usual ABI. See
1043
// ABIResultIter::HasStackResults.
1044
//
1045
// The Arguments area is allocated and deallocated via BaseStackFrame (see
1046
// comments later) but is accessed directly off the stack pointer.
1047
1048
// BaseLocalIter iterates over a vector of types of locals and provides offsets
1049
// from the Frame address for those locals, and associated data.
1050
//
1051
// The implementation of BaseLocalIter is the property of the BaseStackFrame.
1052
// But it is also exposed for eg the debugger to use.
1053
1054
BaseLocalIter::BaseLocalIter(const ValTypeVector& locals,
1055
const ArgTypeVector& args, bool debugEnabled)
1056
: locals_(locals),
1057
args_(args),
1058
argsIter_(args_),
1059
index_(0),
1060
nextFrameSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
1061
frameOffset_(INT32_MAX),
1062
stackResultPointerOffset_(INT32_MAX),
1063
mirType_(MIRType::Undefined),
1064
done_(false) {
1065
MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length());
1066
settle();
1067
}
1068
1069
int32_t BaseLocalIter::pushLocal(size_t nbytes) {
1070
MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
1071
nextFrameSize_ = AlignBytes(frameSize_, nbytes) + nbytes;
1072
return nextFrameSize_; // Locals grow down so capture base address.
1073
}
1074
1075
void BaseLocalIter::settle() {
1076
MOZ_ASSERT(!done_);
1077
frameSize_ = nextFrameSize_;
1078
1079
if (!argsIter_.done()) {
1080
mirType_ = argsIter_.mirType();
1081
MIRType concreteType = mirType_;
1082
switch (mirType_) {
1083
case MIRType::StackResults:
1084
// The pointer to stack results is handled like any other argument:
1085
// either addressed in place if it is passed on the stack, or we spill
1086
// it in the frame if it's in a register.
1087
MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_));
1088
concreteType = MIRType::Pointer;
1089
[[fallthrough]];
1090
case MIRType::Int32:
1091
case MIRType::Int64:
1092
case MIRType::Double:
1093
case MIRType::Float32:
1094
case MIRType::RefOrNull:
1095
if (argsIter_->argInRegister()) {
1096
frameOffset_ = pushLocal(MIRTypeToSize(concreteType));
1097
} else {
1098
frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
1099
}
1100
break;
1101
default:
1102
MOZ_CRASH("Argument type");
1103
}
1104
if (mirType_ == MIRType::StackResults) {
1105
stackResultPointerOffset_ = frameOffset();
1106
// Advance past the synthetic stack result pointer argument and fall
1107
// through to the next case.
1108
argsIter_++;
1109
frameSize_ = nextFrameSize_;
1110
MOZ_ASSERT(argsIter_.done());
1111
} else {
1112
return;
1113
}
1114
}
1115
1116
if (index_ < locals_.length()) {
1117
switch (locals_[index_].kind()) {
1118
case ValType::I32:
1119
case ValType::I64:
1120
case ValType::F32:
1121
case ValType::F64:
1122
case ValType::Ref:
1123
// TODO/AnyRef-boxing: With boxed immediates and strings, the
1124
// debugger must be made aware that AnyRef != Pointer.
1125
ASSERT_ANYREF_IS_JSOBJECT;
1126
mirType_ = ToMIRType(locals_[index_]);
1127
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
1128
break;
1129
default:
1130
MOZ_CRASH("Compiler bug: Unexpected local type");
1131
}
1132
return;
1133
}
1134
1135
done_ = true;
1136
}
1137
1138
void BaseLocalIter::operator++(int) {
1139
MOZ_ASSERT(!done_);
1140
index_++;
1141
if (!argsIter_.done()) {
1142
argsIter_++;
1143
}
1144
settle();
1145
}
1146
1147
// Abstraction of the height of the stack frame, to avoid type confusion.
1148
1149
class StackHeight {
1150
friend class BaseStackFrameAllocator;
1151
1152
uint32_t height;
1153
1154
public:
1155
explicit StackHeight(uint32_t h) : height(h) {}
1156
static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
1157
bool isValid() const { return height != UINT32_MAX; }
1158
bool operator==(StackHeight rhs) const {
1159
MOZ_ASSERT(isValid() && rhs.isValid());
1160
return height == rhs.height;
1161
}
1162
bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
1163
};
1164
1165
// Abstraction for where multi-value results go on the machine stack.
1166
1167
class StackResultsLoc {
1168
uint32_t bytes_;
1169
size_t count_;
1170
Maybe<uint32_t> height_;
1171
1172
public:
1173
StackResultsLoc() : bytes_(0), count_(0){};
1174
StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
1175
: bytes_(bytes), count_(count), height_(Some(height)) {
1176
MOZ_ASSERT(bytes != 0);
1177
MOZ_ASSERT(count != 0);
1178
MOZ_ASSERT(height != 0);
1179
}
1180
1181
uint32_t bytes() const { return bytes_; }
1182
uint32_t count() const { return count_; }
1183
uint32_t height() const { return height_.value(); }
1184
1185
bool hasStackResults() const { return bytes() != 0; }
1186
StackResults stackResults() const {
1187
return hasStackResults() ? StackResults::HasStackResults
1188
: StackResults::NoStackResults;
1189
}
1190
};
1191
1192
// Abstraction of the baseline compiler's stack frame (except for the Frame /
1193
// DebugFrame parts). See comments above for more. Remember, "below" on the
1194
// stack means at lower addresses.
1195
//
1196
// The abstraction is split into two parts: BaseStackFrameAllocator is
1197
// responsible for allocating and deallocating space on the stack and for
1198
// performing computations that are affected by how the allocation is performed;
1199
// BaseStackFrame then provides a pleasant interface for stack frame management.
1200
1201
class BaseStackFrameAllocator {
1202
MacroAssembler& masm;
1203
1204
#ifdef RABALDR_CHUNKY_STACK
1205
// On platforms that require the stack pointer to be aligned on a boundary
1206
// greater than the typical stack item (eg, ARM64 requires 16-byte alignment
1207
// but items are 8 bytes), allocate stack memory in chunks, and use a
1208
// separate stack height variable to track the effective stack pointer
1209
// within the allocated area. Effectively, there's a variable amount of
1210
// free space directly above the stack pointer. See diagram above.
1211
1212
// The following must be true in order for the stack height to be
1213
// predictable at control flow joins:
1214
//
1215
// - The Local area is always aligned according to WasmStackAlignment, ie,
1216
// masm.framePushed() % WasmStackAlignment is zero after allocating
1217
// locals.
1218
//
1219
// - ChunkSize is always a multiple of WasmStackAlignment.
1220
//
1221
// - Pushing and popping are always in units of ChunkSize (hence preserving
1222
// alignment).
1223
//
1224
// - The free space on the stack (masm.framePushed() - currentStackHeight_)
1225
// is a predictable (nonnegative) amount.
1226
1227
// As an optimization, we pre-allocate some space on the stack, the size of
1228
// this allocation is InitialChunk and it must be a multiple of ChunkSize.
1229
// It is allocated as part of the function prologue and deallocated as part
1230
// of the epilogue, along with the locals.
1231
//
1232
// If ChunkSize is too large then we risk overflowing the stack on simple
1233
// recursions with few live values where stack overflow should not be a
1234
// risk; if it is too small we spend too much time adjusting the stack
1235
// pointer.
1236
//
1237
// Good values for ChunkSize are the subject of future empirical analysis;
1238
// eight words is just an educated guess.
1239
1240
static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
1241
static constexpr uint32_t InitialChunk = ChunkSize;
1242
1243
// The current logical height of the frame is
1244
// currentStackHeight_ = localSize_ + dynamicSize
1245
// where dynamicSize is not accounted for explicitly and localSize_ also
1246
// includes size for the DebugFrame.
1247
//
1248
// The allocated size of the frame, provided by masm.framePushed(), is usually
1249
// larger than currentStackHeight_, notably at the beginning of execution when
1250
// we've allocated InitialChunk extra space.
1251
1252
uint32_t currentStackHeight_;
1253
#endif
1254
1255
// Size of the Local area in bytes (stable after BaseCompiler::init() has
1256
// called BaseStackFrame::setupLocals(), which in turn calls
1257
// BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
1258
// stack alignment. The Local area is then allocated in beginFunction(),
1259
// following the allocation of the Header. See onFixedStackAllocated()
1260
// below.
1261
1262
uint32_t localSize_;
1263
1264
protected:
1265
///////////////////////////////////////////////////////////////////////////
1266
//
1267
// Initialization
1268
1269
explicit BaseStackFrameAllocator(MacroAssembler& masm)
1270
: masm(masm),
1271
#ifdef RABALDR_CHUNKY_STACK
1272
currentStackHeight_(0),
1273
#endif
1274
localSize_(UINT32_MAX) {
1275
}
1276
1277
protected:
1278
//////////////////////////////////////////////////////////////////////
1279
//
1280
// The Local area - the static part of the frame.
1281
1282
// Record the size of the Local area, once it is known.
1283
1284
void setLocalSize(uint32_t localSize) {
1285
MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
1286
"localSize_ should be aligned to at least a pointer");
1287
MOZ_ASSERT(localSize_ == UINT32_MAX);
1288
localSize_ = localSize;
1289
}
1290
1291
// Record the current stack height, after it has become stable in
1292
// beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
1293
1294
void onFixedStackAllocated() {
1295
MOZ_ASSERT(localSize_ != UINT32_MAX);
1296
#ifdef RABALDR_CHUNKY_STACK
1297
currentStackHeight_ = localSize_;
1298
#endif
1299
}
1300
1301
public:
1302
// The fixed amount of memory, in bytes, allocated on the stack below the
1303
// Header for purposes such as locals and other fixed values. Includes all
1304
// necessary alignment, and on ARM64 also the initial chunk for the working
1305
// stack memory.
1306
1307
uint32_t fixedAllocSize() const {
1308
MOZ_ASSERT(localSize_ != UINT32_MAX);
1309
#ifdef RABALDR_CHUNKY_STACK
1310
return localSize_ + InitialChunk;
1311
#else
1312
return localSize_;
1313
#endif
1314
}
1315
1316
#ifdef RABALDR_CHUNKY_STACK
1317
// The allocated frame size is frequently larger than the logical stack
1318
// height; we round up to a chunk boundary, and special case the initial
1319
// chunk.
1320
uint32_t framePushedForHeight(uint32_t logicalHeight) {
1321
if (logicalHeight <= fixedAllocSize()) {
1322
return fixedAllocSize();
1323
}
1324
return fixedAllocSize() +
1325
AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
1326
}
1327
#endif
1328
1329
protected:
1330
//////////////////////////////////////////////////////////////////////
1331
//
1332
// The Dynamic area - the dynamic part of the frame, for spilling and saving
1333
// intermediate values.
1334
1335
// Offset off of sp_ for the slot at stack area location `offset`.
1336
1337
int32_t stackOffset(int32_t offset) { return masm.framePushed() - offset; }
1338
1339
uint32_t computeHeightWithStackResults(StackHeight stackBase,
1340
uint32_t stackResultBytes) {
1341
MOZ_ASSERT(stackResultBytes);
1342
MOZ_ASSERT(currentStackHeight() >= stackBase.height);
1343
return stackBase.height + stackResultBytes;
1344
}
1345
1346
#ifdef RABALDR_CHUNKY_STACK
1347
void pushChunkyBytes(uint32_t bytes) {
1348
MOZ_ASSERT(bytes <= ChunkSize);
1349
checkChunkyInvariants();
1350
if (masm.framePushed() - currentStackHeight_ < bytes) {
1351
masm.reserveStack(ChunkSize);
1352
}
1353
currentStackHeight_ += bytes;
1354
checkChunkyInvariants();
1355
}
1356
1357
void popChunkyBytes(uint32_t bytes) {
1358
checkChunkyInvariants();
1359
currentStackHeight_ -= bytes;
1360
// Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
1361
// values consumed by a call, and we may need to drop several chunks. But
1362
// never drop the initial chunk. Crucially, the amount we drop is always an
1363
// integral number of chunks.
1364
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
1365
if (freeSpace >= ChunkSize) {
1366
uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
1367
uint32_t amountToFree = masm.framePushed() - targetAllocSize;
1368
MOZ_ASSERT(amountToFree % ChunkSize == 0);
1369
if (amountToFree) {
1370
masm.freeStack(amountToFree);
1371
}
1372
}
1373
checkChunkyInvariants();
1374
}
1375
#endif
1376
1377
uint32_t currentStackHeight() const {
1378
#ifdef RABALDR_CHUNKY_STACK
1379
return currentStackHeight_;
1380
#else
1381
return masm.framePushed();
1382
#endif
1383
}
1384
1385
private:
1386
#ifdef RABALDR_CHUNKY_STACK
1387
void checkChunkyInvariants() {
1388
MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
1389
MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
1390
MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
1391
masm.framePushed() - currentStackHeight_ < ChunkSize);
1392
MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
1393
}
1394
#endif
1395
1396
// For a given stack height, return the appropriate size of the allocated
1397
// frame.
1398
1399
uint32_t framePushedForHeight(StackHeight stackHeight) {
1400
#ifdef RABALDR_CHUNKY_STACK
1401
// A more complicated adjustment is needed.
1402
return framePushedForHeight(stackHeight.height);
1403
#else
1404
// The allocated frame size equals the stack height.
1405
return stackHeight.height;
1406
#endif
1407
}
1408
1409
public:
1410
// The current height of the stack area, not necessarily zero-based, in a
1411
// type-safe way.
1412
1413
StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
1414
1415
// Set the frame height to a previously recorded value.
1416
1417
void setStackHeight(StackHeight amount) {
1418
#ifdef RABALDR_CHUNKY_STACK
1419
currentStackHeight_ = amount.height;
1420
masm.setFramePushed(framePushedForHeight(amount));
1421
checkChunkyInvariants();
1422
#else
1423
masm.setFramePushed(amount.height);
1424
#endif
1425
}
1426
1427
// The current height of the dynamic part of the stack area (ie, the backing
1428
// store for the evaluation stack), zero-based.
1429
1430
uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
1431
1432
// Before branching to an outer control label, pop the execution stack to
1433
// the level expected by that region, but do not update masm.framePushed()
1434
// as that will happen as compilation leaves the block.
1435
//
1436
// Note these operate directly on the stack pointer register.
1437
1438
void popStackBeforeBranch(StackHeight destStackHeight,
1439
uint32_t stackResultBytes) {
1440
uint32_t framePushedHere = masm.framePushed();
1441
StackHeight heightThere =
1442
StackHeight(destStackHeight.height + stackResultBytes);
1443
uint32_t framePushedThere = framePushedForHeight(heightThere);
1444
if (framePushedHere > framePushedThere) {
1445
masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
1446
}
1447
}
1448
1449
void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
1450
popStackBeforeBranch(destStackHeight,
1451
ABIResultIter::MeasureStackBytes(type));
1452
}
1453
1454
// Given that there are |stackParamSize| bytes on the dynamic stack
1455
// corresponding to the stack results, return the stack height once these
1456
// parameters are popped.
1457
1458
StackHeight stackResultsBase(uint32_t stackParamSize) {
1459
return StackHeight(currentStackHeight() - stackParamSize);
1460
}
1461
1462
// For most of WebAssembly, adjacent instructions have fallthrough control
1463
// flow between them, which allows us to simply thread the current stack
1464
// height through the compiler. There are two exceptions to this rule: when
1465
// leaving a block via dead code, and when entering the "else" arm of an "if".
1466
// In these cases, the stack height is the block entry height, plus any stack
1467
// values (results in the block exit case, parameters in the else entry case).
1468
1469
void resetStackHeight(StackHeight destStackHeight, ResultType type) {
1470
uint32_t height = destStackHeight.height;
1471
height += ABIResultIter::MeasureStackBytes(type);
1472
setStackHeight(StackHeight(height));
1473
}
1474
1475
// Return offset of stack result.
1476
1477
uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
1478
uint32_t stackResultBytes) {
1479
MOZ_ASSERT(result.onStack());
1480
MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
1481
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1482
return end - result.stackOffset();
1483
}
1484
1485
public:
1486
//////////////////////////////////////////////////////////////////////
1487
//
1488
// The Argument area - for outgoing calls.
1489
//
1490
// We abstract these operations as an optimization: we can merge the freeing
1491
// of the argument area and dropping values off the stack after a call. But
1492
// they always amount to manipulating the real stack pointer by some amount.
1493
//
1494
// Note that we do not update currentStackHeight_ for this; the frame does
1495
// not know about outgoing arguments. But we do update framePushed(), so we
1496
// can still index into the frame below the outgoing arguments area.
1497
1498
// This is always equivalent to a masm.reserveStack() call.
1499
1500
void allocArgArea(size_t argSize) {
1501
if (argSize) {
1502
masm.reserveStack(argSize);
1503
}
1504
}
1505
1506
// This frees the argument area allocated by allocArgArea(), and `argSize`
1507
// must be equal to the `argSize` argument to allocArgArea(). In addition
1508
// we drop some values from the frame, corresponding to the values that were
1509
// consumed by the call.
1510
1511
void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
1512
#ifdef RABALDR_CHUNKY_STACK
1513
// Freeing the outgoing arguments and freeing the consumed values have
1514
// different semantics here, which is why the operation is split.
1515
if (argSize) {
1516
masm.freeStack(argSize);
1517
}
1518
popChunkyBytes(dropSize);
1519
#else
1520
if (argSize + dropSize) {
1521
masm.freeStack(argSize + dropSize);
1522
}
1523
#endif
1524
}
1525
};
1526
1527
class BaseStackFrame final : public BaseStackFrameAllocator {
1528
MacroAssembler& masm;
1529
1530
// The largest observed value of masm.framePushed(), ie, the size of the
1531
// stack frame. Read this for its true value only when code generation is
1532
// finished.
1533
uint32_t maxFramePushed_;
1534
1535
// Patch point where we check for stack overflow.
1536
CodeOffset stackAddOffset_;
1537
1538
// Low byte offset of pointer to stack results, if any.
1539
Maybe<int32_t> stackResultsPtrOffset_;
1540
1541
// Low byte offset of local area for true locals (not parameters).
1542
uint32_t varLow_;
1543
1544
// High byte offset + 1 of local area for true locals.
1545
uint32_t varHigh_;
1546
1547
// The stack pointer, cached for brevity.
1548
RegisterOrSP sp_;
1549
1550
public:
1551
explicit BaseStackFrame(MacroAssembler& masm)
1552
: BaseStackFrameAllocator(masm),
1553
masm(masm),
1554
maxFramePushed_(0),
1555
stackAddOffset_(0),
1556
varLow_(UINT32_MAX),
1557
varHigh_(UINT32_MAX),
1558
sp_(masm.getStackPointer()) {}
1559
1560
///////////////////////////////////////////////////////////////////////////
1561
//
1562
// Stack management and overflow checking
1563
1564
// This must be called once beginFunction has allocated space for the Header
1565
// (the Frame and DebugFrame) and the Local area, and will record the current
1566
// frame size for internal use by the stack abstractions.
1567
1568
void onFixedStackAllocated() {
1569
maxFramePushed_ = masm.framePushed();
1570
BaseStackFrameAllocator::onFixedStackAllocated();
1571
}
1572
1573
// We won't know until after we've generated code how big the frame will be
1574
// (we may need arbitrary spill slots and outgoing param slots) so emit a
1575
// patchable add that is patched in endFunction().
1576
//
1577
// Note the platform scratch register may be used by branchPtr(), so
1578
// generally tmp must be something else.
1579
1580
void checkStack(Register tmp, BytecodeOffset trapOffset) {
1581
stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
1582
Label ok;
1583
masm.branchPtr(Assembler::Below,
1584
Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
1585
tmp, &ok);
1586
masm.wasmTrap(Trap::StackOverflow, trapOffset);
1587
masm.bind(&ok);
1588
}
1589
1590
void patchCheckStack() {
1591
masm.patchSub32FromStackPtr(stackAddOffset_,
1592
Imm32(int32_t(maxFramePushed_)));
1593
}
1594
1595
// Very large frames are implausible, probably an attack.
1596
1597
bool checkStackHeight() {
1598
// 512KiB should be enough, considering how Rabaldr uses the stack and
1599
// what the standard limits are:
1600
//
1601
// - 1,000 parameters
1602
// - 50,000 locals
1603
// - 10,000 values on the eval stack (not an official limit)
1604
//
1605
// At sizeof(int64) bytes per slot this works out to about 480KiB.
1606
return maxFramePushed_ <= 512 * 1024;
1607
}
1608
1609
///////////////////////////////////////////////////////////////////////////
1610
//
1611
// Local area
1612
1613
struct Local {
1614
// Type of the value.
1615
const MIRType type;
1616
1617
// Byte offset from Frame "into" the locals, ie positive for true locals
1618
// and negative for incoming args that read directly from the arg area.
1619
// It assumes the stack is growing down and that locals are on the stack
1620
// at lower addresses than Frame, and is the offset from Frame of the
1621
// lowest-addressed byte of the local.
1622
const int32_t offs;
1623
1624
Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
1625
};
1626
1627
// Profiling shows that the number of parameters and locals frequently
1628
// touches or exceeds 8. So 16 seems like a reasonable starting point.
1629
using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
1630
1631
// Initialize `localInfo` based on the types of `locals` and `args`.
1632
bool setupLocals(const ValTypeVector& locals, const ArgTypeVector& args,
1633
bool debugEnabled, LocalVector* localInfo) {
1634
if (!localInfo->reserve(locals.length())) {
1635
return false;
1636
}
1637
1638
DebugOnly<uint32_t> index = 0;
1639
BaseLocalIter i(locals, args, debugEnabled);
1640
for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
1641
MOZ_ASSERT(i.isArg());
1642
MOZ_ASSERT(i.index() == index);
1643
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1644
index++;
1645
}
1646
1647
varLow_ = i.frameSize();
1648
for (; !i.done(); i++) {
1649
MOZ_ASSERT(!i.isArg());
1650
MOZ_ASSERT(i.index() == index);
1651
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1652
index++;
1653
}
1654
varHigh_ = i.frameSize();
1655
1656
setLocalSize(AlignBytes(varHigh_, WasmStackAlignment));
1657
1658
if (args.hasSyntheticStackResultPointerArg()) {
1659
stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
1660
}
1661
1662
return true;
1663
}
1664
1665
void zeroLocals(BaseRegAlloc* ra);
1666
1667
void loadLocalI32(const Local& src, RegI32 dest) {
1668
masm.load32(Address(sp_, localOffset(src)), dest);
1669
}
1670
1671
#ifndef JS_PUNBOX64
1672
void loadLocalI64Low(const Local& src, RegI32 dest) {
1673
masm.load32(Address(sp_, localOffset(src) + INT64LOW_OFFSET), dest);
1674
}
1675
1676
void loadLocalI64High(const Local& src, RegI32 dest) {
1677
masm.load32(Address(sp_, localOffset(src) + INT64HIGH_OFFSET), dest);
1678
}
1679
#endif
1680
1681
void loadLocalI64(const Local& src, RegI64 dest) {
1682
masm.load64(Address(sp_, localOffset(src)), dest);
1683
}
1684
1685
void loadLocalPtr(const Local& src, RegPtr dest) {
1686
masm.loadPtr(Address(sp_, localOffset(src)), dest);
1687
}
1688
1689
void loadLocalF64(const Local& src, RegF64 dest) {
1690
masm.loadDouble(Address(sp_, localOffset(src)), dest);
1691
}
1692
1693
void loadLocalF32(const Local& src, RegF32 dest) {
1694
masm.loadFloat32(Address(sp_, localOffset(src)), dest);
1695
}
1696
1697
void storeLocalI32(RegI32 src, const Local& dest) {
1698
masm.store32(src, Address(sp_, localOffset(dest)));
1699
}
1700
1701
void storeLocalI64(RegI64 src, const Local& dest) {
1702
masm.store64(src, Address(sp_, localOffset(dest)));
1703
}
1704
1705
void storeLocalPtr(Register src, const Local& dest) {
1706
masm.storePtr(src, Address(sp_, localOffset(dest)));
1707
}
1708
1709
void storeLocalF64(RegF64 src, const Local& dest) {
1710
masm.storeDouble(src, Address(sp_, localOffset(dest)));
1711
}
1712
1713
void storeLocalF32(RegF32 src, const Local& dest) {
1714
masm.storeFloat32(src, Address(sp_, localOffset(dest)));
1715
}
1716
1717
// Offset off of sp_ for `local`.
1718
int32_t localOffset(const Local& local) { return localOffset(local.offs); }
1719
1720
// The incoming stack result area pointer is for stack results of the function
1721
// being compiled.
1722
void loadIncomingStackResultAreaPtr(RegPtr reg) {
1723
masm.loadPtr(Address(sp_, stackOffset(stackResultsPtrOffset_.value())),
1724
reg);
1725
}
1726
void storeIncomingStackResultAreaPtr(RegPtr reg) {
1727
// If we get here, that means the pointer to the stack results area was
1728
// passed in as a register, and therefore it will be spilled below the
1729
// frame, so the offset is a positive height.
1730
MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
1731
masm.storePtr(reg,
1732
Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
1733
}
1734
1735
// An outgoing stack result area pointer is for stack results of callees of
1736
// the function being compiled.
1737
void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
1738
RegPtr dest) {
1739
MOZ_ASSERT(results.height() <= masm.framePushed());
1740
uint32_t offsetFromSP = masm.framePushed() - results.height();
1741
masm.moveStackPtrTo(dest);
1742
if (offsetFromSP) {
1743
masm.addPtr(Imm32(offsetFromSP), dest);
1744
}
1745
}
1746
1747
private:
1748
// Offset off of sp_ for a local with offset `offset` from Frame.
1749
int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
1750
1751
public:
1752
///////////////////////////////////////////////////////////////////////////
1753
//
1754
// Dynamic area
1755
1756
static const size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
1757
static const size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
1758
static const size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
1759
static const size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
1760
1761
uint32_t pushPtr(Register r) {
1762
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1763
#ifdef RABALDR_CHUNKY_STACK
1764
pushChunkyBytes(StackSizeOfPtr);
1765
masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
1766
#else
1767
masm.Push(r);
1768
#endif
1769
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1770
MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
1771
return currentStackHeight();
1772
}
1773
1774
uint32_t pushFloat32(FloatRegister r) {
1775
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1776
#ifdef RABALDR_CHUNKY_STACK
1777
pushChunkyBytes(StackSizeOfFloat);
1778
masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
1779
#else
1780
masm.Push(r);
1781
#endif
1782
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1783
MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
1784
return currentStackHeight();
1785
}
1786
1787
uint32_t pushDouble(FloatRegister r) {
1788
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1789
#ifdef RABALDR_CHUNKY_STACK
1790
pushChunkyBytes(StackSizeOfDouble);
1791
masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
1792
#else
1793
masm.Push(r);
1794
#endif
1795
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1796
MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
1797
return currentStackHeight();
1798
}
1799
1800
void popPtr(Register r) {
1801
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1802
#ifdef RABALDR_CHUNKY_STACK
1803
masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
1804
popChunkyBytes(StackSizeOfPtr);
1805
#else
1806
masm.Pop(r);
1807
#endif
1808
MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
1809
}
1810
1811
void popFloat32(FloatRegister r) {
1812
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1813
#ifdef RABALDR_CHUNKY_STACK
1814
masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
1815
popChunkyBytes(StackSizeOfFloat);
1816
#else
1817
masm.Pop(r);
1818
#endif
1819
MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
1820
}
1821
1822
void popDouble(FloatRegister r) {
1823
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1824
#ifdef RABALDR_CHUNKY_STACK
1825
masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
1826
popChunkyBytes(StackSizeOfDouble);
1827
#else
1828
masm.Pop(r);
1829
#endif
1830
MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
1831
}
1832
1833
void popBytes(size_t bytes) {
1834
if (bytes > 0) {
1835
#ifdef RABALDR_CHUNKY_STACK
1836
popChunkyBytes(bytes);
1837
#else
1838
masm.freeStack(bytes);
1839
#endif
1840
}
1841
}
1842
1843
void loadStackI32(int32_t offset, RegI32 dest) {
1844
masm.load32(Address(sp_, stackOffset(offset)), dest);
1845
}
1846
1847
void loadStackI64(int32_t offset, RegI64 dest) {
1848
masm.load64(Address(sp_, stackOffset(offset)), dest);
1849
}
1850
1851
#ifndef JS_PUNBOX64
1852
void loadStackI64Low(int32_t offset, RegI32 dest) {
1853
masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
1854
}
1855
1856
void loadStackI64High(int32_t offset, RegI32 dest) {
1857
masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
1858
}
1859
#endif
1860
1861
// Disambiguation: this loads a "Ptr" value from the stack, it does not load
1862
// the "StackPtr".
1863
1864
void loadStackPtr(int32_t offset, RegPtr dest) {
1865
masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
1866
}
1867
1868
void loadStackF64(int32_t offset, RegF64 dest) {
1869
masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
1870
}
1871
1872
void loadStackF32(int32_t offset, RegF32 dest) {
1873
masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
1874
}
1875
1876
uint32_t prepareStackResultArea(StackHeight stackBase,
1877
uint32_t stackResultBytes) {
1878
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1879
if (currentStackHeight() < end) {
1880
uint32_t bytes = end - currentStackHeight();
1881
#ifdef RABALDR_CHUNKY_STACK
1882
pushChunkyBytes(bytes);
1883
#else
1884
masm.reserveStack(bytes);
1885
#endif
1886
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1887
}
1888
return end;
1889
}
1890
1891
void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
1892
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1893
MOZ_ASSERT(currentStackHeight() >= end);
1894
popBytes(currentStac