Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
*
4
* Copyright 2016 Mozilla Foundation
5
*
6
* Licensed under the Apache License, Version 2.0 (the "License");
7
* you may not use this file except in compliance with the License.
8
* You may obtain a copy of the License at
9
*
11
*
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
17
*/
18
19
/*
20
* [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
21
*
22
* General assumptions for 32-bit vs 64-bit code:
23
*
24
* - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
25
* systems.
26
*
27
* - Code that knows that Register64 has a '.reg' member on 64-bit systems and
28
* '.high' and '.low' members on 32-bit systems, or knows the implications
29
* thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT.
30
*
31
*
32
* Coding standards:
33
*
34
* - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
35
* and surrounding functions; most functions fall into this class) where the
36
* meaning is obvious:
37
*
38
* - if there is a single source + destination register, it is called 'r'
39
* - if there is one source and a different destination, they are called 'rs'
40
* and 'rd'
41
* - if there is one source + destination register and another source register
42
* they are called 'r' and 'rs'
43
* - if there are two source registers and a destination register they are
44
* called 'rs0', 'rs1', and 'rd'.
45
*
46
* - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
47
*
48
* - Registers can be named non-generically for their function ('rp' for the
49
* 'pointer' register and 'rv' for the 'value' register are typical) and those
50
* names may or may not have an 'r' prefix.
51
*
52
* - "Larger" code generating functions make their own rules.
53
*
54
*
55
* General status notes:
56
*
57
* "FIXME" indicates a known or suspected bug. Always has a bug#.
58
*
59
* "TODO" indicates an opportunity for a general improvement, with an additional
60
* tag to indicate the area of improvement. Usually has a bug#.
61
*
62
* There are lots of machine dependencies here but they are pretty well isolated
63
* to a segment of the compiler. Many dependencies will eventually be factored
64
* into the MacroAssembler layer and shared with other code generators.
65
*
66
*
67
* High-value compiler performance improvements:
68
*
69
* - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
70
* etc methods) can avoid syncing the value stack if the specific register is
71
* in use but there is a free register to shuffle the specific register into.
72
* (This will also improve the generated code.) The sync happens often enough
73
* here to show up in profiles, because it is triggered by integer multiply
74
* and divide.
75
*
76
*
77
* High-value code generation improvements:
78
*
79
* - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
80
* the stack and then jumps to the code for the target case. If no cleanup is
81
* needed we could just branch conditionally to the target; if the same amount
82
* of cleanup is needed for all cases then the cleanup can be done before the
83
* dispatch. Both are highly likely.
84
*
85
* - (Bug 1316806) Register management around calls: At the moment we sync the
86
* value stack unconditionally (this is simple) but there are probably many
87
* common cases where we could instead save/restore live caller-saves
88
* registers and perform parallel assignment into argument registers. This
89
* may be important if we keep some locals in registers.
90
*
91
* - (Bug 1316808) Allocate some locals to registers on machines where there are
92
* enough registers. This is probably hard to do well in a one-pass compiler
93
* but it might be that just keeping register arguments and the first few
94
* locals in registers is a viable strategy; another (more general) strategy
95
* is caching locals in registers in straight-line code. Such caching could
96
* also track constant values in registers, if that is deemed valuable. A
97
* combination of techniques may be desirable: parameters and the first few
98
* locals could be cached on entry to the function but not statically assigned
99
* to registers throughout.
100
*
101
* (On a large corpus of code it should be possible to compute, for every
102
* signature comprising the types of parameters and locals, and using a static
103
* weight for loops, a list in priority order of which parameters and locals
104
* that should be assigned to registers. Or something like that. Wasm makes
105
* this simple. Static assignments are desirable because they are not flushed
106
* to memory by the pre-block sync() call.)
107
*/
108
109
#include "wasm/WasmBaselineCompile.h"
110
111
#include "mozilla/MathAlgorithms.h"
112
#include "mozilla/Maybe.h"
113
114
#include <algorithm>
115
#include <utility>
116
117
#include "jit/AtomicOp.h"
118
#include "jit/IonTypes.h"
119
#include "jit/JitAllocPolicy.h"
120
#include "jit/Label.h"
121
#include "jit/MacroAssembler.h"
122
#include "jit/MIR.h"
123
#include "jit/RegisterAllocator.h"
124
#include "jit/Registers.h"
125
#include "jit/RegisterSets.h"
126
#if defined(JS_CODEGEN_ARM)
127
# include "jit/arm/Assembler-arm.h"
128
#endif
129
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
130
# include "jit/x86-shared/Architecture-x86-shared.h"
131
# include "jit/x86-shared/Assembler-x86-shared.h"
132
#endif
133
#if defined(JS_CODEGEN_MIPS32)
134
# include "jit/mips-shared/Assembler-mips-shared.h"
135
# include "jit/mips32/Assembler-mips32.h"
136
#endif
137
#if defined(JS_CODEGEN_MIPS64)
138
# include "jit/mips-shared/Assembler-mips-shared.h"
139
# include "jit/mips64/Assembler-mips64.h"
140
#endif
141
142
#include "util/Memory.h"
143
#include "wasm/WasmGC.h"
144
#include "wasm/WasmGenerator.h"
145
#include "wasm/WasmInstance.h"
146
#include "wasm/WasmOpIter.h"
147
#include "wasm/WasmSignalHandlers.h"
148
#include "wasm/WasmStubs.h"
149
#include "wasm/WasmValidate.h"
150
151
#include "jit/MacroAssembler-inl.h"
152
153
using mozilla::DebugOnly;
154
using mozilla::FloorLog2;
155
using mozilla::IsPowerOfTwo;
156
using mozilla::Maybe;
157
158
namespace js {
159
namespace wasm {
160
161
using namespace js::jit;
162
163
using HandleNaNSpecially = bool;
164
using InvertBranch = bool;
165
using IsKnownNotZero = bool;
166
using IsUnsigned = bool;
167
using NeedsBoundsCheck = bool;
168
using WantResult = bool;
169
using ZeroOnOverflow = bool;
170
171
class BaseStackFrame;
172
173
// Two flags, useABI and interModule, control how calls are made.
174
//
175
// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
176
// except when InterModule::True is also set, when they are volatile.
177
//
178
// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
179
// In this case, we require InterModule::False. The calling convention
180
// is otherwise like UseABI::Wasm.
181
//
182
// UseABI::System implies that the Tls/Heap/Global registers are volatile.
183
// Additionally, the parameter passing mechanism may be slightly different from
184
// the UseABI::Wasm convention.
185
//
186
// When the Tls/Heap/Global registers are not volatile, the baseline compiler
187
// will restore the Tls register from its save slot before the call, since the
188
// baseline compiler uses the Tls register for other things.
189
//
190
// When those registers are volatile, the baseline compiler will reload them
191
// after the call (it will restore the Tls register from the save slot and load
192
// the other two from the Tls data).
193
194
enum class UseABI { Wasm, Builtin, System };
195
enum class InterModule { False = false, True = true };
196
197
#if defined(JS_CODEGEN_NONE)
198
# define RABALDR_SCRATCH_I32
199
# define RABALDR_SCRATCH_F32
200
# define RABALDR_SCRATCH_F64
201
202
static const Register RabaldrScratchI32 = Register::Invalid();
203
static const FloatRegister RabaldrScratchF32 = InvalidFloatReg;
204
static const FloatRegister RabaldrScratchF64 = InvalidFloatReg;
205
#endif
206
207
#ifdef JS_CODEGEN_ARM64
208
# define RABALDR_CHUNKY_STACK
209
# define RABALDR_SCRATCH_I32
210
# define RABALDR_SCRATCH_F32
211
# define RABALDR_SCRATCH_F64
212
# define RABALDR_SCRATCH_F32_ALIASES_F64
213
214
static const Register RabaldrScratchI32 = Register::FromCode(15);
215
216
// Note, the float scratch regs cannot be registers that are used for parameter
217
// passing in any ABI we use. Argregs tend to be low-numbered; register 30
218
// should be safe.
219
220
static constexpr FloatRegister RabaldrScratchF32{FloatRegisters::s30,
221
FloatRegisters::Single};
222
static constexpr FloatRegister RabaldrScratchF64{FloatRegisters::d30,
223
FloatRegisters::Double};
224
225
static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
226
static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
227
#endif
228
229
#ifdef JS_CODEGEN_X86
230
// The selection of EBX here steps gingerly around: the need for EDX
231
// to be allocatable for multiply/divide; ECX to be allocatable for
232
// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
233
// register; EBX not being one of the WasmTableCall registers; and
234
// needing a temp register for load/store that has a single-byte
235
// persona.
236
//
237
// The compiler assumes that RabaldrScratchI32 has a single-byte
238
// persona. Code for 8-byte atomic operations assumes that
239
// RabaldrScratchI32 is in fact ebx.
240
241
# define RABALDR_SCRATCH_I32
242
static const Register RabaldrScratchI32 = ebx;
243
244
# define RABALDR_INT_DIV_I64_CALLOUT
245
#endif
246
247
#ifdef JS_CODEGEN_ARM
248
// We use our own scratch register, because the macro assembler uses
249
// the regular scratch register(s) pretty liberally. We could
250
// work around that in several cases but the mess does not seem
251
// worth it yet. CallTempReg2 seems safe.
252
253
# define RABALDR_SCRATCH_I32
254
static const Register RabaldrScratchI32 = CallTempReg2;
255
256
# define RABALDR_INT_DIV_I64_CALLOUT
257
# define RABALDR_I64_TO_FLOAT_CALLOUT
258
# define RABALDR_FLOAT_TO_I64_CALLOUT
259
#endif
260
261
#ifdef JS_CODEGEN_MIPS32
262
# define RABALDR_SCRATCH_I32
263
static const Register RabaldrScratchI32 = CallTempReg2;
264
265
# define RABALDR_INT_DIV_I64_CALLOUT
266
# define RABALDR_I64_TO_FLOAT_CALLOUT
267
# define RABALDR_FLOAT_TO_I64_CALLOUT
268
#endif
269
270
#ifdef JS_CODEGEN_MIPS64
271
# define RABALDR_SCRATCH_I32
272
static const Register RabaldrScratchI32 = CallTempReg2;
273
#endif
274
275
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
276
# if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
277
# error "Bad configuration"
278
# endif
279
#endif
280
281
template <MIRType t>
282
struct RegTypeOf {
283
static_assert(t == MIRType::Float32 || t == MIRType::Double,
284
"Float mask type");
285
};
286
287
template <>
288
struct RegTypeOf<MIRType::Float32> {
289
static constexpr RegTypeName value = RegTypeName::Float32;
290
};
291
template <>
292
struct RegTypeOf<MIRType::Double> {
293
static constexpr RegTypeName value = RegTypeName::Float64;
294
};
295
296
// The strongly typed register wrappers are especially useful to distinguish
297
// float registers from double registers, but they also clearly distinguish
298
// 32-bit registers from 64-bit register pairs on 32-bit systems.
299
300
struct RegI32 : public Register {
301
RegI32() : Register(Register::Invalid()) {}
302
explicit RegI32(Register reg) : Register(reg) {
303
MOZ_ASSERT(reg != Invalid());
304
}
305
bool isInvalid() const { return *this == Invalid(); }
306
bool isValid() const { return !isInvalid(); }
307
static RegI32 Invalid() { return RegI32(); }
308
};
309
310
struct RegI64 : public Register64 {
311
RegI64() : Register64(Register64::Invalid()) {}
312
explicit RegI64(Register64 reg) : Register64(reg) {
313
MOZ_ASSERT(reg != Invalid());
314
}
315
bool isInvalid() const { return *this == Invalid(); }
316
bool isValid() const { return !isInvalid(); }
317
static RegI64 Invalid() { return RegI64(); }
318
};
319
320
struct RegPtr : public Register {
321
RegPtr() : Register(Register::Invalid()) {}
322
explicit RegPtr(Register reg) : Register(reg) {
323
MOZ_ASSERT(reg != Invalid());
324
}
325
bool isInvalid() const { return *this == Invalid(); }
326
bool isValid() const { return !isInvalid(); }
327
static RegPtr Invalid() { return RegPtr(); }
328
};
329
330
struct RegF32 : public FloatRegister {
331
RegF32() : FloatRegister() {}
332
explicit RegF32(FloatRegister reg) : FloatRegister(reg) {
333
MOZ_ASSERT(isSingle());
334
}
335
bool isValid() const { return !isInvalid(); }
336
static RegF32 Invalid() { return RegF32(); }
337
};
338
339
struct RegF64 : public FloatRegister {
340
RegF64() : FloatRegister() {}
341
explicit RegF64(FloatRegister reg) : FloatRegister(reg) {
342
MOZ_ASSERT(isDouble());
343
}
344
bool isValid() const { return !isInvalid(); }
345
static RegF64 Invalid() { return RegF64(); }
346
};
347
348
struct AnyReg {
349
union {
350
RegI32 i32_;
351
RegI64 i64_;
352
RegPtr ref_;
353
RegF32 f32_;
354
RegF64 f64_;
355
};
356
357
enum { I32, I64, REF, F32, F64 } tag;
358
359
explicit AnyReg(RegI32 r) {
360
tag = I32;
361
i32_ = r;
362
}
363
explicit AnyReg(RegI64 r) {
364
tag = I64;
365
i64_ = r;
366
}
367
explicit AnyReg(RegF32 r) {
368
tag = F32;
369
f32_ = r;
370
}
371
explicit AnyReg(RegF64 r) {
372
tag = F64;
373
f64_ = r;
374
}
375
explicit AnyReg(RegPtr r) {
376
tag = REF;
377
ref_ = r;
378
}
379
380
RegI32 i32() const {
381
MOZ_ASSERT(tag == I32);
382
return i32_;
383
}
384
RegI64 i64() const {
385
MOZ_ASSERT(tag == I64);
386
return i64_;
387
}
388
RegF32 f32() const {
389
MOZ_ASSERT(tag == F32);
390
return f32_;
391
}
392
RegF64 f64() const {
393
MOZ_ASSERT(tag == F64);
394
return f64_;
395
}
396
RegPtr ref() const {
397
MOZ_ASSERT(tag == REF);
398
return ref_;
399
}
400
401
AnyRegister any() const {
402
switch (tag) {
403
case F32:
404
return AnyRegister(f32_);
405
case F64:
406
return AnyRegister(f64_);
407
case I32:
408
return AnyRegister(i32_);
409
case I64:
410
#ifdef JS_PUNBOX64
411
return AnyRegister(i64_.reg);
412
#else
413
// The compiler is written so that this is never needed: any() is
414
// called on arbitrary registers for asm.js but asm.js does not have
415
// 64-bit ints. For wasm, any() is called on arbitrary registers
416
// only on 64-bit platforms.
417
MOZ_CRASH("AnyReg::any() on 32-bit platform");
418
#endif
419
case REF:
420
MOZ_CRASH("AnyReg::any() not implemented for ref types");
421
default:
422
MOZ_CRASH();
423
}
424
// Work around GCC 5 analysis/warning bug.
425
MOZ_CRASH("AnyReg::any(): impossible case");
426
}
427
};
428
429
// Platform-specific registers.
430
//
431
// All platforms must define struct SpecificRegs. All 32-bit platforms must
432
// have an abiReturnRegI64 member in that struct.
433
434
#if defined(JS_CODEGEN_X64)
435
struct SpecificRegs {
436
RegI32 eax, ecx, edx, edi, esi;
437
RegI64 rax, rcx, rdx;
438
439
SpecificRegs()
440
: eax(RegI32(js::jit::eax)),
441
ecx(RegI32(js::jit::ecx)),
442
edx(RegI32(js::jit::edx)),
443
edi(RegI32(js::jit::edi)),
444
esi(RegI32(js::jit::esi)),
445
rax(RegI64(Register64(js::jit::rax))),
446
rcx(RegI64(Register64(js::jit::rcx))),
447
rdx(RegI64(Register64(js::jit::rdx))) {}
448
};
449
#elif defined(JS_CODEGEN_X86)
450
struct SpecificRegs {
451
RegI32 eax, ecx, edx, edi, esi;
452
RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
453
454
SpecificRegs()
455
: eax(RegI32(js::jit::eax)),
456
ecx(RegI32(js::jit::ecx)),
457
edx(RegI32(js::jit::edx)),
458
edi(RegI32(js::jit::edi)),
459
esi(RegI32(js::jit::esi)),
460
ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
461
edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
462
abiReturnRegI64(edx_eax) {}
463
};
464
#elif defined(JS_CODEGEN_ARM)
465
struct SpecificRegs {
466
RegI64 abiReturnRegI64;
467
468
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
469
};
470
#elif defined(JS_CODEGEN_ARM64)
471
struct SpecificRegs {};
472
#elif defined(JS_CODEGEN_MIPS32)
473
struct SpecificRegs {
474
RegI64 abiReturnRegI64;
475
476
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
477
};
478
#elif defined(JS_CODEGEN_MIPS64)
479
struct SpecificRegs {};
480
#else
481
struct SpecificRegs {
482
# ifndef JS_64BIT
483
RegI64 abiReturnRegI64;
484
# endif
485
486
SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
487
};
488
#endif
489
490
class BaseCompilerInterface {
491
public:
492
// Spill all spillable registers.
493
//
494
// TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
495
// spilling only enough registers to satisfy current needs.
496
virtual void sync() = 0;
497
virtual void saveTempPtr(RegPtr r) = 0;
498
virtual void restoreTempPtr(RegPtr r) = 0;
499
};
500
501
// Register allocator.
502
503
class BaseRegAlloc {
504
// Notes on float register allocation.
505
//
506
// The general rule in SpiderMonkey is that float registers can alias double
507
// registers, but there are predicates to handle exceptions to that rule:
508
// hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually
509
// works is platform dependent and exposed through the aliased(n, &r)
510
// predicate, etc.
511
//
512
// - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
513
// cannot be treated as float.
514
// - hasMultiAlias(): on ARM and MIPS a double register aliases two float
515
// registers.
516
//
517
// On some platforms (x86, x64, ARM64) but not all (ARM)
518
// ScratchFloat32Register is the same as ScratchDoubleRegister.
519
//
520
// It's a basic invariant of the AllocatableRegisterSet that it deals
521
// properly with aliasing of registers: if s0 or s1 are allocated then d0 is
522
// not allocatable; if s0 and s1 are freed individually then d0 becomes
523
// allocatable.
524
525
BaseCompilerInterface* bc;
526
AllocatableGeneralRegisterSet availGPR;
527
AllocatableFloatRegisterSet availFPU;
528
#ifdef DEBUG
529
AllocatableGeneralRegisterSet
530
allGPR; // The registers available to the compiler
531
AllocatableFloatRegisterSet
532
allFPU; // after removing ScratchReg, HeapReg, etc
533
uint32_t scratchTaken;
534
#endif
535
#ifdef JS_CODEGEN_X86
536
AllocatableGeneralRegisterSet singleByteRegs;
537
#endif
538
539
bool hasGPR() { return !availGPR.empty(); }
540
541
bool hasGPR64() {
542
#ifdef JS_PUNBOX64
543
return !availGPR.empty();
544
#else
545
if (availGPR.empty()) {
546
return false;
547
}
548
Register r = allocGPR();
549
bool available = !availGPR.empty();
550
freeGPR(r);
551
return available;
552
#endif
553
}
554
555
template <MIRType t>
556
bool hasFPU() {
557
return availFPU.hasAny<RegTypeOf<t>::value>();
558
}
559
560
bool isAvailableGPR(Register r) { return availGPR.has(r); }
561
562
bool isAvailableFPU(FloatRegister r) { return availFPU.has(r); }
563
564
void allocGPR(Register r) {
565
MOZ_ASSERT(isAvailableGPR(r));
566
availGPR.take(r);
567
}
568
569
Register allocGPR() {
570
MOZ_ASSERT(hasGPR());
571
return availGPR.takeAny();
572
}
573
574
void allocInt64(Register64 r) {
575
#ifdef JS_PUNBOX64
576
allocGPR(r.reg);
577
#else
578
allocGPR(r.low);
579
allocGPR(r.high);
580
#endif
581
}
582
583
Register64 allocInt64() {
584
MOZ_ASSERT(hasGPR64());
585
#ifdef JS_PUNBOX64
586
return Register64(availGPR.takeAny());
587
#else
588
Register high = availGPR.takeAny();
589
Register low = availGPR.takeAny();
590
return Register64(high, low);
591
#endif
592
}
593
594
#ifdef JS_CODEGEN_ARM
595
// r12 is normally the ScratchRegister and r13 is always the stack pointer,
596
// so the highest possible pair has r10 as the even-numbered register.
597
598
static constexpr uint32_t PAIR_LIMIT = 10;
599
600
bool hasGPRPair() {
601
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
602
if (isAvailableGPR(Register::FromCode(i)) &&
603
isAvailableGPR(Register::FromCode(i + 1))) {
604
return true;
605
}
606
}
607
return false;
608
}
609
610
void allocGPRPair(Register* low, Register* high) {
611
MOZ_ASSERT(hasGPRPair());
612
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
613
if (isAvailableGPR(Register::FromCode(i)) &&
614
isAvailableGPR(Register::FromCode(i + 1))) {
615
*low = Register::FromCode(i);
616
*high = Register::FromCode(i + 1);
617
allocGPR(*low);
618
allocGPR(*high);
619
return;
620
}
621
}
622
MOZ_CRASH("No pair");
623
}
624
#endif
625
626
void allocFPU(FloatRegister r) {
627
MOZ_ASSERT(isAvailableFPU(r));
628
availFPU.take(r);
629
}
630
631
template <MIRType t>
632
FloatRegister allocFPU() {
633
return availFPU.takeAny<RegTypeOf<t>::value>();
634
}
635
636
void freeGPR(Register r) { availGPR.add(r); }
637
638
void freeInt64(Register64 r) {
639
#ifdef JS_PUNBOX64
640
freeGPR(r.reg);
641
#else
642
freeGPR(r.low);
643
freeGPR(r.high);
644
#endif
645
}
646
647
void freeFPU(FloatRegister r) { availFPU.add(r); }
648
649
public:
650
explicit BaseRegAlloc()
651
: bc(nullptr),
652
availGPR(GeneralRegisterSet::All()),
653
availFPU(FloatRegisterSet::All())
654
#ifdef DEBUG
655
,
656
scratchTaken(0)
657
#endif
658
#ifdef JS_CODEGEN_X86
659
,
660
singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
661
#endif
662
{
663
RegisterAllocator::takeWasmRegisters(availGPR);
664
665
// Allocate any private scratch registers.
666
#if defined(RABALDR_SCRATCH_I32)
667
if (RabaldrScratchI32 != RegI32::Invalid()) {
668
availGPR.take(RabaldrScratchI32);
669
}
670
#endif
671
672
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
673
MOZ_ASSERT(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
674
MOZ_ASSERT(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
675
#endif
676
677
#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
678
if (RabaldrScratchF32 != RegF32::Invalid()) {
679
availFPU.take(RabaldrScratchF32);
680
}
681
#endif
682
683
#if defined(RABALDR_SCRATCH_F64)
684
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
685
MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
686
# endif
687
if (RabaldrScratchF64 != RegF64::Invalid()) {
688
availFPU.take(RabaldrScratchF64);
689
}
690
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
691
MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
692
# endif
693
#endif
694
695
#ifdef DEBUG
696
allGPR = availGPR;
697
allFPU = availFPU;
698
#endif
699
}
700
701
void init(BaseCompilerInterface* bc) { this->bc = bc; }
702
703
enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4 };
704
705
#ifdef DEBUG
706
bool isScratchRegisterTaken(ScratchKind s) const {
707
return (scratchTaken & uint32_t(s)) != 0;
708
}
709
710
void setScratchRegisterTaken(ScratchKind s, bool state) {
711
if (state) {
712
scratchTaken |= uint32_t(s);
713
} else {
714
scratchTaken &= ~uint32_t(s);
715
}
716
}
717
#endif
718
719
#ifdef JS_CODEGEN_X86
720
bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
721
#endif
722
723
bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
724
725
bool isAvailableI64(RegI64 r) {
726
#ifdef JS_PUNBOX64
727
return isAvailableGPR(r.reg);
728
#else
729
return isAvailableGPR(r.low) && isAvailableGPR(r.high);
730
#endif
731
}
732
733
bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
734
735
bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
736
737
bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
738
739
// TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
740
// failure, only as much as we need.
741
742
MOZ_MUST_USE RegI32 needI32() {
743
if (!hasGPR()) {
744
bc->sync();
745
}
746
return RegI32(allocGPR());
747
}
748
749
void needI32(RegI32 specific) {
750
if (!isAvailableI32(specific)) {
751
bc->sync();
752
}
753
allocGPR(specific);
754
}
755
756
MOZ_MUST_USE RegI64 needI64() {
757
if (!hasGPR64()) {
758
bc->sync();
759
}
760
return RegI64(allocInt64());
761
}
762
763
void needI64(RegI64 specific) {
764
if (!isAvailableI64(specific)) {
765
bc->sync();
766
}
767
allocInt64(specific);
768
}
769
770
MOZ_MUST_USE RegPtr needPtr() {
771
if (!hasGPR()) {
772
bc->sync();
773
}
774
return RegPtr(allocGPR());
775
}
776
777
void needPtr(RegPtr specific) {
778
if (!isAvailablePtr(specific)) {
779
bc->sync();
780
}
781
allocGPR(specific);
782
}
783
784
// Use when you need a register for a short time but explicitly want to avoid
785
// a full sync().
786
MOZ_MUST_USE RegPtr needTempPtr(RegPtr fallback, bool* saved) {
787
if (hasGPR()) {
788
*saved = false;
789
return RegPtr(allocGPR());
790
}
791
*saved = true;
792
bc->saveTempPtr(fallback);
793
MOZ_ASSERT(isAvailablePtr(fallback));
794
allocGPR(fallback);
795
return RegPtr(fallback);
796
}
797
798
MOZ_MUST_USE RegF32 needF32() {
799
if (!hasFPU<MIRType::Float32>()) {
800
bc->sync();
801
}
802
return RegF32(allocFPU<MIRType::Float32>());
803
}
804
805
void needF32(RegF32 specific) {
806
if (!isAvailableF32(specific)) {
807
bc->sync();
808
}
809
allocFPU(specific);
810
}
811
812
MOZ_MUST_USE RegF64 needF64() {
813
if (!hasFPU<MIRType::Double>()) {
814
bc->sync();
815
}
816
return RegF64(allocFPU<MIRType::Double>());
817
}
818
819
void needF64(RegF64 specific) {
820
if (!isAvailableF64(specific)) {
821
bc->sync();
822
}
823
allocFPU(specific);
824
}
825
826
void freeI32(RegI32 r) { freeGPR(r); }
827
828
void freeI64(RegI64 r) { freeInt64(r); }
829
830
void freePtr(RegPtr r) { freeGPR(r); }
831
832
void freeF64(RegF64 r) { freeFPU(r); }
833
834
void freeF32(RegF32 r) { freeFPU(r); }
835
836
void freeTempPtr(RegPtr r, bool saved) {
837
freePtr(r);
838
if (saved) {
839
bc->restoreTempPtr(r);
840
MOZ_ASSERT(!isAvailablePtr(r));
841
}
842
}
843
844
#ifdef JS_CODEGEN_ARM
845
MOZ_MUST_USE RegI64 needI64Pair() {
846
if (!hasGPRPair()) {
847
bc->sync();
848
}
849
Register low, high;
850
allocGPRPair(&low, &high);
851
return RegI64(Register64(high, low));
852
}
853
#endif
854
855
#ifdef DEBUG
856
friend class LeakCheck;
857
858
class MOZ_RAII LeakCheck {
859
private:
860
const BaseRegAlloc& ra;
861
AllocatableGeneralRegisterSet knownGPR_;
862
AllocatableFloatRegisterSet knownFPU_;
863
864
public:
865
explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
866
knownGPR_ = ra.availGPR;
867
knownFPU_ = ra.availFPU;
868
}
869
870
~LeakCheck() {
871
MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
872
MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
873
}
874
875
void addKnownI32(RegI32 r) { knownGPR_.add(r); }
876
877
void addKnownI64(RegI64 r) {
878
# ifdef JS_PUNBOX64
879
knownGPR_.add(r.reg);
880
# else
881
knownGPR_.add(r.high);
882
knownGPR_.add(r.low);
883
# endif
884
}
885
886
void addKnownF32(RegF32 r) { knownFPU_.add(r); }
887
888
void addKnownF64(RegF64 r) { knownFPU_.add(r); }
889
890
void addKnownRef(RegPtr r) { knownGPR_.add(r); }
891
};
892
#endif
893
};
894
895
// Scratch register abstractions.
896
//
897
// We define our own scratch registers when the platform doesn't provide what we
898
// need. A notable use case is that we will need a private scratch register
899
// when the platform masm uses its scratch register very frequently (eg, ARM).
900
901
class BaseScratchRegister {
902
#ifdef DEBUG
903
BaseRegAlloc& ra;
904
BaseRegAlloc::ScratchKind kind_;
905
906
public:
907
explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
908
: ra(ra), kind_(kind) {
909
MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
910
ra.setScratchRegisterTaken(kind_, true);
911
}
912
~BaseScratchRegister() {
913
MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
914
ra.setScratchRegisterTaken(kind_, false);
915
}
916
#else
917
public:
918
explicit BaseScratchRegister(BaseRegAlloc& ra,
919
BaseRegAlloc::ScratchKind kind) {}
920
#endif
921
};
922
923
#ifdef RABALDR_SCRATCH_F64
924
class ScratchF64 : public BaseScratchRegister {
925
public:
926
explicit ScratchF64(BaseRegAlloc& ra)
927
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
928
operator RegF64() const { return RegF64(RabaldrScratchF64); }
929
};
930
#else
931
class ScratchF64 : public ScratchDoubleScope {
932
public:
933
explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
934
operator RegF64() const { return RegF64(FloatRegister(*this)); }
935
};
936
#endif
937
938
#ifdef RABALDR_SCRATCH_F32
939
class ScratchF32 : public BaseScratchRegister {
940
public:
941
explicit ScratchF32(BaseRegAlloc& ra)
942
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
943
operator RegF32() const { return RegF32(RabaldrScratchF32); }
944
};
945
#else
946
class ScratchF32 : public ScratchFloat32Scope {
947
public:
948
explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
949
operator RegF32() const { return RegF32(FloatRegister(*this)); }
950
};
951
#endif
952
953
#ifdef RABALDR_SCRATCH_I32
954
template <class RegType>
955
class ScratchGPR : public BaseScratchRegister {
956
public:
957
explicit ScratchGPR(BaseRegAlloc& ra)
958
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
959
operator RegType() const { return RegType(RabaldrScratchI32); }
960
};
961
#else
962
template <class RegType>
963
class ScratchGPR : public ScratchRegisterScope {
964
public:
965
explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
966
operator RegType() const { return RegType(Register(*this)); }
967
};
968
#endif
969
970
using ScratchI32 = ScratchGPR<RegI32>;
971
using ScratchPtr = ScratchGPR<RegPtr>;
972
973
#if defined(JS_CODEGEN_X86)
974
// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
975
// no other register will do. And we would normally have to allocate that
976
// register using ScratchI32 since normally the scratch register is EBX.
977
// But the whole point of ScratchI32 is to hide that relationship. By using
978
// the ScratchEBX alias, we document that at that point we require the
979
// scratch register to be EBX.
980
using ScratchEBX = ScratchI32;
981
982
// ScratchI8 is a mnemonic device: For some ops we need a register with a
983
// byte subregister.
984
using ScratchI8 = ScratchI32;
985
#endif
986
987
// The stack frame.
988
//
989
// The stack frame has four parts ("below" means at lower addresses):
990
//
991
// - the Frame element;
992
// - the Local area, including the DebugFrame element and possibly a spilled
993
// pointer to stack results, if any; allocated below the header with various
994
// forms of alignment;
995
// - the Dynamic area, comprising the temporary storage the compiler uses for
996
// register spilling, allocated below the Local area;
997
// - the Arguments area, comprising memory allocated for outgoing calls,
998
// allocated below the Dynamic area.
999
//
1000
// +==============================+
1001
// | Incoming stack arg |
1002
// | ... |
1003
// ------------- +==============================+
1004
// | Frame (fixed size) |
1005
// ------------- +==============================+ <-------------------- FP
1006
// ^ | DebugFrame (optional) | ^ ^ ^^
1007
// localSize | Register arg local | | | ||
1008
// | | ... | | | framePushed
1009
// | | Register stack result ptr?| | | ||
1010
// | | Non-arg local | | | ||
1011
// | | ... | | | ||
1012
// | +------------------------------+ | | ||
1013
// v | (padding) | | v ||
1014
// ------------- +==============================+ currentStackHeight ||
1015
// ^ | Dynamic (variable size) | | ||
1016
// dynamicSize | ... | | ||
1017
// v | ... | v ||
1018
// ------------- | (free space, sometimes) | --------- v|
1019
// +==============================+ <----- SP not-during calls
1020
// | Arguments (sometimes) | |
1021
// | ... | v
1022
// +==============================+ <----- SP during calls
1023
//
1024
// The Frame is addressed off the stack pointer. masm.framePushed() is always
1025
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
1026
// Frame, with the DebugFrame optionally below it.
1027
//
1028
// The Local area (including the DebugFrame and, if needed, the spilled value of
1029
// the stack results area pointer) is laid out by BaseLocalIter and is allocated
1030
// and deallocated by standard prologue and epilogue functions that manipulate
1031
// the stack pointer, but it is accessed via BaseStackFrame.
1032
//
1033
// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
1034
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
1035
// because the SP needs a specific alignment, and in this case there will
1036
// normally be some free space directly above the SP. The stack height does not
1037
// include the free space, it reflects the logically used space only.
1038
//
1039
// The Dynamic area is where space for stack results is allocated when calling
1040
// functions that return results on the stack. If a function has stack results,
1041
// a pointer to the low address of the stack result area is passed as an
1042
// additional argument, according to the usual ABI. See
1043
// ABIResultIter::HasStackResults.
1044
//
1045
// The Arguments area is allocated and deallocated via BaseStackFrame (see
1046
// comments later) but is accessed directly off the stack pointer.
1047
1048
// BaseLocalIter iterates over a vector of types of locals and provides offsets
1049
// from the Frame address for those locals, and associated data.
1050
//
1051
// The implementation of BaseLocalIter is the property of the BaseStackFrame.
1052
// But it is also exposed for eg the debugger to use.
1053
1054
BaseLocalIter::BaseLocalIter(const ValTypeVector& locals,
1055
const ArgTypeVector& args, bool debugEnabled)
1056
: locals_(locals),
1057
args_(args),
1058
argsIter_(args_),
1059
index_(0),
1060
localSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
1061
reservedSize_(localSize_),
1062
frameOffset_(INT32_MAX),
1063
stackResultPointerOffset_(INT32_MAX),
1064
mirType_(MIRType::Undefined),
1065
done_(false) {
1066
MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length());
1067
settle();
1068
}
1069
1070
int32_t BaseLocalIter::pushLocal(size_t nbytes) {
1071
MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
1072
localSize_ = AlignBytes(localSize_, nbytes) + nbytes;
1073
return localSize_; // Locals grow down so capture base address.
1074
}
1075
1076
void BaseLocalIter::settle() {
1077
if (!argsIter_.done()) {
1078
mirType_ = argsIter_.mirType();
1079
switch (mirType_) {
1080
case MIRType::Pointer:
1081
// The pointer to stack results is handled like any other argument:
1082
// either addressed in place if it is passed on the stack, or we spill
1083
// it in the frame if it's in a register.
1084
MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_));
1085
[[fallthrough]];
1086
case MIRType::Int32:
1087
case MIRType::Int64:
1088
case MIRType::Double:
1089
case MIRType::Float32:
1090
case MIRType::RefOrNull:
1091
if (argsIter_->argInRegister()) {
1092
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
1093
} else {
1094
frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
1095
}
1096
break;
1097
default:
1098
MOZ_CRASH("Argument type");
1099
}
1100
if (mirType_ == MIRType::Pointer) {
1101
stackResultPointerOffset_ = frameOffset();
1102
// Advance past the synthetic stack result pointer argument and fall
1103
// through to the next case.
1104
argsIter_++;
1105
MOZ_ASSERT(argsIter_.done());
1106
} else {
1107
return;
1108
}
1109
}
1110
1111
if (index_ < locals_.length()) {
1112
switch (locals_[index_].kind()) {
1113
case ValType::I32:
1114
case ValType::I64:
1115
case ValType::F32:
1116
case ValType::F64:
1117
case ValType::Ref:
1118
// TODO/AnyRef-boxing: With boxed immediates and strings, the
1119
// debugger must be made aware that AnyRef != Pointer.
1120
ASSERT_ANYREF_IS_JSOBJECT;
1121
mirType_ = ToMIRType(locals_[index_]);
1122
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
1123
break;
1124
default:
1125
MOZ_CRASH("Compiler bug: Unexpected local type");
1126
}
1127
return;
1128
}
1129
1130
done_ = true;
1131
}
1132
1133
void BaseLocalIter::operator++(int) {
1134
MOZ_ASSERT(!done_);
1135
index_++;
1136
if (!argsIter_.done()) {
1137
argsIter_++;
1138
}
1139
settle();
1140
}
1141
1142
// Abstraction of the height of the stack frame, to avoid type confusion.
1143
1144
class StackHeight {
1145
friend class BaseStackFrameAllocator;
1146
1147
uint32_t height;
1148
1149
public:
1150
explicit StackHeight(uint32_t h) : height(h) {}
1151
static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
1152
bool isValid() const { return height != UINT32_MAX; }
1153
bool operator==(StackHeight rhs) const {
1154
MOZ_ASSERT(isValid() && rhs.isValid());
1155
return height == rhs.height;
1156
}
1157
bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
1158
};
1159
1160
// Abstraction for where multi-value results go on the machine stack.
1161
1162
class StackResultsLoc {
1163
uint32_t bytes_;
1164
size_t count_;
1165
Maybe<uint32_t> height_;
1166
1167
public:
1168
StackResultsLoc() : bytes_(0), count_(0){};
1169
StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
1170
: bytes_(bytes), count_(count), height_(Some(height)) {
1171
MOZ_ASSERT(bytes != 0);
1172
MOZ_ASSERT(count != 0);
1173
MOZ_ASSERT(height != 0);
1174
}
1175
1176
uint32_t bytes() const { return bytes_; }
1177
uint32_t count() const { return count_; }
1178
uint32_t height() const { return height_.value(); }
1179
1180
bool hasStackResults() const { return bytes() != 0; }
1181
StackResults stackResults() const {
1182
return hasStackResults() ? StackResults::HasStackResults
1183
: StackResults::NoStackResults;
1184
}
1185
};
1186
1187
// Abstraction of the baseline compiler's stack frame (except for the Frame /
1188
// DebugFrame parts). See comments above for more. Remember, "below" on the
1189
// stack means at lower addresses.
1190
//
1191
// The abstraction is split into two parts: BaseStackFrameAllocator is
1192
// responsible for allocating and deallocating space on the stack and for
1193
// performing computations that are affected by how the allocation is performed;
1194
// BaseStackFrame then provides a pleasant interface for stack frame management.
1195
1196
class BaseStackFrameAllocator {
1197
MacroAssembler& masm;
1198
1199
#ifdef RABALDR_CHUNKY_STACK
1200
// On platforms that require the stack pointer to be aligned on a boundary
1201
// greater than the typical stack item (eg, ARM64 requires 16-byte alignment
1202
// but items are 8 bytes), allocate stack memory in chunks, and use a
1203
// separate stack height variable to track the effective stack pointer
1204
// within the allocated area. Effectively, there's a variable amount of
1205
// free space directly above the stack pointer. See diagram above.
1206
1207
// The following must be true in order for the stack height to be
1208
// predictable at control flow joins:
1209
//
1210
// - The Local area is always aligned according to WasmStackAlignment, ie,
1211
// masm.framePushed() % WasmStackAlignment is zero after allocating
1212
// locals.
1213
//
1214
// - ChunkSize is always a multiple of WasmStackAlignment.
1215
//
1216
// - Pushing and popping are always in units of ChunkSize (hence preserving
1217
// alignment).
1218
//
1219
// - The free space on the stack (masm.framePushed() - currentStackHeight_)
1220
// is a predictable (nonnegative) amount.
1221
1222
// As an optimization, we pre-allocate some space on the stack, the size of
1223
// this allocation is InitialChunk and it must be a multiple of ChunkSize.
1224
// It is allocated as part of the function prologue and deallocated as part
1225
// of the epilogue, along with the locals.
1226
//
1227
// If ChunkSize is too large then we risk overflowing the stack on simple
1228
// recursions with few live values where stack overflow should not be a
1229
// risk; if it is too small we spend too much time adjusting the stack
1230
// pointer.
1231
//
1232
// Good values for ChunkSize are the subject of future empirical analysis;
1233
// eight words is just an educated guess.
1234
1235
static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
1236
static constexpr uint32_t InitialChunk = ChunkSize;
1237
1238
// The current logical height of the frame is
1239
// currentStackHeight_ = localSize_ + dynamicSize
1240
// where dynamicSize is not accounted for explicitly and localSize_ also
1241
// includes size for the DebugFrame.
1242
//
1243
// The allocated size of the frame, provided by masm.framePushed(), is usually
1244
// larger than currentStackHeight_, notably at the beginning of execution when
1245
// we've allocated InitialChunk extra space.
1246
1247
uint32_t currentStackHeight_;
1248
#endif
1249
1250
// Size of the Local area in bytes (stable after BaseCompiler::init() has
1251
// called BaseStackFrame::setupLocals(), which in turn calls
1252
// BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
1253
// stack alignment. The Local area is then allocated in beginFunction(),
1254
// following the allocation of the Header. See onFixedStackAllocated()
1255
// below.
1256
1257
uint32_t localSize_;
1258
1259
protected:
1260
///////////////////////////////////////////////////////////////////////////
1261
//
1262
// Initialization
1263
1264
explicit BaseStackFrameAllocator(MacroAssembler& masm)
1265
: masm(masm),
1266
#ifdef RABALDR_CHUNKY_STACK
1267
currentStackHeight_(0),
1268
#endif
1269
localSize_(UINT32_MAX) {
1270
}
1271
1272
protected:
1273
//////////////////////////////////////////////////////////////////////
1274
//
1275
// The Local area - the static part of the frame.
1276
1277
// Record the size of the Local area, once it is known.
1278
1279
void setLocalSize(uint32_t localSize) {
1280
MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
1281
"localSize_ should be aligned to at least a pointer");
1282
MOZ_ASSERT(localSize_ == UINT32_MAX);
1283
localSize_ = localSize;
1284
}
1285
1286
// Record the current stack height, after it has become stable in
1287
// beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
1288
1289
void onFixedStackAllocated() {
1290
MOZ_ASSERT(localSize_ != UINT32_MAX);
1291
#ifdef RABALDR_CHUNKY_STACK
1292
currentStackHeight_ = localSize_;
1293
#endif
1294
}
1295
1296
public:
1297
// The fixed amount of memory, in bytes, allocated on the stack below the
1298
// Header for purposes such as locals and other fixed values. Includes all
1299
// necessary alignment, and on ARM64 also the initial chunk for the working
1300
// stack memory.
1301
1302
uint32_t fixedAllocSize() const {
1303
MOZ_ASSERT(localSize_ != UINT32_MAX);
1304
#ifdef RABALDR_CHUNKY_STACK
1305
return localSize_ + InitialChunk;
1306
#else
1307
return localSize_;
1308
#endif
1309
}
1310
1311
#ifdef RABALDR_CHUNKY_STACK
1312
// The allocated frame size is frequently larger than the logical stack
1313
// height; we round up to a chunk boundary, and special case the initial
1314
// chunk.
1315
uint32_t framePushedForHeight(uint32_t logicalHeight) {
1316
if (logicalHeight <= fixedAllocSize()) {
1317
return fixedAllocSize();
1318
}
1319
return fixedAllocSize() +
1320
AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
1321
}
1322
#endif
1323
1324
protected:
1325
//////////////////////////////////////////////////////////////////////
1326
//
1327
// The Dynamic area - the dynamic part of the frame, for spilling and saving
1328
// intermediate values.
1329
1330
// Offset off of sp_ for the slot at stack area location `offset`.
1331
1332
int32_t stackOffset(int32_t offset) { return masm.framePushed() - offset; }
1333
1334
uint32_t computeHeightWithStackResults(StackHeight stackBase,
1335
uint32_t stackResultBytes) {
1336
MOZ_ASSERT(stackResultBytes);
1337
MOZ_ASSERT(currentStackHeight() >= stackBase.height);
1338
return stackBase.height + stackResultBytes;
1339
}
1340
1341
#ifdef RABALDR_CHUNKY_STACK
1342
void pushChunkyBytes(uint32_t bytes) {
1343
MOZ_ASSERT(bytes <= ChunkSize);
1344
checkChunkyInvariants();
1345
if (masm.framePushed() - currentStackHeight_ < bytes) {
1346
masm.reserveStack(ChunkSize);
1347
}
1348
currentStackHeight_ += bytes;
1349
checkChunkyInvariants();
1350
}
1351
1352
void popChunkyBytes(uint32_t bytes) {
1353
checkChunkyInvariants();
1354
currentStackHeight_ -= bytes;
1355
// Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
1356
// values consumed by a call, and we may need to drop several chunks. But
1357
// never drop the initial chunk. Crucially, the amount we drop is always an
1358
// integral number of chunks.
1359
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
1360
if (freeSpace >= ChunkSize) {
1361
uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
1362
uint32_t amountToFree = masm.framePushed() - targetAllocSize;
1363
MOZ_ASSERT(amountToFree % ChunkSize == 0);
1364
if (amountToFree) {
1365
masm.freeStack(amountToFree);
1366
}
1367
}
1368
checkChunkyInvariants();
1369
}
1370
#endif
1371
1372
uint32_t currentStackHeight() const {
1373
#ifdef RABALDR_CHUNKY_STACK
1374
return currentStackHeight_;
1375
#else
1376
return masm.framePushed();
1377
#endif
1378
}
1379
1380
private:
1381
#ifdef RABALDR_CHUNKY_STACK
1382
void checkChunkyInvariants() {
1383
MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
1384
MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
1385
MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
1386
masm.framePushed() - currentStackHeight_ < ChunkSize);
1387
MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
1388
}
1389
#endif
1390
1391
// For a given stack height, return the appropriate size of the allocated
1392
// frame.
1393
1394
uint32_t framePushedForHeight(StackHeight stackHeight) {
1395
#ifdef RABALDR_CHUNKY_STACK
1396
// A more complicated adjustment is needed.
1397
return framePushedForHeight(stackHeight.height);
1398
#else
1399
// The allocated frame size equals the stack height.
1400
return stackHeight.height;
1401
#endif
1402
}
1403
1404
public:
1405
// The current height of the stack area, not necessarily zero-based, in a
1406
// type-safe way.
1407
1408
StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
1409
1410
// Set the frame height to a previously recorded value.
1411
1412
void setStackHeight(StackHeight amount) {
1413
#ifdef RABALDR_CHUNKY_STACK
1414
currentStackHeight_ = amount.height;
1415
masm.setFramePushed(framePushedForHeight(amount));
1416
checkChunkyInvariants();
1417
#else
1418
masm.setFramePushed(amount.height);
1419
#endif
1420
}
1421
1422
// The current height of the dynamic part of the stack area (ie, the backing
1423
// store for the evaluation stack), zero-based.
1424
1425
uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
1426
1427
// Before branching to an outer control label, pop the execution stack to
1428
// the level expected by that region, but do not update masm.framePushed()
1429
// as that will happen as compilation leaves the block.
1430
//
1431
// Note these operate directly on the stack pointer register.
1432
1433
void popStackBeforeBranch(StackHeight destStackHeight,
1434
uint32_t stackResultBytes) {
1435
uint32_t framePushedHere = masm.framePushed();
1436
StackHeight heightThere =
1437
StackHeight(destStackHeight.height + stackResultBytes);
1438
uint32_t framePushedThere = framePushedForHeight(heightThere);
1439
if (framePushedHere > framePushedThere) {
1440
masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
1441
}
1442
}
1443
1444
void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
1445
popStackBeforeBranch(destStackHeight,
1446
ABIResultIter::MeasureStackBytes(type));
1447
}
1448
1449
// Given that there are |stackParamSize| bytes on the dynamic stack
1450
// corresponding to the stack results, return the stack height once these
1451
// parameters are popped.
1452
1453
StackHeight stackResultsBase(uint32_t stackParamSize) {
1454
return StackHeight(currentStackHeight() - stackParamSize);
1455
}
1456
1457
// For most of WebAssembly, adjacent instructions have fallthrough control
1458
// flow between them, which allows us to simply thread the current stack
1459
// height through the compiler. There are two exceptions to this rule: when
1460
// leaving a block via dead code, and when entering the "else" arm of an "if".
1461
// In these cases, the stack height is the block entry height, plus any stack
1462
// values (results in the block exit case, parameters in the else entry case).
1463
1464
void resetStackHeight(StackHeight destStackHeight, ResultType type) {
1465
uint32_t height = destStackHeight.height;
1466
height += ABIResultIter::MeasureStackBytes(type);
1467
setStackHeight(StackHeight(height));
1468
}
1469
1470
// Return offset of stack result.
1471
1472
uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
1473
uint32_t stackResultBytes) {
1474
MOZ_ASSERT(result.onStack());
1475
MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
1476
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1477
return end - result.stackOffset();
1478
}
1479
1480
public:
1481
//////////////////////////////////////////////////////////////////////
1482
//
1483
// The Argument area - for outgoing calls.
1484
//
1485
// We abstract these operations as an optimization: we can merge the freeing
1486
// of the argument area and dropping values off the stack after a call. But
1487
// they always amount to manipulating the real stack pointer by some amount.
1488
//
1489
// Note that we do not update currentStackHeight_ for this; the frame does
1490
// not know about outgoing arguments. But we do update framePushed(), so we
1491
// can still index into the frame below the outgoing arguments area.
1492
1493
// This is always equivalent to a masm.reserveStack() call.
1494
1495
void allocArgArea(size_t argSize) {
1496
if (argSize) {
1497
masm.reserveStack(argSize);
1498
}
1499
}
1500
1501
// This frees the argument area allocated by allocArgArea(), and `argSize`
1502
// must be equal to the `argSize` argument to allocArgArea(). In addition
1503
// we drop some values from the frame, corresponding to the values that were
1504
// consumed by the call.
1505
1506
void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
1507
#ifdef RABALDR_CHUNKY_STACK
1508
// Freeing the outgoing arguments and freeing the consumed values have
1509
// different semantics here, which is why the operation is split.
1510
if (argSize) {
1511
masm.freeStack(argSize);
1512
}
1513
popChunkyBytes(dropSize);
1514
#else
1515
if (argSize + dropSize) {
1516
masm.freeStack(argSize + dropSize);
1517
}
1518
#endif
1519
}
1520
};
1521
1522
class BaseStackFrame final : public BaseStackFrameAllocator {
1523
MacroAssembler& masm;
1524
1525
// The largest observed value of masm.framePushed(), ie, the size of the
1526
// stack frame. Read this for its true value only when code generation is
1527
// finished.
1528
uint32_t maxFramePushed_;
1529
1530
// Patch point where we check for stack overflow.
1531
CodeOffset stackAddOffset_;
1532
1533
// Low byte offset of pointer to stack results, if any.
1534
Maybe<int32_t> stackResultsPtrOffset_;
1535
1536
// Low byte offset of local area for true locals (not parameters).
1537
uint32_t varLow_;
1538
1539
// High byte offset + 1 of local area for true locals.
1540
uint32_t varHigh_;
1541
1542
// The stack pointer, cached for brevity.
1543
RegisterOrSP sp_;
1544
1545
public:
1546
explicit BaseStackFrame(MacroAssembler& masm)
1547
: BaseStackFrameAllocator(masm),
1548
masm(masm),
1549
maxFramePushed_(0),
1550
stackAddOffset_(0),
1551
varLow_(UINT32_MAX),
1552
varHigh_(UINT32_MAX),
1553
sp_(masm.getStackPointer()) {}
1554
1555
///////////////////////////////////////////////////////////////////////////
1556
//
1557
// Stack management and overflow checking
1558
1559
// This must be called once beginFunction has allocated space for the Header
1560
// (the Frame and DebugFrame) and the Local area, and will record the current
1561
// frame size for internal use by the stack abstractions.
1562
1563
void onFixedStackAllocated() {
1564
maxFramePushed_ = masm.framePushed();
1565
BaseStackFrameAllocator::onFixedStackAllocated();
1566
}
1567
1568
// We won't know until after we've generated code how big the frame will be
1569
// (we may need arbitrary spill slots and outgoing param slots) so emit a
1570
// patchable add that is patched in endFunction().
1571
//
1572
// Note the platform scratch register may be used by branchPtr(), so
1573
// generally tmp must be something else.
1574
1575
void checkStack(Register tmp, BytecodeOffset trapOffset) {
1576
stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
1577
Label ok;
1578
masm.branchPtr(Assembler::Below,
1579
Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
1580
tmp, &ok);
1581
masm.wasmTrap(Trap::StackOverflow, trapOffset);
1582
masm.bind(&ok);
1583
}
1584
1585
void patchCheckStack() {
1586
masm.patchSub32FromStackPtr(stackAddOffset_,
1587
Imm32(int32_t(maxFramePushed_)));
1588
}
1589
1590
// Very large frames are implausible, probably an attack.
1591
1592
bool checkStackHeight() {
1593
// 512KiB should be enough, considering how Rabaldr uses the stack and
1594
// what the standard limits are:
1595
//
1596
// - 1,000 parameters
1597
// - 50,000 locals
1598
// - 10,000 values on the eval stack (not an official limit)
1599
//
1600
// At sizeof(int64) bytes per slot this works out to about 480KiB.
1601
return maxFramePushed_ <= 512 * 1024;
1602
}
1603
1604
///////////////////////////////////////////////////////////////////////////
1605
//
1606
// Local area
1607
1608
struct Local {
1609
// Type of the value.
1610
const MIRType type;
1611
1612
// Byte offset from Frame "into" the locals, ie positive for true locals
1613
// and negative for incoming args that read directly from the arg area.
1614
// It assumes the stack is growing down and that locals are on the stack
1615
// at lower addresses than Frame, and is the offset from Frame of the
1616
// lowest-addressed byte of the local.
1617
const int32_t offs;
1618
1619
Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
1620
};
1621
1622
// Profiling shows that the number of parameters and locals frequently
1623
// touches or exceeds 8. So 16 seems like a reasonable starting point.
1624
using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
1625
1626
// Initialize `localInfo` based on the types of `locals` and `args`.
1627
bool setupLocals(const ValTypeVector& locals, const ArgTypeVector& args,
1628
bool debugEnabled, LocalVector* localInfo) {
1629
if (!localInfo->reserve(locals.length())) {
1630
return false;
1631
}
1632
1633
DebugOnly<uint32_t> index = 0;
1634
BaseLocalIter i(locals, args, debugEnabled);
1635
varLow_ = i.reservedSize();
1636
for (; !i.done() && i.index() < args.length(); i++) {
1637
MOZ_ASSERT(i.isArg());
1638
MOZ_ASSERT(i.index() == index);
1639
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1640
varLow_ = i.currentLocalSize();
1641
index++;
1642
}
1643
1644
varHigh_ = varLow_;
1645
for (; !i.done(); i++) {
1646
MOZ_ASSERT(!i.isArg());
1647
MOZ_ASSERT(i.index() == index);
1648
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1649
varHigh_ = i.currentLocalSize();
1650
index++;
1651
}
1652
1653
setLocalSize(AlignBytes(varHigh_, WasmStackAlignment));
1654
1655
if (args.hasSyntheticStackResultPointerArg()) {
1656
stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
1657
}
1658
1659
return true;
1660
}
1661
1662
void zeroLocals(BaseRegAlloc* ra);
1663
1664
void loadLocalI32(const Local& src, RegI32 dest) {
1665
masm.load32(Address(sp_, localOffset(src)), dest);
1666
}
1667
1668
#ifndef JS_PUNBOX64
1669
void loadLocalI64Low(const Local& src, RegI32 dest) {
1670
masm.load32(Address(sp_, localOffset(src) + INT64LOW_OFFSET), dest);
1671
}
1672
1673
void loadLocalI64High(const Local& src, RegI32 dest) {
1674
masm.load32(Address(sp_, localOffset(src) + INT64HIGH_OFFSET), dest);
1675
}
1676
#endif
1677
1678
void loadLocalI64(const Local& src, RegI64 dest) {
1679
masm.load64(Address(sp_, localOffset(src)), dest);
1680
}
1681
1682
void loadLocalPtr(const Local& src, RegPtr dest) {
1683
masm.loadPtr(Address(sp_, localOffset(src)), dest);
1684
}
1685
1686
void loadLocalF64(const Local& src, RegF64 dest) {
1687
masm.loadDouble(Address(sp_, localOffset(src)), dest);
1688
}
1689
1690
void loadLocalF32(const Local& src, RegF32 dest) {
1691
masm.loadFloat32(Address(sp_, localOffset(src)), dest);
1692
}
1693
1694
void storeLocalI32(RegI32 src, const Local& dest) {
1695
masm.store32(src, Address(sp_, localOffset(dest)));
1696
}
1697
1698
void storeLocalI64(RegI64 src, const Local& dest) {
1699
masm.store64(src, Address(sp_, localOffset(dest)));
1700
}
1701
1702
void storeLocalPtr(Register src, const Local& dest) {
1703
masm.storePtr(src, Address(sp_, localOffset(dest)));
1704
}
1705
1706
void storeLocalF64(RegF64 src, const Local& dest) {
1707
masm.storeDouble(src, Address(sp_, localOffset(dest)));
1708
}
1709
1710
void storeLocalF32(RegF32 src, const Local& dest) {
1711
masm.storeFloat32(src, Address(sp_, localOffset(dest)));
1712
}
1713
1714
// Offset off of sp_ for `local`.
1715
int32_t localOffset(const Local& local) { return localOffset(local.offs); }
1716
1717
// The incoming stack result area pointer is for stack results of the function
1718
// being compiled.
1719
void loadIncomingStackResultAreaPtr(RegPtr reg) {
1720
masm.loadPtr(Address(sp_, stackOffset(stackResultsPtrOffset_.value())),
1721
reg);
1722
}
1723
void storeIncomingStackResultAreaPtr(RegPtr reg) {
1724
// If we get here, that means the pointer to the stack results area was
1725
// passed in as a register, and therefore it will be spilled below the
1726
// frame, so the offset is a positive height.
1727
MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
1728
masm.storePtr(reg,
1729
Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
1730
}
1731
1732
// An outgoing stack result area pointer is for stack results of callees of
1733
// the function being compiled.
1734
void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
1735
RegPtr dest) {
1736
MOZ_ASSERT(results.height() <= masm.framePushed());
1737
uint32_t offsetFromSP = masm.framePushed() - results.height();
1738
masm.movePtr(AsRegister(sp_), dest);
1739
masm.addPtr(Imm32(offsetFromSP), dest);
1740
}
1741
1742
private:
1743
// Offset off of sp_ for a local with offset `offset` from Frame.
1744
int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
1745
1746
public:
1747
///////////////////////////////////////////////////////////////////////////
1748
//
1749
// Dynamic area
1750
1751
static const size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
1752
static const size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
1753
static const size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
1754
static const size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
1755
1756
uint32_t pushPtr(Register r) {
1757
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1758
#ifdef RABALDR_CHUNKY_STACK
1759
pushChunkyBytes(StackSizeOfPtr);
1760
masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
1761
#else
1762
masm.Push(r);
1763
#endif
1764
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1765
MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
1766
return currentStackHeight();
1767
}
1768
1769
uint32_t pushFloat32(FloatRegister r) {
1770
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1771
#ifdef RABALDR_CHUNKY_STACK
1772
pushChunkyBytes(StackSizeOfFloat);
1773
masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
1774
#else
1775
masm.Push(r);
1776
#endif
1777
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1778
MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
1779
return currentStackHeight();
1780
}
1781
1782
uint32_t pushDouble(FloatRegister r) {
1783
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1784
#ifdef RABALDR_CHUNKY_STACK
1785
pushChunkyBytes(StackSizeOfDouble);
1786
masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
1787
#else
1788
masm.Push(r);
1789
#endif
1790
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1791
MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
1792
return currentStackHeight();
1793
}
1794
1795
void popPtr(Register r) {
1796
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1797
#ifdef RABALDR_CHUNKY_STACK
1798
masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
1799
popChunkyBytes(StackSizeOfPtr);
1800
#else
1801
masm.Pop(r);
1802
#endif
1803
MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
1804
}
1805
1806
void popFloat32(FloatRegister r) {
1807
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1808
#ifdef RABALDR_CHUNKY_STACK
1809
masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
1810
popChunkyBytes(StackSizeOfFloat);
1811
#else
1812
masm.Pop(r);
1813
#endif
1814
MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
1815
}
1816
1817
void popDouble(FloatRegister r) {
1818
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1819
#ifdef RABALDR_CHUNKY_STACK
1820
masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
1821
popChunkyBytes(StackSizeOfDouble);
1822
#else
1823
masm.Pop(r);
1824
#endif
1825
MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
1826
}
1827
1828
void popBytes(size_t bytes) {
1829
if (bytes > 0) {
1830
#ifdef RABALDR_CHUNKY_STACK
1831
popChunkyBytes(bytes);
1832
#else
1833
masm.freeStack(bytes);
1834
#endif
1835
}
1836
}
1837
1838
void loadStackI32(int32_t offset, RegI32 dest) {
1839
masm.load32(Address(sp_, stackOffset(offset)), dest);
1840
}
1841
1842
void loadStackI64(int32_t offset, RegI64 dest) {
1843
masm.load64(Address(sp_, stackOffset(offset)), dest);
1844
}
1845
1846
#ifndef JS_PUNBOX64
1847
void loadStackI64Low(int32_t offset, RegI32 dest) {
1848
masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
1849
}
1850
1851
void loadStackI64High(int32_t offset, RegI32 dest) {
1852
masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
1853
}
1854
#endif
1855
1856
// Disambiguation: this loads a "Ptr" value from the stack, it does not load
1857
// the "StackPtr".
1858
1859
void loadStackPtr(int32_t offset, RegPtr dest) {
1860
masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
1861
}
1862
1863
void loadStackF64(int32_t offset, RegF64 dest) {
1864
masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
1865
}
1866
1867
void loadStackF32(int32_t offset, RegF32 dest) {
1868
masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
1869
}
1870
1871
uint32_t prepareStackResultArea(StackHeight stackBase,
1872
uint32_t stackResultBytes) {
1873
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1874
if (currentStackHeight() < end) {
1875
uint32_t bytes = end - currentStackHeight();
1876
#ifdef RABALDR_CHUNKY_STACK
1877
pushChunkyBytes(bytes);
1878
#else
1879
masm.reserveStack(bytes);
1880
#endif
1881
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1882
}
1883
return end;
1884
}
1885
1886
void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
1887
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1888
MOZ_ASSERT(currentStackHeight() >= end);
1889
popBytes(currentStackHeight() - end);
1890
}
1891
1892
void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,