Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
*
4
* Copyright 2016 Mozilla Foundation
5
*
6
* Licensed under the Apache License, Version 2.0 (the "License");
7
* you may not use this file except in compliance with the License.
8
* You may obtain a copy of the License at
9
*
11
*
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
17
*/
18
19
/*
20
* [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
21
*
22
* General assumptions for 32-bit vs 64-bit code:
23
*
24
* - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
25
* systems.
26
*
27
* - Code that knows that Register64 has a '.reg' member on 64-bit systems and
28
* '.high' and '.low' members on 32-bit systems, or knows the implications
29
* thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT.
30
*
31
*
32
* Coding standards:
33
*
34
* - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
35
* and surrounding functions; most functions fall into this class) where the
36
* meaning is obvious:
37
*
38
* - if there is a single source + destination register, it is called 'r'
39
* - if there is one source and a different destination, they are called 'rs'
40
* and 'rd'
41
* - if there is one source + destination register and another source register
42
* they are called 'r' and 'rs'
43
* - if there are two source registers and a destination register they are
44
* called 'rs0', 'rs1', and 'rd'.
45
*
46
* - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
47
*
48
* - Registers can be named non-generically for their function ('rp' for the
49
* 'pointer' register and 'rv' for the 'value' register are typical) and those
50
* names may or may not have an 'r' prefix.
51
*
52
* - "Larger" code generating functions make their own rules.
53
*
54
*
55
* General status notes:
56
*
57
* "FIXME" indicates a known or suspected bug. Always has a bug#.
58
*
59
* "TODO" indicates an opportunity for a general improvement, with an additional
60
* tag to indicate the area of improvement. Usually has a bug#.
61
*
62
* There are lots of machine dependencies here but they are pretty well isolated
63
* to a segment of the compiler. Many dependencies will eventually be factored
64
* into the MacroAssembler layer and shared with other code generators.
65
*
66
*
67
* High-value compiler performance improvements:
68
*
69
* - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
70
* etc methods) can avoid syncing the value stack if the specific register is
71
* in use but there is a free register to shuffle the specific register into.
72
* (This will also improve the generated code.) The sync happens often enough
73
* here to show up in profiles, because it is triggered by integer multiply
74
* and divide.
75
*
76
*
77
* High-value code generation improvements:
78
*
79
* - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
80
* the stack and then jumps to the code for the target case. If no cleanup is
81
* needed we could just branch conditionally to the target; if the same amount
82
* of cleanup is needed for all cases then the cleanup can be done before the
83
* dispatch. Both are highly likely.
84
*
85
* - (Bug 1316806) Register management around calls: At the moment we sync the
86
* value stack unconditionally (this is simple) but there are probably many
87
* common cases where we could instead save/restore live caller-saves
88
* registers and perform parallel assignment into argument registers. This
89
* may be important if we keep some locals in registers.
90
*
91
* - (Bug 1316808) Allocate some locals to registers on machines where there are
92
* enough registers. This is probably hard to do well in a one-pass compiler
93
* but it might be that just keeping register arguments and the first few
94
* locals in registers is a viable strategy; another (more general) strategy
95
* is caching locals in registers in straight-line code. Such caching could
96
* also track constant values in registers, if that is deemed valuable. A
97
* combination of techniques may be desirable: parameters and the first few
98
* locals could be cached on entry to the function but not statically assigned
99
* to registers throughout.
100
*
101
* (On a large corpus of code it should be possible to compute, for every
102
* signature comprising the types of parameters and locals, and using a static
103
* weight for loops, a list in priority order of which parameters and locals
104
* that should be assigned to registers. Or something like that. Wasm makes
105
* this simple. Static assignments are desirable because they are not flushed
106
* to memory by the pre-block sync() call.)
107
*/
108
109
#include "wasm/WasmBaselineCompile.h"
110
111
#include "mozilla/MathAlgorithms.h"
112
#include "mozilla/Maybe.h"
113
114
#include <algorithm>
115
#include <utility>
116
117
#include "jit/AtomicOp.h"
118
#include "jit/IonTypes.h"
119
#include "jit/JitAllocPolicy.h"
120
#include "jit/Label.h"
121
#include "jit/MacroAssembler.h"
122
#include "jit/MIR.h"
123
#include "jit/RegisterAllocator.h"
124
#include "jit/Registers.h"
125
#include "jit/RegisterSets.h"
126
#if defined(JS_CODEGEN_ARM)
127
# include "jit/arm/Assembler-arm.h"
128
#endif
129
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
130
# include "jit/x86-shared/Architecture-x86-shared.h"
131
# include "jit/x86-shared/Assembler-x86-shared.h"
132
#endif
133
#if defined(JS_CODEGEN_MIPS32)
134
# include "jit/mips-shared/Assembler-mips-shared.h"
135
# include "jit/mips32/Assembler-mips32.h"
136
#endif
137
#if defined(JS_CODEGEN_MIPS64)
138
# include "jit/mips-shared/Assembler-mips-shared.h"
139
# include "jit/mips64/Assembler-mips64.h"
140
#endif
141
142
#include "util/Memory.h"
143
#include "wasm/WasmGC.h"
144
#include "wasm/WasmGenerator.h"
145
#include "wasm/WasmInstance.h"
146
#include "wasm/WasmOpIter.h"
147
#include "wasm/WasmSignalHandlers.h"
148
#include "wasm/WasmStubs.h"
149
#include "wasm/WasmValidate.h"
150
151
#include "jit/MacroAssembler-inl.h"
152
153
using mozilla::DebugOnly;
154
using mozilla::FloorLog2;
155
using mozilla::IsPowerOfTwo;
156
using mozilla::Maybe;
157
158
namespace js {
159
namespace wasm {
160
161
using namespace js::jit;
162
163
using HandleNaNSpecially = bool;
164
using InvertBranch = bool;
165
using IsKnownNotZero = bool;
166
using IsUnsigned = bool;
167
using NeedsBoundsCheck = bool;
168
using WantResult = bool;
169
using ZeroOnOverflow = bool;
170
171
class BaseStackFrame;
172
173
// Two flags, useABI and interModule, control how calls are made.
174
//
175
// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
176
// except when InterModule::True is also set, when they are volatile.
177
//
178
// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
179
// In this case, we require InterModule::False. The calling convention
180
// is otherwise like UseABI::Wasm.
181
//
182
// UseABI::System implies that the Tls/Heap/Global registers are volatile.
183
// Additionally, the parameter passing mechanism may be slightly different from
184
// the UseABI::Wasm convention.
185
//
186
// When the Tls/Heap/Global registers are not volatile, the baseline compiler
187
// will restore the Tls register from its save slot before the call, since the
188
// baseline compiler uses the Tls register for other things.
189
//
190
// When those registers are volatile, the baseline compiler will reload them
191
// after the call (it will restore the Tls register from the save slot and load
192
// the other two from the Tls data).
193
194
enum class UseABI { Wasm, Builtin, System };
195
enum class InterModule { False = false, True = true };
196
197
#if defined(JS_CODEGEN_NONE)
198
# define RABALDR_SCRATCH_I32
199
# define RABALDR_SCRATCH_F32
200
# define RABALDR_SCRATCH_F64
201
202
static const Register RabaldrScratchI32 = Register::Invalid();
203
static const FloatRegister RabaldrScratchF32 = InvalidFloatReg;
204
static const FloatRegister RabaldrScratchF64 = InvalidFloatReg;
205
#endif
206
207
#ifdef JS_CODEGEN_ARM64
208
# define RABALDR_CHUNKY_STACK
209
# define RABALDR_SCRATCH_I32
210
# define RABALDR_SCRATCH_F32
211
# define RABALDR_SCRATCH_F64
212
# define RABALDR_SCRATCH_F32_ALIASES_F64
213
214
static const Register RabaldrScratchI32 = Register::FromCode(15);
215
216
// Note, the float scratch regs cannot be registers that are used for parameter
217
// passing in any ABI we use. Argregs tend to be low-numbered; register 30
218
// should be safe.
219
220
static constexpr FloatRegister RabaldrScratchF32 =
221
FloatRegister(30, FloatRegisters::Single);
222
static constexpr FloatRegister RabaldrScratchF64 =
223
FloatRegister(30, FloatRegisters::Double);
224
225
static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
226
static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
227
#endif
228
229
#ifdef JS_CODEGEN_X86
230
// The selection of EBX here steps gingerly around: the need for EDX
231
// to be allocatable for multiply/divide; ECX to be allocatable for
232
// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
233
// register; EBX not being one of the WasmTableCall registers; and
234
// needing a temp register for load/store that has a single-byte
235
// persona.
236
//
237
// The compiler assumes that RabaldrScratchI32 has a single-byte
238
// persona. Code for 8-byte atomic operations assumes that
239
// RabaldrScratchI32 is in fact ebx.
240
241
# define RABALDR_SCRATCH_I32
242
static const Register RabaldrScratchI32 = ebx;
243
244
# define RABALDR_INT_DIV_I64_CALLOUT
245
#endif
246
247
#ifdef JS_CODEGEN_ARM
248
// We use our own scratch register, because the macro assembler uses
249
// the regular scratch register(s) pretty liberally. We could
250
// work around that in several cases but the mess does not seem
251
// worth it yet. CallTempReg2 seems safe.
252
253
# define RABALDR_SCRATCH_I32
254
static const Register RabaldrScratchI32 = CallTempReg2;
255
256
# define RABALDR_INT_DIV_I64_CALLOUT
257
# define RABALDR_I64_TO_FLOAT_CALLOUT
258
# define RABALDR_FLOAT_TO_I64_CALLOUT
259
#endif
260
261
#ifdef JS_CODEGEN_MIPS32
262
# define RABALDR_SCRATCH_I32
263
static const Register RabaldrScratchI32 = CallTempReg2;
264
265
# define RABALDR_INT_DIV_I64_CALLOUT
266
# define RABALDR_I64_TO_FLOAT_CALLOUT
267
# define RABALDR_FLOAT_TO_I64_CALLOUT
268
#endif
269
270
#ifdef JS_CODEGEN_MIPS64
271
# define RABALDR_SCRATCH_I32
272
static const Register RabaldrScratchI32 = CallTempReg2;
273
#endif
274
275
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
276
# if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
277
# error "Bad configuration"
278
# endif
279
#endif
280
281
template <MIRType t>
282
struct RegTypeOf {
283
static_assert(t == MIRType::Float32 || t == MIRType::Double,
284
"Float mask type");
285
};
286
287
template <>
288
struct RegTypeOf<MIRType::Float32> {
289
static constexpr RegTypeName value = RegTypeName::Float32;
290
};
291
template <>
292
struct RegTypeOf<MIRType::Double> {
293
static constexpr RegTypeName value = RegTypeName::Float64;
294
};
295
296
// The strongly typed register wrappers are especially useful to distinguish
297
// float registers from double registers, but they also clearly distinguish
298
// 32-bit registers from 64-bit register pairs on 32-bit systems.
299
300
struct RegI32 : public Register {
301
RegI32() : Register(Register::Invalid()) {}
302
explicit RegI32(Register reg) : Register(reg) {}
303
bool isValid() const { return *this != Invalid(); }
304
bool isInvalid() const { return !isValid(); }
305
static RegI32 Invalid() { return RegI32(Register::Invalid()); }
306
};
307
308
struct RegI64 : public Register64 {
309
RegI64() : Register64(Register64::Invalid()) {}
310
explicit RegI64(Register64 reg) : Register64(reg) {}
311
bool isValid() const { return *this != Invalid(); }
312
bool isInvalid() const { return !isValid(); }
313
static RegI64 Invalid() { return RegI64(Register64::Invalid()); }
314
};
315
316
struct RegPtr : public Register {
317
RegPtr() : Register(Register::Invalid()) {}
318
explicit RegPtr(Register reg) : Register(reg) {}
319
bool isValid() const { return *this != Invalid(); }
320
bool isInvalid() const { return !isValid(); }
321
static RegPtr Invalid() { return RegPtr(Register::Invalid()); }
322
};
323
324
struct RegF32 : public FloatRegister {
325
RegF32() : FloatRegister() {}
326
explicit RegF32(FloatRegister reg) : FloatRegister(reg) {}
327
bool isValid() const { return *this != Invalid(); }
328
bool isInvalid() const { return !isValid(); }
329
static RegF32 Invalid() { return RegF32(InvalidFloatReg); }
330
};
331
332
struct RegF64 : public FloatRegister {
333
RegF64() : FloatRegister() {}
334
explicit RegF64(FloatRegister reg) : FloatRegister(reg) {}
335
bool isValid() const { return *this != Invalid(); }
336
bool isInvalid() const { return !isValid(); }
337
static RegF64 Invalid() { return RegF64(InvalidFloatReg); }
338
};
339
340
struct AnyReg {
341
union {
342
RegI32 i32_;
343
RegI64 i64_;
344
RegPtr ref_;
345
RegF32 f32_;
346
RegF64 f64_;
347
};
348
349
enum { I32, I64, REF, F32, F64 } tag;
350
351
explicit AnyReg(RegI32 r) {
352
tag = I32;
353
i32_ = r;
354
}
355
explicit AnyReg(RegI64 r) {
356
tag = I64;
357
i64_ = r;
358
}
359
explicit AnyReg(RegF32 r) {
360
tag = F32;
361
f32_ = r;
362
}
363
explicit AnyReg(RegF64 r) {
364
tag = F64;
365
f64_ = r;
366
}
367
explicit AnyReg(RegPtr r) {
368
tag = REF;
369
ref_ = r;
370
}
371
372
RegI32 i32() const {
373
MOZ_ASSERT(tag == I32);
374
return i32_;
375
}
376
RegI64 i64() const {
377
MOZ_ASSERT(tag == I64);
378
return i64_;
379
}
380
RegF32 f32() const {
381
MOZ_ASSERT(tag == F32);
382
return f32_;
383
}
384
RegF64 f64() const {
385
MOZ_ASSERT(tag == F64);
386
return f64_;
387
}
388
RegPtr ref() const {
389
MOZ_ASSERT(tag == REF);
390
return ref_;
391
}
392
393
AnyRegister any() const {
394
switch (tag) {
395
case F32:
396
return AnyRegister(f32_);
397
case F64:
398
return AnyRegister(f64_);
399
case I32:
400
return AnyRegister(i32_);
401
case I64:
402
#ifdef JS_PUNBOX64
403
return AnyRegister(i64_.reg);
404
#else
405
// The compiler is written so that this is never needed: any() is
406
// called on arbitrary registers for asm.js but asm.js does not have
407
// 64-bit ints. For wasm, any() is called on arbitrary registers
408
// only on 64-bit platforms.
409
MOZ_CRASH("AnyReg::any() on 32-bit platform");
410
#endif
411
case REF:
412
MOZ_CRASH("AnyReg::any() not implemented for ref types");
413
default:
414
MOZ_CRASH();
415
}
416
// Work around GCC 5 analysis/warning bug.
417
MOZ_CRASH("AnyReg::any(): impossible case");
418
}
419
};
420
421
// Platform-specific registers.
422
//
423
// All platforms must define struct SpecificRegs. All 32-bit platforms must
424
// have an abiReturnRegI64 member in that struct.
425
426
#if defined(JS_CODEGEN_X64)
427
struct SpecificRegs {
428
RegI32 eax, ecx, edx, edi, esi;
429
RegI64 rax, rcx, rdx;
430
431
SpecificRegs()
432
: eax(RegI32(js::jit::eax)),
433
ecx(RegI32(js::jit::ecx)),
434
edx(RegI32(js::jit::edx)),
435
edi(RegI32(js::jit::edi)),
436
esi(RegI32(js::jit::esi)),
437
rax(RegI64(Register64(js::jit::rax))),
438
rcx(RegI64(Register64(js::jit::rcx))),
439
rdx(RegI64(Register64(js::jit::rdx))) {}
440
};
441
#elif defined(JS_CODEGEN_X86)
442
struct SpecificRegs {
443
RegI32 eax, ecx, edx, edi, esi;
444
RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
445
446
SpecificRegs()
447
: eax(RegI32(js::jit::eax)),
448
ecx(RegI32(js::jit::ecx)),
449
edx(RegI32(js::jit::edx)),
450
edi(RegI32(js::jit::edi)),
451
esi(RegI32(js::jit::esi)),
452
ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
453
edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
454
abiReturnRegI64(edx_eax) {}
455
};
456
#elif defined(JS_CODEGEN_ARM)
457
struct SpecificRegs {
458
RegI64 abiReturnRegI64;
459
460
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
461
};
462
#elif defined(JS_CODEGEN_ARM64)
463
struct SpecificRegs {};
464
#elif defined(JS_CODEGEN_MIPS32)
465
struct SpecificRegs {
466
RegI64 abiReturnRegI64;
467
468
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
469
};
470
#elif defined(JS_CODEGEN_MIPS64)
471
struct SpecificRegs {};
472
#else
473
struct SpecificRegs {
474
# ifndef JS_64BIT
475
RegI64 abiReturnRegI64;
476
# endif
477
478
SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
479
};
480
#endif
481
482
class BaseCompilerInterface {
483
public:
484
// Spill all spillable registers.
485
//
486
// TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
487
// spilling only enough registers to satisfy current needs.
488
virtual void sync() = 0;
489
virtual void saveTempPtr(RegPtr r) = 0;
490
virtual void restoreTempPtr(RegPtr r) = 0;
491
};
492
493
// Register allocator.
494
495
class BaseRegAlloc {
496
// Notes on float register allocation.
497
//
498
// The general rule in SpiderMonkey is that float registers can alias double
499
// registers, but there are predicates to handle exceptions to that rule:
500
// hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually
501
// works is platform dependent and exposed through the aliased(n, &r)
502
// predicate, etc.
503
//
504
// - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
505
// cannot be treated as float.
506
// - hasMultiAlias(): on ARM and MIPS a double register aliases two float
507
// registers.
508
//
509
// On some platforms (x86, x64, ARM64) but not all (ARM)
510
// ScratchFloat32Register is the same as ScratchDoubleRegister.
511
//
512
// It's a basic invariant of the AllocatableRegisterSet that it deals
513
// properly with aliasing of registers: if s0 or s1 are allocated then d0 is
514
// not allocatable; if s0 and s1 are freed individually then d0 becomes
515
// allocatable.
516
517
BaseCompilerInterface* bc;
518
AllocatableGeneralRegisterSet availGPR;
519
AllocatableFloatRegisterSet availFPU;
520
#ifdef DEBUG
521
AllocatableGeneralRegisterSet
522
allGPR; // The registers available to the compiler
523
AllocatableFloatRegisterSet
524
allFPU; // after removing ScratchReg, HeapReg, etc
525
uint32_t scratchTaken;
526
#endif
527
#ifdef JS_CODEGEN_X86
528
AllocatableGeneralRegisterSet singleByteRegs;
529
#endif
530
531
bool hasGPR() { return !availGPR.empty(); }
532
533
bool hasGPR64() {
534
#ifdef JS_PUNBOX64
535
return !availGPR.empty();
536
#else
537
if (availGPR.empty()) {
538
return false;
539
}
540
Register r = allocGPR();
541
bool available = !availGPR.empty();
542
freeGPR(r);
543
return available;
544
#endif
545
}
546
547
template <MIRType t>
548
bool hasFPU() {
549
return availFPU.hasAny<RegTypeOf<t>::value>();
550
}
551
552
bool isAvailableGPR(Register r) { return availGPR.has(r); }
553
554
bool isAvailableFPU(FloatRegister r) { return availFPU.has(r); }
555
556
void allocGPR(Register r) {
557
MOZ_ASSERT(isAvailableGPR(r));
558
availGPR.take(r);
559
}
560
561
Register allocGPR() {
562
MOZ_ASSERT(hasGPR());
563
return availGPR.takeAny();
564
}
565
566
void allocInt64(Register64 r) {
567
#ifdef JS_PUNBOX64
568
allocGPR(r.reg);
569
#else
570
allocGPR(r.low);
571
allocGPR(r.high);
572
#endif
573
}
574
575
Register64 allocInt64() {
576
MOZ_ASSERT(hasGPR64());
577
#ifdef JS_PUNBOX64
578
return Register64(availGPR.takeAny());
579
#else
580
Register high = availGPR.takeAny();
581
Register low = availGPR.takeAny();
582
return Register64(high, low);
583
#endif
584
}
585
586
#ifdef JS_CODEGEN_ARM
587
// r12 is normally the ScratchRegister and r13 is always the stack pointer,
588
// so the highest possible pair has r10 as the even-numbered register.
589
590
static constexpr uint32_t PAIR_LIMIT = 10;
591
592
bool hasGPRPair() {
593
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
594
if (isAvailableGPR(Register::FromCode(i)) &&
595
isAvailableGPR(Register::FromCode(i + 1))) {
596
return true;
597
}
598
}
599
return false;
600
}
601
602
void allocGPRPair(Register* low, Register* high) {
603
MOZ_ASSERT(hasGPRPair());
604
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
605
if (isAvailableGPR(Register::FromCode(i)) &&
606
isAvailableGPR(Register::FromCode(i + 1))) {
607
*low = Register::FromCode(i);
608
*high = Register::FromCode(i + 1);
609
allocGPR(*low);
610
allocGPR(*high);
611
return;
612
}
613
}
614
MOZ_CRASH("No pair");
615
}
616
#endif
617
618
void allocFPU(FloatRegister r) {
619
MOZ_ASSERT(isAvailableFPU(r));
620
availFPU.take(r);
621
}
622
623
template <MIRType t>
624
FloatRegister allocFPU() {
625
return availFPU.takeAny<RegTypeOf<t>::value>();
626
}
627
628
void freeGPR(Register r) { availGPR.add(r); }
629
630
void freeInt64(Register64 r) {
631
#ifdef JS_PUNBOX64
632
freeGPR(r.reg);
633
#else
634
freeGPR(r.low);
635
freeGPR(r.high);
636
#endif
637
}
638
639
void freeFPU(FloatRegister r) { availFPU.add(r); }
640
641
public:
642
explicit BaseRegAlloc()
643
: bc(nullptr),
644
availGPR(GeneralRegisterSet::All()),
645
availFPU(FloatRegisterSet::All())
646
#ifdef DEBUG
647
,
648
scratchTaken(0)
649
#endif
650
#ifdef JS_CODEGEN_X86
651
,
652
singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
653
#endif
654
{
655
RegisterAllocator::takeWasmRegisters(availGPR);
656
657
// Allocate any private scratch registers.
658
#if defined(RABALDR_SCRATCH_I32)
659
if (RabaldrScratchI32 != RegI32::Invalid()) {
660
availGPR.take(RabaldrScratchI32);
661
}
662
#endif
663
664
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
665
MOZ_ASSERT(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
666
MOZ_ASSERT(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
667
#endif
668
669
#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
670
if (RabaldrScratchF32 != RegF32::Invalid()) {
671
availFPU.take(RabaldrScratchF32);
672
}
673
#endif
674
675
#if defined(RABALDR_SCRATCH_F64)
676
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
677
MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
678
# endif
679
if (RabaldrScratchF64 != RegF64::Invalid()) {
680
availFPU.take(RabaldrScratchF64);
681
}
682
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
683
MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
684
# endif
685
#endif
686
687
#ifdef DEBUG
688
allGPR = availGPR;
689
allFPU = availFPU;
690
#endif
691
}
692
693
void init(BaseCompilerInterface* bc) { this->bc = bc; }
694
695
enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4 };
696
697
#ifdef DEBUG
698
bool isScratchRegisterTaken(ScratchKind s) const {
699
return (scratchTaken & uint32_t(s)) != 0;
700
}
701
702
void setScratchRegisterTaken(ScratchKind s, bool state) {
703
if (state) {
704
scratchTaken |= uint32_t(s);
705
} else {
706
scratchTaken &= ~uint32_t(s);
707
}
708
}
709
#endif
710
711
#ifdef JS_CODEGEN_X86
712
bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
713
#endif
714
715
bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
716
717
bool isAvailableI64(RegI64 r) {
718
#ifdef JS_PUNBOX64
719
return isAvailableGPR(r.reg);
720
#else
721
return isAvailableGPR(r.low) && isAvailableGPR(r.high);
722
#endif
723
}
724
725
bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
726
727
bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
728
729
bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
730
731
// TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
732
// failure, only as much as we need.
733
734
MOZ_MUST_USE RegI32 needI32() {
735
if (!hasGPR()) {
736
bc->sync();
737
}
738
return RegI32(allocGPR());
739
}
740
741
void needI32(RegI32 specific) {
742
if (!isAvailableI32(specific)) {
743
bc->sync();
744
}
745
allocGPR(specific);
746
}
747
748
MOZ_MUST_USE RegI64 needI64() {
749
if (!hasGPR64()) {
750
bc->sync();
751
}
752
return RegI64(allocInt64());
753
}
754
755
void needI64(RegI64 specific) {
756
if (!isAvailableI64(specific)) {
757
bc->sync();
758
}
759
allocInt64(specific);
760
}
761
762
MOZ_MUST_USE RegPtr needPtr() {
763
if (!hasGPR()) {
764
bc->sync();
765
}
766
return RegPtr(allocGPR());
767
}
768
769
void needPtr(RegPtr specific) {
770
if (!isAvailablePtr(specific)) {
771
bc->sync();
772
}
773
allocGPR(specific);
774
}
775
776
// Use when you need a register for a short time but explicitly want to avoid
777
// a full sync().
778
MOZ_MUST_USE RegPtr needTempPtr(RegPtr fallback, bool* saved) {
779
if (hasGPR()) {
780
*saved = false;
781
return RegPtr(allocGPR());
782
}
783
*saved = true;
784
bc->saveTempPtr(fallback);
785
MOZ_ASSERT(isAvailablePtr(fallback));
786
allocGPR(fallback);
787
return RegPtr(fallback);
788
}
789
790
MOZ_MUST_USE RegF32 needF32() {
791
if (!hasFPU<MIRType::Float32>()) {
792
bc->sync();
793
}
794
return RegF32(allocFPU<MIRType::Float32>());
795
}
796
797
void needF32(RegF32 specific) {
798
if (!isAvailableF32(specific)) {
799
bc->sync();
800
}
801
allocFPU(specific);
802
}
803
804
MOZ_MUST_USE RegF64 needF64() {
805
if (!hasFPU<MIRType::Double>()) {
806
bc->sync();
807
}
808
return RegF64(allocFPU<MIRType::Double>());
809
}
810
811
void needF64(RegF64 specific) {
812
if (!isAvailableF64(specific)) {
813
bc->sync();
814
}
815
allocFPU(specific);
816
}
817
818
void freeI32(RegI32 r) { freeGPR(r); }
819
820
void freeI64(RegI64 r) { freeInt64(r); }
821
822
void freePtr(RegPtr r) { freeGPR(r); }
823
824
void freeF64(RegF64 r) { freeFPU(r); }
825
826
void freeF32(RegF32 r) { freeFPU(r); }
827
828
void freeTempPtr(RegPtr r, bool saved) {
829
freePtr(r);
830
if (saved) {
831
bc->restoreTempPtr(r);
832
MOZ_ASSERT(!isAvailablePtr(r));
833
}
834
}
835
836
#ifdef JS_CODEGEN_ARM
837
MOZ_MUST_USE RegI64 needI64Pair() {
838
if (!hasGPRPair()) {
839
bc->sync();
840
}
841
Register low, high;
842
allocGPRPair(&low, &high);
843
return RegI64(Register64(high, low));
844
}
845
#endif
846
847
#ifdef DEBUG
848
friend class LeakCheck;
849
850
class MOZ_RAII LeakCheck {
851
private:
852
const BaseRegAlloc& ra;
853
AllocatableGeneralRegisterSet knownGPR_;
854
AllocatableFloatRegisterSet knownFPU_;
855
856
public:
857
explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
858
knownGPR_ = ra.availGPR;
859
knownFPU_ = ra.availFPU;
860
}
861
862
~LeakCheck() {
863
MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
864
MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
865
}
866
867
void addKnownI32(RegI32 r) { knownGPR_.add(r); }
868
869
void addKnownI64(RegI64 r) {
870
# ifdef JS_PUNBOX64
871
knownGPR_.add(r.reg);
872
# else
873
knownGPR_.add(r.high);
874
knownGPR_.add(r.low);
875
# endif
876
}
877
878
void addKnownF32(RegF32 r) { knownFPU_.add(r); }
879
880
void addKnownF64(RegF64 r) { knownFPU_.add(r); }
881
882
void addKnownRef(RegPtr r) { knownGPR_.add(r); }
883
};
884
#endif
885
};
886
887
// Scratch register abstractions.
888
//
889
// We define our own scratch registers when the platform doesn't provide what we
890
// need. A notable use case is that we will need a private scratch register
891
// when the platform masm uses its scratch register very frequently (eg, ARM).
892
893
class BaseScratchRegister {
894
#ifdef DEBUG
895
BaseRegAlloc& ra;
896
BaseRegAlloc::ScratchKind kind_;
897
898
public:
899
explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
900
: ra(ra), kind_(kind) {
901
MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
902
ra.setScratchRegisterTaken(kind_, true);
903
}
904
~BaseScratchRegister() {
905
MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
906
ra.setScratchRegisterTaken(kind_, false);
907
}
908
#else
909
public:
910
explicit BaseScratchRegister(BaseRegAlloc& ra,
911
BaseRegAlloc::ScratchKind kind) {}
912
#endif
913
};
914
915
#ifdef RABALDR_SCRATCH_F64
916
class ScratchF64 : public BaseScratchRegister {
917
public:
918
explicit ScratchF64(BaseRegAlloc& ra)
919
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
920
operator RegF64() const { return RegF64(RabaldrScratchF64); }
921
};
922
#else
923
class ScratchF64 : public ScratchDoubleScope {
924
public:
925
explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
926
operator RegF64() const { return RegF64(FloatRegister(*this)); }
927
};
928
#endif
929
930
#ifdef RABALDR_SCRATCH_F32
931
class ScratchF32 : public BaseScratchRegister {
932
public:
933
explicit ScratchF32(BaseRegAlloc& ra)
934
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
935
operator RegF32() const { return RegF32(RabaldrScratchF32); }
936
};
937
#else
938
class ScratchF32 : public ScratchFloat32Scope {
939
public:
940
explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
941
operator RegF32() const { return RegF32(FloatRegister(*this)); }
942
};
943
#endif
944
945
#ifdef RABALDR_SCRATCH_I32
946
template <class RegType>
947
class ScratchGPR : public BaseScratchRegister {
948
public:
949
explicit ScratchGPR(BaseRegAlloc& ra)
950
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
951
operator RegType() const { return RegType(RabaldrScratchI32); }
952
};
953
#else
954
template <class RegType>
955
class ScratchGPR : public ScratchRegisterScope {
956
public:
957
explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
958
operator RegType() const { return RegType(Register(*this)); }
959
};
960
#endif
961
962
using ScratchI32 = ScratchGPR<RegI32>;
963
using ScratchPtr = ScratchGPR<RegPtr>;
964
965
#if defined(JS_CODEGEN_X86)
966
// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
967
// no other register will do. And we would normally have to allocate that
968
// register using ScratchI32 since normally the scratch register is EBX.
969
// But the whole point of ScratchI32 is to hide that relationship. By using
970
// the ScratchEBX alias, we document that at that point we require the
971
// scratch register to be EBX.
972
using ScratchEBX = ScratchI32;
973
974
// ScratchI8 is a mnemonic device: For some ops we need a register with a
975
// byte subregister.
976
using ScratchI8 = ScratchI32;
977
#endif
978
979
// The stack frame.
980
//
981
// The stack frame has four parts ("below" means at lower addresses):
982
//
983
// - the Frame element;
984
// - the Local area, including the DebugFrame element; allocated below the
985
// header with various forms of alignment;
986
// - the Dynamic area, comprising the temporary storage the compiler uses for
987
// register spilling, allocated below the Local area;
988
// - the Arguments area, comprising memory allocated for outgoing calls,
989
// allocated below the Dynamic area.
990
//
991
// +============================+
992
// | Incoming arg |
993
// | ... |
994
// -------------- +============================+
995
// | Frame (fixed size) |
996
// -------------- +============================+ <-------------------- FP
997
// ^ | DebugFrame (optional) | ^ ^^
998
// | +----------------------------+ | ||
999
// localSize | Local (static size) | | ||
1000
// | | ... | | framePushed
1001
// v | (padding) | | ||
1002
// -------------- +============================+ currentStackHeight ||
1003
// ^ | Dynamic (variable size) | | ||
1004
// dynamicSize | ... | | ||
1005
// v | ... | v ||
1006
// -------------- | (free space, sometimes) | --------- v|
1007
// +============================+ <----- SP not-during calls
1008
// | Arguments (sometimes) | |
1009
// | ... | v
1010
// +============================+ <----- SP during calls
1011
//
1012
// The Frame is addressed off the stack pointer. masm.framePushed() is always
1013
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
1014
// Frame, with the DebugFrame optionally below it.
1015
//
1016
// The Local area (including the DebugFrame) is laid out by BaseLocalIter and is
1017
// allocated and deallocated by standard prologue and epilogue functions that
1018
// manipulate the stack pointer, but it is accessed via BaseStackFrame.
1019
//
1020
// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
1021
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
1022
// because the SP needs a specific alignment, and in this case there will
1023
// normally be some free space directly above the SP. The stack height does not
1024
// include the free space, it reflects the logically used space only.
1025
//
1026
// The Arguments area is allocated and deallocated via BaseStackFrame (see
1027
// comments later) but is accessed directly off the stack pointer.
1028
1029
// BaseLocalIter iterates over a vector of types of locals and provides offsets
1030
// from the Frame address for those locals, and associated data.
1031
//
1032
// The implementation of BaseLocalIter is the property of the BaseStackFrame.
1033
// But it is also exposed for eg the debugger to use.
1034
1035
BaseLocalIter::BaseLocalIter(const ValTypeVector& locals, size_t argsLength,
1036
bool debugEnabled)
1037
: locals_(locals),
1038
argsLength_(argsLength),
1039
argsRange_(locals.begin(), argsLength),
1040
argsIter_(argsRange_),
1041
index_(0),
1042
localSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
1043
reservedSize_(localSize_),
1044
frameOffset_(UINT32_MAX),
1045
mirType_(MIRType::Undefined),
1046
done_(false) {
1047
MOZ_ASSERT(argsLength <= locals.length());
1048
settle();
1049
}
1050
1051
int32_t BaseLocalIter::pushLocal(size_t nbytes) {
1052
MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
1053
localSize_ = AlignBytes(localSize_, nbytes) + nbytes;
1054
return localSize_; // Locals grow down so capture base address.
1055
}
1056
1057
void BaseLocalIter::settle() {
1058
if (index_ < argsLength_) {
1059
MOZ_ASSERT(!argsIter_.done());
1060
mirType_ = argsIter_.mirType();
1061
switch (mirType_) {
1062
case MIRType::Int32:
1063
case MIRType::Int64:
1064
case MIRType::Double:
1065
case MIRType::Float32:
1066
case MIRType::RefOrNull:
1067
if (argsIter_->argInRegister()) {
1068
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
1069
} else {
1070
frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
1071
}
1072
break;
1073
default:
1074
MOZ_CRASH("Argument type");
1075
}
1076
return;
1077
}
1078
1079
MOZ_ASSERT(argsIter_.done());
1080
if (index_ < locals_.length()) {
1081
switch (locals_[index_].code()) {
1082
case ValType::I32:
1083
case ValType::I64:
1084
case ValType::F32:
1085
case ValType::F64:
1086
case ValType::Ref:
1087
case ValType::FuncRef:
1088
case ValType::AnyRef:
1089
// TODO/AnyRef-boxing: With boxed immediates and strings, the
1090
// debugger must be made aware that AnyRef != Pointer.
1091
ASSERT_ANYREF_IS_JSOBJECT;
1092
mirType_ = ToMIRType(locals_[index_]);
1093
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
1094
break;
1095
case ValType::NullRef:
1096
default:
1097
MOZ_CRASH("Compiler bug: Unexpected local type");
1098
}
1099
return;
1100
}
1101
1102
done_ = true;
1103
}
1104
1105
void BaseLocalIter::operator++(int) {
1106
MOZ_ASSERT(!done_);
1107
index_++;
1108
if (!argsIter_.done()) {
1109
argsIter_++;
1110
}
1111
settle();
1112
}
1113
1114
// Abstraction of the height of the stack frame, to avoid type confusion.
1115
1116
class StackHeight {
1117
friend class BaseStackFrameAllocator;
1118
1119
uint32_t height;
1120
1121
public:
1122
explicit StackHeight(uint32_t h) : height(h) {}
1123
static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
1124
bool isValid() const { return height != UINT32_MAX; }
1125
bool operator==(StackHeight rhs) const {
1126
MOZ_ASSERT(isValid() && rhs.isValid());
1127
return height == rhs.height;
1128
}
1129
bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
1130
};
1131
1132
// Abstraction of the baseline compiler's stack frame (except for the Frame /
1133
// DebugFrame parts). See comments above for more. Remember, "below" on the
1134
// stack means at lower addresses.
1135
//
1136
// The abstraction is split into two parts: BaseStackFrameAllocator is
1137
// responsible for allocating and deallocating space on the stack and for
1138
// performing computations that are affected by how the allocation is performed;
1139
// BaseStackFrame then provides a pleasant interface for stack frame management.
1140
1141
class BaseStackFrameAllocator {
1142
MacroAssembler& masm;
1143
1144
#ifdef RABALDR_CHUNKY_STACK
1145
// On platforms that require the stack pointer to be aligned on a boundary
1146
// greater than the typical stack item (eg, ARM64 requires 16-byte alignment
1147
// but items are 8 bytes), allocate stack memory in chunks, and use a
1148
// separate stack height variable to track the effective stack pointer
1149
// within the allocated area. Effectively, there's a variable amount of
1150
// free space directly above the stack pointer. See diagram above.
1151
1152
// The following must be true in order for the stack height to be
1153
// predictable at control flow joins:
1154
//
1155
// - The Local area is always aligned according to WasmStackAlignment, ie,
1156
// masm.framePushed() % WasmStackAlignment is zero after allocating
1157
// locals.
1158
//
1159
// - ChunkSize is always a multiple of WasmStackAlignment.
1160
//
1161
// - Pushing and popping are always in units of ChunkSize (hence preserving
1162
// alignment).
1163
//
1164
// - The free space on the stack (masm.framePushed() - currentStackHeight_)
1165
// is a predictable (nonnegative) amount.
1166
1167
// As an optimization, we pre-allocate some space on the stack, the size of
1168
// this allocation is InitialChunk and it must be a multiple of ChunkSize.
1169
// It is allocated as part of the function prologue and deallocated as part
1170
// of the epilogue, along with the locals.
1171
//
1172
// If ChunkSize is too large then we risk overflowing the stack on simple
1173
// recursions with few live values where stack overflow should not be a
1174
// risk; if it is too small we spend too much time adjusting the stack
1175
// pointer.
1176
//
1177
// Good values for ChunkSize are the subject of future empirical analysis;
1178
// eight words is just an educated guess.
1179
1180
static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
1181
static constexpr uint32_t InitialChunk = ChunkSize;
1182
1183
// The current logical height of the frame is
1184
// currentStackHeight_ = localSize_ + dynamicSize
1185
// where dynamicSize is not accounted for explicitly and localSize_ also
1186
// includes size for the DebugFrame.
1187
//
1188
// The allocated size of the frame, provided by masm.framePushed(), is usually
1189
// larger than currentStackHeight_, notably at the beginning of execution when
1190
// we've allocated InitialChunk extra space.
1191
1192
uint32_t currentStackHeight_;
1193
#endif
1194
1195
// Size of the Local area in bytes (stable after BaseCompiler::init() has
1196
// called BaseStackFrame::setupLocals(), which in turn calls
1197
// BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
1198
// stack alignment. The Local area is then allocated in beginFunction(),
1199
// following the allocation of the Header. See onFixedStackAllocated()
1200
// below.
1201
1202
uint32_t localSize_;
1203
1204
protected:
1205
///////////////////////////////////////////////////////////////////////////
1206
//
1207
// Initialization
1208
1209
explicit BaseStackFrameAllocator(MacroAssembler& masm)
1210
: masm(masm),
1211
#ifdef RABALDR_CHUNKY_STACK
1212
currentStackHeight_(0),
1213
#endif
1214
localSize_(UINT32_MAX) {
1215
}
1216
1217
protected:
1218
//////////////////////////////////////////////////////////////////////
1219
//
1220
// The Local area - the static part of the frame.
1221
1222
// Record the size of the Local area, once it is known.
1223
1224
void setLocalSize(uint32_t localSize) {
1225
MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
1226
"localSize_ should be aligned to at least a pointer");
1227
MOZ_ASSERT(localSize_ == UINT32_MAX);
1228
localSize_ = localSize;
1229
}
1230
1231
// Record the current stack height, after it has become stable in
1232
// beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
1233
1234
void onFixedStackAllocated() {
1235
MOZ_ASSERT(localSize_ != UINT32_MAX);
1236
#ifdef RABALDR_CHUNKY_STACK
1237
currentStackHeight_ = localSize_;
1238
#endif
1239
}
1240
1241
public:
1242
// The fixed amount of memory, in bytes, allocated on the stack below the
1243
// Header for purposes such as locals and other fixed values. Includes all
1244
// necessary alignment, and on ARM64 also the initial chunk for the working
1245
// stack memory.
1246
1247
uint32_t fixedAllocSize() const {
1248
MOZ_ASSERT(localSize_ != UINT32_MAX);
1249
#ifdef RABALDR_CHUNKY_STACK
1250
return localSize_ + InitialChunk;
1251
#else
1252
return localSize_;
1253
#endif
1254
}
1255
1256
#ifdef RABALDR_CHUNKY_STACK
1257
// The allocated frame size is frequently larger than the logical stack
1258
// height; we round up to a chunk boundary, and special case the initial
1259
// chunk.
1260
uint32_t framePushedForHeight(uint32_t logicalHeight) {
1261
if (logicalHeight <= fixedAllocSize()) {
1262
return fixedAllocSize();
1263
}
1264
return fixedAllocSize() +
1265
AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
1266
}
1267
#endif
1268
1269
protected:
1270
//////////////////////////////////////////////////////////////////////
1271
//
1272
// The Dynamic area - the dynamic part of the frame, for spilling and saving
1273
// intermediate values.
1274
1275
// Offset off of sp_ for the slot at stack area location `offset`.
1276
1277
int32_t stackOffset(int32_t offset) { return masm.framePushed() - offset; }
1278
1279
uint32_t computeHeightWithStackResults(StackHeight stackBase,
1280
uint32_t stackResultBytes) {
1281
MOZ_ASSERT(stackResultBytes);
1282
MOZ_ASSERT(currentStackHeight() >= stackBase.height);
1283
return stackBase.height + stackResultBytes;
1284
}
1285
1286
#ifdef RABALDR_CHUNKY_STACK
1287
void pushChunkyBytes(uint32_t bytes) {
1288
MOZ_ASSERT(bytes <= ChunkSize);
1289
checkChunkyInvariants();
1290
if (masm.framePushed() - currentStackHeight_ < bytes) {
1291
masm.reserveStack(ChunkSize);
1292
}
1293
currentStackHeight_ += bytes;
1294
checkChunkyInvariants();
1295
}
1296
1297
void popChunkyBytes(uint32_t bytes) {
1298
checkChunkyInvariants();
1299
currentStackHeight_ -= bytes;
1300
// Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
1301
// values consumed by a call, and we may need to drop several chunks. But
1302
// never drop the initial chunk. Crucially, the amount we drop is always an
1303
// integral number of chunks.
1304
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
1305
if (freeSpace >= ChunkSize) {
1306
uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
1307
uint32_t amountToFree = masm.framePushed() - targetAllocSize;
1308
MOZ_ASSERT(amountToFree % ChunkSize == 0);
1309
if (amountToFree) {
1310
masm.freeStack(amountToFree);
1311
}
1312
}
1313
checkChunkyInvariants();
1314
}
1315
#endif
1316
1317
uint32_t currentStackHeight() const {
1318
#ifdef RABALDR_CHUNKY_STACK
1319
return currentStackHeight_;
1320
#else
1321
return masm.framePushed();
1322
#endif
1323
}
1324
1325
private:
1326
#ifdef RABALDR_CHUNKY_STACK
1327
void checkChunkyInvariants() {
1328
MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
1329
MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
1330
MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
1331
masm.framePushed() - currentStackHeight_ < ChunkSize);
1332
MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
1333
}
1334
#endif
1335
1336
// For a given stack height, return the appropriate size of the allocated
1337
// frame.
1338
1339
uint32_t framePushedForHeight(StackHeight stackHeight) {
1340
#ifdef RABALDR_CHUNKY_STACK
1341
// A more complicated adjustment is needed.
1342
return framePushedForHeight(stackHeight.height);
1343
#else
1344
// The allocated frame size equals the stack height.
1345
return stackHeight.height;
1346
#endif
1347
}
1348
1349
public:
1350
// The current height of the stack area, not necessarily zero-based, in a
1351
// type-safe way.
1352
1353
StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
1354
1355
// Set the frame height to a previously recorded value.
1356
1357
void setStackHeight(StackHeight amount) {
1358
#ifdef RABALDR_CHUNKY_STACK
1359
currentStackHeight_ = amount.height;
1360
masm.setFramePushed(framePushedForHeight(amount));
1361
checkChunkyInvariants();
1362
#else
1363
masm.setFramePushed(amount.height);
1364
#endif
1365
}
1366
1367
// The current height of the dynamic part of the stack area (ie, the backing
1368
// store for the evaluation stack), zero-based.
1369
1370
uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
1371
1372
// Before branching to an outer control label, pop the execution stack to
1373
// the level expected by that region, but do not update masm.framePushed()
1374
// as that will happen as compilation leaves the block.
1375
//
1376
// Note these operate directly on the stack pointer register.
1377
1378
void popStackBeforeBranch(StackHeight destStackHeight,
1379
uint32_t stackResultBytes) {
1380
uint32_t framePushedHere = masm.framePushed();
1381
StackHeight heightThere =
1382
StackHeight(destStackHeight.height + stackResultBytes);
1383
uint32_t framePushedThere = framePushedForHeight(heightThere);
1384
if (framePushedHere > framePushedThere) {
1385
masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
1386
}
1387
}
1388
1389
void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
1390
popStackBeforeBranch(destStackHeight,
1391
ABIResultIter::MeasureStackBytes(type));
1392
}
1393
1394
// Given that there are |stackParamSize| bytes on the dynamic stack
1395
// corresponding to the stack results, return the stack height once these
1396
// parameters are popped.
1397
1398
StackHeight stackResultsBase(uint32_t stackParamSize) {
1399
return StackHeight(currentStackHeight() - stackParamSize);
1400
}
1401
1402
// For most of WebAssembly, adjacent instructions have fallthrough control
1403
// flow between them, which allows us to simply thread the current stack
1404
// height through the compiler. There are two exceptions to this rule: when
1405
// leaving a block via dead code, and when entering the "else" arm of an "if".
1406
// In these cases, the stack height is the block entry height, plus any stack
1407
// values (results in the block exit case, parameters in the else entry case).
1408
1409
void resetStackHeight(StackHeight destStackHeight, ResultType type) {
1410
uint32_t height = destStackHeight.height;
1411
height += ABIResultIter::MeasureStackBytes(type);
1412
setStackHeight(StackHeight(height));
1413
}
1414
1415
// Return offset of stack result.
1416
1417
uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
1418
uint32_t stackResultBytes) {
1419
MOZ_ASSERT(result.onStack());
1420
MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
1421
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1422
return end - result.stackOffset();
1423
}
1424
1425
public:
1426
//////////////////////////////////////////////////////////////////////
1427
//
1428
// The Argument area - for outgoing calls.
1429
//
1430
// We abstract these operations as an optimization: we can merge the freeing
1431
// of the argument area and dropping values off the stack after a call. But
1432
// they always amount to manipulating the real stack pointer by some amount.
1433
//
1434
// Note that we do not update currentStackHeight_ for this; the frame does
1435
// not know about outgoing arguments. But we do update framePushed(), so we
1436
// can still index into the frame below the outgoing arguments area.
1437
1438
// This is always equivalent to a masm.reserveStack() call.
1439
1440
void allocArgArea(size_t argSize) {
1441
if (argSize) {
1442
masm.reserveStack(argSize);
1443
}
1444
}
1445
1446
// This frees the argument area allocated by allocArgArea(), and `argSize`
1447
// must be equal to the `argSize` argument to allocArgArea(). In addition
1448
// we drop some values from the frame, corresponding to the values that were
1449
// consumed by the call.
1450
1451
void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
1452
#ifdef RABALDR_CHUNKY_STACK
1453
// Freeing the outgoing arguments and freeing the consumed values have
1454
// different semantics here, which is why the operation is split.
1455
if (argSize) {
1456
masm.freeStack(argSize);
1457
}
1458
popChunkyBytes(dropSize);
1459
#else
1460
if (argSize + dropSize) {
1461
masm.freeStack(argSize + dropSize);
1462
}
1463
#endif
1464
}
1465
};
1466
1467
class BaseStackFrame final : public BaseStackFrameAllocator {
1468
MacroAssembler& masm;
1469
1470
// The largest observed value of masm.framePushed(), ie, the size of the
1471
// stack frame. Read this for its true value only when code generation is
1472
// finished.
1473
uint32_t maxFramePushed_;
1474
1475
// Patch point where we check for stack overflow.
1476
CodeOffset stackAddOffset_;
1477
1478
// Low byte offset of local area for true locals (not parameters).
1479
uint32_t varLow_;
1480
1481
// High byte offset + 1 of local area for true locals.
1482
uint32_t varHigh_;
1483
1484
// The stack pointer, cached for brevity.
1485
RegisterOrSP sp_;
1486
1487
public:
1488
explicit BaseStackFrame(MacroAssembler& masm)
1489
: BaseStackFrameAllocator(masm),
1490
masm(masm),
1491
maxFramePushed_(0),
1492
stackAddOffset_(0),
1493
varLow_(UINT32_MAX),
1494
varHigh_(UINT32_MAX),
1495
sp_(masm.getStackPointer()) {}
1496
1497
///////////////////////////////////////////////////////////////////////////
1498
//
1499
// Stack management and overflow checking
1500
1501
// This must be called once beginFunction has allocated space for the Header
1502
// (the Frame and DebugFrame) and the Local area, and will record the current
1503
// frame size for internal use by the stack abstractions.
1504
1505
void onFixedStackAllocated() {
1506
maxFramePushed_ = masm.framePushed();
1507
BaseStackFrameAllocator::onFixedStackAllocated();
1508
}
1509
1510
// We won't know until after we've generated code how big the frame will be
1511
// (we may need arbitrary spill slots and outgoing param slots) so emit a
1512
// patchable add that is patched in endFunction().
1513
//
1514
// Note the platform scratch register may be used by branchPtr(), so
1515
// generally tmp must be something else.
1516
1517
void checkStack(Register tmp, BytecodeOffset trapOffset) {
1518
stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
1519
Label ok;
1520
masm.branchPtr(Assembler::Below,
1521
Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
1522
tmp, &ok);
1523
masm.wasmTrap(Trap::StackOverflow, trapOffset);
1524
masm.bind(&ok);
1525
}
1526
1527
void patchCheckStack() {
1528
masm.patchSub32FromStackPtr(stackAddOffset_,
1529
Imm32(int32_t(maxFramePushed_)));
1530
}
1531
1532
// Very large frames are implausible, probably an attack.
1533
1534
bool checkStackHeight() {
1535
// 512KiB should be enough, considering how Rabaldr uses the stack and
1536
// what the standard limits are:
1537
//
1538
// - 1,000 parameters
1539
// - 50,000 locals
1540
// - 10,000 values on the eval stack (not an official limit)
1541
//
1542
// At sizeof(int64) bytes per slot this works out to about 480KiB.
1543
return maxFramePushed_ <= 512 * 1024;
1544
}
1545
1546
///////////////////////////////////////////////////////////////////////////
1547
//
1548
// Local area
1549
1550
struct Local {
1551
// Type of the value.
1552
const MIRType type;
1553
1554
// Byte offset from Frame "into" the locals, ie positive for true locals
1555
// and negative for incoming args that read directly from the arg area.
1556
// It assumes the stack is growing down and that locals are on the stack
1557
// at lower addresses than Frame, and is the offset from Frame of the
1558
// lowest-addressed byte of the local.
1559
const int32_t offs;
1560
1561
Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
1562
};
1563
1564
// Profiling shows that the number of parameters and locals frequently
1565
// touches or exceeds 8. So 16 seems like a reasonable starting point.
1566
using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
1567
1568
// Initialize `localInfo` based on the types of `locals` and `args`.
1569
bool setupLocals(const ValTypeVector& locals, const ValTypeVector& args,
1570
bool debugEnabled, LocalVector* localInfo) {
1571
if (!localInfo->reserve(locals.length())) {
1572
return false;
1573
}
1574
1575
DebugOnly<uint32_t> index = 0;
1576
BaseLocalIter i(locals, args.length(), debugEnabled);
1577
varLow_ = i.reservedSize();
1578
for (; !i.done() && i.index() < args.length(); i++) {
1579
MOZ_ASSERT(i.isArg());
1580
MOZ_ASSERT(i.index() == index);
1581
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1582
varLow_ = i.currentLocalSize();
1583
index++;
1584
}
1585
1586
varHigh_ = varLow_;
1587
for (; !i.done(); i++) {
1588
MOZ_ASSERT(!i.isArg());
1589
MOZ_ASSERT(i.index() == index);
1590
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1591
varHigh_ = i.currentLocalSize();
1592
index++;
1593
}
1594
1595
setLocalSize(AlignBytes(varHigh_, WasmStackAlignment));
1596
1597
return true;
1598
}
1599
1600
void zeroLocals(BaseRegAlloc* ra);
1601
1602
void loadLocalI32(const Local& src, RegI32 dest) {
1603
masm.load32(Address(sp_, localOffset(src)), dest);
1604
}
1605
1606
#ifndef JS_PUNBOX64
1607
void loadLocalI64Low(const Local& src, RegI32 dest) {
1608
masm.load32(Address(sp_, localOffset(src) + INT64LOW_OFFSET), dest);
1609
}
1610
1611
void loadLocalI64High(const Local& src, RegI32 dest) {
1612
masm.load32(Address(sp_, localOffset(src) + INT64HIGH_OFFSET), dest);
1613
}
1614
#endif
1615
1616
void loadLocalI64(const Local& src, RegI64 dest) {
1617
masm.load64(Address(sp_, localOffset(src)), dest);
1618
}
1619
1620
void loadLocalPtr(const Local& src, RegPtr dest) {
1621
masm.loadPtr(Address(sp_, localOffset(src)), dest);
1622
}
1623
1624
void loadLocalF64(const Local& src, RegF64 dest) {
1625
masm.loadDouble(Address(sp_, localOffset(src)), dest);
1626
}
1627
1628
void loadLocalF32(const Local& src, RegF32 dest) {
1629
masm.loadFloat32(Address(sp_, localOffset(src)), dest);
1630
}
1631
1632
void storeLocalI32(RegI32 src, const Local& dest) {
1633
masm.store32(src, Address(sp_, localOffset(dest)));
1634
}
1635
1636
void storeLocalI64(RegI64 src, const Local& dest) {
1637
masm.store64(src, Address(sp_, localOffset(dest)));
1638
}
1639
1640
void storeLocalPtr(Register src, const Local& dest) {
1641
masm.storePtr(src, Address(sp_, localOffset(dest)));
1642
}
1643
1644
void storeLocalF64(RegF64 src, const Local& dest) {
1645
masm.storeDouble(src, Address(sp_, localOffset(dest)));
1646
}
1647
1648
void storeLocalF32(RegF32 src, const Local& dest) {
1649
masm.storeFloat32(src, Address(sp_, localOffset(dest)));
1650
}
1651
1652
// Offset off of sp_ for `local`.
1653
int32_t localOffset(const Local& local) { return localOffset(local.offs); }
1654
1655
private:
1656
// Offset off of sp_ for a local with offset `offset` from Frame.
1657
int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
1658
1659
public:
1660
///////////////////////////////////////////////////////////////////////////
1661
//
1662
// Dynamic area
1663
1664
static const size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
1665
static const size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
1666
static const size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
1667
static const size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
1668
1669
uint32_t pushPtr(Register r) {
1670
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1671
#ifdef RABALDR_CHUNKY_STACK
1672
pushChunkyBytes(StackSizeOfPtr);
1673
masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
1674
#else
1675
masm.Push(r);
1676
#endif
1677
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1678
MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
1679
return currentStackHeight();
1680
}
1681
1682
uint32_t pushFloat32(FloatRegister r) {
1683
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1684
#ifdef RABALDR_CHUNKY_STACK
1685
pushChunkyBytes(StackSizeOfFloat);
1686
masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
1687
#else
1688
masm.Push(r);
1689
#endif
1690
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1691
MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
1692
return currentStackHeight();
1693
}
1694
1695
uint32_t pushDouble(FloatRegister r) {
1696
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1697
#ifdef RABALDR_CHUNKY_STACK
1698
pushChunkyBytes(StackSizeOfDouble);
1699
masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
1700
#else
1701
masm.Push(r);
1702
#endif
1703
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1704
MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
1705
return currentStackHeight();
1706
}
1707
1708
void popPtr(Register r) {
1709
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1710
#ifdef RABALDR_CHUNKY_STACK
1711
masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
1712
popChunkyBytes(StackSizeOfPtr);
1713
#else
1714
masm.Pop(r);
1715
#endif
1716
MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
1717
}
1718
1719
void popFloat32(FloatRegister r) {
1720
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1721
#ifdef RABALDR_CHUNKY_STACK
1722
masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
1723
popChunkyBytes(StackSizeOfFloat);
1724
#else
1725
masm.Pop(r);
1726
#endif
1727
MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
1728
}
1729
1730
void popDouble(FloatRegister r) {
1731
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1732
#ifdef RABALDR_CHUNKY_STACK
1733
masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
1734
popChunkyBytes(StackSizeOfDouble);
1735
#else
1736
masm.Pop(r);
1737
#endif
1738
MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
1739
}
1740
1741
void popBytes(size_t bytes) {
1742
if (bytes > 0) {
1743
#ifdef RABALDR_CHUNKY_STACK
1744
popChunkyBytes(bytes);
1745
#else
1746
masm.freeStack(bytes);
1747
#endif
1748
}
1749
}
1750
1751
void loadStackI32(int32_t offset, RegI32 dest) {
1752
masm.load32(Address(sp_, stackOffset(offset)), dest);
1753
}
1754
1755
void loadStackI64(int32_t offset, RegI64 dest) {
1756
masm.load64(Address(sp_, stackOffset(offset)), dest);
1757
}
1758
1759
#ifndef JS_PUNBOX64
1760
void loadStackI64Low(int32_t offset, RegI32 dest) {
1761
masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
1762
}
1763
1764
void loadStackI64High(int32_t offset, RegI32 dest) {
1765
masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
1766
}
1767
#endif
1768
1769
// Disambiguation: this loads a "Ptr" value from the stack, it does not load
1770
// the "StackPtr".
1771
1772
void loadStackPtr(int32_t offset, RegPtr dest) {
1773
masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
1774
}
1775
1776
void loadStackF64(int32_t offset, RegF64 dest) {
1777
masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
1778
}
1779
1780
void loadStackF32(int32_t offset, RegF32 dest) {
1781
masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
1782
}
1783
1784
uint32_t prepareStackResultArea(StackHeight stackBase,
1785
uint32_t stackResultBytes) {
1786
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1787
if (currentStackHeight() < end) {
1788
uint32_t bytes = end - currentStackHeight();
1789
#ifdef RABALDR_CHUNKY_STACK
1790
pushChunkyBytes(bytes);
1791
#else
1792
masm.reserveStack(bytes);
1793
#endif
1794
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
1795
}
1796
return end;
1797
}
1798
1799
void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
1800
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
1801
MOZ_ASSERT(currentStackHeight() >= end);
1802
popBytes(currentStackHeight() - end);
1803
}
1804
1805
void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
1806
uint32_t bytes, Register temp) {
1807
MOZ_ASSERT(destHeight < srcHeight);
1808
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
1809
uint32_t destOffset = stackOffset(destHeight) + bytes;
1810
uint32_t srcOffset = stackOffset(srcHeight) + bytes;
1811
while (bytes >= sizeof(intptr_t)) {
1812
destOffset -= sizeof(intptr_t);
1813
srcOffset -= sizeof(intptr_t);
1814
bytes -= sizeof(intptr_t);
1815
masm.loadPtr(Address(sp_, srcOffset), temp);
1816
masm.storePtr(temp, Address(sp_, destOffset));
1817
}
1818
if (bytes) {
1819
MOZ_ASSERT(bytes == sizeof(uint32_t));
1820
destOffset -= sizeof(uint32_t);
1821
srcOffset -= sizeof(uint32_t);
1822
masm.load32(Address(sp_, srcOffset), temp);
1823
masm.store32(temp, Address(sp_, destOffset));
1824
}
1825
}
1826
1827
void shuffleStackResultsTowardFP(StackHeight srcHeight,
1828
StackHeight destHeight, uint32_t bytes,
1829
Register temp) {
1830
MOZ_ASSERT(srcHeight.isValid());
1831
MOZ_ASSERT(destHeight.isValid());
1832
uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
1833
uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
1834
MOZ_ASSERT(src <= currentStackHeight());
1835
MOZ_ASSERT(dest <= currentStackHeight());
1836
shuffleStackResultsTowardFP(src - bytes, dest - bytes, bytes, temp);
1837
}
1838
1839
void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
1840
uint32_t bytes, Register temp) {
1841
MOZ_ASSERT(destHeight > srcHeight);
1842
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
1843
uint32_t destOffset = stackOffset(destHeight);
1844
uint32_t srcOffset = stackOffset(srcHeight);
1845
while (bytes >= sizeof(intptr_t)) {
1846
masm.loadPtr(Address(sp_, srcOffset), temp);
1847
masm.storePtr(temp, Address(sp_, destOffset));
1848
destOffset += sizeof(intptr_t);
1849
srcOffset += sizeof(intptr_t);
1850
bytes -= sizeof(intptr_t);
1851
}
1852
if (bytes) {
1853
MOZ_ASSERT(bytes == sizeof(uint32_t));
1854
masm.load32(Address(sp_, srcOffset), temp);
1855
masm.store32(temp, Address(sp_, destOffset));
1856
}
1857
}
1858
1859
void storeImmediateToStack(int32_t imm, uint32_t destHeight, Register temp) {
1860
masm.move32(Imm32(imm), temp);
1861
masm.store32(temp, Address(sp_, stackOffset(destHeight)));
1862
}
1863
1864
void storeImmediateToStack(int64_t imm, uint32_t destHeight, Register temp) {
1865
#ifdef JS_PUNBOX64
1866
masm.move64(Imm64(imm), Register64(temp));
1867
masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
1868
#else
1869
union {