Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
*
4
* Copyright 2016 Mozilla Foundation
5
*
6
* Licensed under the Apache License, Version 2.0 (the "License");
7
* you may not use this file except in compliance with the License.
8
* You may obtain a copy of the License at
9
*
11
*
12
* Unless required by applicable law or agreed to in writing, software
13
* distributed under the License is distributed on an "AS IS" BASIS,
14
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
* See the License for the specific language governing permissions and
16
* limitations under the License.
17
*/
18
19
/*
20
* [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
21
*
22
* General assumptions for 32-bit vs 64-bit code:
23
*
24
* - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
25
* systems.
26
*
27
* - Code that knows that Register64 has a '.reg' member on 64-bit systems and
28
* '.high' and '.low' members on 32-bit systems, or knows the implications
29
* thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT.
30
*
31
*
32
* Coding standards:
33
*
34
* - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
35
* and surrounding functions; most functions fall into this class) where the
36
* meaning is obvious:
37
*
38
* - if there is a single source + destination register, it is called 'r'
39
* - if there is one source and a different destination, they are called 'rs'
40
* and 'rd'
41
* - if there is one source + destination register and another source register
42
* they are called 'r' and 'rs'
43
* - if there are two source registers and a destination register they are
44
* called 'rs0', 'rs1', and 'rd'.
45
*
46
* - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
47
*
48
* - Registers can be named non-generically for their function ('rp' for the
49
* 'pointer' register and 'rv' for the 'value' register are typical) and those
50
* names may or may not have an 'r' prefix.
51
*
52
* - "Larger" code generating functions make their own rules.
53
*
54
*
55
* General status notes:
56
*
57
* "FIXME" indicates a known or suspected bug. Always has a bug#.
58
*
59
* "TODO" indicates an opportunity for a general improvement, with an additional
60
* tag to indicate the area of improvement. Usually has a bug#.
61
*
62
* There are lots of machine dependencies here but they are pretty well isolated
63
* to a segment of the compiler. Many dependencies will eventually be factored
64
* into the MacroAssembler layer and shared with other code generators.
65
*
66
*
67
* High-value compiler performance improvements:
68
*
69
* - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
70
* etc methods) can avoid syncing the value stack if the specific register is
71
* in use but there is a free register to shuffle the specific register into.
72
* (This will also improve the generated code.) The sync happens often enough
73
* here to show up in profiles, because it is triggered by integer multiply
74
* and divide.
75
*
76
*
77
* High-value code generation improvements:
78
*
79
* - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
80
* the stack and then jumps to the code for the target case. If no cleanup is
81
* needed we could just branch conditionally to the target; if the same amount
82
* of cleanup is needed for all cases then the cleanup can be done before the
83
* dispatch. Both are highly likely.
84
*
85
* - (Bug 1316806) Register management around calls: At the moment we sync the
86
* value stack unconditionally (this is simple) but there are probably many
87
* common cases where we could instead save/restore live caller-saves
88
* registers and perform parallel assignment into argument registers. This
89
* may be important if we keep some locals in registers.
90
*
91
* - (Bug 1316808) Allocate some locals to registers on machines where there are
92
* enough registers. This is probably hard to do well in a one-pass compiler
93
* but it might be that just keeping register arguments and the first few
94
* locals in registers is a viable strategy; another (more general) strategy
95
* is caching locals in registers in straight-line code. Such caching could
96
* also track constant values in registers, if that is deemed valuable. A
97
* combination of techniques may be desirable: parameters and the first few
98
* locals could be cached on entry to the function but not statically assigned
99
* to registers throughout.
100
*
101
* (On a large corpus of code it should be possible to compute, for every
102
* signature comprising the types of parameters and locals, and using a static
103
* weight for loops, a list in priority order of which parameters and locals
104
* that should be assigned to registers. Or something like that. Wasm makes
105
* this simple. Static assignments are desirable because they are not flushed
106
* to memory by the pre-block sync() call.)
107
*/
108
109
#include "wasm/WasmBaselineCompile.h"
110
111
#include "mozilla/MathAlgorithms.h"
112
#include "mozilla/Maybe.h"
113
114
#include <utility>
115
116
#include "jit/AtomicOp.h"
117
#include "jit/IonTypes.h"
118
#include "jit/JitAllocPolicy.h"
119
#include "jit/Label.h"
120
#include "jit/MacroAssembler.h"
121
#include "jit/MIR.h"
122
#include "jit/RegisterAllocator.h"
123
#include "jit/Registers.h"
124
#include "jit/RegisterSets.h"
125
#if defined(JS_CODEGEN_ARM)
126
# include "jit/arm/Assembler-arm.h"
127
#endif
128
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
129
# include "jit/x86-shared/Architecture-x86-shared.h"
130
# include "jit/x86-shared/Assembler-x86-shared.h"
131
#endif
132
#if defined(JS_CODEGEN_MIPS32)
133
# include "jit/mips-shared/Assembler-mips-shared.h"
134
# include "jit/mips32/Assembler-mips32.h"
135
#endif
136
#if defined(JS_CODEGEN_MIPS64)
137
# include "jit/mips-shared/Assembler-mips-shared.h"
138
# include "jit/mips64/Assembler-mips64.h"
139
#endif
140
141
#include "wasm/WasmGC.h"
142
#include "wasm/WasmGenerator.h"
143
#include "wasm/WasmInstance.h"
144
#include "wasm/WasmOpIter.h"
145
#include "wasm/WasmSignalHandlers.h"
146
#include "wasm/WasmStubs.h"
147
#include "wasm/WasmValidate.h"
148
149
#include "jit/MacroAssembler-inl.h"
150
151
using mozilla::DebugOnly;
152
using mozilla::FloorLog2;
153
using mozilla::IsPowerOfTwo;
154
using mozilla::Maybe;
155
156
namespace js {
157
namespace wasm {
158
159
using namespace js::jit;
160
161
using HandleNaNSpecially = bool;
162
using InvertBranch = bool;
163
using IsKnownNotZero = bool;
164
using IsUnsigned = bool;
165
using NeedsBoundsCheck = bool;
166
using PopStack = bool;
167
using WantResult = bool;
168
using ZeroOnOverflow = bool;
169
170
class BaseStackFrame;
171
172
// Two flags, useABI and interModule, control how calls are made.
173
//
174
// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
175
// except when InterModule::True is also set, when they are volatile.
176
//
177
// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
178
// In this case, we require InterModule::False. The calling convention
179
// is otherwise like UseABI::Wasm.
180
//
181
// UseABI::System implies that the Tls/Heap/Global registers are volatile.
182
// Additionally, the parameter passing mechanism may be slightly different from
183
// the UseABI::Wasm convention.
184
//
185
// When the Tls/Heap/Global registers are not volatile, the baseline compiler
186
// will restore the Tls register from its save slot before the call, since the
187
// baseline compiler uses the Tls register for other things.
188
//
189
// When those registers are volatile, the baseline compiler will reload them
190
// after the call (it will restore the Tls register from the save slot and load
191
// the other two from the Tls data).
192
193
enum class UseABI { Wasm, Builtin, System };
194
enum class InterModule { False = false, True = true };
195
196
#if defined(JS_CODEGEN_NONE)
197
# define RABALDR_SCRATCH_I32
198
# define RABALDR_SCRATCH_F32
199
# define RABALDR_SCRATCH_F64
200
201
static const Register RabaldrScratchI32 = Register::Invalid();
202
static const FloatRegister RabaldrScratchF32 = InvalidFloatReg;
203
static const FloatRegister RabaldrScratchF64 = InvalidFloatReg;
204
#endif
205
206
#ifdef JS_CODEGEN_ARM64
207
# define RABALDR_CHUNKY_STACK
208
# define RABALDR_SCRATCH_I32
209
# define RABALDR_SCRATCH_F32
210
# define RABALDR_SCRATCH_F64
211
# define RABALDR_SCRATCH_F32_ALIASES_F64
212
213
static const Register RabaldrScratchI32 = Register::FromCode(15);
214
215
// Note, the float scratch regs cannot be registers that are used for parameter
216
// passing in any ABI we use. Argregs tend to be low-numbered; register 30
217
// should be safe.
218
219
static constexpr FloatRegister RabaldrScratchF32 =
220
FloatRegister(30, FloatRegisters::Single);
221
static constexpr FloatRegister RabaldrScratchF64 =
222
FloatRegister(30, FloatRegisters::Double);
223
224
static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
225
static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
226
#endif
227
228
#ifdef JS_CODEGEN_X86
229
// The selection of EBX here steps gingerly around: the need for EDX
230
// to be allocatable for multiply/divide; ECX to be allocatable for
231
// shift/rotate; EAX (= ReturnReg) to be allocatable as the joinreg;
232
// EBX not being one of the WasmTableCall registers; and needing a
233
// temp register for load/store that has a single-byte persona.
234
//
235
// The compiler assumes that RabaldrScratchI32 has a single-byte
236
// persona. Code for 8-byte atomic operations assumes that
237
// RabaldrScratchI32 is in fact ebx.
238
239
# define RABALDR_SCRATCH_I32
240
static const Register RabaldrScratchI32 = ebx;
241
242
# define RABALDR_INT_DIV_I64_CALLOUT
243
#endif
244
245
#ifdef JS_CODEGEN_ARM
246
// We use our own scratch register, because the macro assembler uses
247
// the regular scratch register(s) pretty liberally. We could
248
// work around that in several cases but the mess does not seem
249
// worth it yet. CallTempReg2 seems safe.
250
251
# define RABALDR_SCRATCH_I32
252
static const Register RabaldrScratchI32 = CallTempReg2;
253
254
# define RABALDR_INT_DIV_I64_CALLOUT
255
# define RABALDR_I64_TO_FLOAT_CALLOUT
256
# define RABALDR_FLOAT_TO_I64_CALLOUT
257
#endif
258
259
#ifdef JS_CODEGEN_MIPS32
260
# define RABALDR_SCRATCH_I32
261
static const Register RabaldrScratchI32 = CallTempReg2;
262
263
# define RABALDR_INT_DIV_I64_CALLOUT
264
# define RABALDR_I64_TO_FLOAT_CALLOUT
265
# define RABALDR_FLOAT_TO_I64_CALLOUT
266
#endif
267
268
#ifdef JS_CODEGEN_MIPS64
269
# define RABALDR_SCRATCH_I32
270
static const Register RabaldrScratchI32 = CallTempReg2;
271
#endif
272
273
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
274
# if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
275
# error "Bad configuration"
276
# endif
277
#endif
278
279
template <MIRType t>
280
struct RegTypeOf {
281
static_assert(t == MIRType::Float32 || t == MIRType::Double,
282
"Float mask type");
283
};
284
285
template <>
286
struct RegTypeOf<MIRType::Float32> {
287
static constexpr RegTypeName value = RegTypeName::Float32;
288
};
289
template <>
290
struct RegTypeOf<MIRType::Double> {
291
static constexpr RegTypeName value = RegTypeName::Float64;
292
};
293
294
// The strongly typed register wrappers are especially useful to distinguish
295
// float registers from double registers, but they also clearly distinguish
296
// 32-bit registers from 64-bit register pairs on 32-bit systems.
297
298
struct RegI32 : public Register {
299
RegI32() : Register(Register::Invalid()) {}
300
explicit RegI32(Register reg) : Register(reg) {}
301
bool isValid() const { return *this != Invalid(); }
302
bool isInvalid() const { return !isValid(); }
303
static RegI32 Invalid() { return RegI32(Register::Invalid()); }
304
};
305
306
struct RegI64 : public Register64 {
307
RegI64() : Register64(Register64::Invalid()) {}
308
explicit RegI64(Register64 reg) : Register64(reg) {}
309
bool isValid() const { return *this != Invalid(); }
310
bool isInvalid() const { return !isValid(); }
311
static RegI64 Invalid() { return RegI64(Register64::Invalid()); }
312
};
313
314
struct RegPtr : public Register {
315
RegPtr() : Register(Register::Invalid()) {}
316
explicit RegPtr(Register reg) : Register(reg) {}
317
bool isValid() const { return *this != Invalid(); }
318
bool isInvalid() const { return !isValid(); }
319
static RegPtr Invalid() { return RegPtr(Register::Invalid()); }
320
};
321
322
struct RegF32 : public FloatRegister {
323
RegF32() : FloatRegister() {}
324
explicit RegF32(FloatRegister reg) : FloatRegister(reg) {}
325
bool isValid() const { return *this != Invalid(); }
326
bool isInvalid() const { return !isValid(); }
327
static RegF32 Invalid() { return RegF32(InvalidFloatReg); }
328
};
329
330
struct RegF64 : public FloatRegister {
331
RegF64() : FloatRegister() {}
332
explicit RegF64(FloatRegister reg) : FloatRegister(reg) {}
333
bool isValid() const { return *this != Invalid(); }
334
bool isInvalid() const { return !isValid(); }
335
static RegF64 Invalid() { return RegF64(InvalidFloatReg); }
336
};
337
338
struct AnyReg {
339
union {
340
RegI32 i32_;
341
RegI64 i64_;
342
RegPtr ref_;
343
RegF32 f32_;
344
RegF64 f64_;
345
};
346
347
enum { I32, I64, REF, F32, F64 } tag;
348
349
explicit AnyReg(RegI32 r) {
350
tag = I32;
351
i32_ = r;
352
}
353
explicit AnyReg(RegI64 r) {
354
tag = I64;
355
i64_ = r;
356
}
357
explicit AnyReg(RegF32 r) {
358
tag = F32;
359
f32_ = r;
360
}
361
explicit AnyReg(RegF64 r) {
362
tag = F64;
363
f64_ = r;
364
}
365
explicit AnyReg(RegPtr r) {
366
tag = REF;
367
ref_ = r;
368
}
369
370
RegI32 i32() const {
371
MOZ_ASSERT(tag == I32);
372
return i32_;
373
}
374
RegI64 i64() const {
375
MOZ_ASSERT(tag == I64);
376
return i64_;
377
}
378
RegF32 f32() const {
379
MOZ_ASSERT(tag == F32);
380
return f32_;
381
}
382
RegF64 f64() const {
383
MOZ_ASSERT(tag == F64);
384
return f64_;
385
}
386
RegPtr ref() const {
387
MOZ_ASSERT(tag == REF);
388
return ref_;
389
}
390
391
AnyRegister any() const {
392
switch (tag) {
393
case F32:
394
return AnyRegister(f32_);
395
case F64:
396
return AnyRegister(f64_);
397
case I32:
398
return AnyRegister(i32_);
399
case I64:
400
#ifdef JS_PUNBOX64
401
return AnyRegister(i64_.reg);
402
#else
403
// The compiler is written so that this is never needed: any() is
404
// called on arbitrary registers for asm.js but asm.js does not have
405
// 64-bit ints. For wasm, any() is called on arbitrary registers
406
// only on 64-bit platforms.
407
MOZ_CRASH("AnyReg::any() on 32-bit platform");
408
#endif
409
case REF:
410
MOZ_CRASH("AnyReg::any() not implemented for ref types");
411
default:
412
MOZ_CRASH();
413
}
414
// Work around GCC 5 analysis/warning bug.
415
MOZ_CRASH("AnyReg::any(): impossible case");
416
}
417
};
418
419
// Platform-specific registers.
420
//
421
// All platforms must define struct SpecificRegs. All 32-bit platforms must
422
// have an abiReturnRegI64 member in that struct.
423
424
#if defined(JS_CODEGEN_X64)
425
struct SpecificRegs {
426
RegI32 eax, ecx, edx, edi, esi;
427
RegI64 rax, rcx, rdx;
428
429
SpecificRegs()
430
: eax(RegI32(js::jit::eax)),
431
ecx(RegI32(js::jit::ecx)),
432
edx(RegI32(js::jit::edx)),
433
edi(RegI32(js::jit::edi)),
434
esi(RegI32(js::jit::esi)),
435
rax(RegI64(Register64(js::jit::rax))),
436
rcx(RegI64(Register64(js::jit::rcx))),
437
rdx(RegI64(Register64(js::jit::rdx))) {}
438
};
439
#elif defined(JS_CODEGEN_X86)
440
struct SpecificRegs {
441
RegI32 eax, ecx, edx, edi, esi;
442
RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
443
444
SpecificRegs()
445
: eax(RegI32(js::jit::eax)),
446
ecx(RegI32(js::jit::ecx)),
447
edx(RegI32(js::jit::edx)),
448
edi(RegI32(js::jit::edi)),
449
esi(RegI32(js::jit::esi)),
450
ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
451
edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
452
abiReturnRegI64(edx_eax) {}
453
};
454
#elif defined(JS_CODEGEN_ARM)
455
struct SpecificRegs {
456
RegI64 abiReturnRegI64;
457
458
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
459
};
460
#elif defined(JS_CODEGEN_ARM64)
461
struct SpecificRegs {};
462
#elif defined(JS_CODEGEN_MIPS32)
463
struct SpecificRegs {
464
RegI64 abiReturnRegI64;
465
466
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
467
};
468
#elif defined(JS_CODEGEN_MIPS64)
469
struct SpecificRegs {};
470
#else
471
struct SpecificRegs {
472
# ifndef JS_64BIT
473
RegI64 abiReturnRegI64;
474
# endif
475
476
SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
477
};
478
#endif
479
480
class BaseCompilerInterface {
481
public:
482
// Spill all spillable registers.
483
//
484
// TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
485
// spilling only enough registers to satisfy current needs.
486
virtual void sync() = 0;
487
};
488
489
// Register allocator.
490
491
class BaseRegAlloc {
492
// Notes on float register allocation.
493
//
494
// The general rule in SpiderMonkey is that float registers can alias double
495
// registers, but there are predicates to handle exceptions to that rule:
496
// hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually
497
// works is platform dependent and exposed through the aliased(n, &r)
498
// predicate, etc.
499
//
500
// - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
501
// cannot be treated as float.
502
// - hasMultiAlias(): on ARM and MIPS a double register aliases two float
503
// registers.
504
//
505
// On some platforms (x86, x64, ARM64) but not all (ARM)
506
// ScratchFloat32Register is the same as ScratchDoubleRegister.
507
//
508
// It's a basic invariant of the AllocatableRegisterSet that it deals
509
// properly with aliasing of registers: if s0 or s1 are allocated then d0 is
510
// not allocatable; if s0 and s1 are freed individually then d0 becomes
511
// allocatable.
512
513
BaseCompilerInterface& bc;
514
AllocatableGeneralRegisterSet availGPR;
515
AllocatableFloatRegisterSet availFPU;
516
#ifdef DEBUG
517
AllocatableGeneralRegisterSet
518
allGPR; // The registers available to the compiler
519
AllocatableFloatRegisterSet
520
allFPU; // after removing ScratchReg, HeapReg, etc
521
uint32_t scratchTaken;
522
#endif
523
#ifdef JS_CODEGEN_X86
524
AllocatableGeneralRegisterSet singleByteRegs;
525
#endif
526
527
bool hasGPR() { return !availGPR.empty(); }
528
529
bool hasGPR64() {
530
#ifdef JS_PUNBOX64
531
return !availGPR.empty();
532
#else
533
if (availGPR.empty()) {
534
return false;
535
}
536
Register r = allocGPR();
537
bool available = !availGPR.empty();
538
freeGPR(r);
539
return available;
540
#endif
541
}
542
543
template <MIRType t>
544
bool hasFPU() {
545
return availFPU.hasAny<RegTypeOf<t>::value>();
546
}
547
548
bool isAvailableGPR(Register r) { return availGPR.has(r); }
549
550
bool isAvailableFPU(FloatRegister r) { return availFPU.has(r); }
551
552
void allocGPR(Register r) {
553
MOZ_ASSERT(isAvailableGPR(r));
554
availGPR.take(r);
555
}
556
557
Register allocGPR() {
558
MOZ_ASSERT(hasGPR());
559
return availGPR.takeAny();
560
}
561
562
void allocInt64(Register64 r) {
563
#ifdef JS_PUNBOX64
564
allocGPR(r.reg);
565
#else
566
allocGPR(r.low);
567
allocGPR(r.high);
568
#endif
569
}
570
571
Register64 allocInt64() {
572
MOZ_ASSERT(hasGPR64());
573
#ifdef JS_PUNBOX64
574
return Register64(availGPR.takeAny());
575
#else
576
Register high = availGPR.takeAny();
577
Register low = availGPR.takeAny();
578
return Register64(high, low);
579
#endif
580
}
581
582
#ifdef JS_CODEGEN_ARM
583
// r12 is normally the ScratchRegister and r13 is always the stack pointer,
584
// so the highest possible pair has r10 as the even-numbered register.
585
586
static constexpr uint32_t PAIR_LIMIT = 10;
587
588
bool hasGPRPair() {
589
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
590
if (isAvailableGPR(Register::FromCode(i)) &&
591
isAvailableGPR(Register::FromCode(i + 1))) {
592
return true;
593
}
594
}
595
return false;
596
}
597
598
void allocGPRPair(Register* low, Register* high) {
599
MOZ_ASSERT(hasGPRPair());
600
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
601
if (isAvailableGPR(Register::FromCode(i)) &&
602
isAvailableGPR(Register::FromCode(i + 1))) {
603
*low = Register::FromCode(i);
604
*high = Register::FromCode(i + 1);
605
allocGPR(*low);
606
allocGPR(*high);
607
return;
608
}
609
}
610
MOZ_CRASH("No pair");
611
}
612
#endif
613
614
void allocFPU(FloatRegister r) {
615
MOZ_ASSERT(isAvailableFPU(r));
616
availFPU.take(r);
617
}
618
619
template <MIRType t>
620
FloatRegister allocFPU() {
621
return availFPU.takeAny<RegTypeOf<t>::value>();
622
}
623
624
void freeGPR(Register r) { availGPR.add(r); }
625
626
void freeInt64(Register64 r) {
627
#ifdef JS_PUNBOX64
628
freeGPR(r.reg);
629
#else
630
freeGPR(r.low);
631
freeGPR(r.high);
632
#endif
633
}
634
635
void freeFPU(FloatRegister r) { availFPU.add(r); }
636
637
public:
638
explicit BaseRegAlloc(BaseCompilerInterface& bc)
639
: bc(bc),
640
availGPR(GeneralRegisterSet::All()),
641
availFPU(FloatRegisterSet::All())
642
#ifdef DEBUG
643
,
644
scratchTaken(0)
645
#endif
646
#ifdef JS_CODEGEN_X86
647
,
648
singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
649
#endif
650
{
651
RegisterAllocator::takeWasmRegisters(availGPR);
652
653
// Allocate any private scratch registers.
654
#if defined(RABALDR_SCRATCH_I32)
655
if (RabaldrScratchI32 != RegI32::Invalid()) {
656
availGPR.take(RabaldrScratchI32);
657
}
658
#endif
659
660
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
661
MOZ_ASSERT(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
662
MOZ_ASSERT(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
663
#endif
664
665
#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
666
if (RabaldrScratchF32 != RegF32::Invalid()) {
667
availFPU.take(RabaldrScratchF32);
668
}
669
#endif
670
671
#if defined(RABALDR_SCRATCH_F64)
672
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
673
MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
674
# endif
675
if (RabaldrScratchF64 != RegF64::Invalid()) {
676
availFPU.take(RabaldrScratchF64);
677
}
678
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
679
MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
680
# endif
681
#endif
682
683
#ifdef DEBUG
684
allGPR = availGPR;
685
allFPU = availFPU;
686
#endif
687
}
688
689
enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4 };
690
691
#ifdef DEBUG
692
bool isScratchRegisterTaken(ScratchKind s) const {
693
return (scratchTaken & uint32_t(s)) != 0;
694
}
695
696
void setScratchRegisterTaken(ScratchKind s, bool state) {
697
if (state) {
698
scratchTaken |= uint32_t(s);
699
} else {
700
scratchTaken &= ~uint32_t(s);
701
}
702
}
703
#endif
704
705
#ifdef JS_CODEGEN_X86
706
bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
707
#endif
708
709
bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
710
711
bool isAvailableI64(RegI64 r) {
712
#ifdef JS_PUNBOX64
713
return isAvailableGPR(r.reg);
714
#else
715
return isAvailableGPR(r.low) && isAvailableGPR(r.high);
716
#endif
717
}
718
719
bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
720
721
bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
722
723
bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
724
725
// TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
726
// failure, only as much as we need.
727
728
MOZ_MUST_USE RegI32 needI32() {
729
if (!hasGPR()) {
730
bc.sync();
731
}
732
return RegI32(allocGPR());
733
}
734
735
void needI32(RegI32 specific) {
736
if (!isAvailableI32(specific)) {
737
bc.sync();
738
}
739
allocGPR(specific);
740
}
741
742
MOZ_MUST_USE RegI64 needI64() {
743
if (!hasGPR64()) {
744
bc.sync();
745
}
746
return RegI64(allocInt64());
747
}
748
749
void needI64(RegI64 specific) {
750
if (!isAvailableI64(specific)) {
751
bc.sync();
752
}
753
allocInt64(specific);
754
}
755
756
MOZ_MUST_USE RegPtr needPtr() {
757
if (!hasGPR()) {
758
bc.sync();
759
}
760
return RegPtr(allocGPR());
761
}
762
763
void needPtr(RegPtr specific) {
764
if (!isAvailablePtr(specific)) {
765
bc.sync();
766
}
767
allocGPR(specific);
768
}
769
770
MOZ_MUST_USE RegF32 needF32() {
771
if (!hasFPU<MIRType::Float32>()) {
772
bc.sync();
773
}
774
return RegF32(allocFPU<MIRType::Float32>());
775
}
776
777
void needF32(RegF32 specific) {
778
if (!isAvailableF32(specific)) {
779
bc.sync();
780
}
781
allocFPU(specific);
782
}
783
784
MOZ_MUST_USE RegF64 needF64() {
785
if (!hasFPU<MIRType::Double>()) {
786
bc.sync();
787
}
788
return RegF64(allocFPU<MIRType::Double>());
789
}
790
791
void needF64(RegF64 specific) {
792
if (!isAvailableF64(specific)) {
793
bc.sync();
794
}
795
allocFPU(specific);
796
}
797
798
void freeI32(RegI32 r) { freeGPR(r); }
799
800
void freeI64(RegI64 r) { freeInt64(r); }
801
802
void freePtr(RegPtr r) { freeGPR(r); }
803
804
void freeF64(RegF64 r) { freeFPU(r); }
805
806
void freeF32(RegF32 r) { freeFPU(r); }
807
808
#ifdef JS_CODEGEN_ARM
809
MOZ_MUST_USE RegI64 needI64Pair() {
810
if (!hasGPRPair()) {
811
bc.sync();
812
}
813
Register low, high;
814
allocGPRPair(&low, &high);
815
return RegI64(Register64(high, low));
816
}
817
#endif
818
819
#ifdef DEBUG
820
friend class LeakCheck;
821
822
class MOZ_RAII LeakCheck {
823
private:
824
const BaseRegAlloc& ra;
825
AllocatableGeneralRegisterSet knownGPR_;
826
AllocatableFloatRegisterSet knownFPU_;
827
828
public:
829
explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
830
knownGPR_ = ra.availGPR;
831
knownFPU_ = ra.availFPU;
832
}
833
834
~LeakCheck() {
835
MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
836
MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
837
}
838
839
void addKnownI32(RegI32 r) { knownGPR_.add(r); }
840
841
void addKnownI64(RegI64 r) {
842
# ifdef JS_PUNBOX64
843
knownGPR_.add(r.reg);
844
# else
845
knownGPR_.add(r.high);
846
knownGPR_.add(r.low);
847
# endif
848
}
849
850
void addKnownF32(RegF32 r) { knownFPU_.add(r); }
851
852
void addKnownF64(RegF64 r) { knownFPU_.add(r); }
853
854
void addKnownRef(RegPtr r) { knownGPR_.add(r); }
855
};
856
#endif
857
};
858
859
// Scratch register abstractions.
860
//
861
// We define our own scratch registers when the platform doesn't provide what we
862
// need. A notable use case is that we will need a private scratch register
863
// when the platform masm uses its scratch register very frequently (eg, ARM).
864
865
class BaseScratchRegister {
866
#ifdef DEBUG
867
BaseRegAlloc& ra;
868
BaseRegAlloc::ScratchKind kind_;
869
870
public:
871
explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
872
: ra(ra), kind_(kind) {
873
MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
874
ra.setScratchRegisterTaken(kind_, true);
875
}
876
~BaseScratchRegister() {
877
MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
878
ra.setScratchRegisterTaken(kind_, false);
879
}
880
#else
881
public:
882
explicit BaseScratchRegister(BaseRegAlloc& ra,
883
BaseRegAlloc::ScratchKind kind) {}
884
#endif
885
};
886
887
#ifdef RABALDR_SCRATCH_F64
888
class ScratchF64 : public BaseScratchRegister {
889
public:
890
explicit ScratchF64(BaseRegAlloc& ra)
891
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
892
operator RegF64() const { return RegF64(RabaldrScratchF64); }
893
};
894
#else
895
class ScratchF64 : public ScratchDoubleScope {
896
public:
897
explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
898
operator RegF64() const { return RegF64(FloatRegister(*this)); }
899
};
900
#endif
901
902
#ifdef RABALDR_SCRATCH_F32
903
class ScratchF32 : public BaseScratchRegister {
904
public:
905
explicit ScratchF32(BaseRegAlloc& ra)
906
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
907
operator RegF32() const { return RegF32(RabaldrScratchF32); }
908
};
909
#else
910
class ScratchF32 : public ScratchFloat32Scope {
911
public:
912
explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
913
operator RegF32() const { return RegF32(FloatRegister(*this)); }
914
};
915
#endif
916
917
#ifdef RABALDR_SCRATCH_I32
918
template <class RegType>
919
class ScratchGPR : public BaseScratchRegister {
920
public:
921
explicit ScratchGPR(BaseRegAlloc& ra)
922
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
923
operator RegType() const { return RegType(RabaldrScratchI32); }
924
};
925
#else
926
template <class RegType>
927
class ScratchGPR : public ScratchRegisterScope {
928
public:
929
explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
930
operator RegType() const { return RegType(Register(*this)); }
931
};
932
#endif
933
934
using ScratchI32 = ScratchGPR<RegI32>;
935
using ScratchPtr = ScratchGPR<RegPtr>;
936
937
#if defined(JS_CODEGEN_X86)
938
// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
939
// no other register will do. And we would normally have to allocate that
940
// register using ScratchI32 since normally the scratch register is EBX.
941
// But the whole point of ScratchI32 is to hide that relationship. By using
942
// the ScratchEBX alias, we document that at that point we require the
943
// scratch register to be EBX.
944
using ScratchEBX = ScratchI32;
945
946
// ScratchI8 is a mnemonic device: For some ops we need a register with a
947
// byte subregister.
948
using ScratchI8 = ScratchI32;
949
#endif
950
951
// The stack frame.
952
//
953
// The stack frame has four parts ("below" means at lower addresses):
954
//
955
// - the Frame element;
956
// - the Local area, including the DebugFrame element; allocated below the
957
// header with various forms of alignment;
958
// - the Dynamic area, comprising the temporary storage the compiler uses for
959
// register spilling, allocated below the Local area;
960
// - the Arguments area, comprising memory allocated for outgoing calls,
961
// allocated below the Dynamic area.
962
//
963
// +============================+
964
// | Incoming arg |
965
// | ... |
966
// -------------- +============================+
967
// | Frame (fixed size) |
968
// -------------- +============================+ <-------------------- FP
969
// ^ | DebugFrame (optional) | ^ ^^
970
// | +----------------------------+ | ||
971
// localSize | Local (static size) | | ||
972
// | | ... | | framePushed
973
// v | (padding) | | ||
974
// -------------- +============================+ currentStackHeight ||
975
// ^ | Dynamic (variable size) | | ||
976
// dynamicSize | ... | | ||
977
// v | ... | v ||
978
// -------------- | (free space, sometimes) | --------- v|
979
// +============================+ <----- SP not-during calls
980
// | Arguments (sometimes) | |
981
// | ... | v
982
// +============================+ <----- SP during calls
983
//
984
// The Frame is addressed off the stack pointer. masm.framePushed() is always
985
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
986
// Frame, with the DebugFrame optionally below it.
987
//
988
// The Local area (including the DebugFrame) is laid out by BaseLocalIter and is
989
// allocated and deallocated by standard prologue and epilogue functions that
990
// manipulate the stack pointer, but it is accessed via BaseStackFrame.
991
//
992
// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
993
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
994
// because the SP needs a specific alignment, and in this case there will
995
// normally be some free space directly above the SP. The stack height does not
996
// include the free space, it reflects the logically used space only.
997
//
998
// The Arguments area is allocated and deallocated via BaseStackFrame (see
999
// comments later) but is accessed directly off the stack pointer.
1000
1001
// BaseLocalIter iterates over a vector of types of locals and provides offsets
1002
// from the Frame address for those locals, and associated data.
1003
//
1004
// The implementation of BaseLocalIter is the property of the BaseStackFrame.
1005
// But it is also exposed for eg the debugger to use.
1006
1007
BaseLocalIter::BaseLocalIter(const ValTypeVector& locals, size_t argsLength,
1008
bool debugEnabled)
1009
: locals_(locals),
1010
argsLength_(argsLength),
1011
argsRange_(locals.begin(), argsLength),
1012
argsIter_(argsRange_),
1013
index_(0),
1014
localSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
1015
reservedSize_(localSize_),
1016
frameOffset_(UINT32_MAX),
1017
mirType_(MIRType::Undefined),
1018
done_(false) {
1019
MOZ_ASSERT(argsLength <= locals.length());
1020
settle();
1021
}
1022
1023
int32_t BaseLocalIter::pushLocal(size_t nbytes) {
1024
MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
1025
localSize_ = AlignBytes(localSize_, nbytes) + nbytes;
1026
return localSize_; // Locals grow down so capture base address.
1027
}
1028
1029
void BaseLocalIter::settle() {
1030
if (index_ < argsLength_) {
1031
MOZ_ASSERT(!argsIter_.done());
1032
mirType_ = argsIter_.mirType();
1033
switch (mirType_) {
1034
case MIRType::Int32:
1035
case MIRType::Int64:
1036
case MIRType::Double:
1037
case MIRType::Float32:
1038
case MIRType::RefOrNull:
1039
if (argsIter_->argInRegister()) {
1040
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
1041
} else {
1042
frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
1043
}
1044
break;
1045
default:
1046
MOZ_CRASH("Argument type");
1047
}
1048
return;
1049
}
1050
1051
MOZ_ASSERT(argsIter_.done());
1052
if (index_ < locals_.length()) {
1053
switch (locals_[index_].code()) {
1054
case ValType::I32:
1055
case ValType::I64:
1056
case ValType::F32:
1057
case ValType::F64:
1058
case ValType::Ref:
1059
case ValType::FuncRef:
1060
case ValType::AnyRef:
1061
// TODO/AnyRef-boxing: With boxed immediates and strings, the
1062
// debugger must be made aware that AnyRef != Pointer.
1063
ASSERT_ANYREF_IS_JSOBJECT;
1064
mirType_ = ToMIRType(locals_[index_]);
1065
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
1066
break;
1067
case ValType::NullRef:
1068
default:
1069
MOZ_CRASH("Compiler bug: Unexpected local type");
1070
}
1071
return;
1072
}
1073
1074
done_ = true;
1075
}
1076
1077
void BaseLocalIter::operator++(int) {
1078
MOZ_ASSERT(!done_);
1079
index_++;
1080
if (!argsIter_.done()) {
1081
argsIter_++;
1082
}
1083
settle();
1084
}
1085
1086
// Abstraction of the height of the stack frame, to avoid type confusion.
1087
1088
class StackHeight {
1089
friend class BaseStackFrameAllocator;
1090
1091
uint32_t height;
1092
1093
public:
1094
explicit StackHeight(uint32_t h) : height(h) {}
1095
static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
1096
bool isValid() const { return height != UINT32_MAX; }
1097
};
1098
1099
// Abstraction of the baseline compiler's stack frame (except for the Frame /
1100
// DebugFrame parts). See comments above for more. Remember, "below" on the
1101
// stack means at lower addresses.
1102
//
1103
// The abstraction is split into two parts: BaseStackFrameAllocator is
1104
// responsible for allocating and deallocating space on the stack and for
1105
// performing computations that are affected by how the allocation is performed;
1106
// BaseStackFrame then provides a pleasant interface for stack frame management.
1107
1108
class BaseStackFrameAllocator {
1109
MacroAssembler& masm;
1110
1111
#ifdef RABALDR_CHUNKY_STACK
1112
// On platforms that require the stack pointer to be aligned on a boundary
1113
// greater than the typical stack item (eg, ARM64 requires 16-byte alignment
1114
// but items are 8 bytes), allocate stack memory in chunks, and use a
1115
// separate stack height variable to track the effective stack pointer
1116
// within the allocated area. Effectively, there's a variable amount of
1117
// free space directly above the stack pointer. See diagram above.
1118
1119
// The following must be true in order for the stack height to be
1120
// predictable at control flow joins:
1121
//
1122
// - The Local area is always aligned according to WasmStackAlignment, ie,
1123
// masm.framePushed() % WasmStackAlignment is zero after allocating
1124
// locals.
1125
//
1126
// - ChunkSize is always a multiple of WasmStackAlignment.
1127
//
1128
// - Pushing and popping are always in units of ChunkSize (hence preserving
1129
// alignment).
1130
//
1131
// - The free space on the stack (masm.framePushed() - currentStackHeight_)
1132
// is a predictable (nonnegative) amount.
1133
1134
// As an optimization, we pre-allocate some space on the stack, the size of
1135
// this allocation is InitialChunk and it must be a multiple of ChunkSize.
1136
// It is allocated as part of the function prologue and deallocated as part
1137
// of the epilogue, along with the locals.
1138
//
1139
// If ChunkSize is too large then we risk overflowing the stack on simple
1140
// recursions with few live values where stack overflow should not be a
1141
// risk; if it is too small we spend too much time adjusting the stack
1142
// pointer.
1143
//
1144
// Good values for ChunkSize are the subject of future empirical analysis;
1145
// eight words is just an educated guess.
1146
1147
static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
1148
static constexpr uint32_t InitialChunk = ChunkSize;
1149
1150
// The current logical height of the frame is
1151
// currentStackHeight_ = localSize_ + dynamicSize
1152
// where dynamicSize is not accounted for explicitly and localSize_ also
1153
// includes size for the DebugFrame.
1154
//
1155
// The allocated size of the frame, provided by masm.framePushed(), is usually
1156
// larger than currentStackHeight_, notably at the beginning of execution when
1157
// we've allocated InitialChunk extra space.
1158
1159
uint32_t currentStackHeight_;
1160
#endif
1161
1162
// Size of the Local area in bytes (stable after BaseCompiler::init() has
1163
// called BaseStackFrame::setupLocals(), which in turn calls
1164
// BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
1165
// stack alignment. The Local area is then allocated in beginFunction(),
1166
// following the allocation of the Header. See onFixedStackAllocated()
1167
// below.
1168
1169
uint32_t localSize_;
1170
1171
protected:
1172
///////////////////////////////////////////////////////////////////////////
1173
//
1174
// Initialization
1175
1176
explicit BaseStackFrameAllocator(MacroAssembler& masm)
1177
: masm(masm),
1178
#ifdef RABALDR_CHUNKY_STACK
1179
currentStackHeight_(0),
1180
#endif
1181
localSize_(UINT32_MAX) {
1182
}
1183
1184
protected:
1185
//////////////////////////////////////////////////////////////////////
1186
//
1187
// The Local area - the static part of the frame.
1188
1189
// Record the size of the Local area, once it is known.
1190
1191
void setLocalSize(uint32_t localSize) {
1192
MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
1193
"localSize_ should be aligned to at least a pointer");
1194
MOZ_ASSERT(localSize_ == UINT32_MAX);
1195
localSize_ = localSize;
1196
}
1197
1198
// Record the current stack height, after it has become stable in
1199
// beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
1200
1201
void onFixedStackAllocated() {
1202
MOZ_ASSERT(localSize_ != UINT32_MAX);
1203
#ifdef RABALDR_CHUNKY_STACK
1204
currentStackHeight_ = localSize_;
1205
#endif
1206
}
1207
1208
public:
1209
// The fixed amount of memory, in bytes, allocated on the stack below the
1210
// Header for purposes such as locals and other fixed values. Includes all
1211
// necessary alignment, and on ARM64 also the initial chunk for the working
1212
// stack memory.
1213
1214
uint32_t fixedAllocSize() const {
1215
MOZ_ASSERT(localSize_ != UINT32_MAX);
1216
#ifdef RABALDR_CHUNKY_STACK
1217
return localSize_ + InitialChunk;
1218
#else
1219
return localSize_;
1220
#endif
1221
}
1222
1223
#ifdef RABALDR_CHUNKY_STACK
1224
// The allocated frame size is frequently larger than the logical stack
1225
// height; we round up to a chunk boundary, and special case the initial
1226
// chunk.
1227
uint32_t framePushedForHeight(uint32_t logicalHeight) {
1228
if (logicalHeight <= fixedAllocSize()) {
1229
return fixedAllocSize();
1230
}
1231
return fixedAllocSize() +
1232
AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
1233
}
1234
#endif
1235
1236
protected:
1237
//////////////////////////////////////////////////////////////////////
1238
//
1239
// The Dynamic area - the dynamic part of the frame, for spilling and saving
1240
// intermediate values.
1241
1242
// Offset off of sp_ for the slot at stack area location `offset`.
1243
1244
int32_t stackOffset(int32_t offset) { return masm.framePushed() - offset; }
1245
1246
#ifdef RABALDR_CHUNKY_STACK
1247
void pushChunkyBytes(uint32_t bytes) {
1248
MOZ_ASSERT(bytes <= ChunkSize);
1249
checkChunkyInvariants();
1250
if (masm.framePushed() - currentStackHeight_ < bytes) {
1251
masm.reserveStack(ChunkSize);
1252
}
1253
currentStackHeight_ += bytes;
1254
checkChunkyInvariants();
1255
}
1256
1257
void popChunkyBytes(uint32_t bytes) {
1258
checkChunkyInvariants();
1259
currentStackHeight_ -= bytes;
1260
// Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
1261
// values consumed by a call, and we may need to drop several chunks. But
1262
// never drop the initial chunk. Crucially, the amount we drop is always an
1263
// integral number of chunks.
1264
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
1265
if (freeSpace >= ChunkSize) {
1266
uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
1267
uint32_t amountToFree = masm.framePushed() - targetAllocSize;
1268
MOZ_ASSERT(amountToFree % ChunkSize == 0);
1269
if (amountToFree) {
1270
masm.freeStack(amountToFree);
1271
}
1272
}
1273
checkChunkyInvariants();
1274
}
1275
#endif
1276
1277
uint32_t currentStackHeight() const {
1278
#ifdef RABALDR_CHUNKY_STACK
1279
return currentStackHeight_;
1280
#else
1281
return masm.framePushed();
1282
#endif
1283
}
1284
1285
private:
1286
#ifdef RABALDR_CHUNKY_STACK
1287
void checkChunkyInvariants() {
1288
MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
1289
MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
1290
MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
1291
masm.framePushed() - currentStackHeight_ < ChunkSize);
1292
MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
1293
}
1294
#endif
1295
1296
// For a given stack height, return the appropriate size of the allocated
1297
// frame.
1298
1299
uint32_t framePushedForHeight(StackHeight stackHeight) {
1300
#ifdef RABALDR_CHUNKY_STACK
1301
// A more complicated adjustment is needed.
1302
return framePushedForHeight(stackHeight.height);
1303
#else
1304
// The allocated frame size equals the stack height.
1305
return stackHeight.height;
1306
#endif
1307
}
1308
1309
public:
1310
// The current height of the stack area, not necessarily zero-based, in a
1311
// type-safe way.
1312
1313
StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
1314
1315
// Set the frame height to a previously recorded value.
1316
1317
void setStackHeight(StackHeight amount) {
1318
#ifdef RABALDR_CHUNKY_STACK
1319
currentStackHeight_ = amount.height;
1320
masm.setFramePushed(framePushedForHeight(amount));
1321
checkChunkyInvariants();
1322
#else
1323
masm.setFramePushed(amount.height);
1324
#endif
1325
}
1326
1327
// The current height of the dynamic part of the stack area (ie, the backing
1328
// store for the evaluation stack), zero-based.
1329
1330
uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
1331
1332
// Before branching to an outer control label, pop the execution stack to
1333
// the level expected by that region, but do not update masm.framePushed()
1334
// as that will happen as compilation leaves the block.
1335
//
1336
// Note these operate directly on the stack pointer register.
1337
1338
void popStackBeforeBranch(StackHeight destStackHeight) {
1339
uint32_t framePushedHere = masm.framePushed();
1340
uint32_t framePushedThere = framePushedForHeight(destStackHeight);
1341
if (framePushedHere > framePushedThere) {
1342
masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
1343
}
1344
}
1345
1346
bool willPopStackBeforeBranch(StackHeight destStackHeight) {
1347
uint32_t framePushedHere = masm.framePushed();
1348
uint32_t framePushedThere = framePushedForHeight(destStackHeight);
1349
return framePushedHere > framePushedThere;
1350
}
1351
1352
// Before exiting a nested control region, pop the execution stack
1353
// to the level expected by the nesting region, and free the
1354
// stack.
1355
//
1356
// Note this operates on the stack height, which is not the same as the
1357
// stack pointer on chunky-stack systems; the stack pointer may or may not
1358
// change on such systems.
1359
1360
void popStackOnBlockExit(StackHeight destStackHeight, bool deadCode) {
1361
uint32_t stackHeightHere = currentStackHeight();
1362
uint32_t stackHeightThere = destStackHeight.height;
1363
if (stackHeightHere > stackHeightThere) {
1364
#ifdef RABALDR_CHUNKY_STACK
1365
if (deadCode) {
1366
setStackHeight(destStackHeight);
1367
} else {
1368
popChunkyBytes(stackHeightHere - stackHeightThere);
1369
}
1370
#else
1371
if (deadCode) {
1372
masm.setFramePushed(stackHeightThere);
1373
} else {
1374
masm.freeStack(stackHeightHere - stackHeightThere);
1375
}
1376
#endif
1377
}
1378
}
1379
1380
public:
1381
//////////////////////////////////////////////////////////////////////
1382
//
1383
// The Argument area - for outgoing calls.
1384
//
1385
// We abstract these operations as an optimization: we can merge the freeing
1386
// of the argument area and dropping values off the stack after a call. But
1387
// they always amount to manipulating the real stack pointer by some amount.
1388
//
1389
// Note that we do not update currentStackHeight_ for this; the frame does
1390
// not know about outgoing arguments. But we do update framePushed(), so we
1391
// can still index into the frame below the outgoing arguments area.
1392
1393
// This is always equivalent to a masm.reserveStack() call.
1394
1395
void allocArgArea(size_t argSize) {
1396
if (argSize) {
1397
masm.reserveStack(argSize);
1398
}
1399
}
1400
1401
// This frees the argument area allocated by allocArgArea(), and `argSize`
1402
// must be equal to the `argSize` argument to allocArgArea(). In addition
1403
// we drop some values from the frame, corresponding to the values that were
1404
// consumed by the call.
1405
1406
void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
1407
#ifdef RABALDR_CHUNKY_STACK
1408
// Freeing the outgoing arguments and freeing the consumed values have
1409
// different semantics here, which is why the operation is split.
1410
if (argSize) {
1411
masm.freeStack(argSize);
1412
}
1413
popChunkyBytes(dropSize);
1414
#else
1415
if (argSize + dropSize) {
1416
masm.freeStack(argSize + dropSize);
1417
}
1418
#endif
1419
}
1420
};
1421
1422
class BaseStackFrame final : public BaseStackFrameAllocator {
1423
MacroAssembler& masm;
1424
1425
// The largest observed value of masm.framePushed(), ie, the size of the
1426
// stack frame. Read this for its true value only when code generation is
1427
// finished.
1428
uint32_t maxFramePushed_;
1429
1430
// Patch point where we check for stack overflow.
1431
CodeOffset stackAddOffset_;
1432
1433
// Low byte offset of local area for true locals (not parameters).
1434
uint32_t varLow_;
1435
1436
// High byte offset + 1 of local area for true locals.
1437
uint32_t varHigh_;
1438
1439
// The stack pointer, cached for brevity.
1440
RegisterOrSP sp_;
1441
1442
public:
1443
explicit BaseStackFrame(MacroAssembler& masm)
1444
: BaseStackFrameAllocator(masm),
1445
masm(masm),
1446
maxFramePushed_(0),
1447
stackAddOffset_(0),
1448
varLow_(UINT32_MAX),
1449
varHigh_(UINT32_MAX),
1450
sp_(masm.getStackPointer()) {}
1451
1452
///////////////////////////////////////////////////////////////////////////
1453
//
1454
// Stack management and overflow checking
1455
1456
// This must be called once beginFunction has allocated space for the Header
1457
// (the Frame and DebugFrame) and the Local area, and will record the current
1458
// frame size for internal use by the stack abstractions.
1459
1460
void onFixedStackAllocated() {
1461
maxFramePushed_ = masm.framePushed();
1462
BaseStackFrameAllocator::onFixedStackAllocated();
1463
}
1464
1465
// We won't know until after we've generated code how big the frame will be
1466
// (we may need arbitrary spill slots and outgoing param slots) so emit a
1467
// patchable add that is patched in endFunction().
1468
//
1469
// Note the platform scratch register may be used by branchPtr(), so
1470
// generally tmp must be something else.
1471
1472
void checkStack(Register tmp, BytecodeOffset trapOffset) {
1473
stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
1474
Label ok;
1475
masm.branchPtr(Assembler::Below,
1476
Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
1477
tmp, &ok);
1478
masm.wasmTrap(Trap::StackOverflow, trapOffset);
1479
masm.bind(&ok);
1480
}
1481
1482
void patchCheckStack() {
1483
masm.patchSub32FromStackPtr(stackAddOffset_,
1484
Imm32(int32_t(maxFramePushed_)));
1485
}
1486
1487
// Very large frames are implausible, probably an attack.
1488
1489
bool checkStackHeight() {
1490
// 512KiB should be enough, considering how Rabaldr uses the stack and
1491
// what the standard limits are:
1492
//
1493
// - 1,000 parameters
1494
// - 50,000 locals
1495
// - 10,000 values on the eval stack (not an official limit)
1496
//
1497
// At sizeof(int64) bytes per slot this works out to about 480KiB.
1498
return maxFramePushed_ <= 512 * 1024;
1499
}
1500
1501
///////////////////////////////////////////////////////////////////////////
1502
//
1503
// Local area
1504
1505
struct Local {
1506
// Type of the value.
1507
const MIRType type;
1508
1509
// Byte offset from Frame "into" the locals, ie positive for true locals
1510
// and negative for incoming args that read directly from the arg area.
1511
// It assumes the stack is growing down and that locals are on the stack
1512
// at lower addresses than Frame, and is the offset from Frame of the
1513
// lowest-addressed byte of the local.
1514
const int32_t offs;
1515
1516
Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
1517
};
1518
1519
// Profiling shows that the number of parameters and locals frequently
1520
// touches or exceeds 8. So 16 seems like a reasonable starting point.
1521
using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
1522
1523
// Initialize `localInfo` based on the types of `locals` and `args`.
1524
bool setupLocals(const ValTypeVector& locals, const ValTypeVector& args,
1525
bool debugEnabled, LocalVector* localInfo) {
1526
if (!localInfo->reserve(locals.length())) {
1527
return false;
1528
}
1529
1530
DebugOnly<uint32_t> index = 0;
1531
BaseLocalIter i(locals, args.length(), debugEnabled);
1532
varLow_ = i.reservedSize();
1533
for (; !i.done() && i.index() < args.length(); i++) {
1534
MOZ_ASSERT(i.isArg());
1535
MOZ_ASSERT(i.index() == index);
1536
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1537
varLow_ = i.currentLocalSize();
1538
index++;
1539
}
1540
1541
varHigh_ = varLow_;
1542
for (; !i.done(); i++) {
1543
MOZ_ASSERT(!i.isArg());
1544
MOZ_ASSERT(i.index() == index);
1545
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
1546
varHigh_ = i.currentLocalSize();
1547
index++;
1548
}
1549
1550
setLocalSize(AlignBytes(varHigh_, WasmStackAlignment));
1551
1552
return true;
1553
}
1554
1555
void zeroLocals(BaseRegAlloc* ra);
1556
1557
void loadLocalI32(const Local& src, RegI32 dest) {
1558
masm.load32(Address(sp_, localOffset(src)), dest);
1559
}
1560
1561
#ifndef JS_PUNBOX64
1562
void loadLocalI64Low(const Local& src, RegI32 dest) {
1563
masm.load32(Address(sp_, localOffset(src) + INT64LOW_OFFSET), dest);
1564
}
1565
1566
void loadLocalI64High(const Local& src, RegI32 dest) {
1567
masm.load32(Address(sp_, localOffset(src) + INT64HIGH_OFFSET), dest);
1568
}
1569
#endif
1570
1571
void loadLocalI64(const Local& src, RegI64 dest) {
1572
masm.load64(Address(sp_, localOffset(src)), dest);
1573
}
1574
1575
void loadLocalPtr(const Local& src, RegPtr dest) {
1576
masm.loadPtr(Address(sp_, localOffset(src)), dest);
1577
}
1578
1579
void loadLocalF64(const Local& src, RegF64 dest) {
1580
masm.loadDouble(Address(sp_, localOffset(src)), dest);
1581
}
1582
1583
void loadLocalF32(const Local& src, RegF32 dest) {
1584
masm.loadFloat32(Address(sp_, localOffset(src)), dest);
1585
}
1586
1587
void storeLocalI32(RegI32 src, const Local& dest) {
1588
masm.store32(src, Address(sp_, localOffset(dest)));
1589
}
1590
1591
void storeLocalI64(RegI64 src, const Local& dest) {
1592
masm.store64(src, Address(sp_, localOffset(dest)));
1593
}
1594
1595
void storeLocalPtr(Register src, const Local& dest) {
1596
masm.storePtr(src, Address(sp_, localOffset(dest)));
1597
}
1598
1599
void storeLocalF64(RegF64 src, const Local& dest) {
1600
masm.storeDouble(src, Address(sp_, localOffset(dest)));
1601
}
1602
1603
void storeLocalF32(RegF32 src, const Local& dest) {
1604
masm.storeFloat32(src, Address(sp_, localOffset(dest)));
1605
}
1606
1607
// Offset off of sp_ for `local`.
1608
int32_t localOffset(const Local& local) { return localOffset(local.offs); }
1609
1610
private:
1611
// Offset off of sp_ for a local with offset `offset` from Frame.
1612
int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
1613
1614
public:
1615
///////////////////////////////////////////////////////////////////////////
1616
//
1617
// Dynamic area
1618
1619
// Sizes of items in the stack area.
1620
//
1621
// The size values come from the implementations of Push() in
1622
// MacroAssembler-x86-shared.cpp and MacroAssembler-arm-shared.cpp, and from
1623
// VFPRegister::size() in Architecture-arm.h.
1624
//
1625
// On ARM unlike on x86 we push a single for float.
1626
1627
static const size_t StackSizeOfPtr = sizeof(intptr_t);
1628
static const size_t StackSizeOfInt64 = sizeof(int64_t);
1629
#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32)
1630
static const size_t StackSizeOfFloat = sizeof(float);
1631
#else
1632
static const size_t StackSizeOfFloat = sizeof(double);
1633
#endif
1634
static const size_t StackSizeOfDouble = sizeof(double);
1635
1636
uint32_t pushPtr(Register r) {
1637
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1638
#ifdef RABALDR_CHUNKY_STACK
1639
pushChunkyBytes(StackSizeOfPtr);
1640
masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
1641
#else
1642
masm.Push(r);
1643
#endif
1644
maxFramePushed_ = Max(maxFramePushed_, masm.framePushed());
1645
MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
1646
return currentStackHeight();
1647
}
1648
1649
uint32_t pushFloat32(FloatRegister r) {
1650
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1651
#ifdef RABALDR_CHUNKY_STACK
1652
pushChunkyBytes(StackSizeOfFloat);
1653
masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
1654
#else
1655
masm.Push(r);
1656
#endif
1657
maxFramePushed_ = Max(maxFramePushed_, masm.framePushed());
1658
MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
1659
return currentStackHeight();
1660
}
1661
1662
uint32_t pushDouble(FloatRegister r) {
1663
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1664
#ifdef RABALDR_CHUNKY_STACK
1665
pushChunkyBytes(StackSizeOfDouble);
1666
masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
1667
#else
1668
masm.Push(r);
1669
#endif
1670
maxFramePushed_ = Max(maxFramePushed_, masm.framePushed());
1671
MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
1672
return currentStackHeight();
1673
}
1674
1675
void popPtr(Register r) {
1676
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1677
#ifdef RABALDR_CHUNKY_STACK
1678
masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
1679
popChunkyBytes(StackSizeOfPtr);
1680
#else
1681
masm.Pop(r);
1682
#endif
1683
MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
1684
}
1685
1686
void popFloat32(FloatRegister r) {
1687
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1688
#ifdef RABALDR_CHUNKY_STACK
1689
masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
1690
popChunkyBytes(StackSizeOfFloat);
1691
#else
1692
masm.Pop(r);
1693
#endif
1694
MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
1695
}
1696
1697
void popDouble(FloatRegister r) {
1698
DebugOnly<uint32_t> stackBefore = currentStackHeight();
1699
#ifdef RABALDR_CHUNKY_STACK
1700
masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
1701
popChunkyBytes(StackSizeOfDouble);
1702
#else
1703
masm.Pop(r);
1704
#endif
1705
MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
1706
}
1707
1708
void popBytes(size_t bytes) {
1709
if (bytes > 0) {
1710
#ifdef RABALDR_CHUNKY_STACK
1711
popChunkyBytes(bytes);
1712
#else
1713
masm.freeStack(bytes);
1714
#endif
1715
}
1716
}
1717
1718
void loadStackI32(int32_t offset, RegI32 dest) {
1719
masm.load32(Address(sp_, stackOffset(offset)), dest);
1720
}
1721
1722
void loadStackI64(int32_t offset, RegI64 dest) {
1723
masm.load64(Address(sp_, stackOffset(offset)), dest);
1724
}
1725
1726
#ifndef JS_PUNBOX64
1727
void loadStackI64Low(int32_t offset, RegI32 dest) {
1728
masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
1729
}
1730
1731
void loadStackI64High(int32_t offset, RegI32 dest) {
1732
masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
1733
}
1734
#endif
1735
1736
// Disambiguation: this loads a "Ptr" value from the stack, it does not load
1737
// the "StackPtr".
1738
1739
void loadStackPtr(int32_t offset, RegPtr dest) {
1740
masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
1741
}
1742
1743
void loadStackF64(int32_t offset, RegF64 dest) {
1744
masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
1745
}
1746
1747
void loadStackF32(int32_t offset, RegF32 dest) {
1748
masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
1749
}
1750
};
1751
1752
void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) {
1753
MOZ_ASSERT(varLow_ != UINT32_MAX);
1754
1755
if (varLow_ == varHigh_) {
1756
return;
1757
}
1758
1759
static const uint32_t wordSize = sizeof(void*);
1760
1761
// The adjustments to 'low' by the size of the item being stored compensates
1762
// for the fact that locals offsets are the offsets from Frame to the bytes
1763
// directly "above" the locals in the locals area. See comment at Local.
1764
1765
// On 64-bit systems we may have 32-bit alignment for the local area as it
1766
// may be preceded by parameters and prologue/debug data.
1767
1768
uint32_t low = varLow_;
1769
if (low % wordSize) {
1770
masm.store32(Imm32(0), Address(sp_, localOffset(low + 4)));
1771
low += 4;
1772
}
1773
MOZ_ASSERT(low % wordSize == 0);
1774
1775
const uint32_t high = AlignBytes(varHigh_, wordSize);
1776
1777
// An UNROLL_LIMIT of 16 is chosen so that we only need an 8-bit signed
1778
// immediate to represent the offset in the store instructions in the loop
1779
// on x64.
1780
1781
const uint32_t UNROLL_LIMIT = 16;
1782
const uint32_t initWords = (high - low) / wordSize;
1783
const uint32_t tailWords = initWords % UNROLL_LIMIT;
1784
const uint32_t loopHigh = high - (tailWords * wordSize);
1785
1786
// With only one word to initialize, just store an immediate zero.
1787
1788
if (initWords == 1) {
1789
masm.storePtr(ImmWord(0), Address(sp_, localOffset(low + wordSize)));
1790
return;
1791
}
1792
1793
// For other cases, it's best to have a zero in a register.
1794
//
1795
// One can do more here with SIMD registers (store 16 bytes at a time) or
1796
// with instructions like STRD on ARM (store 8 bytes at a time), but that's
1797
// for another day.
1798
1799
RegI32 zero = ra->needI32();
1800
masm.mov(ImmWord(0), zero);
1801
1802
// For the general case we want to have a loop body of UNROLL_LIMIT stores
1803
// and then a tail of less than UNROLL_LIMIT stores. When initWords is less
1804
// than 2*UNROLL_LIMIT the loop trip count is at most 1 and there is no
1805
// benefit to having the pointer calculations and the compare-and-branch.
1806
// So we completely unroll when we have initWords < 2 * UNROLL_LIMIT. (In
1807
// this case we'll end up using 32-bit offsets on x64 for up to half of the
1808
// stores, though.)
1809
1810
// Fully-unrolled case.
1811
1812
if (initWords < 2 * UNROLL_LIMIT) {
1813
for (uint32_t i = low; i < high; i += wordSize) {
1814
masm.storePtr(zero, Address(sp_, localOffset(i + wordSize)));
1815
}
1816
ra->freeI32(zero);
1817
return;
1818
}
1819
1820
// Unrolled loop with a tail. Stores will use negative offsets. That's OK
1821
// for x86 and ARM, at least.
1822
1823
// Compute pointer to the highest-addressed slot on the frame.
1824
RegI32 p = ra->needI32();
1825
masm.computeEffectiveAddress(Address(sp_, localOffset(low + wordSize)), p);
1826
1827
// Compute pointer to the lowest-addressed slot on the frame that will be
1828
// initialized by the loop body.
1829
RegI32 lim = ra->needI32();
1830
masm.computeEffectiveAddress(Address(sp_, localOffset(loopHigh + wordSize)),
1831
lim);
1832
1833
// The loop body. Eventually we'll have p == lim and exit the loop.
1834
Label again;
1835
masm.bind(&again);
1836
for (uint32_t i = 0; i < UNROLL_LIMIT; ++i) {
1837
masm.storePtr(zero, Address(p, -(wordSize * i)));
1838
}
1839
masm.subPtr(Imm32(UNROLL_LIMIT * wordSize), p);
1840
masm.branchPtr(Assembler::LessThan, lim, p, &again);
1841
1842
// The tail.
1843
for (uint32_t i = 0; i < tailWords; ++i) {
1844
masm.storePtr(zero, Address(p, -(wordSize * i)));
1845
}
1846
1847
ra->freeI32(p);
1848
ra->freeI32(lim);
1849
ra->freeI32(zero);
1850
}
1851
1852
// Value stack: stack elements
1853
1854
struct Stk {
1855
private:
1856
Stk() : kind_(Unknown), i64val_(0) {}
1857
1858
public:
1859
enum Kind {
1860
// The Mem opcodes are all clustered at the beginning to
1861
// allow for a quick test within sync().
1862
MemI32, // 32-bit integer stack value ("offs")
1863
MemI64, // 64-bit integer stack value ("offs")
1864
MemF32, // 32-bit floating stack value ("offs")
1865
MemF64, // 64-bit floating stack value ("offs")
1866
MemRef, // reftype (pointer wide) stack value ("offs")
1867
1868
// The Local opcodes follow the Mem opcodes for a similar
1869
// quick test within hasLocal().
1870
LocalI32, // Local int32 var ("slot")
1871
LocalI64, // Local int64 var ("slot")
1872
LocalF32, // Local float32 var ("slot")
1873
LocalF64, // Local double var ("slot")
1874
LocalRef, // Local reftype (pointer wide) var ("slot")
1875
1876
RegisterI32, // 32-bit integer register ("i32reg")
1877
RegisterI64, // 64-bit integer register ("i64reg")
1878
RegisterF32, // 32-bit floating register ("f32reg")
1879
RegisterF64, // 64-bit floating register ("f64reg")
1880
RegisterRef, // reftype (pointer wide) register ("refReg")
1881
1882
ConstI32, // 32-bit integer constant ("i32val")
1883
ConstI64, // 64-bit integer constant ("i64val")
1884
ConstF32, // 32-bit floating constant ("f32val")
1885
ConstF64, // 64-bit floating constant ("f64val")
1886
ConstRef, // reftype (pointer wide) constant ("refval")
1887