Source code

Revision control

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
*
* Copyright 2016 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* [WASMDOC] WebAssembly baseline compiler (RabaldrMonkey)
*
* General assumptions for 32-bit vs 64-bit code:
*
* - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
* systems.
*
* - Code that knows that Register64 has a '.reg' member on 64-bit systems and
* '.high' and '.low' members on 32-bit systems, or knows the implications
* thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT.
*
*
* Coding standards:
*
* - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
* and surrounding functions; most functions fall into this class) where the
* meaning is obvious:
*
* - if there is a single source + destination register, it is called 'r'
* - if there is one source and a different destination, they are called 'rs'
* and 'rd'
* - if there is one source + destination register and another source register
* they are called 'r' and 'rs'
* - if there are two source registers and a destination register they are
* called 'rs0', 'rs1', and 'rd'.
*
* - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
*
* - Registers can be named non-generically for their function ('rp' for the
* 'pointer' register and 'rv' for the 'value' register are typical) and those
* names may or may not have an 'r' prefix.
*
* - "Larger" code generating functions make their own rules.
*
*
* General status notes:
*
* "FIXME" indicates a known or suspected bug. Always has a bug#.
*
* "TODO" indicates an opportunity for a general improvement, with an additional
* tag to indicate the area of improvement. Usually has a bug#.
*
* There are lots of machine dependencies here but they are pretty well isolated
* to a segment of the compiler. Many dependencies will eventually be factored
* into the MacroAssembler layer and shared with other code generators.
*
*
* High-value compiler performance improvements:
*
* - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
* etc methods) can avoid syncing the value stack if the specific register is
* in use but there is a free register to shuffle the specific register into.
* (This will also improve the generated code.) The sync happens often enough
* here to show up in profiles, because it is triggered by integer multiply
* and divide.
*
*
* High-value code generation improvements:
*
* - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
* the stack and then jumps to the code for the target case. If no cleanup is
* needed we could just branch conditionally to the target; if the same amount
* of cleanup is needed for all cases then the cleanup can be done before the
* dispatch. Both are highly likely.
*
* - (Bug 1316806) Register management around calls: At the moment we sync the
* value stack unconditionally (this is simple) but there are probably many
* common cases where we could instead save/restore live caller-saves
* registers and perform parallel assignment into argument registers. This
* may be important if we keep some locals in registers.
*
* - (Bug 1316808) Allocate some locals to registers on machines where there are
* enough registers. This is probably hard to do well in a one-pass compiler
* but it might be that just keeping register arguments and the first few
* locals in registers is a viable strategy; another (more general) strategy
* is caching locals in registers in straight-line code. Such caching could
* also track constant values in registers, if that is deemed valuable. A
* combination of techniques may be desirable: parameters and the first few
* locals could be cached on entry to the function but not statically assigned
* to registers throughout.
*
* (On a large corpus of code it should be possible to compute, for every
* signature comprising the types of parameters and locals, and using a static
* weight for loops, a list in priority order of which parameters and locals
* that should be assigned to registers. Or something like that. Wasm makes
* this simple. Static assignments are desirable because they are not flushed
* to memory by the pre-block sync() call.)
*/
#include "wasm/WasmBaselineCompile.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/Maybe.h"
#include <algorithm>
#include <utility>
#include "jit/AtomicOp.h"
#include "jit/IonTypes.h"
#include "jit/JitAllocPolicy.h"
#include "jit/Label.h"
#include "jit/MIR.h"
#include "jit/RegisterAllocator.h"
#include "jit/Registers.h"
#include "jit/RegisterSets.h"
#if defined(JS_CODEGEN_ARM)
# include "jit/arm/Assembler-arm.h"
#endif
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
# include "jit/x86-shared/Architecture-x86-shared.h"
# include "jit/x86-shared/Assembler-x86-shared.h"
#endif
#if defined(JS_CODEGEN_MIPS32)
# include "jit/mips-shared/Assembler-mips-shared.h"
# include "jit/mips32/Assembler-mips32.h"
#endif
#if defined(JS_CODEGEN_MIPS64)
# include "jit/mips-shared/Assembler-mips-shared.h"
# include "jit/mips64/Assembler-mips64.h"
#endif
#include "js/ScalarType.h" // js::Scalar::Type
#include "util/Memory.h"
#include "wasm/WasmGC.h"
#include "wasm/WasmGenerator.h"
#include "wasm/WasmInstance.h"
#include "wasm/WasmOpIter.h"
#include "wasm/WasmSignalHandlers.h"
#include "wasm/WasmStubs.h"
#include "wasm/WasmValidate.h"
#include "jit/MacroAssembler-inl.h"
using mozilla::DebugOnly;
using mozilla::FloorLog2;
using mozilla::IsPowerOfTwo;
using mozilla::Maybe;
namespace js {
namespace wasm {
using namespace js::jit;
using HandleNaNSpecially = bool;
using InvertBranch = bool;
using IsKnownNotZero = bool;
using IsUnsigned = bool;
using NeedsBoundsCheck = bool;
using WantResult = bool;
using ZeroOnOverflow = bool;
class BaseStackFrame;
// Two flags, useABI and interModule, control how calls are made.
//
// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
// except when InterModule::True is also set, when they are volatile.
//
// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
// In this case, we require InterModule::False. The calling convention
// is otherwise like UseABI::Wasm.
//
// UseABI::System implies that the Tls/Heap/Global registers are volatile.
// Additionally, the parameter passing mechanism may be slightly different from
// the UseABI::Wasm convention.
//
// When the Tls/Heap/Global registers are not volatile, the baseline compiler
// will restore the Tls register from its save slot before the call, since the
// baseline compiler uses the Tls register for other things.
//
// When those registers are volatile, the baseline compiler will reload them
// after the call (it will restore the Tls register from the save slot and load
// the other two from the Tls data).
enum class UseABI { Wasm, Builtin, System };
enum class InterModule { False = false, True = true };
enum class RhsDestOp { True = true };
#if defined(JS_CODEGEN_NONE)
# define RABALDR_SCRATCH_I32
# define RABALDR_SCRATCH_F32
# define RABALDR_SCRATCH_F64
static constexpr Register RabaldrScratchI32 = Register::Invalid();
static constexpr FloatRegister RabaldrScratchF32 = InvalidFloatReg;
static constexpr FloatRegister RabaldrScratchF64 = InvalidFloatReg;
#endif
#ifdef JS_CODEGEN_ARM64
# define RABALDR_CHUNKY_STACK
# define RABALDR_SIDEALLOC_V128
# define RABALDR_SCRATCH_I32
# define RABALDR_SCRATCH_F32
# define RABALDR_SCRATCH_F64
# define RABALDR_SCRATCH_V128
# define RABALDR_SCRATCH_F32_ALIASES_F64
static constexpr Register RabaldrScratchI32{Registers::x15};
// Note, the float scratch regs cannot be registers that are used for parameter
// passing in any ABI we use. Argregs tend to be low-numbered; register 30
// should be safe.
static constexpr FloatRegister RabaldrScratchF32{FloatRegisters::s30,
FloatRegisters::Single};
static constexpr FloatRegister RabaldrScratchF64{FloatRegisters::d30,
FloatRegisters::Double};
# ifdef ENABLE_WASM_SIMD
static constexpr FloatRegister RabaldrScratchV128{FloatRegisters::d30,
FloatRegisters::Simd128};
# endif
static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
# ifdef ENABLE_WASM_SIMD
static_assert(RabaldrScratchV128 != ScratchSimd128Reg, "Too busy");
# endif
#endif
#ifdef JS_CODEGEN_X86
// The selection of EBX here steps gingerly around: the need for EDX
// to be allocatable for multiply/divide; ECX to be allocatable for
// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
// register; EBX not being one of the WasmTableCall registers; and
// needing a temp register for load/store that has a single-byte
// persona.
//
// The compiler assumes that RabaldrScratchI32 has a single-byte
// persona. Code for 8-byte atomic operations assumes that
// RabaldrScratchI32 is in fact ebx.
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = ebx;
# define RABALDR_INT_DIV_I64_CALLOUT
#endif
#ifdef JS_CODEGEN_ARM
// We use our own scratch register, because the macro assembler uses
// the regular scratch register(s) pretty liberally. We could
// work around that in several cases but the mess does not seem
// worth it yet. CallTempReg2 seems safe.
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = CallTempReg2;
# define RABALDR_INT_DIV_I64_CALLOUT
# define RABALDR_I64_TO_FLOAT_CALLOUT
# define RABALDR_FLOAT_TO_I64_CALLOUT
#endif
#ifdef JS_CODEGEN_MIPS32
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = CallTempReg2;
# define RABALDR_INT_DIV_I64_CALLOUT
# define RABALDR_I64_TO_FLOAT_CALLOUT
# define RABALDR_FLOAT_TO_I64_CALLOUT
#endif
#ifdef JS_CODEGEN_MIPS64
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = CallTempReg2;
#endif
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
# if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
# error "Bad configuration"
# endif
#endif
template <MIRType t>
struct RegTypeOf {
#ifdef ENABLE_WASM_SIMD
static_assert(t == MIRType::Float32 || t == MIRType::Double ||
t == MIRType::Simd128,
"Float mask type");
#else
static_assert(t == MIRType::Float32 || t == MIRType::Double,
"Float mask type");
#endif
};
template <>
struct RegTypeOf<MIRType::Float32> {
static constexpr RegTypeName value = RegTypeName::Float32;
};
template <>
struct RegTypeOf<MIRType::Double> {
static constexpr RegTypeName value = RegTypeName::Float64;
};
#ifdef ENABLE_WASM_SIMD
template <>
struct RegTypeOf<MIRType::Simd128> {
static constexpr RegTypeName value = RegTypeName::Vector128;
};
#endif
// The strongly typed register wrappers are especially useful to distinguish
// float registers from double registers, but they also clearly distinguish
// 32-bit registers from 64-bit register pairs on 32-bit systems.
struct RegI32 : public Register {
RegI32() : Register(Register::Invalid()) {}
explicit RegI32(Register reg) : Register(reg) {
MOZ_ASSERT(reg != Invalid());
}
bool isInvalid() const { return *this == Invalid(); }
bool isValid() const { return !isInvalid(); }
static RegI32 Invalid() { return RegI32(); }
};
struct RegI64 : public Register64 {
RegI64() : Register64(Register64::Invalid()) {}
explicit RegI64(Register64 reg) : Register64(reg) {
MOZ_ASSERT(reg != Invalid());
}
bool isInvalid() const { return *this == Invalid(); }
bool isValid() const { return !isInvalid(); }
static RegI64 Invalid() { return RegI64(); }
};
struct RegPtr : public Register {
RegPtr() : Register(Register::Invalid()) {}
explicit RegPtr(Register reg) : Register(reg) {
MOZ_ASSERT(reg != Invalid());
}
bool isInvalid() const { return *this == Invalid(); }
bool isValid() const { return !isInvalid(); }
static RegPtr Invalid() { return RegPtr(); }
};
struct RegF32 : public FloatRegister {
RegF32() : FloatRegister() {}
explicit RegF32(FloatRegister reg) : FloatRegister(reg) {
MOZ_ASSERT(isSingle());
}
bool isValid() const { return !isInvalid(); }
static RegF32 Invalid() { return RegF32(); }
};
struct RegF64 : public FloatRegister {
RegF64() : FloatRegister() {}
explicit RegF64(FloatRegister reg) : FloatRegister(reg) {
MOZ_ASSERT(isDouble());
}
bool isValid() const { return !isInvalid(); }
static RegF64 Invalid() { return RegF64(); }
};
#ifdef ENABLE_WASM_SIMD
# ifdef RABALDR_SIDEALLOC_V128
class RegV128 {
// fpr_ is either invalid or a double that aliases the simd register, see
// comments below at BaseRegAlloc.
FloatRegister fpr_;
public:
RegV128() : fpr_(FloatRegister()) {}
explicit RegV128(FloatRegister reg)
: fpr_(FloatRegister(reg.encoding(), FloatRegisters::Double)) {
MOZ_ASSERT(reg.isSimd128());
}
static RegV128 fromDouble(FloatRegister reg) {
MOZ_ASSERT(reg.isDouble());
return RegV128(FloatRegister(reg.encoding(), FloatRegisters::Simd128));
}
FloatRegister asDouble() const { return fpr_; }
bool isInvalid() const { return fpr_.isInvalid(); }
bool isValid() const { return !isInvalid(); }
static RegV128 Invalid() { return RegV128(); }
operator FloatRegister() const {
return FloatRegister(fpr_.encoding(), FloatRegisters::Simd128);
}
bool operator==(const RegV128& that) const {
return asDouble() == that.asDouble();
}
bool operator!=(const RegV128& that) const {
return asDouble() != that.asDouble();
}
};
# else
struct RegV128 : public FloatRegister {
RegV128() : FloatRegister() {}
explicit RegV128(FloatRegister reg) : FloatRegister(reg) {
MOZ_ASSERT(isSimd128());
}
bool isValid() const { return !isInvalid(); }
static RegV128 Invalid() { return RegV128(); }
};
# endif
#endif
struct AnyReg {
union {
RegI32 i32_;
RegI64 i64_;
RegPtr ref_;
RegF32 f32_;
RegF64 f64_;
#ifdef ENABLE_WASM_SIMD
RegV128 v128_;
#endif
};
enum {
I32,
I64,
REF,
F32,
F64,
#ifdef ENABLE_WASM_SIMD
V128
#endif
} tag;
explicit AnyReg(RegI32 r) {
tag = I32;
i32_ = r;
}
explicit AnyReg(RegI64 r) {
tag = I64;
i64_ = r;
}
explicit AnyReg(RegF32 r) {
tag = F32;
f32_ = r;
}
explicit AnyReg(RegF64 r) {
tag = F64;
f64_ = r;
}
#ifdef ENABLE_WASM_SIMD
explicit AnyReg(RegV128 r) {
tag = V128;
v128_ = r;
}
#endif
explicit AnyReg(RegPtr r) {
tag = REF;
ref_ = r;
}
RegI32 i32() const {
MOZ_ASSERT(tag == I32);
return i32_;
}
RegI64 i64() const {
MOZ_ASSERT(tag == I64);
return i64_;
}
RegF32 f32() const {
MOZ_ASSERT(tag == F32);
return f32_;
}
RegF64 f64() const {
MOZ_ASSERT(tag == F64);
return f64_;
}
#ifdef ENABLE_WASM_SIMD
RegV128 v128() const {
MOZ_ASSERT(tag == V128);
return v128_;
}
#endif
RegPtr ref() const {
MOZ_ASSERT(tag == REF);
return ref_;
}
AnyRegister any() const {
switch (tag) {
case F32:
return AnyRegister(f32_);
case F64:
return AnyRegister(f64_);
#ifdef ENABLE_WASM_SIMD
case V128:
return AnyRegister(v128_);
#endif
case I32:
return AnyRegister(i32_);
case I64:
#ifdef JS_PUNBOX64
return AnyRegister(i64_.reg);
#else
// The compiler is written so that this is never needed: any() is
// called on arbitrary registers for asm.js but asm.js does not have
// 64-bit ints. For wasm, any() is called on arbitrary registers
// only on 64-bit platforms.
MOZ_CRASH("AnyReg::any() on 32-bit platform");
#endif
case REF:
MOZ_CRASH("AnyReg::any() not implemented for ref types");
default:
MOZ_CRASH();
}
// Work around GCC 5 analysis/warning bug.
MOZ_CRASH("AnyReg::any(): impossible case");
}
};
// Platform-specific registers.
//
// All platforms must define struct SpecificRegs. All 32-bit platforms must
// have an abiReturnRegI64 member in that struct.
#if defined(JS_CODEGEN_X64)
struct SpecificRegs {
RegI32 eax, ecx, edx, edi, esi;
RegI64 rax, rcx, rdx;
SpecificRegs()
: eax(RegI32(js::jit::eax)),
ecx(RegI32(js::jit::ecx)),
edx(RegI32(js::jit::edx)),
edi(RegI32(js::jit::edi)),
esi(RegI32(js::jit::esi)),
rax(RegI64(Register64(js::jit::rax))),
rcx(RegI64(Register64(js::jit::rcx))),
rdx(RegI64(Register64(js::jit::rdx))) {}
};
#elif defined(JS_CODEGEN_X86)
struct SpecificRegs {
RegI32 eax, ecx, edx, edi, esi;
RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
SpecificRegs()
: eax(RegI32(js::jit::eax)),
ecx(RegI32(js::jit::ecx)),
edx(RegI32(js::jit::edx)),
edi(RegI32(js::jit::edi)),
esi(RegI32(js::jit::esi)),
ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
abiReturnRegI64(edx_eax) {}
};
#elif defined(JS_CODEGEN_ARM)
struct SpecificRegs {
RegI64 abiReturnRegI64;
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
};
#elif defined(JS_CODEGEN_ARM64)
struct SpecificRegs {};
#elif defined(JS_CODEGEN_MIPS32)
struct SpecificRegs {
RegI64 abiReturnRegI64;
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
};
#elif defined(JS_CODEGEN_MIPS64)
struct SpecificRegs {};
#else
struct SpecificRegs {
# ifndef JS_64BIT
RegI64 abiReturnRegI64;
# endif
SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
};
#endif
class BaseCompilerInterface {
public:
// Spill all spillable registers.
//
// TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
// spilling only enough registers to satisfy current needs.
virtual void sync() = 0;
virtual void saveTempPtr(RegPtr r) = 0;
virtual void restoreTempPtr(RegPtr r) = 0;
};
// Register allocator.
class BaseRegAlloc {
// Notes on float register allocation.
//
// The general rule in SpiderMonkey is that float registers can alias double
// registers, but there are predicates to handle exceptions to that rule:
// hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually
// works is platform dependent and exposed through the aliased(n, &r)
// predicate, etc.
//
// - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
// cannot be treated as float.
// - hasMultiAlias(): on ARM and MIPS a double register aliases two float
// registers.
//
// On some platforms (x86, x64, ARM64) but not all (ARM)
// ScratchFloat32Register is the same as ScratchDoubleRegister.
//
// It's a basic invariant of the AllocatableRegisterSet that it deals
// properly with aliasing of registers: if s0 or s1 are allocated then d0 is
// not allocatable; if s0 and s1 are freed individually then d0 becomes
// allocatable.
//
// On platforms with RABALDR_SIDEALLOC_V128, the register set does not
// represent SIMD registers. Instead, we allocate and free these registers as
// doubles and change the kind to Simd128 while the register is exposed to
// masm. (This is the case on ARM64 for now, and is a consequence of needing
// more than 64 bits for FloatRegisters::SetType to represent SIMD registers.
// See lengty comment in Architecture-arm64.h.)
BaseCompilerInterface* bc;
AllocatableGeneralRegisterSet availGPR;
AllocatableFloatRegisterSet availFPU;
#ifdef DEBUG
// The registers available after removing ScratchReg, HeapReg, etc.
AllocatableGeneralRegisterSet allGPR;
AllocatableFloatRegisterSet allFPU;
uint32_t scratchTaken;
#endif
#ifdef JS_CODEGEN_X86
AllocatableGeneralRegisterSet singleByteRegs;
#endif
bool hasGPR() { return !availGPR.empty(); }
bool hasGPR64() {
#ifdef JS_PUNBOX64
return !availGPR.empty();
#else
if (availGPR.empty()) {
return false;
}
Register r = allocGPR();
bool available = !availGPR.empty();
freeGPR(r);
return available;
#endif
}
template <MIRType t>
bool hasFPU() {
return availFPU.hasAny<RegTypeOf<t>::value>();
}
#ifdef RABALDR_SIDEALLOC_V128
template <>
bool hasFPU<MIRType::Simd128>() {
MOZ_CRASH("Should not happen");
}
#endif
bool isAvailableGPR(Register r) { return availGPR.has(r); }
bool isAvailableFPU(FloatRegister r) {
#ifdef RABALDR_SIDEALLOC_V128
MOZ_ASSERT(!r.isSimd128());
#endif
return availFPU.has(r);
}
void allocGPR(Register r) {
MOZ_ASSERT(isAvailableGPR(r));
availGPR.take(r);
}
Register allocGPR() {
MOZ_ASSERT(hasGPR());
return availGPR.takeAny();
}
void allocInt64(Register64 r) {
#ifdef JS_PUNBOX64
allocGPR(r.reg);
#else
allocGPR(r.low);
allocGPR(r.high);
#endif
}
Register64 allocInt64() {
MOZ_ASSERT(hasGPR64());
#ifdef JS_PUNBOX64
return Register64(availGPR.takeAny());
#else
Register high = availGPR.takeAny();
Register low = availGPR.takeAny();
return Register64(high, low);
#endif
}
#ifdef JS_CODEGEN_ARM
// r12 is normally the ScratchRegister and r13 is always the stack pointer,
// so the highest possible pair has r10 as the even-numbered register.
static constexpr uint32_t PAIR_LIMIT = 10;
bool hasGPRPair() {
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
if (isAvailableGPR(Register::FromCode(i)) &&
isAvailableGPR(Register::FromCode(i + 1))) {
return true;
}
}
return false;
}
void allocGPRPair(Register* low, Register* high) {
MOZ_ASSERT(hasGPRPair());
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
if (isAvailableGPR(Register::FromCode(i)) &&
isAvailableGPR(Register::FromCode(i + 1))) {
*low = Register::FromCode(i);
*high = Register::FromCode(i + 1);
allocGPR(*low);
allocGPR(*high);
return;
}
}
MOZ_CRASH("No pair");
}
#endif
void allocFPU(FloatRegister r) {
#ifdef RABALDR_SIDEALLOC_V128
MOZ_ASSERT(!r.isSimd128());
#endif
MOZ_ASSERT(isAvailableFPU(r));
availFPU.take(r);
}
template <MIRType t>
FloatRegister allocFPU() {
return availFPU.takeAny<RegTypeOf<t>::value>();
}
#ifdef RABALDR_SIDEALLOC_V128
template <>
FloatRegister allocFPU<MIRType::Simd128>() {
MOZ_CRASH("Should not happen");
}
#endif
void freeGPR(Register r) { availGPR.add(r); }
void freeInt64(Register64 r) {
#ifdef JS_PUNBOX64
freeGPR(r.reg);
#else
freeGPR(r.low);
freeGPR(r.high);
#endif
}
void freeFPU(FloatRegister r) {
#ifdef RABALDR_SIDEALLOC_V128
MOZ_ASSERT(!r.isSimd128());
#endif
availFPU.add(r);
}
public:
explicit BaseRegAlloc()
: bc(nullptr),
availGPR(GeneralRegisterSet::All()),
availFPU(FloatRegisterSet::All())
#ifdef DEBUG
,
scratchTaken(0)
#endif
#ifdef JS_CODEGEN_X86
,
singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
#endif
{
RegisterAllocator::takeWasmRegisters(availGPR);
// Allocate any private scratch registers.
#if defined(RABALDR_SCRATCH_I32)
if (RabaldrScratchI32 != RegI32::Invalid()) {
availGPR.take(RabaldrScratchI32);
}
#endif
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
static_assert(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
static_assert(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
#endif
#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
if (RabaldrScratchF32 != RegF32::Invalid()) {
availFPU.take(RabaldrScratchF32);
}
#endif
#if defined(RABALDR_SCRATCH_F64)
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
# endif
if (RabaldrScratchF64 != RegF64::Invalid()) {
availFPU.take(RabaldrScratchF64);
}
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
# endif
#endif
#ifdef DEBUG
allGPR = availGPR;
allFPU = availFPU;
#endif
}
void init(BaseCompilerInterface* bc) { this->bc = bc; }
enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4, V128 = 8 };
#ifdef DEBUG
bool isScratchRegisterTaken(ScratchKind s) const {
return (scratchTaken & uint32_t(s)) != 0;
}
void setScratchRegisterTaken(ScratchKind s, bool state) {
if (state) {
scratchTaken |= uint32_t(s);
} else {
scratchTaken &= ~uint32_t(s);
}
}
#endif
#ifdef JS_CODEGEN_X86
bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
#endif
bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
bool isAvailableI64(RegI64 r) {
#ifdef JS_PUNBOX64
return isAvailableGPR(r.reg);
#else
return isAvailableGPR(r.low) && isAvailableGPR(r.high);
#endif
}
bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
#ifdef ENABLE_WASM_SIMD
# ifdef RABALDR_SIDEALLOC_V128
bool isAvailableV128(RegV128 r) { return isAvailableFPU(r.asDouble()); }
# else
bool isAvailableV128(RegV128 r) { return isAvailableFPU(r); }
# endif
#endif
// TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
// failure, only as much as we need.
MOZ_MUST_USE RegI32 needI32() {
if (!hasGPR()) {
bc->sync();
}
return RegI32(allocGPR());
}
void needI32(RegI32 specific) {
if (!isAvailableI32(specific)) {
bc->sync();
}
allocGPR(specific);
}
MOZ_MUST_USE RegI64 needI64() {
if (!hasGPR64()) {
bc->sync();
}
return RegI64(allocInt64());
}
void needI64(RegI64 specific) {
if (!isAvailableI64(specific)) {
bc->sync();
}
allocInt64(specific);
}
MOZ_MUST_USE RegPtr needPtr() {
if (!hasGPR()) {
bc->sync();
}
return RegPtr(allocGPR());
}
void needPtr(RegPtr specific) {
if (!isAvailablePtr(specific)) {
bc->sync();
}
allocGPR(specific);
}
// Use when you need a register for a short time but explicitly want to avoid
// a full sync().
MOZ_MUST_USE RegPtr needTempPtr(RegPtr fallback, bool* saved) {
if (hasGPR()) {
*saved = false;
return RegPtr(allocGPR());
}
*saved = true;
bc->saveTempPtr(fallback);
MOZ_ASSERT(isAvailablePtr(fallback));
allocGPR(fallback);
return RegPtr(fallback);
}
MOZ_MUST_USE RegF32 needF32() {
if (!hasFPU<MIRType::Float32>()) {
bc->sync();
}
return RegF32(allocFPU<MIRType::Float32>());
}
void needF32(RegF32 specific) {
if (!isAvailableF32(specific)) {
bc->sync();
}
allocFPU(specific);
}
MOZ_MUST_USE RegF64 needF64() {
if (!hasFPU<MIRType::Double>()) {
bc->sync();
}
return RegF64(allocFPU<MIRType::Double>());
}
void needF64(RegF64 specific) {
if (!isAvailableF64(specific)) {
bc->sync();
}
allocFPU(specific);
}
#ifdef ENABLE_WASM_SIMD
MOZ_MUST_USE RegV128 needV128() {
# ifdef RABALDR_SIDEALLOC_V128
if (!hasFPU<MIRType::Double>()) {
bc->sync();
}
return RegV128::fromDouble(allocFPU<MIRType::Double>());
# else
if (!hasFPU<MIRType::Simd128>()) {
bc->sync();
}
return RegV128(allocFPU<MIRType::Simd128>());
# endif
}
void needV128(RegV128 specific) {
# ifdef RABALDR_SIDEALLOC_V128
if (!isAvailableV128(specific)) {
bc->sync();
}
allocFPU(specific.asDouble());
# else
if (!isAvailableV128(specific)) {
bc->sync();
}
allocFPU(specific);
# endif
}
#endif
void freeI32(RegI32 r) { freeGPR(r); }
void freeI64(RegI64 r) { freeInt64(r); }
void freePtr(RegPtr r) { freeGPR(r); }
void freeF64(RegF64 r) { freeFPU(r); }
void freeF32(RegF32 r) { freeFPU(r); }
#ifdef ENABLE_WASM_SIMD
void freeV128(RegV128 r) {
# ifdef RABALDR_SIDEALLOC_V128
freeFPU(r.asDouble());
# else
freeFPU(r);
# endif
}
#endif
void freeTempPtr(RegPtr r, bool saved) {
freePtr(r);
if (saved) {
bc->restoreTempPtr(r);
MOZ_ASSERT(!isAvailablePtr(r));
}
}
#ifdef JS_CODEGEN_ARM
MOZ_MUST_USE RegI64 needI64Pair() {
if (!hasGPRPair()) {
bc->sync();
}
Register low, high;
allocGPRPair(&low, &high);
return RegI64(Register64(high, low));
}
#endif
#ifdef DEBUG
friend class LeakCheck;
class MOZ_RAII LeakCheck {
private:
const BaseRegAlloc& ra;
AllocatableGeneralRegisterSet knownGPR_;
AllocatableFloatRegisterSet knownFPU_;
public:
explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
knownGPR_ = ra.availGPR;
knownFPU_ = ra.availFPU;
}
~LeakCheck() {
MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
}
void addKnownI32(RegI32 r) { knownGPR_.add(r); }
void addKnownI64(RegI64 r) {
# ifdef JS_PUNBOX64
knownGPR_.add(r.reg);
# else
knownGPR_.add(r.high);
knownGPR_.add(r.low);
# endif
}
void addKnownF32(RegF32 r) { knownFPU_.add(r); }
void addKnownF64(RegF64 r) { knownFPU_.add(r); }
# ifdef ENABLE_WASM_SIMD
void addKnownV128(RegV128 r) {
# ifdef RABALDR_SIDEALLOC_V128
knownFPU_.add(r.asDouble());
# else
knownFPU_.add(r);
# endif
}
# endif
void addKnownRef(RegPtr r) { knownGPR_.add(r); }
};
#endif
};
// Scratch register abstractions.
//
// We define our own scratch registers when the platform doesn't provide what we
// need. A notable use case is that we will need a private scratch register
// when the platform masm uses its scratch register very frequently (eg, ARM).
class BaseScratchRegister {
#ifdef DEBUG
BaseRegAlloc& ra;
BaseRegAlloc::ScratchKind kind_;
public:
explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
: ra(ra), kind_(kind) {
MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
ra.setScratchRegisterTaken(kind_, true);
}
~BaseScratchRegister() {
MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
ra.setScratchRegisterTaken(kind_, false);
}
#else
public:
explicit BaseScratchRegister(BaseRegAlloc& ra,
BaseRegAlloc::ScratchKind kind) {}
#endif
};
#ifdef ENABLE_WASM_SIMD
# ifdef RABALDR_SCRATCH_V128
class ScratchV128 : public BaseScratchRegister {
public:
explicit ScratchV128(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::V128) {}
operator RegV128() const { return RegV128(RabaldrScratchV128); }
};
# else
class ScratchV128 : public ScratchSimd128Scope {
public:
explicit ScratchV128(MacroAssembler& m) : ScratchSimd128Scope(m) {}
operator RegV128() const { return RegV128(FloatRegister(*this)); }
};
# endif
#endif
#ifdef RABALDR_SCRATCH_F64
class ScratchF64 : public BaseScratchRegister {
public:
explicit ScratchF64(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
operator RegF64() const { return RegF64(RabaldrScratchF64); }
};
#else
class ScratchF64 : public ScratchDoubleScope {
public:
explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
operator RegF64() const { return RegF64(FloatRegister(*this)); }
};
#endif
#ifdef RABALDR_SCRATCH_F32
class ScratchF32 : public BaseScratchRegister {
public:
explicit ScratchF32(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
operator RegF32() const { return RegF32(RabaldrScratchF32); }
};
#else
class ScratchF32 : public ScratchFloat32Scope {
public:
explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
operator RegF32() const { return RegF32(FloatRegister(*this)); }
};
#endif
#ifdef RABALDR_SCRATCH_I32
template <class RegType>
class ScratchGPR : public BaseScratchRegister {
public:
explicit ScratchGPR(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
operator RegType() const { return RegType(RabaldrScratchI32); }
};
#else
template <class RegType>
class ScratchGPR : public ScratchRegisterScope {
public:
explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
operator RegType() const { return RegType(Register(*this)); }
};
#endif
using ScratchI32 = ScratchGPR<RegI32>;
using ScratchPtr = ScratchGPR<RegPtr>;
#if defined(JS_CODEGEN_X86)
// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
// no other register will do. And we would normally have to allocate that
// register using ScratchI32 since normally the scratch register is EBX.
// But the whole point of ScratchI32 is to hide that relationship. By using
// the ScratchEBX alias, we document that at that point we require the
// scratch register to be EBX.
using ScratchEBX = ScratchI32;
// ScratchI8 is a mnemonic device: For some ops we need a register with a
// byte subregister.
using ScratchI8 = ScratchI32;
#endif
// The stack frame.
//
// The stack frame has four parts ("below" means at lower addresses):
//
// - the Frame element;
// - the Local area, including the DebugFrame element and possibly a spilled
// pointer to stack results, if any; allocated below the header with various
// forms of alignment;
// - the Dynamic area, comprising the temporary storage the compiler uses for
// register spilling, allocated below the Local area;
// - the Arguments area, comprising memory allocated for outgoing calls,
// allocated below the Dynamic area.
//
// +==============================+
// | Incoming stack arg |
// | ... |
// ------------- +==============================+
// | Frame (fixed size) |
// ------------- +==============================+ <-------------------- FP
// ^ | DebugFrame (optional) | ^ ^ ^^
// localSize | Register arg local | | | ||
// | | ... | | | framePushed
// | | Register stack result ptr?| | | ||
// | | Non-arg local | | | ||
// | | ... | | | ||
// | | (padding) | | | ||
// | | Tls pointer | | | ||
// | +------------------------------+ | | ||
// v | (padding) | | v ||
// ------------- +==============================+ currentStackHeight ||
// ^ | Dynamic (variable size) | | ||
// dynamicSize | ... | | ||
// v | ... | v ||
// ------------- | (free space, sometimes) | --------- v|
// +==============================+ <----- SP not-during calls
// | Arguments (sometimes) | |
// | ... | v
// +==============================+ <----- SP during calls
//
// The Frame is addressed off the stack pointer. masm.framePushed() is always
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
// Frame, with the DebugFrame optionally below it.
//
// The Local area (including the DebugFrame and, if needed, the spilled value of
// the stack results area pointer) is laid out by BaseLocalIter and is allocated
// and deallocated by standard prologue and epilogue functions that manipulate
// the stack pointer, but it is accessed via BaseStackFrame.
//
// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
// because the SP needs a specific alignment, and in this case there will
// normally be some free space directly above the SP. The stack height does not
// include the free space, it reflects the logically used space only.
//
// The Dynamic area is where space for stack results is allocated when calling
// functions that return results on the stack. If a function has stack results,
// a pointer to the low address of the stack result area is passed as an
// additional argument, according to the usual ABI. See
// ABIResultIter::HasStackResults.
//
// The Arguments area is allocated and deallocated via BaseStackFrame (see
// comments later) but is accessed directly off the stack pointer.
// BaseLocalIter iterates over a vector of types of locals and provides offsets
// from the Frame address for those locals, and associated data.
//
// The implementation of BaseLocalIter is the property of the BaseStackFrame.
// But it is also exposed for eg the debugger to use.
BaseLocalIter::BaseLocalIter(const ValTypeVector& locals,
const ArgTypeVector& args, bool debugEnabled)
: locals_(locals),
args_(args),
argsIter_(args_),
index_(0),
nextFrameSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
frameOffset_(INT32_MAX),
stackResultPointerOffset_(INT32_MAX),
mirType_(MIRType::Undefined),
done_(false) {
MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length());
settle();
}
int32_t BaseLocalIter::pushLocal(size_t nbytes) {
MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
nextFrameSize_ = AlignBytes(frameSize_, nbytes) + nbytes;
return nextFrameSize_; // Locals grow down so capture base address.
}
void BaseLocalIter::settle() {
MOZ_ASSERT(!done_);
frameSize_ = nextFrameSize_;
if (!argsIter_.done()) {
mirType_ = argsIter_.mirType();
MIRType concreteType = mirType_;
switch (mirType_) {
case MIRType::StackResults:
// The pointer to stack results is handled like any other argument:
// either addressed in place if it is passed on the stack, or we spill
// it in the frame if it's in a register.
MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_));
concreteType = MIRType::Pointer;
[[fallthrough]];
case MIRType::Int32:
case MIRType::Int64:
case MIRType::Double:
case MIRType::Float32:
case MIRType::RefOrNull:
#ifdef ENABLE_WASM_SIMD
case MIRType::Simd128:
#endif
if (argsIter_->argInRegister()) {
frameOffset_ = pushLocal(MIRTypeToSize(concreteType));
} else {
frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
}
break;
default:
MOZ_CRASH("Argument type");
}
if (mirType_ == MIRType::StackResults) {
stackResultPointerOffset_ = frameOffset();
// Advance past the synthetic stack result pointer argument and fall
// through to the next case.
argsIter_++;
frameSize_ = nextFrameSize_;
MOZ_ASSERT(argsIter_.done());
} else {
return;
}
}
if (index_ < locals_.length()) {
switch (locals_[index_].kind()) {
case ValType::I32:
case ValType::I64:
case ValType::F32:
case ValType::F64:
#ifdef ENABLE_WASM_SIMD
case ValType::V128:
#endif
case ValType::Ref:
// TODO/AnyRef-boxing: With boxed immediates and strings, the
// debugger must be made aware that AnyRef != Pointer.
ASSERT_ANYREF_IS_JSOBJECT;
mirType_ = ToMIRType(locals_[index_]);
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
break;
default:
MOZ_CRASH("Compiler bug: Unexpected local type");
}
return;
}
done_ = true;
}
void BaseLocalIter::operator++(int) {
MOZ_ASSERT(!done_);
index_++;
if (!argsIter_.done()) {
argsIter_++;
}
settle();
}
// Abstraction of the height of the stack frame, to avoid type confusion.
class StackHeight {
friend class BaseStackFrameAllocator;
uint32_t height;
public:
explicit StackHeight(uint32_t h) : height(h) {}
static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
bool isValid() const { return height != UINT32_MAX; }
bool operator==(StackHeight rhs) const {
MOZ_ASSERT(isValid() && rhs.isValid());
return height == rhs.height;
}
bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
};
// Abstraction for where multi-value results go on the machine stack.
class StackResultsLoc {
uint32_t bytes_;
size_t count_;
Maybe<uint32_t> height_;
public:
StackResultsLoc() : bytes_(0), count_(0){};
StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
: bytes_(bytes), count_(count), height_(Some(height)) {
MOZ_ASSERT(bytes != 0);
MOZ_ASSERT(count != 0);
MOZ_ASSERT(height != 0);
}
uint32_t bytes() const { return bytes_; }
uint32_t count() const { return count_; }
uint32_t height() const { return height_.value(); }
bool hasStackResults() const { return bytes() != 0; }
StackResults stackResults() const {
return hasStackResults() ? StackResults::HasStackResults
: StackResults::NoStackResults;
}
};
// Abstraction of the baseline compiler's stack frame (except for the Frame /
// DebugFrame parts). See comments above for more. Remember, "below" on the
// stack means at lower addresses.
//
// The abstraction is split into two parts: BaseStackFrameAllocator is
// responsible for allocating and deallocating space on the stack and for
// performing computations that are affected by how the allocation is performed;
// BaseStackFrame then provides a pleasant interface for stack frame management.
class BaseStackFrameAllocator {
MacroAssembler& masm;
#ifdef RABALDR_CHUNKY_STACK
// On platforms that require the stack pointer to be aligned on a boundary
// greater than the typical stack item (eg, ARM64 requires 16-byte alignment
// but items are 8 bytes), allocate stack memory in chunks, and use a
// separate stack height variable to track the effective stack pointer
// within the allocated area. Effectively, there's a variable amount of
// free space directly above the stack pointer. See diagram above.
// The following must be true in order for the stack height to be
// predictable at control flow joins:
//
// - The Local area is always aligned according to WasmStackAlignment, ie,
// masm.framePushed() % WasmStackAlignment is zero after allocating
// locals.
//
// - ChunkSize is always a multiple of WasmStackAlignment.
//
// - Pushing and popping are always in units of ChunkSize (hence preserving
// alignment).
//
// - The free space on the stack (masm.framePushed() - currentStackHeight_)
// is a predictable (nonnegative) amount.
// As an optimization, we pre-allocate some space on the stack, the size of
// this allocation is InitialChunk and it must be a multiple of ChunkSize.
// It is allocated as part of the function prologue and deallocated as part
// of the epilogue, along with the locals.
//
// If ChunkSize is too large then we risk overflowing the stack on simple
// recursions with few live values where stack overflow should not be a
// risk; if it is too small we spend too much time adjusting the stack
// pointer.
//
// Good values for ChunkSize are the subject of future empirical analysis;
// eight words is just an educated guess.
static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
static constexpr uint32_t InitialChunk = ChunkSize;
// The current logical height of the frame is
// currentStackHeight_ = localSize_ + dynamicSize
// where dynamicSize is not accounted for explicitly and localSize_ also
// includes size for the DebugFrame.
//
// The allocated size of the frame, provided by masm.framePushed(), is usually
// larger than currentStackHeight_, notably at the beginning of execution when
// we've allocated InitialChunk extra space.
uint32_t currentStackHeight_;
#endif
// Size of the Local area in bytes (stable after BaseCompiler::init() has
// called BaseStackFrame::setupLocals(), which in turn calls
// BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
// stack alignment. The Local area is then allocated in beginFunction(),
// following the allocation of the Header. See onFixedStackAllocated()
// below.
uint32_t localSize_;
protected:
///////////////////////////////////////////////////////////////////////////
//
// Initialization
explicit BaseStackFrameAllocator(MacroAssembler& masm)
: masm(masm),
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_(0),
#endif
localSize_(UINT32_MAX) {
}
protected:
//////////////////////////////////////////////////////////////////////
//
// The Local area - the static part of the frame.
// Record the size of the Local area, once it is known.
void setLocalSize(uint32_t localSize) {
MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
"localSize_ should be aligned to at least a pointer");
MOZ_ASSERT(localSize_ == UINT32_MAX);
localSize_ = localSize;
}
// Record the current stack height, after it has become stable in
// beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
void onFixedStackAllocated() {
MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_ = localSize_;
#endif
}
public:
// The fixed amount of memory, in bytes, allocated on the stack below the
// Header for purposes such as locals and other fixed values. Includes all
// necessary alignment, and on ARM64 also the initial chunk for the working
// stack memory.
uint32_t fixedAllocSize() const {
MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
return localSize_ + InitialChunk;
#else
return localSize_;
#endif
}
#ifdef RABALDR_CHUNKY_STACK
// The allocated frame size is frequently larger than the logical stack
// height; we round up to a chunk boundary, and special case the initial
// chunk.
uint32_t framePushedForHeight(uint32_t logicalHeight) {
if (logicalHeight <= fixedAllocSize()) {
return fixedAllocSize();
}
return fixedAllocSize() +
AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
}
#endif
protected:
//////////////////////////////////////////////////////////////////////
//
// The Dynamic area - the dynamic part of the frame, for spilling and saving
// intermediate values.
// Offset off of sp_ for the slot at stack area location `offset`.
int32_t stackOffset(int32_t offset) {
MOZ_ASSERT(offset > 0);
return masm.framePushed() - offset;
}
uint32_t computeHeightWithStackResults(StackHeight stackBase,
uint32_t stackResultBytes) {
MOZ_ASSERT(stackResultBytes);
MOZ_ASSERT(currentStackHeight() >= stackBase.height);
return stackBase.height + stackResultBytes;
}
#ifdef RABALDR_CHUNKY_STACK
void pushChunkyBytes(uint32_t bytes) {
checkChunkyInvariants();
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
if (freeSpace < bytes) {
uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
masm.reserveStack(bytesToReserve);
}
currentStackHeight_ += bytes;
checkChunkyInvariants();
}
void popChunkyBytes(uint32_t bytes) {
checkChunkyInvariants();
currentStackHeight_ -= bytes;
// Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
// values consumed by a call, and we may need to drop several chunks. But
// never drop the initial chunk. Crucially, the amount we drop is always an
// integral number of chunks.
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
if (freeSpace >= ChunkSize) {
uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
uint32_t amountToFree = masm.framePushed() - targetAllocSize;
MOZ_ASSERT(amountToFree % ChunkSize == 0);
if (amountToFree) {
masm.freeStack(amountToFree);
}
}
checkChunkyInvariants();
}
#endif
uint32_t currentStackHeight() const {
#ifdef RABALDR_CHUNKY_STACK
return currentStackHeight_;
#else
return masm.framePushed();
#endif
}
private:
#ifdef RABALDR_CHUNKY_STACK
void checkChunkyInvariants() {
MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
masm.framePushed() - currentStackHeight_ < ChunkSize);
MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
}
#endif
// For a given stack height, return the appropriate size of the allocated
// frame.
uint32_t framePushedForHeight(StackHeight stackHeight) {
#ifdef RABALDR_CHUNKY_STACK
// A more complicated adjustment is needed.
return framePushedForHeight(stackHeight.height);
#else
// The allocated frame size equals the stack height.
return stackHeight.height;
#endif
}
public: