Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
*
* Copyright 2016 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// This is an INTERNAL header for Wasm baseline compiler: CPU stack frame,
// stack maps, and associated logic.
#ifndef wasm_wasm_baseline_frame_h
#define wasm_wasm_baseline_frame_h
#include "wasm/WasmBaselineCompile.h" // For BaseLocalIter
#include "wasm/WasmBCDefs.h"
#include "wasm/WasmBCRegDefs.h"
#include "wasm/WasmBCStk.h"
#include "wasm/WasmConstants.h" // For MaxFrameSize
// [SMDOC] Wasm baseline compiler's stack frame.
//
// For background, see "Wasm's ABIs" in WasmFrame.h, the following should never
// be in conflict with that.
//
// The stack frame has four parts ("below" means at lower addresses):
//
// - the Frame element;
// - the Local area, including the DebugFrame element and possibly a spilled
// pointer to stack results, if any; allocated below the header with various
// forms of alignment;
// - the Dynamic area, comprising the temporary storage the compiler uses for
// register spilling, allocated below the Local area;
// - the Arguments area, comprising memory allocated for outgoing calls,
// allocated below the Dynamic area.
//
// +==============================+
// | Incoming stack arg |
// | ... |
// ------------- +==============================+
// | Frame (fixed size) |
// ------------- +==============================+ <-------------------- FP
// ^ | DebugFrame (optional) | ^ ^ ^^
// localSize | Register arg local | | | ||
// | | ... | | | framePushed
// | | Register stack result ptr?| | | ||
// | | Non-arg local | | | ||
// | | ... | | | ||
// | | (padding) | | | ||
// | | Instance pointer | | | ||
// | +------------------------------+ | | ||
// v | (padding) | | v ||
// ------------- +==============================+ currentStackHeight ||
// ^ | Dynamic (variable size) | | ||
// dynamicSize | ... | | ||
// v | ... | v ||
// ------------- | (free space, sometimes) | --------- v|
// +==============================+ <----- SP not-during calls
// | Arguments (sometimes) | |
// | ... | v
// +==============================+ <----- SP during calls
//
// The Frame is addressed off the stack pointer. masm.framePushed() is always
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
// Frame, with the DebugFrame optionally below it.
//
// The Local area (including the DebugFrame and, if needed, the spilled value of
// the stack results area pointer) is laid out by BaseLocalIter and is allocated
// and deallocated by standard prologue and epilogue functions that manipulate
// the stack pointer, but it is accessed via BaseStackFrame.
//
// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
// because the SP needs a specific alignment, and in this case there will
// normally be some free space directly above the SP. The stack height does not
// include the free space, it reflects the logically used space only.
//
// The Dynamic area is where space for stack results is allocated when calling
// functions that return results on the stack. If a function has stack results,
// a pointer to the low address of the stack result area is passed as an
// additional argument, according to the usual ABI. See
// ABIResultIter::HasStackResults.
//
// The Arguments area is allocated and deallocated via BaseStackFrame (see
// comments later) but is accessed directly off the stack pointer.
namespace js {
namespace wasm {
using namespace js::jit;
// Abstraction of the height of the stack frame, to avoid type confusion.
class StackHeight {
friend class BaseStackFrameAllocator;
uint32_t height;
public:
explicit StackHeight(uint32_t h) : height(h) {}
static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
bool isValid() const { return height != UINT32_MAX; }
bool operator==(StackHeight rhs) const {
MOZ_ASSERT(isValid() && rhs.isValid());
return height == rhs.height;
}
bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
};
// Abstraction for where multi-value results go on the machine stack.
class StackResultsLoc {
uint32_t bytes_;
size_t count_;
Maybe<uint32_t> height_;
public:
StackResultsLoc() : bytes_(0), count_(0){};
StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
: bytes_(bytes), count_(count), height_(Some(height)) {
MOZ_ASSERT(bytes != 0);
MOZ_ASSERT(count != 0);
MOZ_ASSERT(height != 0);
}
uint32_t bytes() const { return bytes_; }
uint32_t count() const { return count_; }
uint32_t height() const { return height_.value(); }
bool hasStackResults() const { return bytes() != 0; }
StackResults stackResults() const {
return hasStackResults() ? StackResults::HasStackResults
: StackResults::NoStackResults;
}
};
// Abstraction of the baseline compiler's stack frame (except for the Frame /
// DebugFrame parts). See comments above for more. Remember, "below" on the
// stack means at lower addresses.
//
// The abstraction is split into two parts: BaseStackFrameAllocator is
// responsible for allocating and deallocating space on the stack and for
// performing computations that are affected by how the allocation is performed;
// BaseStackFrame then provides a pleasant interface for stack frame management.
class BaseStackFrameAllocator {
MacroAssembler& masm;
#ifdef RABALDR_CHUNKY_STACK
// On platforms that require the stack pointer to be aligned on a boundary
// greater than the typical stack item (eg, ARM64 requires 16-byte alignment
// but items are 8 bytes), allocate stack memory in chunks, and use a
// separate stack height variable to track the effective stack pointer
// within the allocated area. Effectively, there's a variable amount of
// free space directly above the stack pointer. See diagram above.
// The following must be true in order for the stack height to be
// predictable at control flow joins:
//
// - The Local area is always aligned according to WasmStackAlignment, ie,
// masm.framePushed() % WasmStackAlignment is zero after allocating
// locals.
//
// - ChunkSize is always a multiple of WasmStackAlignment.
//
// - Pushing and popping are always in units of ChunkSize (hence preserving
// alignment).
//
// - The free space on the stack (masm.framePushed() - currentStackHeight_)
// is a predictable (nonnegative) amount.
// As an optimization, we pre-allocate some space on the stack, the size of
// this allocation is InitialChunk and it must be a multiple of ChunkSize.
// It is allocated as part of the function prologue and deallocated as part
// of the epilogue, along with the locals.
//
// If ChunkSize is too large then we risk overflowing the stack on simple
// recursions with few live values where stack overflow should not be a
// risk; if it is too small we spend too much time adjusting the stack
// pointer.
//
// Good values for ChunkSize are the subject of future empirical analysis;
// eight words is just an educated guess.
static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
static constexpr uint32_t InitialChunk = ChunkSize;
// The current logical height of the frame is
// currentStackHeight_ = localSize_ + dynamicSize
// where dynamicSize is not accounted for explicitly and localSize_ also
// includes size for the DebugFrame.
//
// The allocated size of the frame, provided by masm.framePushed(), is usually
// larger than currentStackHeight_, notably at the beginning of execution when
// we've allocated InitialChunk extra space.
uint32_t currentStackHeight_;
#endif
// Size of the Local area in bytes (stable after BaseCompiler::init() has
// called BaseStackFrame::setupLocals(), which in turn calls
// BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
// stack alignment. The Local area is then allocated in beginFunction(),
// following the allocation of the Header. See onFixedStackAllocated()
// below.
uint32_t localSize_;
protected:
///////////////////////////////////////////////////////////////////////////
//
// Initialization
explicit BaseStackFrameAllocator(MacroAssembler& masm)
: masm(masm),
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_(0),
#endif
localSize_(UINT32_MAX) {
}
protected:
//////////////////////////////////////////////////////////////////////
//
// The Local area - the static part of the frame.
// Record the size of the Local area, once it is known.
void setLocalSize(uint32_t localSize) {
MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
"localSize_ should be aligned to at least a pointer");
MOZ_ASSERT(localSize_ == UINT32_MAX);
localSize_ = localSize;
}
// Record the current stack height, after it has become stable in
// beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
void onFixedStackAllocated() {
MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_ = localSize_;
#endif
}
public:
// The fixed amount of memory, in bytes, allocated on the stack below the
// Header for purposes such as locals and other fixed values. Includes all
// necessary alignment, and on ARM64 also the initial chunk for the working
// stack memory.
uint32_t fixedAllocSize() const {
MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
return localSize_ + InitialChunk;
#else
return localSize_;
#endif
}
#ifdef RABALDR_CHUNKY_STACK
// The allocated frame size is frequently larger than the logical stack
// height; we round up to a chunk boundary, and special case the initial
// chunk.
uint32_t framePushedForHeight(uint32_t logicalHeight) {
if (logicalHeight <= fixedAllocSize()) {
return fixedAllocSize();
}
return fixedAllocSize() +
AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
}
#endif
protected:
//////////////////////////////////////////////////////////////////////
//
// The Dynamic area - the dynamic part of the frame, for spilling and saving
// intermediate values.
// Offset off of sp_ for the slot at stack area location `offset`.
int32_t stackOffset(int32_t offset) {
MOZ_ASSERT(offset > 0);
return masm.framePushed() - offset;
}
uint32_t computeHeightWithStackResults(StackHeight stackBase,
uint32_t stackResultBytes) {
MOZ_ASSERT(stackResultBytes);
MOZ_ASSERT(currentStackHeight() >= stackBase.height);
return stackBase.height + stackResultBytes;
}
#ifdef RABALDR_CHUNKY_STACK
void pushChunkyBytes(uint32_t bytes) {
checkChunkyInvariants();
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
if (freeSpace < bytes) {
uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
masm.reserveStack(bytesToReserve);
}
currentStackHeight_ += bytes;
checkChunkyInvariants();
}
void popChunkyBytes(uint32_t bytes) {
checkChunkyInvariants();
currentStackHeight_ -= bytes;
// Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
// values consumed by a call, and we may need to drop several chunks. But
// never drop the initial chunk. Crucially, the amount we drop is always an
// integral number of chunks.
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
if (freeSpace >= ChunkSize) {
uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
uint32_t amountToFree = masm.framePushed() - targetAllocSize;
MOZ_ASSERT(amountToFree % ChunkSize == 0);
if (amountToFree) {
masm.freeStack(amountToFree);
}
}
checkChunkyInvariants();
}
#endif
uint32_t currentStackHeight() const {
#ifdef RABALDR_CHUNKY_STACK
return currentStackHeight_;
#else
return masm.framePushed();
#endif
}
private:
#ifdef RABALDR_CHUNKY_STACK
void checkChunkyInvariants() {
MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
masm.framePushed() - currentStackHeight_ < ChunkSize);
MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
}
#endif
// For a given stack height, return the appropriate size of the allocated
// frame.
uint32_t framePushedForHeight(StackHeight stackHeight) {
#ifdef RABALDR_CHUNKY_STACK
// A more complicated adjustment is needed.
return framePushedForHeight(stackHeight.height);
#else
// The allocated frame size equals the stack height.
return stackHeight.height;
#endif
}
public:
// The current height of the stack area, not necessarily zero-based, in a
// type-safe way.
StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
// Set the frame height to a previously recorded value.
void setStackHeight(StackHeight amount) {
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_ = amount.height;
masm.setFramePushed(framePushedForHeight(amount));
checkChunkyInvariants();
#else
masm.setFramePushed(amount.height);
#endif
}
// The current height of the dynamic part of the stack area (ie, the backing
// store for the evaluation stack), zero-based.
uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
// Before branching to an outer control label, pop the execution stack to
// the level expected by that region, but do not update masm.framePushed()
// as that will happen as compilation leaves the block.
//
// Note these operate directly on the stack pointer register.
void popStackBeforeBranch(StackHeight destStackHeight,
uint32_t stackResultBytes) {
uint32_t framePushedHere = masm.framePushed();
StackHeight heightThere =
StackHeight(destStackHeight.height + stackResultBytes);
uint32_t framePushedThere = framePushedForHeight(heightThere);
if (framePushedHere > framePushedThere) {
masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
}
}
void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
popStackBeforeBranch(destStackHeight,
ABIResultIter::MeasureStackBytes(type));
}
// Given that there are |stackParamSize| bytes on the dynamic stack
// corresponding to the stack results, return the stack height once these
// parameters are popped.
StackHeight stackResultsBase(uint32_t stackParamSize) {
return StackHeight(currentStackHeight() - stackParamSize);
}
// For most of WebAssembly, adjacent instructions have fallthrough control
// flow between them, which allows us to simply thread the current stack
// height through the compiler. There are two exceptions to this rule: when
// leaving a block via dead code, and when entering the "else" arm of an "if".
// In these cases, the stack height is the block entry height, plus any stack
// values (results in the block exit case, parameters in the else entry case).
void resetStackHeight(StackHeight destStackHeight, ResultType type) {
uint32_t height = destStackHeight.height;
height += ABIResultIter::MeasureStackBytes(type);
setStackHeight(StackHeight(height));
}
// Return offset of stack result.
uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
uint32_t stackResultBytes) {
MOZ_ASSERT(result.onStack());
MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
return end - result.stackOffset();
}
public:
//////////////////////////////////////////////////////////////////////
//
// The Argument area - for outgoing calls.
//
// We abstract these operations as an optimization: we can merge the freeing
// of the argument area and dropping values off the stack after a call. But
// they always amount to manipulating the real stack pointer by some amount.
//
// Note that we do not update currentStackHeight_ for this; the frame does
// not know about outgoing arguments. But we do update framePushed(), so we
// can still index into the frame below the outgoing arguments area.
// This is always equivalent to a masm.reserveStack() call.
void allocArgArea(size_t argSize) {
if (argSize) {
masm.reserveStack(argSize);
}
}
// This frees the argument area allocated by allocArgArea(), and `argSize`
// must be equal to the `argSize` argument to allocArgArea(). In addition
// we drop some values from the frame, corresponding to the values that were
// consumed by the call.
void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
#ifdef RABALDR_CHUNKY_STACK
// Freeing the outgoing arguments and freeing the consumed values have
// different semantics here, which is why the operation is split.
if (argSize) {
masm.freeStack(argSize);
}
popChunkyBytes(dropSize);
#else
if (argSize + dropSize) {
masm.freeStack(argSize + dropSize);
}
#endif
}
};
class BaseStackFrame final : public BaseStackFrameAllocator {
MacroAssembler& masm;
// The largest observed value of masm.framePushed(), ie, the size of the
// stack frame. Read this for its true value only when code generation is
// finished.
uint32_t maxFramePushed_;
// Patch point where we check for stack overflow.
CodeOffset stackAddOffset_;
// Low byte offset of pointer to stack results, if any.
Maybe<int32_t> stackResultsPtrOffset_;
// The offset of instance pointer.
uint32_t instancePointerOffset_;
// Low byte offset of local area for true locals (not parameters).
uint32_t varLow_;
// High byte offset + 1 of local area for true locals.
uint32_t varHigh_;
// The stack pointer, cached for brevity.
RegisterOrSP sp_;
public:
explicit BaseStackFrame(MacroAssembler& masm)
: BaseStackFrameAllocator(masm),
masm(masm),
maxFramePushed_(0),
stackAddOffset_(0),
instancePointerOffset_(UINT32_MAX),
varLow_(UINT32_MAX),
varHigh_(UINT32_MAX),
sp_(masm.getStackPointer()) {}
///////////////////////////////////////////////////////////////////////////
//
// Stack management and overflow checking
// This must be called once beginFunction has allocated space for the Header
// (the Frame and DebugFrame) and the Local area, and will record the current
// frame size for internal use by the stack abstractions.
void onFixedStackAllocated() {
maxFramePushed_ = masm.framePushed();
BaseStackFrameAllocator::onFixedStackAllocated();
}
// We won't know until after we've generated code how big the frame will be
// (we may need arbitrary spill slots and outgoing param slots) so emit a
// patchable add that is patched in endFunction().
//
// Note the platform scratch register may be used by branchPtr(), so
// generally tmp must be something else.
void checkStack(Register tmp, BytecodeOffset trapOffset) {
stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
Label ok;
masm.branchPtr(Assembler::Below,
Address(InstanceReg, wasm::Instance::offsetOfStackLimit()),
tmp, &ok);
masm.wasmTrap(Trap::StackOverflow, trapOffset);
masm.bind(&ok);
}
void patchCheckStack() {
masm.patchSub32FromStackPtr(stackAddOffset_,
Imm32(int32_t(maxFramePushed_)));
}
// Very large frames are implausible, probably an attack.
bool checkStackHeight() { return maxFramePushed_ <= MaxFrameSize; }
///////////////////////////////////////////////////////////////////////////
//
// Local area
struct Local {
// Type of the value.
const MIRType type;
// Byte offset from Frame "into" the locals, ie positive for true locals
// and negative for incoming args that read directly from the arg area.
// It assumes the stack is growing down and that locals are on the stack
// at lower addresses than Frame, and is the offset from Frame of the
// lowest-addressed byte of the local.
const int32_t offs;
Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
bool isStackArgument() const { return offs < 0; }
};
// Profiling shows that the number of parameters and locals frequently
// touches or exceeds 8. So 16 seems like a reasonable starting point.
using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
// Initialize `localInfo` based on the types of `locals` and `args`.
[[nodiscard]] bool setupLocals(const ValTypeVector& locals,
const ArgTypeVector& args, bool debugEnabled,
LocalVector* localInfo) {
if (!localInfo->reserve(locals.length())) {
return false;
}
DebugOnly<uint32_t> index = 0;
BaseLocalIter i(locals, args, debugEnabled);
for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
MOZ_ASSERT(i.isArg());
MOZ_ASSERT(i.index() == index);
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
index++;
}
varLow_ = i.frameSize();
for (; !i.done(); i++) {
MOZ_ASSERT(!i.isArg());
MOZ_ASSERT(i.index() == index);
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
index++;
}
varHigh_ = i.frameSize();
// Reserve an additional stack slot for the instance pointer.
const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
instancePointerOffset_ = localSize;
setLocalSize(AlignBytes(localSize, WasmStackAlignment));
if (args.hasSyntheticStackResultPointerArg()) {
stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
}
return true;
}
void zeroLocals(BaseRegAlloc* ra);
Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) {
if (local.isStackArgument()) {
return Address(FramePointer,
stackArgumentOffsetFromFp(local) + additionalOffset);
}
return Address(sp_, localOffsetFromSp(local) + additionalOffset);
}
void loadLocalI32(const Local& src, RegI32 dest) {
masm.load32(addressOfLocal(src), dest);
}
#ifndef JS_PUNBOX64
void loadLocalI64Low(const Local& src, RegI32 dest) {
masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest);
}
void loadLocalI64High(const Local& src, RegI32 dest) {
masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest);
}
#endif
void loadLocalI64(const Local& src, RegI64 dest) {
masm.load64(addressOfLocal(src), dest);
}
void loadLocalRef(const Local& src, RegRef dest) {
masm.loadPtr(addressOfLocal(src), dest);
}
void loadLocalF64(const Local& src, RegF64 dest) {
masm.loadDouble(addressOfLocal(src), dest);
}
void loadLocalF32(const Local& src, RegF32 dest) {
masm.loadFloat32(addressOfLocal(src), dest);
}
#ifdef ENABLE_WASM_SIMD
void loadLocalV128(const Local& src, RegV128 dest) {
masm.loadUnalignedSimd128(addressOfLocal(src), dest);
}
#endif
void storeLocalI32(RegI32 src, const Local& dest) {
masm.store32(src, addressOfLocal(dest));
}
void storeLocalI64(RegI64 src, const Local& dest) {
masm.store64(src, addressOfLocal(dest));
}
void storeLocalRef(RegRef src, const Local& dest) {
masm.storePtr(src, addressOfLocal(dest));
}
void storeLocalF64(RegF64 src, const Local& dest) {
masm.storeDouble(src, addressOfLocal(dest));
}
void storeLocalF32(RegF32 src, const Local& dest) {
masm.storeFloat32(src, addressOfLocal(dest));
}
#ifdef ENABLE_WASM_SIMD
void storeLocalV128(RegV128 src, const Local& dest) {
masm.storeUnalignedSimd128(src, addressOfLocal(dest));
}
#endif
// Offset off of sp_ for `local`.
int32_t localOffsetFromSp(const Local& local) {
MOZ_ASSERT(!local.isStackArgument());
return localOffset(local.offs);
}
// Offset off of frame pointer for `stack argument`.
int32_t stackArgumentOffsetFromFp(const Local& local) {
MOZ_ASSERT(local.isStackArgument());
return -local.offs;
}
// The incoming stack result area pointer is for stack results of the function
// being compiled.
void loadIncomingStackResultAreaPtr(RegPtr reg) {
const int32_t offset = stackResultsPtrOffset_.value();
Address src = offset < 0 ? Address(FramePointer, -offset)
: Address(sp_, stackOffset(offset));
masm.loadPtr(src, reg);
}
void storeIncomingStackResultAreaPtr(RegPtr reg) {
// If we get here, that means the pointer to the stack results area was
// passed in as a register, and therefore it will be spilled below the
// frame, so the offset is a positive height.
MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
masm.storePtr(reg,
Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
}
void loadInstancePtr(Register dst) {
masm.loadPtr(Address(sp_, stackOffset(instancePointerOffset_)), dst);
}
void storeInstancePtr(Register instance) {
masm.storePtr(instance, Address(sp_, stackOffset(instancePointerOffset_)));
}
int32_t getInstancePtrOffset() { return stackOffset(instancePointerOffset_); }
// An outgoing stack result area pointer is for stack results of callees of
// the function being compiled.
void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
RegPtr dest) {
MOZ_ASSERT(results.height() <= masm.framePushed());
uint32_t offsetFromSP = masm.framePushed() - results.height();
masm.moveStackPtrTo(dest);
if (offsetFromSP) {
masm.addPtr(Imm32(offsetFromSP), dest);
}
}
private:
// Offset off of sp_ for a local with offset `offset` from Frame.
int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
public:
///////////////////////////////////////////////////////////////////////////
//
// Dynamic area
static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
#ifdef ENABLE_WASM_SIMD
static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128;
#endif
// Pushes the register `r` to the stack. This pushes the full 64-bit width on
// 64-bit systems, and 32-bits otherwise.
uint32_t pushGPR(Register r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfPtr);
masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
#else
masm.Push(r);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
return currentStackHeight();
}
uint32_t pushFloat32(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfFloat);
masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
#else
masm.Push(r);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
return currentStackHeight();
}
#ifdef ENABLE_WASM_SIMD
uint32_t pushV128(RegV128 r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
# ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfV128);
# else
masm.adjustStack(-(int)StackSizeOfV128);
# endif
masm.storeUnalignedSimd128(r,
Address(sp_, stackOffset(currentStackHeight())));
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight());
return currentStackHeight();
}
#endif
uint32_t pushDouble(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfDouble);
masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
#else
masm.Push(r);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
return currentStackHeight();
}
// Pops the stack into the register `r`. This pops the full 64-bit width on
// 64-bit systems, and 32-bits otherwise.
void popGPR(Register r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
popChunkyBytes(StackSizeOfPtr);
#else
masm.Pop(r);
#endif
MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
}
void popFloat32(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
popChunkyBytes(StackSizeOfFloat);
#else
masm.Pop(r);
#endif
MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
}
void popDouble(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
popChunkyBytes(StackSizeOfDouble);
#else
masm.Pop(r);
#endif
MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
}
#ifdef ENABLE_WASM_SIMD
void popV128(RegV128 r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())),
r);
# ifdef RABALDR_CHUNKY_STACK
popChunkyBytes(StackSizeOfV128);
# else
masm.adjustStack((int)StackSizeOfV128);
# endif
MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight());
}
#endif
void popBytes(size_t bytes) {
if (bytes > 0) {
#ifdef RABALDR_CHUNKY_STACK
popChunkyBytes(bytes);
#else
masm.freeStack(bytes);
#endif
}
}
void loadStackI32(int32_t offset, RegI32 dest) {
masm.load32(Address(sp_, stackOffset(offset)), dest);
}
void loadStackI64(int32_t offset, RegI64 dest) {
masm.load64(Address(sp_, stackOffset(offset)), dest);
}
#ifndef JS_PUNBOX64
void loadStackI64Low(int32_t offset, RegI32 dest) {
masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
}
void loadStackI64High(int32_t offset, RegI32 dest) {
masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
}
#endif
void loadStackRef(int32_t offset, RegRef dest) {
masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
}
void loadStackF64(int32_t offset, RegF64 dest) {
masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
}
void loadStackF32(int32_t offset, RegF32 dest) {
masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
}
#ifdef ENABLE_WASM_SIMD
void loadStackV128(int32_t offset, RegV128 dest) {
masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest);
}
#endif
uint32_t prepareStackResultArea(StackHeight stackBase,
uint32_t stackResultBytes) {
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
if (currentStackHeight() < end) {
uint32_t bytes = end - currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(bytes);
#else
masm.reserveStack(bytes);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
}
return end;
}
void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
MOZ_ASSERT(currentStackHeight() >= end);
popBytes(currentStackHeight() - end);
}
// |srcHeight| and |destHeight| are stack heights *including* |bytes|.
void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
uint32_t bytes, Register temp) {
MOZ_ASSERT(destHeight < srcHeight);
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
uint32_t destOffset = stackOffset(destHeight) + bytes;
uint32_t srcOffset = stackOffset(srcHeight) + bytes;
while (bytes >= sizeof(intptr_t)) {
destOffset -= sizeof(intptr_t);
srcOffset -= sizeof(intptr_t);
bytes -= sizeof(intptr_t);
masm.loadPtr(Address(sp_, srcOffset), temp);
masm.storePtr(temp, Address(sp_, destOffset));
}
if (bytes) {
MOZ_ASSERT(bytes == sizeof(uint32_t));
destOffset -= sizeof(uint32_t);
srcOffset -= sizeof(uint32_t);
masm.load32(Address(sp_, srcOffset), temp);
masm.store32(temp, Address(sp_, destOffset));
}
}
// Unlike the overload that operates on raw heights, |srcHeight| and
// |destHeight| are stack heights *not including* |bytes|.
void shuffleStackResultsTowardFP(StackHeight srcHeight,
StackHeight destHeight, uint32_t bytes,
Register temp) {
MOZ_ASSERT(srcHeight.isValid());
MOZ_ASSERT(destHeight.isValid());
uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
MOZ_ASSERT(src <= currentStackHeight());
MOZ_ASSERT(dest <= currentStackHeight());
shuffleStackResultsTowardFP(src, dest, bytes, temp);
}
// |srcHeight| and |destHeight| are stack heights *including* |bytes|.
void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
uint32_t bytes, Register temp) {
MOZ_ASSERT(destHeight > srcHeight);
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
uint32_t destOffset = stackOffset(destHeight);
uint32_t srcOffset = stackOffset(srcHeight);
while (bytes >= sizeof(intptr_t)) {
masm.loadPtr(Address(sp_, srcOffset), temp);
masm.storePtr(temp, Address(sp_, destOffset));
destOffset += sizeof(intptr_t);
srcOffset += sizeof(intptr_t);
bytes -= sizeof(intptr_t);
}
if (bytes) {
MOZ_ASSERT(bytes == sizeof(uint32_t));
masm.load32(Address(sp_, srcOffset), temp);
masm.store32(temp, Address(sp_, destOffset));
}
}
// Copy results from the top of the current stack frame to an area of memory,
// and pop the stack accordingly. `dest` is the address of the low byte of
// that memory.
void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
MOZ_ASSERT(bytes <= currentStackHeight());
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
uint32_t bytesToPop = bytes;
uint32_t srcOffset = stackOffset(currentStackHeight());
uint32_t destOffset = 0;
while (bytes >= sizeof(intptr_t)) {
masm.loadPtr(Address(sp_, srcOffset), temp);
masm.storePtr(temp, Address(dest, destOffset));
destOffset += sizeof(intptr_t);
srcOffset += sizeof(intptr_t);
bytes -= sizeof(intptr_t);
}
if (bytes) {
MOZ_ASSERT(bytes == sizeof(uint32_t));
masm.load32(Address(sp_, srcOffset), temp);
masm.store32(temp, Address(dest, destOffset));
}
popBytes(bytesToPop);
}
void allocArgArea(size_t argSize) {
if (argSize) {
BaseStackFrameAllocator::allocArgArea(argSize);
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
}
}
private:
void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) {
masm.move32(Imm32(imm), temp);
masm.store32(temp, Address(sp_, stackOffset(destHeight)));
}
void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) {
#ifdef JS_PUNBOX64
masm.move64(Imm64(imm), Register64(temp));
masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
#else
union {
int64_t i64;
int32_t i32[2];
} bits = {.i64 = imm};
static_assert(sizeof(bits) == 8);
store32BitsToStack(bits.i32[0], destHeight, temp);
store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
#endif
}
public:
void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight,
Register temp) {
#ifdef JS_PUNBOX64
static_assert(StackSizeOfPtr == 8);
store64BitsToStack(imm, destHeight, temp);
#else
static_assert(StackSizeOfPtr == 4);
store32BitsToStack(int32_t(imm), destHeight, temp);
#endif
}
void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight,
Register temp) {
store64BitsToStack(imm, destHeight, temp);
}
void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) {
union {
int32_t i32;
float f32;
} bits = {.f32 = imm};
static_assert(sizeof(bits) == 4);
// Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do
// so, but it costs little to store something predictable.
if (StackSizeOfFloat == 4) {
store32BitsToStack(bits.i32, destHeight, temp);
} else {
store64BitsToStack(uint32_t(bits.i32), destHeight, temp);
}
}
void storeImmediateF64ToStack(double imm, uint32_t destHeight,
Register temp) {
union {
int64_t i64;
double f64;
} bits = {.f64 = imm};
static_assert(sizeof(bits) == 8);
store64BitsToStack(bits.i64, destHeight, temp);
}
#ifdef ENABLE_WASM_SIMD
void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) {
union {
int32_t i32[4];
uint8_t bytes[16];
} bits{};
static_assert(sizeof(bits) == 16);
memcpy(bits.bytes, imm.bytes, 16);
for (unsigned i = 0; i < 4; i++) {
store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp);
}
}
#endif
};
//////////////////////////////////////////////////////////////////////////////
//
// MachineStackTracker, used for stack-slot pointerness tracking.
class MachineStackTracker {
// Simulates the machine's stack, with one bool per word. Index zero in
// this vector corresponds to the highest address in the machine stack. The
// last entry corresponds to what SP currently points at. This all assumes
// a grow-down stack.
//
// numPtrs_ contains the number of "true" values in vec_, and is therefore
// redundant. But it serves as a constant-time way to detect the common
// case where vec_ holds no "true" values.
size_t numPtrs_;
Vector<bool, 64, SystemAllocPolicy> vec_;
public:
MachineStackTracker() : numPtrs_(0) {}
~MachineStackTracker() {
#ifdef DEBUG
size_t n = 0;
for (bool b : vec_) {
n += (b ? 1 : 0);
}
MOZ_ASSERT(n == numPtrs_);
#endif
}
// Clone this MachineStackTracker, writing the result at |dst|.
[[nodiscard]] bool cloneTo(MachineStackTracker* dst);
// Notionally push |n| non-pointers on the stack.
[[nodiscard]] bool pushNonGCPointers(size_t n) {
return vec_.appendN(false, n);
}
// Mark the stack slot |offsetFromSP| up from the bottom as holding a
// pointer.
void setGCPointer(size_t offsetFromSP) {
// offsetFromSP == 0 denotes the most recently pushed item, == 1 the
// second most recently pushed item, etc.
MOZ_ASSERT(offsetFromSP < vec_.length());
size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
vec_[offsetFromTop] = true;
}
// Query the pointerness of the slot |offsetFromSP| up from the bottom.
bool isGCPointer(size_t offsetFromSP) {
MOZ_ASSERT(offsetFromSP < vec_.length());
size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
return vec_[offsetFromTop];
}
// Return the number of words tracked by this MachineStackTracker.
size_t length() { return vec_.length(); }
// Return the number of pointer-typed words tracked by this
// MachineStackTracker.
size_t numPtrs() {
MOZ_ASSERT(numPtrs_ <= length());
return numPtrs_;
}
// Discard all contents, but (per mozilla::Vector::clear semantics) don't
// free or reallocate any dynamic storage associated with |vec_|.
void clear() {
vec_.clear();
numPtrs_ = 0;
}
};
//////////////////////////////////////////////////////////////////////////////
//
// StackMapGenerator, which carries all state needed to create stackmaps.
enum class HasDebugFrameWithLiveRefs { No, Maybe };
struct StackMapGenerator {
private:
// --- These are constant for the life of the function's compilation ---
// For generating stackmaps, we'll need to know the offsets of registers
// as saved by the trap exit stub.
const RegisterOffsets& trapExitLayout_;
const size_t trapExitLayoutNumWords_;
// Completed stackmaps are added here
StackMaps* stackMaps_;
// So as to be able to get current offset when creating stackmaps
const MacroAssembler& masm_;
public:
// --- These are constant once we've completed beginFunction() ---
// The number of words of arguments passed to this function in memory.
size_t numStackArgWords;
MachineStackTracker machineStackTracker; // tracks machine stack pointerness
// This holds masm.framePushed at entry to the function's body. It is a
// Maybe because createStackMap needs to know whether or not we're still
// in the prologue. It makes a Nothing-to-Some transition just once per
// function.
Maybe<uint32_t> framePushedAtEntryToBody;
// --- These can change at any point ---
// This holds masm.framePushed at it would be be for a function call
// instruction, but excluding the stack area used to pass arguments in
// memory. That is, for an upcoming function call, this will hold
//
// masm.framePushed() at the call instruction -
// StackArgAreaSizeUnaligned(argumentTypes)
//
// This value denotes the lowest-addressed stack word covered by the current
// function's stackmap. Words below this point form the highest-addressed
// area of the callee's stackmap. Note that all alignment padding above the
// arguments-in-memory themselves belongs to the caller's stackmap, which
// is why this is defined in terms of StackArgAreaSizeUnaligned() rather than
// StackArgAreaSizeAligned().
//
// When not inside a function call setup/teardown sequence, it is Nothing.
// It can make Nothing-to/from-Some transitions arbitrarily as we progress
// through the function body.
Maybe<uint32_t> framePushedExcludingOutboundCallArgs;
// The number of memory-resident, ref-typed entries on the containing
// BaseCompiler::stk_.
size_t memRefsOnStk;
// This is a copy of machineStackTracker that is used only within individual
// calls to createStackMap. It is here only to avoid possible heap allocation
// costs resulting from making it local to createStackMap().
MachineStackTracker augmentedMst;
StackMapGenerator(StackMaps* stackMaps, const RegisterOffsets& trapExitLayout,
const size_t trapExitLayoutNumWords,
const MacroAssembler& masm)
: trapExitLayout_(trapExitLayout),
trapExitLayoutNumWords_(trapExitLayoutNumWords),
stackMaps_(stackMaps),
masm_(masm),
numStackArgWords(0),
memRefsOnStk(0) {}
// At the beginning of a function, we may have live roots in registers (as
// arguments) at the point where we perform a stack overflow check. This
// method generates the "extra" stackmap entries to describe that, in the
// case that the check fails and we wind up calling into the wasm exit
// stub, as generated by GenerateTrapExit().
//
// The resulting map must correspond precisely with the stack layout
// created for the integer registers as saved by (code generated by)
// GenerateTrapExit(). To do that we use trapExitLayout_ and
// trapExitLayoutNumWords_, which together comprise a description of the
// layout and are created by GenerateTrapExitRegisterOffsets().
[[nodiscard]] bool generateStackmapEntriesForTrapExit(
const ArgTypeVector& args, ExitStubMapVector* extras);
// Creates a stackmap associated with the instruction denoted by
// |assemblerOffset|, incorporating pointers from the current operand
// stack |stk|, incorporating possible extra pointers in |extra| at the
// lower addressed end, and possibly with the associated frame having a
// DebugFrame that must be traced, as indicated by |debugFrameWithLiveRefs|.
[[nodiscard]] bool createStackMap(
const char* who, const ExitStubMapVector& extras,
uint32_t assemblerOffset,
HasDebugFrameWithLiveRefs debugFrameWithLiveRefs, const StkVector& stk);
};
} // namespace wasm
} // namespace js
#endif // wasm_wasm_baseline_frame_h