Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "jit/arm64/MacroAssembler-arm64.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/Maybe.h"
#include "jsmath.h"
#include "jit/arm64/MoveEmitter-arm64.h"
#include "jit/arm64/SharedICRegisters-arm64.h"
#include "jit/Bailouts.h"
#include "jit/BaselineFrame.h"
#include "jit/JitRuntime.h"
#include "jit/MacroAssembler.h"
#include "util/Memory.h"
#include "vm/BigIntType.h"
#include "vm/JitActivation.h" // js::jit::JitActivation
#include "vm/JSContext.h"
#include "vm/StringType.h"
#include "jit/MacroAssembler-inl.h"
namespace js {
namespace jit {
enum class Width { _32 = 32, _64 = 64 };
static inline ARMRegister X(Register r) { return ARMRegister(r, 64); }
static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) {
return masm.toARMRegister(r, 64);
}
static inline ARMRegister W(Register r) { return ARMRegister(r, 32); }
static inline ARMRegister R(Register r, Width w) {
return ARMRegister(r, unsigned(w));
}
void MacroAssemblerCompat::boxValue(JSValueType type, Register src,
Register dest) {
#ifdef DEBUG
if (type == JSVAL_TYPE_INT32 || type == JSVAL_TYPE_BOOLEAN) {
Label upper32BitsZeroed;
movePtr(ImmWord(UINT32_MAX), dest);
asMasm().branchPtr(Assembler::BelowOrEqual, src, dest, &upper32BitsZeroed);
breakpoint();
bind(&upper32BitsZeroed);
}
#endif
Orr(ARMRegister(dest, 64), ARMRegister(src, 64),
Operand(ImmShiftedTag(type).value));
}
#ifdef ENABLE_WASM_SIMD
bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) {
switch (op) {
case wasm::SimdOp::I8x16Shl:
case wasm::SimdOp::I8x16ShrU:
case wasm::SimdOp::I8x16ShrS:
*mask = 7;
break;
case wasm::SimdOp::I16x8Shl:
case wasm::SimdOp::I16x8ShrU:
case wasm::SimdOp::I16x8ShrS:
*mask = 15;
break;
case wasm::SimdOp::I32x4Shl:
case wasm::SimdOp::I32x4ShrU:
case wasm::SimdOp::I32x4ShrS:
*mask = 31;
break;
case wasm::SimdOp::I64x2Shl:
case wasm::SimdOp::I64x2ShrU:
case wasm::SimdOp::I64x2ShrS:
*mask = 63;
break;
default:
MOZ_CRASH("Unexpected shift operation");
}
return true;
}
#endif
void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
ARMRegister dest(output, 32);
Fcvtns(dest, ARMFPRegister(input, 64));
{
vixl::UseScratchRegisterScope temps(this);
const ARMRegister scratch32 = temps.AcquireW();
Mov(scratch32, Operand(0xff));
Cmp(dest, scratch32);
Csel(dest, dest, scratch32, LessThan);
}
Cmp(dest, Operand(0));
Csel(dest, dest, wzr, GreaterThan);
}
js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() {
return *static_cast<js::jit::MacroAssembler*>(this);
}
const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const {
return *static_cast<const js::jit::MacroAssembler*>(this);
}
vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() {
return *static_cast<vixl::MacroAssembler*>(this);
}
const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const {
return *static_cast<const vixl::MacroAssembler*>(this);
}
void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) {
BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest);
label->patchAt()->bind(bo.getOffset());
label->setLinkMode(CodeLabel::MoveImmediate);
}
BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) {
const size_t numInst = 1; // Inserting one load instruction.
const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes.
uint8_t* literalAddr = (uint8_t*)(&ptr.value); // TODO: Should be const.
// Scratch space for generating the load instruction.
//
// allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
// index to the corresponding PoolEntry in the instruction itself.
//
// That index will be fixed up later when finishPool()
// walks over all marked loads and calls PatchConstantPoolLoad().
uint32_t instructionScratch = 0;
// Emit the instruction mask in the scratch space.
// The offset doesn't matter: it will be fixed up later.
vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
0);
// Add the entry to the pool, fix up the LDR imm19 offset,
// and add the completed instruction to the buffer.
return allocLiteralLoadEntry(numInst, numPoolEntries,
(uint8_t*)&instructionScratch, literalAddr);
}
BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr,
Register dest) {
const size_t numInst = 1; // Inserting one load instruction.
const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes.
uint8_t* literalAddr = (uint8_t*)(&ptr.value);
// Scratch space for generating the load instruction.
//
// allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary
// index to the corresponding PoolEntry in the instruction itself.
//
// That index will be fixed up later when finishPool()
// walks over all marked loads and calls PatchConstantPoolLoad().
uint32_t instructionScratch = 0;
// Emit the instruction mask in the scratch space.
// The offset doesn't matter: it will be fixed up later.
vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64),
0);
// Add the entry to the pool, fix up the LDR imm19 offset,
// and add the completed instruction to the buffer.
return allocLiteralLoadEntry(numInst, numPoolEntries,
(uint8_t*)&instructionScratch, literalAddr);
}
void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) {
loadPtr(src, dest);
}
void MacroAssemblerCompat::handleFailureWithHandlerTail(Label* profilerExitTail,
Label* bailoutTail) {
// Fail rather than silently create wrong code.
MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
// Reserve space for exception information.
int64_t size = (sizeof(ResumeFromException) + 7) & ~7;
Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size));
syncStackPtr();
MOZ_ASSERT(!x0.Is(PseudoStackPointer64));
Mov(x0, PseudoStackPointer64);
// Call the handler.
using Fn = void (*)(ResumeFromException* rfe);
asMasm().setupUnalignedABICall(r1);
asMasm().passABIArg(r0);
asMasm().callWithABI<Fn, HandleException>(
ABIType::General, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
Label entryFrame;
Label catch_;
Label finally;
Label returnBaseline;
Label returnIon;
Label bailout;
Label wasm;
Label wasmCatch;
// Check the `asMasm` calls above didn't mess with the StackPointer identity.
MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0);
asMasm().branch32(Assembler::Equal, r0,
Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch),
&catch_);
asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally),
&finally);
asMasm().branch32(Assembler::Equal, r0,
Imm32(ExceptionResumeKind::ForcedReturnBaseline),
&returnBaseline);
asMasm().branch32(Assembler::Equal, r0,
Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout),
&bailout);
asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Wasm),
&wasm);
asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch),
&wasmCatch);
breakpoint(); // Invalid kind.
// No exception handler. Load the error value, restore state and return from
// the entry frame.
bind(&entryFrame);
moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
FramePointer);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
PseudoStackPointer);
// `retn` does indeed sync the stack pointer, but before doing that it reads
// from the stack. Consequently, if we remove this call to syncStackPointer
// then we take on the requirement to prove that the immediately preceding
// loadPtr produces a value for PSP which maintains the SP <= PSP invariant.
// That's a proof burden we don't want to take on. In general it would be
// good to move (at some time in the future, not now) to a world where
// *every* assignment to PSP or SP is followed immediately by a copy into
// the other register. That would make all required correctness proofs
// trivial in the sense that it requires only local inspection of code
// immediately following (dominated by) any such assignment.
syncStackPtr();
retn(Imm32(1 * sizeof(void*))); // Pop from stack and return.
// If we found a catch handler, this must be a baseline frame. Restore state
// and jump to the catch block.
bind(&catch_);
loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()),
r0);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
FramePointer);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
PseudoStackPointer);
syncStackPtr();
Br(x0);
// If we found a finally block, this must be a baseline frame. Push three
// values expected by the finally block: the exception, the exception stack,
// and BooleanValue(true).
bind(&finally);
ARMRegister exception = x1;
Ldr(exception, MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfException()));
ARMRegister exceptionStack = x2;
Ldr(exceptionStack,
MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfExceptionStack()));
Ldr(x0,
MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget()));
Ldr(ARMRegister(FramePointer, 64),
MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfFramePointer()));
Ldr(PseudoStackPointer64,
MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfStackPointer()));
syncStackPtr();
push(exception);
push(exceptionStack);
pushValue(BooleanValue(true));
Br(x0);
// Return BaselineFrame->returnValue() to the caller.
// Used in debug mode and for GeneratorReturn.
Label profilingInstrumentation;
bind(&returnBaseline);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
FramePointer);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
PseudoStackPointer);
// See comment further up beginning "`retn` does indeed sync the stack
// pointer". That comment applies here too.
syncStackPtr();
loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()),
JSReturnOperand);
jump(&profilingInstrumentation);
// Return the given value to the caller.
bind(&returnIon);
loadValue(
Address(PseudoStackPointer, ResumeFromException::offsetOfException()),
JSReturnOperand);
loadPtr(
Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)),
FramePointer);
loadPtr(
Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)),
PseudoStackPointer);
syncStackPtr();
// If profiling is enabled, then update the lastProfilingFrame to refer to
// caller frame before returning. This code is shared by ForcedReturnIon
// and ForcedReturnBaseline.
bind(&profilingInstrumentation);
{
Label skipProfilingInstrumentation;
AbsoluteAddress addressOfEnabled(
asMasm().runtime()->geckoProfiler().addressOfEnabled());
asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
&skipProfilingInstrumentation);
jump(profilerExitTail);
bind(&skipProfilingInstrumentation);
}
movePtr(FramePointer, PseudoStackPointer);
syncStackPtr();
vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64));
vixl::MacroAssembler::Pop(vixl::lr);
syncStackPtr();
vixl::MacroAssembler::Ret(vixl::lr);
// If we are bailing out to baseline to handle an exception, jump to the
// bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success.
bind(&bailout);
Ldr(x2, MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfBailoutInfo()));
Ldr(PseudoStackPointer64,
MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfStackPointer()));
syncStackPtr();
Mov(x0, 1);
jump(bailoutTail);
// If we are throwing and the innermost frame was a wasm frame, reset SP and
// FP; SP is pointing to the unwound return address to the wasm entry, so
// we can just ret().
bind(&wasm);
Ldr(x29, MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfFramePointer()));
Ldr(PseudoStackPointer64,
MemOperand(PseudoStackPointer64,
ResumeFromException::offsetOfStackPointer()));
syncStackPtr();
Mov(x23, int64_t(wasm::FailInstanceReg));
ret();
// Found a wasm catch handler, restore state and jump to it.
bind(&wasmCatch);
loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()),
r0);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()),
r29);
loadPtr(
Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()),
PseudoStackPointer);
syncStackPtr();
Br(x0);
MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
}
void MacroAssemblerCompat::profilerEnterFrame(Register framePtr,
Register scratch) {
asMasm().loadJSContext(scratch);
loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
storePtr(framePtr,
Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
storePtr(ImmPtr(nullptr),
Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
}
void MacroAssemblerCompat::profilerExitFrame() {
jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
}
Assembler::Condition MacroAssemblerCompat::testStringTruthy(
bool truthy, const ValueOperand& value) {
vixl::UseScratchRegisterScope temps(this);
const Register scratch = temps.AcquireX().asUnsized();
const ARMRegister scratch32(scratch, 32);
const ARMRegister scratch64(scratch, 64);
MOZ_ASSERT(value.valueReg() != scratch);
unboxString(value, scratch);
Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength()));
Cmp(scratch32, Operand(0));
return truthy ? Condition::NonZero : Condition::Zero;
}
Assembler::Condition MacroAssemblerCompat::testBigIntTruthy(
bool truthy, const ValueOperand& value) {
vixl::UseScratchRegisterScope temps(this);
const Register scratch = temps.AcquireX().asUnsized();
MOZ_ASSERT(value.valueReg() != scratch);
unboxBigInt(value, scratch);
load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch);
cmp32(scratch, Imm32(0));
return truthy ? Condition::NonZero : Condition::Zero;
}
void MacroAssemblerCompat::breakpoint() {
// Note, other payloads are possible, but GDB is known to misinterpret them
// sometimes and iloop on the breakpoint instead of stopping properly.
Brk(0xf000);
}
// Either `any` is valid or `sixtyfour` is valid. Return a 32-bit ARMRegister
// in the first case and an ARMRegister of the desired size in the latter case.
static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour,
unsigned size = 64) {
MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid()));
if (sixtyfour == Register64::Invalid()) {
return ARMRegister(any.gpr(), 32);
}
return ARMRegister(sixtyfour.reg, size);
}
// Assert that `sixtyfour` is invalid and then return an FP register from `any`
// of the desired size.
static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour,
unsigned size) {
MOZ_ASSERT(sixtyfour == Register64::Invalid());
return ARMFPRegister(any.fpu(), size);
}
void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
Register memoryBase_, Register ptr_,
AnyRegister outany, Register64 out64) {
access.assertOffsetInGuardPages();
uint32_t offset = access.offset();
MOZ_ASSERT(memoryBase_ != ptr_);
ARMRegister memoryBase(memoryBase_, 64);
ARMRegister ptr(ptr_, 64);
if (offset) {
vixl::UseScratchRegisterScope temps(this);
ARMRegister scratch = temps.AcquireX();
Add(scratch, ptr, Operand(offset));
MemOperand srcAddr(memoryBase, scratch);
wasmLoadImpl(access, srcAddr, outany, out64);
} else {
MemOperand srcAddr(memoryBase, ptr);
wasmLoadImpl(access, srcAddr, outany, out64);
}
}
void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
MemOperand srcAddr, AnyRegister outany,
Register64 out64) {
MOZ_ASSERT_IF(access.isSplatSimd128Load() || access.isWidenSimd128Load(),
access.type() == Scalar::Float64);
// NOTE: the generated code must match the assembly code in gen_load in
// GenerateAtomicOperations.py
asMasm().memoryBarrierBefore(access.sync());
FaultingCodeOffset fco;
switch (access.type()) {
case Scalar::Int8:
fco = Ldrsb(SelectGPReg(outany, out64), srcAddr);
break;
case Scalar::Uint8:
fco = Ldrb(SelectGPReg(outany, out64), srcAddr);
break;
case Scalar::Int16:
fco = Ldrsh(SelectGPReg(outany, out64), srcAddr);
break;
case Scalar::Uint16:
fco = Ldrh(SelectGPReg(outany, out64), srcAddr);
break;
case Scalar::Int32:
if (out64 != Register64::Invalid()) {
fco = Ldrsw(SelectGPReg(outany, out64), srcAddr);
} else {
fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr);
}
break;
case Scalar::Uint32:
fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr);
break;
case Scalar::Int64:
fco = Ldr(SelectGPReg(outany, out64), srcAddr);
break;
case Scalar::Float32:
// LDR does the right thing also for access.isZeroExtendSimd128Load()
fco = Ldr(SelectFPReg(outany, out64, 32), srcAddr);
break;
case Scalar::Float64:
if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) {
ScratchSimd128Scope scratch_(asMasm());
ARMFPRegister scratch = Simd1D(scratch_);
fco = Ldr(scratch, srcAddr);
if (access.isSplatSimd128Load()) {
Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0);
} else {
MOZ_ASSERT(access.isWidenSimd128Load());
switch (access.widenSimdOp()) {
case wasm::SimdOp::V128Load8x8S:
Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
break;
case wasm::SimdOp::V128Load8x8U:
Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0);
break;
case wasm::SimdOp::V128Load16x4S:
Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
break;
case wasm::SimdOp::V128Load16x4U:
Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0);
break;
case wasm::SimdOp::V128Load32x2S:
Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
break;
case wasm::SimdOp::V128Load32x2U:
Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0);
break;
default:
MOZ_CRASH("Unexpected widening op for wasmLoad");
}
}
} else {
// LDR does the right thing also for access.isZeroExtendSimd128Load()
fco = Ldr(SelectFPReg(outany, out64, 64), srcAddr);
}
break;
case Scalar::Simd128:
fco = Ldr(SelectFPReg(outany, out64, 128), srcAddr);
break;
case Scalar::Uint8Clamped:
case Scalar::BigInt64:
case Scalar::BigUint64:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("unexpected array type");
}
append(access, wasm::TrapMachineInsnForLoad(byteSize(access.type())), fco);
asMasm().memoryBarrierAfter(access.sync());
}
// Return true if `address` can be represented as an immediate (possibly scaled
// by the access size) in an LDR/STR type instruction.
//
// For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro().
static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) {
// The predicates below operate on signed values only.
if (address > INT64_MAX) {
return false;
}
// The access size is always a power of 2, so computing the log amounts to
// counting trailing zeroes.
unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize);
return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) ||
MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize));
}
void MacroAssemblerCompat::wasmLoadAbsolute(
const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address,
AnyRegister output, Register64 out64) {
if (!IsLSImmediateOffset(address, access.byteSize())) {
// The access will require the constant to be loaded into a temp register.
// Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting
// trap information.
//
// Almost all constant addresses will in practice be handled by a single MOV
// so do not worry about additional optimizations here.
vixl::UseScratchRegisterScope temps(this);
ARMRegister scratch = temps.AcquireX();
Mov(scratch, address);
MemOperand srcAddr(X(memoryBase), scratch);
wasmLoadImpl(access, srcAddr, output, out64);
} else {
MemOperand srcAddr(X(memoryBase), address);
wasmLoadImpl(access, srcAddr, output, out64);
}
}
void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
AnyRegister valany, Register64 val64,
Register memoryBase_, Register ptr_) {
access.assertOffsetInGuardPages();
uint32_t offset = access.offset();
ARMRegister memoryBase(memoryBase_, 64);
ARMRegister ptr(ptr_, 64);
if (offset) {
vixl::UseScratchRegisterScope temps(this);
ARMRegister scratch = temps.AcquireX();
Add(scratch, ptr, Operand(offset));
MemOperand destAddr(memoryBase, scratch);
wasmStoreImpl(access, destAddr, valany, val64);
} else {
MemOperand destAddr(memoryBase, ptr);
wasmStoreImpl(access, destAddr, valany, val64);
}
}
void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
MemOperand dstAddr, AnyRegister valany,
Register64 val64) {
// NOTE: the generated code must match the assembly code in gen_store in
// GenerateAtomicOperations.py
asMasm().memoryBarrierBefore(access.sync());
FaultingCodeOffset fco;
switch (access.type()) {
case Scalar::Int8:
case Scalar::Uint8:
fco = Strb(SelectGPReg(valany, val64), dstAddr);
break;
case Scalar::Int16:
case Scalar::Uint16:
fco = Strh(SelectGPReg(valany, val64), dstAddr);
break;
case Scalar::Int32:
case Scalar::Uint32:
fco = Str(SelectGPReg(valany, val64), dstAddr);
break;
case Scalar::Int64:
fco = Str(SelectGPReg(valany, val64), dstAddr);
break;
case Scalar::Float32:
fco = Str(SelectFPReg(valany, val64, 32), dstAddr);
break;
case Scalar::Float64:
fco = Str(SelectFPReg(valany, val64, 64), dstAddr);
break;
case Scalar::Simd128:
fco = Str(SelectFPReg(valany, val64, 128), dstAddr);
break;
case Scalar::Uint8Clamped:
case Scalar::BigInt64:
case Scalar::BigUint64:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("unexpected array type");
}
append(access, wasm::TrapMachineInsnForStore(byteSize(access.type())), fco);
asMasm().memoryBarrierAfter(access.sync());
}
void MacroAssemblerCompat::wasmStoreAbsolute(
const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64,
Register memoryBase, uint64_t address) {
// See comments in wasmLoadAbsolute.
unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize());
if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) ||
IsImmLSUnscaled(int64_t(address)))) {
vixl::UseScratchRegisterScope temps(this);
ARMRegister scratch = temps.AcquireX();
Mov(scratch, address);
MemOperand destAddr(X(memoryBase), scratch);
wasmStoreImpl(access, destAddr, value, value64);
} else {
MemOperand destAddr(X(memoryBase), address);
wasmStoreImpl(access, destAddr, value, value64);
}
}
void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond,
ARMFPRegister dest,
ARMFPRegister lhs,
ARMFPRegister rhs) {
switch (cond) {
case Assembler::Equal:
Cmeq(dest, lhs, rhs);
break;
case Assembler::NotEqual:
Cmeq(dest, lhs, rhs);
Mvn(dest, dest);
break;
case Assembler::GreaterThan:
Cmgt(dest, lhs, rhs);
break;
case Assembler::GreaterThanOrEqual:
Cmge(dest, lhs, rhs);
break;
case Assembler::LessThan:
Cmgt(dest, rhs, lhs);
break;
case Assembler::LessThanOrEqual:
Cmge(dest, rhs, lhs);
break;
case Assembler::Above:
Cmhi(dest, lhs, rhs);
break;
case Assembler::AboveOrEqual:
Cmhs(dest, lhs, rhs);
break;
case Assembler::Below:
Cmhi(dest, rhs, lhs);
break;
case Assembler::BelowOrEqual:
Cmhs(dest, rhs, lhs);
break;
default:
MOZ_CRASH("Unexpected SIMD integer condition");
}
}
void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond,
ARMFPRegister dest,
ARMFPRegister lhs,
ARMFPRegister rhs) {
switch (cond) {
case Assembler::Equal:
Fcmeq(dest, lhs, rhs);
break;
case Assembler::NotEqual:
Fcmeq(dest, lhs, rhs);
Mvn(dest, dest);
break;
case Assembler::GreaterThan:
Fcmgt(dest, lhs, rhs);
break;
case Assembler::GreaterThanOrEqual:
Fcmge(dest, lhs, rhs);
break;
case Assembler::LessThan:
Fcmgt(dest, rhs, lhs);
break;
case Assembler::LessThanOrEqual:
Fcmge(dest, rhs, lhs);
break;
default:
MOZ_CRASH("Unexpected SIMD integer condition");
}
}
void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs,
FloatRegister dest,
bool isUnsigned) {
ScratchSimd128Scope scratch_(asMasm());
ARMFPRegister shift = Simd16B(scratch_);
Dup(shift, ARMRegister(rhs, 32));
Neg(shift, shift);
if (isUnsigned) {
Ushl(Simd16B(dest), Simd16B(lhs), shift);
} else {
Sshl(Simd16B(dest), Simd16B(lhs), shift);
}
}
void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs,
FloatRegister dest,
bool isUnsigned) {
ScratchSimd128Scope scratch_(asMasm());
ARMFPRegister shift = Simd8H(scratch_);
Dup(shift, ARMRegister(rhs, 32));
Neg(shift, shift);
if (isUnsigned) {
Ushl(Simd8H(dest), Simd8H(lhs), shift);
} else {
Sshl(Simd8H(dest), Simd8H(lhs), shift);
}
}
void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs,
FloatRegister dest,
bool isUnsigned) {
ScratchSimd128Scope scratch_(asMasm());
ARMFPRegister shift = Simd4S(scratch_);
Dup(shift, ARMRegister(rhs, 32));
Neg(shift, shift);
if (isUnsigned) {
Ushl(Simd4S(dest), Simd4S(lhs), shift);
} else {
Sshl(Simd4S(dest), Simd4S(lhs), shift);
}
}
void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs,
FloatRegister dest,
bool isUnsigned) {
ScratchSimd128Scope scratch_(asMasm());
ARMFPRegister shift = Simd2D(scratch_);
Dup(shift, ARMRegister(rhs, 64));
Neg(shift, shift);
if (isUnsigned) {
Ushl(Simd2D(dest), Simd2D(lhs), shift);
} else {
Sshl(Simd2D(dest), Simd2D(lhs), shift);
}
}
void MacroAssembler::reserveStack(uint32_t amount) {
// TODO: This bumps |sp| every time we reserve using a second register.
// It would save some instructions if we had a fixed frame size.
vixl::MacroAssembler::Claim(Operand(amount));
adjustFrame(amount);
}
void MacroAssembler::Push(RegisterOrSP reg) {
if (IsHiddenSP(reg)) {
push(sp);
} else {
push(AsRegister(reg));
}
adjustFrame(sizeof(intptr_t));
}
//{{{ check_macroassembler_style
// ===============================================================
// MacroAssembler high-level usage.
void MacroAssembler::flush() { Assembler::flush(); }
// ===============================================================
// Stack manipulation functions.
// Routines for saving/restoring registers on the stack. The format is:
//
// (highest address)
//
// integer (X) regs in any order size: 8 * # int regs
//
// if # int regs is odd,
// then an 8 byte alignment hole size: 0 or 8
//
// double (D) regs in any order size: 8 * # double regs
//
// if # double regs is odd,
// then an 8 byte alignment hole size: 0 or 8
//
// vector (Q) regs in any order size: 16 * # vector regs
//
// (lowest address)
//
// Hence the size of the save area is 0 % 16. And, provided that the base
// (highest) address is 16-aligned, then the vector reg save/restore accesses
// will also be 16-aligned, as will pairwise operations for the double regs.
//
// Implied by this is that the format of the double and vector dump area
// corresponds with what FloatRegister::GetPushSizeInBytes computes.
// See block comment in MacroAssembler.h for more details.
size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) {
size_t numIntRegs = set.gprs().size();
return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) +
FloatRegister::GetPushSizeInBytes(set.fpus());
}
// Generate code to dump the values in `set`, either on the stack if `dest` is
// `Nothing` or working backwards from the address denoted by `dest` if it is
// `Some`. These two cases are combined so as to minimise the chance of
// mistakenly generating different formats for the same `set`, given that the
// `Some` `dest` case is used extremely rarely.
static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set,
mozilla::Maybe<Address> dest) {
static_assert(sizeof(FloatRegisters::RegisterContent) == 16);
// If we're saving to arbitrary memory, check the destination is big enough.
if (dest) {
mozilla::DebugOnly<size_t> bytesRequired =
MacroAssembler::PushRegsInMaskSizeInBytes(set);
MOZ_ASSERT(dest->offset >= 0);
MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired);
}
// Note the high limit point; we'll check it again later.
mozilla::DebugOnly<size_t> maxExtentInitial =
dest ? dest->offset : masm->framePushed();
// Gather up the integer registers in groups of four, and either push each
// group as a single transfer so as to minimise the number of stack pointer
// changes, or write them individually to memory. Take care to ensure the
// space used remains 16-aligned.
for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) {
vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg,
vixl::NoCPUReg};
size_t i;
for (i = 0; i < 4 && iter.more(); i++) {
src[i] = ARMRegister(*iter, 64);
++iter;
}
MOZ_ASSERT(i > 0);
if (i == 1 || i == 3) {
// Ensure the stack remains 16-aligned
MOZ_ASSERT(!iter.more());
src[i] = vixl::xzr;
i++;
}
MOZ_ASSERT(i == 2 || i == 4);
if (dest) {
for (size_t j = 0; j < i; j++) {
Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr
: src[j].code());
dest->offset -= sizeof(intptr_t);
masm->storePtr(ireg, *dest);
}
} else {
masm->adjustFrame(i * 8);
masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
}
}
// Now the same for the FP double registers. Note that because of how
// ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either
// be present as a double register, or as a V128 register, but not both.
// Firstly, round up the registers to be pushed.
FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
vixl::CPURegister allSrcs[FloatRegisters::TotalPhys];
size_t numAllSrcs = 0;
for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
FloatRegister reg = *iter;
if (reg.isDouble()) {
MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
allSrcs[numAllSrcs] = ARMFPRegister(reg, 64);
numAllSrcs++;
} else {
MOZ_ASSERT(reg.isSimd128());
}
}
MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
if ((numAllSrcs & 1) == 1) {
// We've got an odd number of doubles. In order to maintain 16-alignment,
// push the last register twice. We'll skip over the duplicate in
// PopRegsInMaskIgnore.
allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1];
numAllSrcs++;
}
MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0);
// And now generate the transfers.
size_t i;
if (dest) {
for (i = 0; i < numAllSrcs; i++) {
FloatRegister freg =
FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
FloatRegisters::Kind::Double);
dest->offset -= sizeof(double);
masm->storeDouble(freg, *dest);
}
} else {
i = 0;
while (i < numAllSrcs) {
vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
vixl::NoCPUReg, vixl::NoCPUReg};
size_t j;
for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
src[j] = allSrcs[j + i];
}
masm->adjustFrame(8 * j);
masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
i += j;
}
}
MOZ_ASSERT(i == numAllSrcs);
// Finally, deal with the SIMD (V128) registers. This is a bit simpler
// as there's no need for special-casing to maintain 16-alignment.
numAllSrcs = 0;
for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) {
FloatRegister reg = *iter;
if (reg.isSimd128()) {
MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys);
allSrcs[numAllSrcs] = ARMFPRegister(reg, 128);
numAllSrcs++;
}
}
MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys);
// Generate the transfers.
if (dest) {
for (i = 0; i < numAllSrcs; i++) {
FloatRegister freg =
FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()),
FloatRegisters::Kind::Simd128);
dest->offset -= FloatRegister::SizeOfSimd128;
masm->storeUnalignedSimd128(freg, *dest);
}
} else {
i = 0;
while (i < numAllSrcs) {
vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg,
vixl::NoCPUReg, vixl::NoCPUReg};
size_t j;
for (j = 0; j < 4 && j + i < numAllSrcs; j++) {
src[j] = allSrcs[j + i];
}
masm->adjustFrame(16 * j);
masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]);
i += j;
}
}
MOZ_ASSERT(i == numAllSrcs);
// Final overrun check.
if (dest) {
MOZ_ASSERT(maxExtentInitial - dest->offset ==
MacroAssembler::PushRegsInMaskSizeInBytes(set));
} else {
MOZ_ASSERT(masm->framePushed() - maxExtentInitial ==
MacroAssembler::PushRegsInMaskSizeInBytes(set));
}
}
void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
PushOrStoreRegsInMask(this, set, mozilla::Nothing());
}
void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
Register scratch) {
PushOrStoreRegsInMask(this, set, mozilla::Some(dest));
}
// This is a helper function for PopRegsInMaskIgnore below. It emits the
// loads described by dests[0] and [1] and offsets[0] and [1], generating a
// load-pair if it can.
static void GeneratePendingLoadsThenFlush(MacroAssembler* masm,
vixl::CPURegister* dests,
uint32_t* offsets,
uint32_t transactionSize) {
// Generate the loads ..
if (!dests[0].IsNone()) {
if (!dests[1].IsNone()) {
// [0] and [1] both present.
if (offsets[0] + transactionSize == offsets[1]) {
masm->Ldp(dests[0], dests[1],
MemOperand(masm->GetStackPointer64(), offsets[0]));
} else {
// Theoretically we could check for a load-pair with the destinations
// switched, but our callers will never generate that. Hence there's
// no loss in giving up at this point and generating two loads.
masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1]));
}
} else {
// [0] only.
masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0]));
}
} else {
if (!dests[1].IsNone()) {
// [1] only. Can't happen because callers always fill [0] before [1].
MOZ_CRASH("GenerateLoadsThenFlush");
} else {
// Neither entry valid. This can happen.
}
}
// .. and flush.
dests[0] = dests[1] = vixl::NoCPUReg;
offsets[0] = offsets[1] = 0;
}
void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
LiveRegisterSet ignore) {
mozilla::DebugOnly<size_t> framePushedInitial = framePushed();
// The offset of the data from the stack pointer.
uint32_t offset = 0;
// The set of FP/SIMD registers we need to restore.
FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
// The set of registers to ignore. BroadcastToAllSizes() is used to avoid
// any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore`
// containing d17.
FloatRegisterSet ignoreFpusBroadcasted(
FloatRegister::BroadcastToAllSizes(ignore.fpus()));
// First recover the SIMD (V128) registers. This is straightforward in that
// we don't need to think about alignment holes.
// These three form a two-entry queue that holds loads that we know we
// need, but which we haven't yet emitted.
vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg};
uint32_t pendingOffsets[2] = {0, 0};
size_t nPending = 0;
for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
FloatRegister reg = *iter;
if (reg.isDouble()) {
continue;
}
MOZ_RELEASE_ASSERT(reg.isSimd128());
uint32_t offsetForReg = offset;
offset += FloatRegister::SizeOfSimd128;
if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
continue;
}
MOZ_ASSERT(nPending <= 2);
if (nPending == 2) {
GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
nPending = 0;
}
pendingDests[nPending] = ARMFPRegister(reg, 128);
pendingOffsets[nPending] = offsetForReg;
nPending++;
}
GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16);
nPending = 0;
MOZ_ASSERT((offset % 16) == 0);
// Now recover the FP double registers. This is more tricky in that we need
// to skip over the lowest-addressed of them if the number of them was odd.
if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) {
offset += sizeof(double);
}
for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) {
FloatRegister reg = *iter;
if (reg.isSimd128()) {
continue;
}
/* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */
uint32_t offsetForReg = offset;
offset += sizeof(double);
if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) {
continue;
}
MOZ_ASSERT(nPending <= 2);
if (nPending == 2) {
GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
nPending = 0;
}
pendingDests[nPending] = ARMFPRegister(reg, 64);
pendingOffsets[nPending] = offsetForReg;
nPending++;
}
GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
nPending = 0;
MOZ_ASSERT((offset % 16) == 0);
MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes());
// And finally recover the integer registers, again skipping an alignment
// hole if it exists.
if ((set.gprs().size() & 1) == 1) {
offset += sizeof(uint64_t);
}
for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) {
Register reg = *iter;
uint32_t offsetForReg = offset;
offset += sizeof(uint64_t);
if (ignore.has(reg)) {
continue;
}
MOZ_ASSERT(nPending <= 2);
if (nPending == 2) {
GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
nPending = 0;
}
pendingDests[nPending] = ARMRegister(reg, 64);
pendingOffsets[nPending] = offsetForReg;
nPending++;
}
GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8);
MOZ_ASSERT((offset % 16) == 0);
size_t bytesPushed = PushRegsInMaskSizeInBytes(set);
MOZ_ASSERT(offset == bytesPushed);
freeStack(bytesPushed);
}
void MacroAssembler::Push(Register reg) {
push(reg);
adjustFrame(sizeof(intptr_t));
}
void MacroAssembler::Push(Register reg1, Register reg2, Register reg3,
Register reg4) {
push(reg1, reg2, reg3, reg4);
adjustFrame(4 * sizeof(intptr_t));
}
void MacroAssembler::Push(const Imm32 imm) {
push(imm);
adjustFrame(sizeof(intptr_t));
}
void MacroAssembler::Push(const ImmWord imm) {
push(imm);
adjustFrame(sizeof(intptr_t));
}
void MacroAssembler::Push(const ImmPtr imm) {
push(imm);
adjustFrame(sizeof(intptr_t));
}
void MacroAssembler::Push(const ImmGCPtr ptr) {
push(ptr);
adjustFrame(sizeof(intptr_t));
}
void MacroAssembler::Push(FloatRegister f) {
push(f);
adjustFrame(sizeof(double));
}
void MacroAssembler::PushBoxed(FloatRegister reg) {
subFromStackPtr(Imm32(sizeof(double)));
boxDouble(reg, Address(getStackPointer(), 0));
adjustFrame(sizeof(double));
}
void MacroAssembler::Pop(Register reg) {
pop(reg);
adjustFrame(-1 * int64_t(sizeof(int64_t)));
}
void MacroAssembler::Pop(FloatRegister f) {
loadDouble(Address(getStackPointer(), 0), f);
freeStack(sizeof(double));
}
void MacroAssembler::Pop(const ValueOperand& val) {
pop(val);
adjustFrame(-1 * int64_t(sizeof(int64_t)));
}
void MacroAssembler::freeStackTo(uint32_t framePushed) {
MOZ_ASSERT(framePushed <= framePushed_);
Sub(GetStackPointer64(), X(FramePointer), Operand(int32_t(framePushed)));
syncStackPtr();
framePushed_ = framePushed;
}
// ===============================================================
// Simple call functions.
CodeOffset MacroAssembler::call(Register reg) {
// This sync has been observed (and is expected) to be necessary.
// eg testcase: tests/debug/bug1107525.js
syncStackPtr();
Blr(ARMRegister(reg, 64));
return CodeOffset(currentOffset());
}
CodeOffset MacroAssembler::call(Label* label) {
// This sync has been observed (and is expected) to be necessary.
// eg testcase: tests/basic/testBug504520Harder.js
syncStackPtr();
Bl(label);
return CodeOffset(currentOffset());
}
void MacroAssembler::call(ImmPtr imm) {
// This sync has been observed (and is expected) to be necessary.
// eg testcase: asm.js/testTimeout5.js
syncStackPtr();
vixl::UseScratchRegisterScope temps(this);
MOZ_ASSERT(temps.IsAvailable(ScratchReg64)); // ip0
temps.Exclude(ScratchReg64);
movePtr(imm, ScratchReg64.asUnsized());
Blr(ScratchReg64);
}
void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); }
CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) {
vixl::UseScratchRegisterScope temps(this);
const Register scratch = temps.AcquireX().asUnsized();
// This sync is believed to be necessary, although no case in jit-test/tests
// has been observed to cause SP != PSP here.
syncStackPtr();
movePtr(imm, scratch);
Blr(ARMRegister(scratch, 64));
return CodeOffset(currentOffset());
}
void MacroAssembler::call(const Address& addr) {
vixl::UseScratchRegisterScope temps(this);
const Register scratch = temps.AcquireX().asUnsized();
// This sync has been observed (and is expected) to be necessary.
// eg testcase: tests/backup-point-bug1315634.js
syncStackPtr();
loadPtr(addr, scratch);
Blr(ARMRegister(scratch, 64));
}
void MacroAssembler::call(JitCode* c) {
vixl::UseScratchRegisterScope temps(this);
const ARMRegister scratch64 = temps.AcquireX();
// This sync has been observed (and is expected) to be necessary.
// eg testcase: arrays/new-array-undefined-undefined-more-args-2.js
syncStackPtr();
BufferOffset off = immPool64(scratch64, uint64_t(c->raw()));
addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE);
blr(scratch64);
}
CodeOffset MacroAssembler::callWithPatch() {
// This needs to sync. Wasm goes through this one for intramodule calls.
//
// In other cases, wasm goes through masm.wasmCallImport(),
// masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which
// sync.
//
// This sync is believed to be necessary, although no case in jit-test/tests
// has been observed to cause SP != PSP here.
syncStackPtr();
bl(0, LabelDoc());
return CodeOffset(currentOffset());
}
void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) {
Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4));
MOZ_ASSERT(inst->IsBL());
ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4);
ptrdiff_t relTarget00 = relTarget >> 2;
MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0);
MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00));
bl(inst, relTarget00);
}
CodeOffset MacroAssembler::farJumpWithPatch() {
vixl::UseScratchRegisterScope temps(this);
const ARMRegister scratch = temps.AcquireX();
const ARMRegister scratch2 = temps.AcquireX();
AutoForbidPoolsAndNops afp(this,
/* max number of instructions in scope = */ 7);
mozilla::DebugOnly<uint32_t> before = currentOffset();
align(8); // At most one nop
Label branch;
adr(scratch2, &branch);
ldr(scratch, vixl::MemOperand(scratch2, 4));
add(scratch2, scratch2, scratch);
CodeOffset offs(currentOffset());
bind(&branch);
br(scratch2);
Emit(UINT32_MAX);
Emit(UINT32_MAX);
mozilla::DebugOnly<uint32_t> after = currentOffset();
MOZ_ASSERT(after - before == 24 || after - before == 28);
return offs;
}
void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) {
Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4));
Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8));
int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset();
MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX);
MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX);
inst1->SetInstructionBits((uint32_t)distance);
inst2->SetInstructionBits((uint32_t)(distance >> 32));
}
CodeOffset MacroAssembler::nopPatchableToCall() {
AutoForbidPoolsAndNops afp(this,
/* max number of instructions in scope = */ 1);
Nop();
return CodeOffset(currentOffset());
}
void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) {
uint8_t* inst = call - 4;
Instruction* instr = reinterpret_cast<Instruction*>(inst);
MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
bl(instr, (target - inst) >> 2);
}
void MacroAssembler::patchCallToNop(uint8_t* call) {
uint8_t* inst = call - 4;
Instruction* instr = reinterpret_cast<Instruction*>(inst);
MOZ_ASSERT(instr->IsBL() || instr->IsNOP());
nop(instr);
}
void MacroAssembler::pushReturnAddress() {
MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
push(lr);
}
void MacroAssembler::popReturnAddress() {
MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid");
pop(lr);
}
// ===============================================================
// ABI function calls.
void MacroAssembler::setupUnalignedABICall(Register scratch) {
// Because wasm operates without the need for dynamic alignment of SP, it is
// implied that this routine should never be called when generating wasm.
MOZ_ASSERT(!IsCompilingWasm());
// The following won't work for SP -- needs slightly different logic.
MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64));
setupNativeABICall();
dynamicAlignment_ = true;
int64_t alignment = ~(int64_t(ABIStackAlignment) - 1);
ARMRegister scratch64(scratch, 64);
MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64));
// Always save LR -- Baseline ICs assume that LR isn't modified.
push(lr);
// Remember the stack address on entry. This is reloaded in callWithABIPost
// below.
Mov(scratch64, PseudoStackPointer64);
// Make alignment, including the effective push of the previous sp.
Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8));
And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment));
syncStackPtr();
// Store previous sp to the top of the stack, aligned. This is also
// reloaded in callWithABIPost.
Str(scratch64, MemOperand(PseudoStackPointer64, 0));
}
void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
// wasm operates without the need for dynamic alignment of SP.
MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
MOZ_ASSERT(inCall_);
uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
// ARM64 *really* wants SP to always be 16-aligned, so ensure this now.
if (dynamicAlignment_) {
stackForCall += ComputeByteAlignment(stackForCall, StackAlignment);
} else {
// This can happen when we attach out-of-line stubs for rare cases. For
// example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line
// chunk.
uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
stackForCall += ComputeByteAlignment(
stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
}
*stackAdjust = stackForCall;
reserveStack(*stackAdjust);
{
enoughMemory_ &= moveResolver_.resolve();
if (!enoughMemory_) {
return;
}
MoveEmitter emitter(*this);
emitter.emit(moveResolver_);
emitter.finish();
}
assertStackAlignment(ABIStackAlignment);
}
void MacroAssembler::callWithABIPost(uint32_t stackAdjust, ABIType result,
bool callFromWasm) {
// wasm operates without the need for dynamic alignment of SP.
MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm));
// Call boundaries communicate stack via SP, so we must resync PSP now.
initPseudoStackPtr();
freeStack(stackAdjust);
if (dynamicAlignment_) {
// This then-clause makes more sense if you first read
// setupUnalignedABICall above.
//
// Restore the stack pointer from entry. The stack pointer will have been
// saved by setupUnalignedABICall. This is fragile in that it assumes
// that uses of this routine (callWithABIPost) with `dynamicAlignment_ ==
// true` are preceded by matching calls to setupUnalignedABICall. But
// there's nothing that enforce that mechanically. If we really want to
// enforce this, we could add a debug-only CallWithABIState enum to the
// MacroAssembler and assert that setupUnalignedABICall updates it before
// we get here, then reset it to its initial state.
Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0));
syncStackPtr();
// Restore LR. This restores LR to the value stored by
// setupUnalignedABICall, which should have been called just before
// callWithABIPre. This is, per the above comment, also fragile.
pop(lr);
// SP may be < PSP now. That is expected from the behaviour of `pop`. It
// is not clear why the following `syncStackPtr` is necessary, but it is:
// without it, the following test segfaults:
// tests/backup-point-bug1315634.js
syncStackPtr();
}
// If the ABI's return regs are where ION is expecting them, then
// no other work needs to be done.
#ifdef DEBUG
MOZ_ASSERT(inCall_);
inCall_ = false;
#endif
}
void MacroAssembler::callWithABINoProfiler(Register fun, ABIType result) {
vixl::UseScratchRegisterScope temps(this);
const Register scratch = temps.AcquireX().asUnsized();
movePtr(fun, scratch);
uint32_t stackAdjust;
callWithABIPre(&stackAdjust);
call(scratch);
callWithABIPost(stackAdjust, result);
}
void MacroAssembler::callWithABINoProfiler(const Address& fun, ABIType result) {
vixl::UseScratchRegisterScope temps(this);
const Register scratch = temps.AcquireX().asUnsized();
loadPtr(fun, scratch);
uint32_t stackAdjust;
callWithABIPre(&stackAdjust);
call(scratch);
callWithABIPost(stackAdjust, result);
}
// ===============================================================
// Jit Frames.
uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) {
enterNoPool(3);
Label fakeCallsite;
Adr(ARMRegister(scratch, 64), &fakeCallsite);
Push(scratch);
bind(&fakeCallsite);
uint32_t pseudoReturnOffset = currentOffset();
leaveNoPool();
return pseudoReturnOffset;
}
bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) {
asMasm().PushFrameDescriptor(FrameType::IonJS);
asMasm().Push(ImmPtr(fakeReturnAddr));
asMasm().Push(FramePointer);
return true;
}
// ===============================================================
// Move instructions
void MacroAssembler::moveValue(const TypedOrValueRegister& src,
const ValueOperand& dest) {
if (src.hasValue()) {
moveValue(src.valueReg(), dest);
return;
}
MIRType type = src.type();
AnyRegister reg = src.typedReg();
if (!IsFloatingPointType(type)) {
boxNonDouble(ValueTypeFromMIRType(type), reg.gpr(), dest);
return;
}
ScratchDoubleScope scratch(*this);
FloatRegister freg = reg.fpu();
if (type == MIRType::Float32) {
convertFloat32ToDouble(freg, scratch);
freg = scratch;
}
boxDouble(freg, dest, scratch);
}
void MacroAssembler::moveValue(const ValueOperand& src,
const ValueOperand& dest) {
if (src == dest) {
return;
}
movePtr(src.valueReg(), dest.valueReg());
}
void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
if (!src.isGCThing()) {
movePtr(ImmWord(src.asRawBits()), dest.valueReg());
return;
}
BufferOffset load =
movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg());
writeDataRelocation(src, load);
}
// ===============================================================
// Branch functions
void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
And(ARMRegister(buffer, 64), ARMRegister(ptr, 64),
Operand(int32_t(~gc::ChunkMask)));
loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
}
void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
Register temp, Label* label) {
MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
MOZ_ASSERT(ptr != temp);
MOZ_ASSERT(ptr != ScratchReg &&
ptr != ScratchReg2); // Both may be used internally.
MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2);
And(ARMRegister(temp, 64), ARMRegister(ptr, 64),
Operand(int32_t(~gc::ChunkMask)));
branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
ImmWord(0), label);
}
void MacroAssembler::branchValueIsNurseryCell(Condition cond,
const Address& address,
Register temp, Label* label) {
branchValueIsNurseryCellImpl(cond, address, temp, label);
}
void MacroAssembler::branchValueIsNurseryCell(Condition cond,
ValueOperand value, Register temp,
Label* label) {
branchValueIsNurseryCellImpl(cond, value, temp, label);
}
template <typename T>
void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond,
const T& value, Register temp,
Label* label) {
MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
MOZ_ASSERT(temp != ScratchReg &&
temp != ScratchReg2); // Both may be used internally.
Label done;
branchTestGCThing(Assembler::NotEqual, value,
cond == Assembler::Equal ? &done : label);
getGCThingValueChunk(value, temp);
branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset),
ImmWord(0), label);
bind(&done);
}
void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
const Value& rhs, Label* label) {
MOZ_ASSERT(cond == Equal || cond == NotEqual);
vixl::UseScratchRegisterScope temps(this);
const ARMRegister scratch64 = temps.AcquireX();
MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg());
moveValue(rhs, ValueOperand(scratch64.asUnsized()));
Cmp(ARMRegister(lhs.valueReg(), 64), scratch64);
B(label, cond);
}
// ========================================================================
// Memory access primitives.
template <typename T>
void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
MIRType valueType, const T& dest) {
MOZ_ASSERT(valueType < MIRType::Value);
if (valueType == MIRType::Double) {
boxDouble(value.reg().typedReg().fpu(), dest);
return;
}
if (value.constant()) {
storeValue(value.value(), dest);
} else {
storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(),
dest);
}
}
template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
MIRType valueType,
const Address& dest);
template void MacroAssembler::storeUnboxedValue(
const ConstantOrRegister& value, MIRType valueType,
const BaseObjectElementIndex& dest);
void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); }
// ========================================================================
// wasm support
FaultingCodeOffset MacroAssembler::wasmTrapInstruction() {
AutoForbidPoolsAndNops afp(this,
/* max number of instructions in scope = */ 1);
FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
Unreachable();
return fco;
}
void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
Register boundsCheckLimit, Label* ok) {
branch32(cond, index, boundsCheckLimit, ok);
if (JitOptions.spectreIndexMasking) {
csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
}
}
void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index,
Address boundsCheckLimit, Label* ok) {
branch32(cond, index, boundsCheckLimit, ok);
if (JitOptions.spectreIndexMasking) {
csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond);
}
}
void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
Register64 boundsCheckLimit, Label* ok) {
branchPtr(cond, index.reg, boundsCheckLimit.reg, ok);
if (JitOptions.spectreIndexMasking) {
csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
cond);
}
}
void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
Address boundsCheckLimit, Label* ok) {
branchPtr(InvertCondition(cond), boundsCheckLimit, index.reg, ok);
if (JitOptions.spectreIndexMasking) {
csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64),
cond);
}
}
// FCVTZU behaves as follows:
//
// on NaN it produces zero
// on too large it produces UINT_MAX (for appropriate type)
// on too small it produces zero
//
// FCVTZS behaves as follows:
//
// on NaN it produces zero
// on too large it produces INT_MAX (for appropriate type)
// on too small it produces INT_MIN (ditto)
void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_,
Register output_,
bool isSaturating,
Label* oolEntry) {
ARMRegister output(output_, 32);
ARMFPRegister input(input_, 64);
Fcvtzu(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
}
}
void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_,
Register output_,
bool isSaturating,
Label* oolEntry) {
ARMRegister output(output_, 32);
ARMFPRegister input(input_, 32);
Fcvtzu(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
}
}
void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_,
Register output_,
bool isSaturating,
Label* oolEntry) {
ARMRegister output(output_, 32);
ARMFPRegister input(input_, 64);
Fcvtzs(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
}
}
void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_,
Register output_,
bool isSaturating,
Label* oolEntry) {
ARMRegister output(output_, 32);
ARMFPRegister input(input_, 32);
Fcvtzs(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual);
Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
}
}
void MacroAssembler::wasmTruncateDoubleToUInt64(
FloatRegister input_, Register64 output_, bool isSaturating,
Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
MOZ_ASSERT(tempDouble.isInvalid());
ARMRegister output(output_.reg, 64);
ARMFPRegister input(input_, 64);
Fcvtzu(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
bind(oolRejoin);
}
}
void MacroAssembler::wasmTruncateFloat32ToUInt64(
FloatRegister input_, Register64 output_, bool isSaturating,
Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
MOZ_ASSERT(tempDouble.isInvalid());
ARMRegister output(output_.reg, 64);
ARMFPRegister input(input_, 32);
Fcvtzu(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
bind(oolRejoin);
}
}
void MacroAssembler::wasmTruncateDoubleToInt64(
FloatRegister input_, Register64 output_, bool isSaturating,
Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
MOZ_ASSERT(tempDouble.isInvalid());
ARMRegister output(output_.reg, 64);
ARMFPRegister input(input_, 64);
Fcvtzs(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
bind(oolRejoin);
}
}
void MacroAssembler::wasmTruncateFloat32ToInt64(
FloatRegister input_, Register64 output_, bool isSaturating,
Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) {
ARMRegister output(output_.reg, 64);
ARMFPRegister input(input_, 32);
Fcvtzs(output, input);
if (!isSaturating) {
Cmp(output, 0);
Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual);
Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual);
B(oolEntry, Assembler::Equal);
bind(oolRejoin);
}
}
void MacroAssembler::oolWasmTruncateCheckF32ToI32(FloatRegister input,
Register output,
TruncFlags flags,
wasm::BytecodeOffset off,
Label* rejoin) {
Label notNaN;
branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
bind(&notNaN);
Label isOverflow;
const float two_31 = -float(INT32_MIN);
ScratchFloat32Scope fpscratch(*this);
if (flags & TRUNC_UNSIGNED) {
loadConstantFloat32(two_31 * 2, fpscratch);
branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantFloat32(-1.0f, fpscratch);
branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
} else {
loadConstantFloat32(two_31, fpscratch);
branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantFloat32(-two_31, fpscratch);
branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
}
bind(&isOverflow);
wasmTrap(wasm::Trap::IntegerOverflow, off);
}
void MacroAssembler::oolWasmTruncateCheckF64ToI32(FloatRegister input,
Register output,
TruncFlags flags,
wasm::BytecodeOffset off,
Label* rejoin) {
Label notNaN;
branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
bind(&notNaN);
Label isOverflow;
const double two_31 = -double(INT32_MIN);
ScratchDoubleScope fpscratch(*this);
if (flags & TRUNC_UNSIGNED) {
loadConstantDouble(two_31 * 2, fpscratch);
branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantDouble(-1.0, fpscratch);
branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
} else {
loadConstantDouble(two_31, fpscratch);
branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantDouble(-two_31 - 1, fpscratch);
branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
}
bind(&isOverflow);
wasmTrap(wasm::Trap::IntegerOverflow, off);
}
void MacroAssembler::oolWasmTruncateCheckF32ToI64(FloatRegister input,
Register64 output,
TruncFlags flags,
wasm::BytecodeOffset off,
Label* rejoin) {
Label notNaN;
branchFloat(Assembler::DoubleOrdered, input, input, &notNaN);
wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
bind(&notNaN);
Label isOverflow;
const float two_63 = -float(INT64_MIN);
ScratchFloat32Scope fpscratch(*this);
if (flags & TRUNC_UNSIGNED) {
loadConstantFloat32(two_63 * 2, fpscratch);
branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantFloat32(-1.0f, fpscratch);
branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
} else {
loadConstantFloat32(two_63, fpscratch);
branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantFloat32(-two_63, fpscratch);
branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
}
bind(&isOverflow);
wasmTrap(wasm::Trap::IntegerOverflow, off);
}
void MacroAssembler::oolWasmTruncateCheckF64ToI64(FloatRegister input,
Register64 output,
TruncFlags flags,
wasm::BytecodeOffset off,
Label* rejoin) {
Label notNaN;
branchDouble(Assembler::DoubleOrdered, input, input, &notNaN);
wasmTrap(wasm::Trap::InvalidConversionToInteger, off);
bind(&notNaN);
Label isOverflow;
const double two_63 = -double(INT64_MIN);
ScratchDoubleScope fpscratch(*this);
if (flags & TRUNC_UNSIGNED) {
loadConstantDouble(two_63 * 2, fpscratch);
branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantDouble(-1.0, fpscratch);
branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin);
} else {
loadConstantDouble(two_63, fpscratch);
branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&isOverflow);
loadConstantDouble(-two_63, fpscratch);
branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin);
}
bind(&isOverflow);
wasmTrap(wasm::Trap::IntegerOverflow, off);
}
void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
Register memoryBase, Register ptr,
AnyRegister output) {
wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid());
}
void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
Register memoryBase, Register ptr,
Register64 output) {
wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output);
}
void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
AnyRegister value, Register memoryBase,
Register ptr) {
wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr);
}
void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
Register64 value, Register memoryBase,
Register ptr) {
wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr);
}
void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch,
ExitFrameType type) {
// Wasm stubs use the native SP, not the PSP.
linkExitFrame(cxreg, scratch);
MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64()));
// SP has to be 16-byte aligned when we do a load/store, so push |type| twice
// and then add 8 bytes to SP. This leaves SP unaligned.
move32(Imm32(int32_t(type)), scratch);
push(scratch, scratch);
Add(sp, sp, 8);
// Despite the above assertion, it is possible for control to flow from here
// to the code generated by
// MacroAssemblerCompat::handleFailureWithHandlerTail without any
// intervening assignment to PSP. But handleFailureWithHandlerTail assumes
// that PSP is the active stack pointer. Hence the following is necessary
// for safety. Note we can't use initPseudoStackPtr here as that would
// generate no instructions.
Mov(PseudoStackPointer64, sp);
}
void MacroAssembler::widenInt32(Register r) {
move32To64ZeroExtend(r, Register64(r));
}
// ========================================================================
// Convert floating point.
bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
Register temp) {
MOZ_ASSERT(temp == Register::Invalid());
Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
}
void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) {
Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64));
}
void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest,
Register temp) {
MOZ_ASSERT(temp == Register::Invalid());
Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
}
void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) {
Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64));
}
void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
convertInt64ToDouble(Register64(src), dest);
}
// ========================================================================
// Primitive atomic operations.
// The computed MemOperand must be Reg+0 because the load/store exclusive
// instructions only take a single pointer register.
static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
const Address& address,
Register scratch) {
if (address.offset == 0) {
return MemOperand(X(masm, address.base), 0);
}
masm.Add(X(scratch), X(masm, address.base), address.offset);
return MemOperand(X(scratch), 0);
}
static MemOperand ComputePointerForAtomic(MacroAssembler& masm,
const BaseIndex& address,
Register scratch) {
masm.Add(X(scratch), X(masm, address.base),
Operand(X(address.index), vixl::LSL, address.scale));
if (address.offset) {
masm.Add(X(scratch), X(scratch), address.offset);
}
return MemOperand(X(scratch), 0);
}
// This sign extends to targetWidth and leaves any higher bits zero.
static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType,
Width targetWidth, Register src, Register dest) {
bool signExtend = Scalar::isSignedIntType(srcType);
switch (Scalar::byteSize(srcType)) {
case 1:
if (signExtend) {
masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
} else {
masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7);
}
break;
case 2:
if (signExtend) {
masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
} else {
masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15);
}
break;
case 4:
if (targetWidth == Width::_64) {
if (signExtend) {
masm.Sbfm(X(dest), X(src), 0, 31);
} else {
masm.Ubfm(X(dest), X(src), 0, 31);
}
} else if (src != dest) {
masm.Mov(R(dest, targetWidth), R(src, targetWidth));
}
break;
case 8:
if (src != dest) {
masm.Mov(R(dest, targetWidth), R(src, targetWidth));
}
break;
default:
MOZ_CRASH();
}
}
// Exclusive-loads zero-extend their values to the full width of the X register.
//
// Note, we've promised to leave the high bits of the 64-bit register clear if
// the targetWidth is 32.
static void LoadExclusive(MacroAssembler& masm,
const wasm::MemoryAccessDesc* access,
Scalar::Type srcType, Width targetWidth,
MemOperand ptr, Register dest) {
bool signExtend = Scalar::isSignedIntType(srcType);
// With this address form, a single native ldxr* will be emitted, and the
// AutoForbidPoolsAndNops ensures that the metadata is emitted at the
// address of the ldxr*. Note that the use of AutoForbidPoolsAndNops is now
// a "second class" solution; the right way to do this would be to have the
// masm.<LoadInsn> calls produce an FaultingCodeOffset, and hand that value to
// `masm.append`.
MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0);
switch (Scalar::byteSize(srcType)) {
case 1: {
{
AutoForbidPoolsAndNops afp(
&masm,
/* max number of instructions in scope = */ 1);
if (access) {
masm.append(*access, wasm::TrapMachineInsn::Load8,
FaultingCodeOffset(masm.currentOffset()));
}
masm.Ldxrb(W(dest), ptr);
}
if (signExtend) {
masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7);
}
break;
}
case 2: {
{
AutoForbidPoolsAndNops afp(
&masm,
/* max number of instructions in scope = */ 1);
if (access) {
masm.append(*access, wasm::TrapMachineInsn::Load16,
FaultingCodeOffset(masm.currentOffset()));
}
masm.Ldxrh(W(dest), ptr);
}
if (signExtend) {
masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15);
}
break;
}
case 4: {
{
AutoForbidPoolsAndNops afp(
&masm,
/* max number of instructions in scope = */ 1);
if (access) {
masm.append(*access, wasm::TrapMachineInsn::Load32,
FaultingCodeOffset(masm.currentOffset()));
}
masm.Ldxr(W(dest), ptr);
}
if (targetWidth == Width::_64 && signExtend) {
masm.Sbfm(X(dest), X(dest), 0, 31);
}
break;
}
case 8: {
{
AutoForbidPoolsAndNops afp(
&masm,
/* max number of instructions in scope = */ 1);
if (access) {
masm.append(*access, wasm::TrapMachineInsn::Load64,
FaultingCodeOffset(masm.currentOffset()));
}
masm.Ldxr(X(dest), ptr);
}
break;
}
default: {
MOZ_CRASH();
}
}
}
static void StoreExclusive(MacroAssembler& masm, Scalar::Type type,
Register status, Register src, MemOperand ptr) {
// Note, these are not decorated with a TrapSite only because they are
// assumed to be preceded by a LoadExclusive to the same address, of the
// same width, so that will always take the page fault if the address is bad.
switch (Scalar::byteSize(type)) {
case 1:
masm.Stxrb(W(status), W(src), ptr);
break;
case 2:
masm.Stxrh(W(status), W(src), ptr);
break;
case 4:
masm.Stxr(W(status), W(src), ptr);
break;
case 8:
masm.Stxr(W(status), X(src), ptr);
break;
}
}
static bool HasAtomicInstructions(MacroAssembler& masm) {
return masm.asVIXL().GetCPUFeatures()->Has(vixl::CPUFeatures::kAtomics);
}
static inline bool SupportedAtomicInstructionOperands(Scalar::Type type,
Width targetWidth) {
if (targetWidth == Width::_32) {
return byteSize(type) <= 4;
}
if (targetWidth == Width::_64) {
return byteSize(type) == 8;
}
return false;
}
template <typename T>
static void CompareExchange(MacroAssembler& masm,
const wasm::MemoryAccessDesc* access,
Scalar::Type type, Width targetWidth,
Synchronization sync, const T& mem, Register oldval,
Register newval, Register output) {
MOZ_ASSERT(oldval != output && newval != output);
vixl::UseScratchRegisterScope temps(&masm);
Register ptrScratch = temps.AcquireX().asUnsized();
MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
MOZ_ASSERT(ptr.base().asUnsized() != output);
if (HasAtomicInstructions(masm) &&
SupportedAtomicInstructionOperands(type, targetWidth)) {
masm.Mov(X(output), X(oldval));
// Capal is using same atomic mechanism as Ldxr/Stxr, and
// consider it is the same for "Inner Shareable" domain.
// Not updated gen_cmpxchg in GenerateAtomicOperations.py.
masm.memoryBarrierBefore(sync);
{
AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1);
if (access) {
masm.append(*access, wasm::TrapMachineInsn::Atomic,
FaultingCodeOffset(masm.currentOffset()));
}
switch (byteSize(type)) {
case 1:
masm.Casalb(R(output, targetWidth), R(newval, targetWidth), ptr);
break;
case 2:
masm.Casalh(R(output, targetWidth), R(newval, targetWidth), ptr);
break;
case 4:
case 8:
masm.Casal(R(output, targetWidth), R(newval, targetWidth), ptr);
break;
default:
MOZ_CRASH("CompareExchange unsupported type");
}
}
masm.memoryBarrierAfter(sync);
SignOrZeroExtend(masm, type, targetWidth, output, output);
return;
}
// The target doesn't support atomics, so generate a LL-SC loop. This requires
// only AArch64 v8.0.
Label again;
Label done;
// NOTE: the generated code must match the assembly code in gen_cmpxchg in
// GenerateAtomicOperations.py
masm.memoryBarrierBefore(sync);
Register scratch = temps.AcquireX().asUnsized();
masm.bind(&again);
SignOrZeroExtend(masm, type, targetWidth, oldval, scratch);
LoadExclusive(masm, access, type, targetWidth, ptr, output);
masm.Cmp(R(output, targetWidth), R(scratch, targetWidth));
masm.B(&done, MacroAssembler::NotEqual);
StoreExclusive(masm, type, scratch, newval, ptr);
masm.Cbnz(W(scratch), &again);
masm.bind(&done);
masm.memoryBarrierAfter(sync);
}
template <typename T>
static void AtomicExchange(MacroAssembler& masm,
const wasm::MemoryAccessDesc* access,
Scalar::Type type, Width targetWidth,
Synchronization sync, const T& mem, Register value,
Register output) {
MOZ_ASSERT(value != output);
vixl::UseScratchRegisterScope temps(&masm);
Register ptrScratch = temps.AcquireX().asUnsized();
MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
if (HasAtomicInstructions(masm) &&
SupportedAtomicInstructionOperands(type, targetWidth)) {
// Swpal is using same atomic mechanism as Ldxr/Stxr, and
// consider it is the same for "Inner Shareable" domain.
// Not updated gen_exchange in GenerateAtomicOperations.py.
masm.memoryBarrierBefore(sync);
{
AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1);
if (access) {
masm.append(*access, wasm::TrapMachineInsn::Atomic,
FaultingCodeOffset(masm.currentOffset()));
}
switch (byteSize(type)) {
case 1:
masm.Swpalb(R(value, targetWidth), R(output, targetWidth), ptr);
break;
case 2:
masm.Swpalh(R(value, targetWidth), R(output, targetWidth), ptr);
break;
case 4:
case 8:
masm.Swpal(R(value, targetWidth), R(output, targetWidth), ptr);
break;
default:
MOZ_CRASH("AtomicExchange unsupported type");
}
}
masm.memoryBarrierAfter(sync);
SignOrZeroExtend(masm, type, targetWidth, output, output);
return;
}
// The target doesn't support atomics, so generate a LL-SC loop. This requires
// only AArch64 v8.0.
Label again;
// NOTE: the generated code must match the assembly code in gen_exchange in
// GenerateAtomicOperations.py
masm.memoryBarrierBefore(sync);
Register scratch = temps.AcquireX().asUnsized();
masm.bind(&again);
LoadExclusive(masm, access, type, targetWidth, ptr, output);
StoreExclusive(masm, type, scratch, value, ptr);
masm.Cbnz(W(scratch), &again);
masm.memoryBarrierAfter(sync);
}
template <bool wantResult, typename T>
static void AtomicFetchOp(MacroAssembler& masm,
const wasm::MemoryAccessDesc* access,
Scalar::Type type, Width targetWidth,
Synchronization sync, AtomicOp op, const T& mem,
Register value, Register temp, Register output) {
MOZ_ASSERT(value != output);
MOZ_ASSERT(value != temp);
MOZ_ASSERT_IF(wantResult, output != temp);
vixl::UseScratchRegisterScope temps(&masm);
Register ptrScratch = temps.AcquireX().asUnsized();
MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch);
if (HasAtomicInstructions(masm) &&
SupportedAtomicInstructionOperands(type, targetWidth) &&
!isFloatingType(type)) {
// LdXXXal/StXXXl is using same atomic mechanism as Ldxr/Stxr, and
// consider it is the same for "Inner Shareable" domain.
// Not updated gen_fetchop in GenerateAtomicOperations.py.
masm.memoryBarrierBefore(sync);
#define FETCH_OP_CASE(op, arg) \
{ \
AutoForbidPoolsAndNops afp(&masm, /* num insns = */ 1); \
if (access) { \
masm.append(*access, wasm::TrapMachineInsn::Atomic, \
FaultingCodeOffset(masm.currentOffset())); \
} \
switch (byteSize(type)) { \
case 1: \
if (wantResult) { \
masm.Ld##op##alb(R(arg, targetWidth), R(output, targetWidth), ptr); \
} else { \
masm.St##op##lb(R(arg, targetWidth), ptr); \
} \
break; \
case 2: \
if (wantResult) { \
masm.Ld##op##alh(R(arg, targetWidth), R(output, targetWidth), ptr); \
} else { \
masm.St##op##lh(R(arg, targetWidth), ptr); \
} \
break; \
case 4: \
case 8: \
if (wantResult) { \
masm.Ld##op##al(R(arg, targetWidth), R(output, targetWidth), ptr); \
} else { \
masm.St##op##l(R(arg, targetWidth), ptr); \
} \
break; \
default: \
MOZ_CRASH("AtomicFetchOp unsupported type"); \
} \
}
switch (op) {
case AtomicOp::Add:
FETCH_OP_CASE(add, value);
break;
case AtomicOp::Sub: {
Register scratch = temps.AcquireX().asUnsized();
masm.Neg(X(scratch), X(value));
FETCH_OP_CASE(add, scratch);
break;
}
case AtomicOp::And: {
Register scratch = temps.AcquireX().asUnsized();
masm.Eor(X(scratch), X(value), Operand(~0));
FETCH_OP_CASE(clr, scratch);
break;
}
case AtomicOp::Or:
FETCH_OP_CASE(set, value);
break;
case AtomicOp::Xor:
FETCH_OP_CASE(eor, value);
break;
}
masm.memoryBarrierAfter(sync);
if (wantResult) {
SignOrZeroExtend(masm, type, targetWidth, output, output);
}
return;
}
#undef FETCH_OP_CASE
// The target doesn't support atomics, so generate a LL-SC loop. This requires
// only AArch64 v8.0.
Label again;
// NOTE: the generated code must match the assembly code in gen_fetchop in
// GenerateAtomicOperations.py
masm.memoryBarrierBefore(sync);
Register scratch = temps.AcquireX().asUnsized();
masm.bind(&again);
LoadExclusive(masm, access, type, targetWidth, ptr, output);
switch (op) {
case AtomicOp::Add:
masm.Add(X(temp), X(output), X(value));
break;
case AtomicOp::Sub:
masm.Sub(X(temp), X(output), X(value));
break;
case AtomicOp::And:
masm.And(X(temp), X(output), X(value));
break;
case AtomicOp::Or:
masm.Orr(X(temp), X(output), X(value));
break;
case AtomicOp::Xor:
masm.Eor(X(temp), X(output), X(value));
break;
}
StoreExclusive(masm, type, scratch, temp, ptr);
masm.Cbnz(W(scratch), &again);
if (wantResult) {
SignOrZeroExtend(masm, type, targetWidth, output, output);
}
masm.memoryBarrierAfter(sync);
}
void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync,
const Address& mem, Register oldval,
Register newval, Register output) {
CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
output);
}
void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync,
const BaseIndex& mem, Register oldval,
Register newval, Register output) {
CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval,
output);
}
void MacroAssembler::compareExchange64(Synchronization sync, const Address& mem,
Register64 expect, Register64 replace,
Register64 output) {
CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
expect.reg, replace.reg, output.reg);
}
void MacroAssembler::compareExchange64(Synchronization sync,
const BaseIndex& mem, Register64 expect,
Register64 replace, Register64 output) {
CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
expect.reg, replace.reg, output.reg);
}
void MacroAssembler::atomicExchange64(Synchronization sync, const Address& mem,
Register64 value, Register64 output) {
AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
value.reg, output.reg);
}
void MacroAssembler::atomicExchange64(Synchronization sync,
const BaseIndex& mem, Register64 value,
Register64 output) {
AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem,
value.reg, output.reg);
}
void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op,
Register64 value, const Address& mem,
Register64 temp, Register64 output) {
AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
value.reg, temp.reg, output.reg);
}
void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op,
Register64 value, const BaseIndex& mem,
Register64 temp, Register64 output) {
AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
value.reg, temp.reg, output.reg);
}
void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op,
Register64 value, const Address& mem,
Register64 temp) {
AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
value.reg, temp.reg, temp.reg);
}
void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op,
Register64 value, const BaseIndex& mem,
Register64 temp) {
AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem,
value.reg, temp.reg, temp.reg);
}
void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
const Address& mem, Register oldval,
Register newval, Register output) {
CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
oldval, newval, output);
}
void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access,
const BaseIndex& mem, Register oldval,
Register newval, Register output) {
CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
oldval, newval, output);
}
void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync,
const Address& mem, Register value,
Register output) {
AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
}
void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync,
const BaseIndex& mem, Register value,
Register output) {
AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output);
}
void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
const Address& mem, Register value,
Register output) {
AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
value, output);
}
void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access,
const BaseIndex& mem, Register value,
Register output) {
AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem,
value, output);
}
void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync,
AtomicOp op, Register value,
const Address& mem, Register temp,
Register output) {
AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
temp, output);
}
void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync,
AtomicOp op, Register value,
const BaseIndex& mem, Register temp,
Register output) {
AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value,
temp, output);
}
void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
AtomicOp op, Register value,
const Address& mem, Register temp,
Register output) {
AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
op, mem, value, temp, output);
}
void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access,
AtomicOp op, Register value,
const BaseIndex& mem, Register temp,
Register output) {
AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(),
op, mem, value, temp, output);
}
void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
AtomicOp op, Register value,
const Address& mem, Register temp) {
AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
op, mem, value, temp, temp);
}
void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access,
AtomicOp op, Register value,
const BaseIndex& mem, Register temp) {
AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(),
op, mem, value, temp, temp);
}
void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
const Address& mem,
Register64 expect,
Register64 replace,
Register64 output) {
CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
expect.reg, replace.reg, output.reg);
}
void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
const BaseIndex& mem,
Register64 expect,
Register64 replace,
Register64 output) {
CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
expect.reg, replace.reg, output.reg);
}
void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
const Address& mem, Register64 value,
Register64 output) {
AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
value.reg, output.reg);
}
void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
const BaseIndex& mem,
Register64 value, Register64 output) {
AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem,
value.reg, output.reg);
}
void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
AtomicOp op, Register64 value,
const Address& mem, Register64 temp,
Register64 output) {
AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
op, mem, value.reg, temp.reg, output.reg);
}
void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
AtomicOp op, Register64 value,
const BaseIndex& mem, Register64 temp,
Register64 output) {
AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
op, mem, value.reg, temp.reg, output.reg);
}
void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access,
AtomicOp op, Register64 value,
const BaseIndex& mem,
Register64 temp) {
AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(),
op, mem, value.reg, temp.reg, temp.reg);
}
// ========================================================================
// JS atomic operations.
template <typename T>
static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
Synchronization sync, const T& mem,
Register oldval, Register newval, Register temp,
AnyRegister output) {
if (arrayType == Scalar::Uint32) {
masm.compareExchange(arrayType, sync, mem, oldval, newval, temp);
masm.convertUInt32ToDouble(temp, output.fpu());
} else {
masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr());
}
}
void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
Synchronization sync, const Address& mem,
Register oldval, Register newval,
Register temp, AnyRegister output) {
CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
}
void MacroAssembler::compareExchangeJS(Scalar::Type arrayType,
Synchronization sync,
const BaseIndex& mem, Register oldval,
Register newval, Register temp,
AnyRegister output) {
CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output);
}
template <typename T>
static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType,
Synchronization sync, const T& mem, Register value,
Register temp, AnyRegister output) {
if (arrayType == Scalar::Uint32) {
masm.atomicExchange(arrayType, sync, mem, value, temp);
masm.convertUInt32ToDouble(temp, output.fpu());
} else {
masm.atomicExchange(arrayType, sync, mem, value, output.gpr());
}
}
void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
Synchronization sync, const Address& mem,
Register value, Register temp,
AnyRegister output) {
AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
}
void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType,
Synchronization sync,
const BaseIndex& mem, Register value,
Register temp, AnyRegister output) {
AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output);
}
template <typename T>
static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType,
Synchronization sync, AtomicOp op, Register value,
const T& mem, Register temp1, Register temp2,
AnyRegister output) {
if (arrayType == Scalar::Uint32) {
masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1);
masm.convertUInt32ToDouble(temp1, output.fpu());
} else {
masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr());
}
}
void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
Synchronization sync, AtomicOp op,
Register value, const Address& mem,
Register temp1, Register temp2,
AnyRegister output) {
AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
}
void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType,
Synchronization sync, AtomicOp op,
Register value, const BaseIndex& mem,
Register temp1, Register temp2,
AnyRegister output) {
AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output);
}
void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
Synchronization sync, AtomicOp op,
Register value, const BaseIndex& mem,
Register temp) {
AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
value, temp, temp);
}
void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType,
Synchronization sync, AtomicOp op,
Register value, const Address& mem,
Register temp) {
AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem,
value, temp, temp);
}
void MacroAssembler::flexibleQuotient32(Register rhs, Register srcDest,
bool isUnsigned,
const LiveRegisterSet&) {
quotient32(rhs, srcDest, isUnsigned);
}
void MacroAssembler::flexibleRemainder32(Register rhs, Register srcDest,
bool isUnsigned,
const LiveRegisterSet&) {
remainder32(rhs, srcDest, isUnsigned);
}
void MacroAssembler::flexibleDivMod32(Register rhs, Register srcDest,
Register remOutput, bool isUnsigned,
const LiveRegisterSet&) {
vixl::UseScratchRegisterScope temps(this);
ARMRegister scratch = temps.AcquireW();
ARMRegister src = temps.AcquireW();
// Preserve src for remainder computation
Mov(src, ARMRegister(srcDest, 32));
if (isUnsigned) {
Udiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
} else {
Sdiv(ARMRegister(srcDest, 32), src, ARMRegister(rhs, 32));
}
// Compute remainder
Mul(scratch, ARMRegister(srcDest, 32), ARMRegister(rhs, 32));
Sub(ARMRegister(remOutput, 32), src, scratch);
}
CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
AutoForbidPoolsAndNops afp(this,
/* max number of instructions in scope = */ 1);
CodeOffset offset(currentOffset());
adr(ARMRegister(dest, 64), 0, LabelDoc());
return offset;
}
void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
CodeLocationLabel target) {
ptrdiff_t off = target - loc;
MOZ_RELEASE_ASSERT(vixl::IsInt21(off));
Instruction* cur = reinterpret_cast<Instruction*>(loc.raw());
MOZ_ASSERT(cur->IsADR());
vixl::Register rd = vixl::Register::XRegFromCode(cur->Rd());
adr(cur, rd, off);
}
// ========================================================================
// Spectre Mitigations.
void MacroAssembler::speculationBarrier() {
// Conditional speculation barrier.
csdb();
}
void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
Label* fail) {
ARMFPRegister iFlt(src, 32);
ARMRegister o64(dest, 64);
ARMRegister o32(dest, 32);
Label handleZero;
Label fin;
// Handle ±0 and NaN first.
Fcmp(iFlt, 0.0);
B(Assembler::Equal, &handleZero);
// NaN is always a bail condition, just bail directly.
B(Assembler::Overflow, fail);
// Round towards negative infinity.
Fcvtms(o64, iFlt);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(o64, Operand(o64, vixl::SXTW));
B(NotEqual, fail);
// Clear upper 32 bits.
Uxtw(o64, o64);
B(&fin);
bind(&handleZero);
// Move the top word of the float into the output reg, if it is non-zero,
// then the original value was -0.0.
Fmov(o32, iFlt);
Cbnz(o32, fail);
bind(&fin);
}
void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
Label* fail) {
ARMFPRegister iDbl(src, 64);
ARMRegister o64(dest, 64);
ARMRegister o32(dest, 32);
Label handleZero;
Label fin;
// Handle ±0 and NaN first.
Fcmp(iDbl, 0.0);
B(Assembler::Equal, &handleZero);
// NaN is always a bail condition, just bail directly.
B(Assembler::Overflow, fail);
// Round towards negative infinity.
Fcvtms(o64, iDbl);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(o64, Operand(o64, vixl::SXTW));
B(NotEqual, fail);
// Clear upper 32 bits.
Uxtw(o64, o64);
B(&fin);
bind(&handleZero);
// Move the top word of the double into the output reg, if it is non-zero,
// then the original value was -0.0.
Fmov(o64, iDbl);
Cbnz(o64, fail);
bind(&fin);
}
void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest,
Label* fail) {
ARMFPRegister iFlt(src, 32);
ARMRegister o64(dest, 64);
ARMRegister o32(dest, 32);
Label handleZero;
Label fin;
// Round towards positive infinity.
Fcvtps(o64, iFlt);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(o64, Operand(o64, vixl::SXTW));
B(NotEqual, fail);
// We have to check for (-1, -0] and NaN when the result is zero.
Cbz(o64, &handleZero);
// Clear upper 32 bits.
Uxtw(o64, o64);
B(&fin);
// Bail if the input is in (-1, -0] or NaN.
bind(&handleZero);
// Move the top word of the float into the output reg, if it is non-zero,
// then the original value wasn't +0.0.
Fmov(o32, iFlt);
Cbnz(o32, fail);
bind(&fin);
}
void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest,
Label* fail) {
ARMFPRegister iDbl(src, 64);
ARMRegister o64(dest, 64);
ARMRegister o32(dest, 32);
Label handleZero;
Label fin;
// Round towards positive infinity.
Fcvtps(o64, iDbl);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(o64, Operand(o64, vixl::SXTW));
B(NotEqual, fail);
// We have to check for (-1, -0] and NaN when the result is zero.
Cbz(o64, &handleZero);
// Clear upper 32 bits.
Uxtw(o64, o64);
B(&fin);
// Bail if the input is in (-1, -0] or NaN.
bind(&handleZero);
// Move the top word of the double into the output reg, if it is non-zero,
// then the original value wasn't +0.0.
Fmov(o64, iDbl);
Cbnz(o64, fail);
bind(&fin);
}
void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest,
Label* fail) {
ARMFPRegister src32(src, 32);
ARMRegister dest32(dest, 32);
ARMRegister dest64(dest, 64);
Label done, zeroCase;
// Convert scalar to signed 64-bit fixed-point, rounding toward zero.
// In the case of overflow, the output is saturated.
// In the case of NaN and -0, the output is zero.
Fcvtzs(dest64, src32);
// If the output was zero, worry about special cases.
Cbz(dest64, &zeroCase);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(dest64, Operand(dest64, vixl::SXTW));
B(NotEqual, fail);
// Clear upper 32 bits.
Uxtw(dest64, dest64);
// If the output was non-zero and wasn't saturated, just return it.
B(&done);
// Handle the case of a zero output:
// 1. The input may have been NaN, requiring a failure.
// 2. The input may have been in (-1,-0], requiring a failure.
{
bind(&zeroCase);
// Combine test for negative and NaN values using a single bitwise
// operation.
//
// | Decimal number | Bitwise representation |
// |----------------|------------------------|
// | -0 | 8000'0000 |
// | +0 | 0000'0000 |
// | +1 | 3f80'0000 |
// | NaN (or +Inf) | 7fyx'xxxx, y >= 8 |
// | -NaN (or -Inf) | ffyx'xxxx, y >= 8 |
//
// If any of two most significant bits is set, the number isn't in [0, 1).
// (Recall that floating point numbers, except for NaN, are strictly ordered
// when comparing their bitwise representation as signed integers.)
Fmov(dest32, src32);
Lsr(dest32, dest32, 30);
Cbnz(dest32, fail);
}
bind(&done);
}
void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest,
Label* fail) {
ARMFPRegister src64(src, 64);
ARMRegister dest64(dest, 64);
ARMRegister dest32(dest, 32);
Label done, zeroCase;
// Convert scalar to signed 64-bit fixed-point, rounding toward zero.
// In the case of overflow, the output is saturated.
// In the case of NaN and -0, the output is zero.
Fcvtzs(dest64, src64);
// If the output was zero, worry about special cases.
Cbz(dest64, &zeroCase);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(dest64, Operand(dest64, vixl::SXTW));
B(NotEqual, fail);
// Clear upper 32 bits.
Uxtw(dest64, dest64);
// If the output was non-zero and wasn't saturated, just return it.
B(&done);
// Handle the case of a zero output:
// 1. The input may have been NaN, requiring a failure.
// 2. The input may have been in (-1,-0], requiring a failure.
{
bind(&zeroCase);
// Combine test for negative and NaN values using a single bitwise
// operation.
//
// | Decimal number | Bitwise representation |
// |----------------|------------------------|
// | -0 | 8000'0000'0000'0000 |
// | +0 | 0000'0000'0000'0000 |
// | +1 | 3ff0'0000'0000'0000 |
// | NaN (or +Inf) | 7ffx'xxxx'xxxx'xxxx |
// | -NaN (or -Inf) | fffx'xxxx'xxxx'xxxx |
//
// If any of two most significant bits is set, the number isn't in [0, 1).
// (Recall that floating point numbers, except for NaN, are strictly ordered
// when comparing their bitwise representation as signed integers.)
Fmov(dest64, src64);
Lsr(dest64, dest64, 62);
Cbnz(dest64, fail);
}
bind(&done);
}
void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest,
FloatRegister temp, Label* fail) {
ARMFPRegister src32(src, 32);
ARMRegister dest32(dest, 32);
ARMRegister dest64(dest, 64);
Label negative, saturated, done;
// Branch to a slow path if input < 0.0 due to complicated rounding rules.
// Note that Fcmp with NaN unsets the negative flag.
Fcmp(src32, 0.0);
B(&negative, Assembler::Condition::lo);
// Handle the simple case of a positive input, and also -0 and NaN.
// Rounding proceeds with consideration of the fractional part of the input:
// 1. If > 0.5, round to integer with higher absolute value (so, up).
// 2. If < 0.5, round to integer with lower absolute value (so, down).
// 3. If = 0.5, round to +Infinity (so, up).
{
// Convert to signed 64-bit integer, rounding halfway cases away from zero.
// In the case of overflow, the output is saturated.
// In the case of NaN and -0, the output is zero.
Fcvtas(dest64, src32);
// In the case of zero, the input may have been NaN or -0, which must bail.
Cbnz(dest64, &saturated);
// Combine test for -0 and NaN values using a single bitwise operation.
// See truncFloat32ToInt32 for an explanation.
Fmov(dest32, src32);
Lsr(dest32, dest32, 30);
Cbnz(dest32, fail);
B(&done);
}
// Handle the complicated case of a negative input.
// Rounding proceeds with consideration of the fractional part of the input:
// 1. If > 0.5, round to integer with higher absolute value (so, down).
// 2. If < 0.5, round to integer with lower absolute value (so, up).
// 3. If = 0.5, round to +Infinity (so, up).
bind(&negative);
{
// Inputs in [-0.5, 0) are rounded to -0. Fail.
loadConstantFloat32(-0.5f, temp);
branchFloat(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
// Other negative inputs need the biggest double less than 0.5 added.
loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
addFloat32(src, temp);
// Round all values toward -Infinity.
// In the case of overflow, the output is saturated.
// NaN and -0 are already handled by the "positive number" path above.
Fcvtms(dest64, temp);
}
bind(&saturated);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(dest64, Operand(dest64, vixl::SXTW));
B(NotEqual, fail);
// Clear upper 32 bits.
Uxtw(dest64, dest64);
bind(&done);
}
void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest,
FloatRegister temp, Label* fail) {
ARMFPRegister src64(src, 64);
ARMRegister dest64(dest, 64);
ARMRegister dest32(dest, 32);
Label negative, saturated, done;
// Branch to a slow path if input < 0.0 due to complicated rounding rules.
// Note that Fcmp with NaN unsets the negative flag.
Fcmp(src64, 0.0);
B(&negative, Assembler::Condition::lo);
// Handle the simple case of a positive input, and also -0 and NaN.
// Rounding proceeds with consideration of the fractional part of the input:
// 1. If > 0.5, round to integer with higher absolute value (so, up).
// 2. If < 0.5, round to integer with lower absolute value (so, down).
// 3. If = 0.5, round to +Infinity (so, up).
{
// Convert to signed 64-bit integer, rounding halfway cases away from zero.
// In the case of overflow, the output is saturated.
// In the case of NaN and -0, the output is zero.
Fcvtas(dest64, src64);
// In the case of zero, the input may have been NaN or -0, which must bail.
Cbnz(dest64, &saturated);
// Combine test for -0 and NaN values using a single bitwise operation.
// See truncDoubleToInt32 for an explanation.
Fmov(dest64, src64);
Lsr(dest64, dest64, 62);
Cbnz(dest64, fail);
B(&done);
}
// Handle the complicated case of a negative input.
// Rounding proceeds with consideration of the fractional part of the input:
// 1. If > 0.5, round to integer with higher absolute value (so, down).
// 2. If < 0.5, round to integer with lower absolute value (so, up).
// 3. If = 0.5, round to +Infinity (so, up).
bind(&negative);
{
// Inputs in [-0.5, 0) are rounded to -0. Fail.
loadConstantDouble(-0.5, temp);
branchDouble(Assembler::DoubleGreaterThanOrEqual, src, temp, fail);
// Other negative inputs need the biggest double less than 0.5 added.
loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
addDouble(src, temp);
// Round all values toward -Infinity.
// In the case of overflow, the output is saturated.
// NaN and -0 are already handled by the "positive number" path above.
Fcvtms(dest64, temp);
}
bind(&saturated);
// Sign extend lower 32 bits to test if the result isn't an Int32.
Cmp(dest64, Operand(dest64, vixl::SXTW));
B(NotEqual, fail);
// Clear upper 32 bits.
Uxtw(dest64, dest64);
bind(&done);
}
void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src,
FloatRegister dest) {
switch (mode) {
case RoundingMode::Up:
frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
return;
case RoundingMode::Down:
frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
return;
case RoundingMode::NearestTiesToEven:
frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
return;
case RoundingMode::TowardsZero:
frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64));
return;
}
MOZ_CRASH("unexpected mode");
}
void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src,
FloatRegister dest) {
switch (mode) {
case RoundingMode::Up:
frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
return;
case RoundingMode::Down:
frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
return;
case RoundingMode::NearestTiesToEven:
frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
return;
case RoundingMode::TowardsZero:
frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32));
return;
}
MOZ_CRASH("unexpected mode");
}
void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
FloatRegister output) {
ScratchDoubleScope scratch(*this);
// Double with only the sign bit set
loadConstantDouble(-0.0, scratch);
if (lhs != output) {
moveDouble(lhs, output);
}
bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
}
void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
FloatRegister output) {
ScratchFloat32Scope scratch(*this);
// Float with only the sign bit set
loadConstantFloat32(-0.0f, scratch);
if (lhs != output) {
moveFloat32(lhs, output);
}
bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
}
void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
Register pointer) {
Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64),
Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift));
}
#ifdef ENABLE_WASM_TAIL_CALLS
void MacroAssembler::wasmMarkSlowCall() { Mov(x28, x28); }
const int32_t SlowCallMarker = 0xaa1c03fc;
void MacroAssembler::wasmCheckSlowCallsite(Register ra, Label* notSlow,
Register temp1, Register temp2) {
MOZ_ASSERT(ra != temp2);
Ldr(W(temp2), MemOperand(X(ra), 0));
Cmp(W(temp2), Operand(SlowCallMarker));
B(Assembler::NotEqual, notSlow);
}
#endif // ENABLE_WASM_TAIL_CALLS
//}}} check_macroassembler_style
} // namespace jit
} // namespace js