WasmCompile.cpp - mozsearch

mozilla-central/js/src/wasm/WasmCompile.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-

 * vim: set ts=8 sts=2 et sw=2 tw=80:

 * Copyright 2015 Mozilla Foundation

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *     http://www.apache.org/licenses/LICENSE-2.0

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

*/

#include "wasm/WasmCompile.h"

#include "mozilla/Maybe.h"

#include <algorithm>

#include "js/Conversions.h"

#include "js/Equality.h"

#include "js/ForOfIterator.h"

#include "js/PropertyAndElement.h"

#ifndef __wasi__

#  include "jit/ProcessExecutableMemory.h"

#endif

#include "jit/FlushICache.h"

#include "jit/JitOptions.h"

#include "util/Text.h"

#include "vm/HelperThreads.h"

#include "vm/JSAtomState.h"

#include "vm/Realm.h"

#include "wasm/WasmBaselineCompile.h"

#include "wasm/WasmFeatures.h"

#include "wasm/WasmGenerator.h"

#include "wasm/WasmIonCompile.h"

#include "wasm/WasmOpIter.h"

#include "wasm/WasmProcess.h"

#include "wasm/WasmSignalHandlers.h"

#include "wasm/WasmValidate.h"

using namespace js;

using namespace js::jit;

using namespace js::wasm;

uint32_t wasm::ObservedCPUFeatures() {

  enum Arch {

    X86 = 0x1,

    X64 = 0x2,

    ARM = 0x3,

    MIPS = 0x4,

    MIPS64 = 0x5,

    ARM64 = 0x6,

    LOONG64 = 0x7,

    RISCV64 = 0x8,

    ARCH_BITS = 3

};

#if defined(JS_CODEGEN_X86)

  MOZ_ASSERT(uint32_t(jit::CPUInfo::GetFingerprint()) <=

             (UINT32_MAX >> ARCH_BITS));

  return X86 | (uint32_t(jit::CPUInfo::GetFingerprint()) << ARCH_BITS);

#elif defined(JS_CODEGEN_X64)

  MOZ_ASSERT(uint32_t(jit::CPUInfo::GetFingerprint()) <=

             (UINT32_MAX >> ARCH_BITS));

  return X64 | (uint32_t(jit::CPUInfo::GetFingerprint()) << ARCH_BITS);

#elif defined(JS_CODEGEN_ARM)

  MOZ_ASSERT(jit::GetARMFlags() <= (UINT32_MAX >> ARCH_BITS));

  return ARM | (jit::GetARMFlags() << ARCH_BITS);

#elif defined(JS_CODEGEN_ARM64)

  MOZ_ASSERT(jit::GetARM64Flags() <= (UINT32_MAX >> ARCH_BITS));

  return ARM64 | (jit::GetARM64Flags() << ARCH_BITS);

#elif defined(JS_CODEGEN_MIPS64)

  MOZ_ASSERT(jit::GetMIPSFlags() <= (UINT32_MAX >> ARCH_BITS));

  return MIPS64 | (jit::GetMIPSFlags() << ARCH_BITS);

#elif defined(JS_CODEGEN_LOONG64)

  MOZ_ASSERT(jit::GetLOONG64Flags() <= (UINT32_MAX >> ARCH_BITS));

  return LOONG64 | (jit::GetLOONG64Flags() << ARCH_BITS);

#elif defined(JS_CODEGEN_RISCV64)

  MOZ_ASSERT(jit::GetRISCV64Flags() <= (UINT32_MAX >> ARCH_BITS));

  return RISCV64 | (jit::GetRISCV64Flags() << ARCH_BITS);

#elif defined(JS_CODEGEN_NONE) || defined(JS_CODEGEN_WASM32)

  return 0;

#else

#  error "unknown architecture"

#endif

bool FeatureOptions::init(JSContext* cx, HandleValue val) {

  if (val.isNullOrUndefined()) {

    return true;

#ifdef ENABLE_WASM_JS_STRING_BUILTINS

  if (JSStringBuiltinsAvailable(cx)) {

    if (!val.isObject()) {

      JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,

                               JSMSG_WASM_BAD_COMPILE_OPTIONS);

      return false;

    RootedObject obj(cx, &val.toObject());

    // Check the 'importedStringConstants' option

    RootedValue importedStringConstants(cx);

    if (!JS_GetProperty(cx, obj, "importedStringConstants",

                        &importedStringConstants)) {

      return false;

    if (importedStringConstants.isNullOrUndefined()) {

      this->jsStringConstants = false;

    } else if (importedStringConstants.isBoolean() &&

               importedStringConstants.toBoolean()) {

      // Temporary backwards compatibility hack to interpret 'true' as "'"

      this->jsStringConstants = true;

      UniqueChars jsStringConstantsNamespace = JS_smprintf("'");

      if (!jsStringConstantsNamespace) {

        return false;

      this->jsStringConstantsNamespace =

          js_new<ShareableChars>(std::move(jsStringConstantsNamespace));

      if (!this->jsStringConstantsNamespace) {

        return false;

    } else {

      this->jsStringConstants = true;

      RootedString importedStringConstantsString(

          cx, JS::ToString(cx, importedStringConstants));

      if (!importedStringConstantsString) {

        return false;

      UniqueChars jsStringConstantsNamespace =

          StringToNewUTF8CharsZ(cx, *importedStringConstantsString);

      if (!jsStringConstantsNamespace) {

        return false;

      this->jsStringConstantsNamespace =

          js_new<ShareableChars>(std::move(jsStringConstantsNamespace));

      if (!this->jsStringConstantsNamespace) {

        return false;

    // Get the `builtins` iterable

    RootedValue builtins(cx);

    if (!JS_GetProperty(cx, obj, "builtins", &builtins)) {

      return false;

    if (!builtins.isUndefined()) {

      JS::ForOfIterator iterator(cx);

      if (!iterator.init(builtins, JS::ForOfIterator::ThrowOnNonIterable)) {

        return false;

      RootedValue jsStringModule(cx, StringValue(cx->names().jsStringModule));

      RootedValue nextBuiltin(cx);

      while (true) {

        bool done;

        if (!iterator.next(&nextBuiltin, &done)) {

          return false;

        if (done) {

          break;

        bool jsStringBuiltins;

        if (!JS::LooselyEqual(cx, nextBuiltin, jsStringModule,

                              &jsStringBuiltins)) {

          return false;

        if (!jsStringBuiltins) {

          JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,

                                   JSMSG_WASM_UNKNOWN_BUILTIN);

          return false;

        if (this->jsStringBuiltins && jsStringBuiltins) {

          JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,

                                   JSMSG_WASM_DUPLICATE_BUILTIN);

          return false;

        this->jsStringBuiltins = jsStringBuiltins;

#endif

  return true;

FeatureArgs FeatureArgs::build(JSContext* cx, const FeatureOptions& options) {

  FeatureArgs features;

#define WASM_FEATURE(NAME, LOWER_NAME, ...) \

  features.LOWER_NAME = wasm::NAME##Available(cx);

  JS_FOR_WASM_FEATURES(WASM_FEATURE);

#undef WASM_FEATURE

  features.sharedMemory =

      wasm::ThreadsAvailable(cx) ? Shareable::True : Shareable::False;

  features.simd = jit::JitSupportsWasmSimd();

  features.isBuiltinModule = options.isBuiltinModule;

  if (features.jsStringBuiltins) {

    features.builtinModules.jsString = options.jsStringBuiltins;

    features.builtinModules.jsStringConstants = options.jsStringConstants;

    features.builtinModules.jsStringConstantsNamespace =

        options.jsStringConstantsNamespace;

#ifdef ENABLE_WASM_GC

  if (options.requireGC) {

    features.gc = true;

#endif

#ifdef ENABLE_WASM_TAIL_CALLS

  if (options.requireTailCalls) {

    features.tailCalls = true;

#endif

  return features;

SharedCompileArgs CompileArgs::build(JSContext* cx,

                                     ScriptedCaller&& scriptedCaller,

                                     const FeatureOptions& options,

                                     CompileArgsError* error) {

  bool baseline = BaselineAvailable(cx);

  bool ion = IonAvailable(cx);

  // Debug information such as source view or debug traps will require

  // additional memory and permanently stay in baseline code, so we try to

  // only enable it when a developer actually cares: when the debugger tab

  // is open.

  bool debug = cx->realm() && cx->realm()->debuggerObservesWasm();

  bool forceTiering = cx->options().testWasmAwaitTier2() ||

                      JitOptions.wasmDelayTier2 ||

                      wasm::ExperimentalCompilePipelineAvailable(cx);

  // The <Compiler>Available() predicates should ensure no failure here, but

  // when we're fuzzing we allow inconsistent switches and the check may thus

  // fail.  Let it go to a run-time error instead of crashing.

  if (debug && ion) {

    *error = CompileArgsError::NoCompiler;

    return nullptr;

  if (forceTiering && !(baseline && ion)) {

    // This can happen only in testing, and in this case we don't have a

    // proper way to signal the error, so just silently override the default,

    // instead of adding a skip-if directive to every test using debug/gc.

    forceTiering = false;

  if (!(baseline || ion)) {

    *error = CompileArgsError::NoCompiler;

    return nullptr;

  CompileArgs* target = cx->new_<CompileArgs>();

  if (!target) {

    *error = CompileArgsError::OutOfMemory;

    return nullptr;

  target->scriptedCaller = std::move(scriptedCaller);

  target->baselineEnabled = baseline;

  target->ionEnabled = ion;

  target->debugEnabled = debug;

  target->forceTiering = forceTiering;

  target->features = FeatureArgs::build(cx, options);

  return target;

void wasm::SetUseCountersForFeatureUsage(JSContext* cx, JSObject* object,

                                         FeatureUsage usage) {

  if (usage & FeatureUsage::LegacyExceptions) {

    cx->runtime()->setUseCounter(object, JSUseCounter::WASM_LEGACY_EXCEPTIONS);

SharedCompileArgs CompileArgs::buildForAsmJS(ScriptedCaller&& scriptedCaller) {

  CompileArgs* target = js_new<CompileArgs>();

  if (!target) {

    return nullptr;

  target->scriptedCaller = std::move(scriptedCaller);

  // AsmJS is deprecated and doesn't have mechanisms for experimental features,

  // so we don't need to initialize the FeatureArgs. It also only targets the

  // Ion backend and does not need WASM debug support since it is de-optimized

  // to JS in that case.

  target->ionEnabled = true;

  target->debugEnabled = false;

  return target;

SharedCompileArgs CompileArgs::buildForValidation(const FeatureArgs& args) {

  CompileArgs* target = js_new<CompileArgs>();

  if (!target) {

    return nullptr;

  // Validation will not need compilers, just mark them disabled

  target->baselineEnabled = false;

  target->ionEnabled = false;

  target->debugEnabled = false;

  target->forceTiering = false;

  // Set the features

  target->features = args;

  return target;

SharedCompileArgs CompileArgs::buildAndReport(JSContext* cx,

                                              ScriptedCaller&& scriptedCaller,

                                              const FeatureOptions& options,

                                              bool reportOOM) {

  CompileArgsError error;

  SharedCompileArgs args =

      CompileArgs::build(cx, std::move(scriptedCaller), options, &error);

  if (args) {

    Log(cx, "available wasm compilers: tier1=%s tier2=%s",

        args->baselineEnabled ? "baseline" : "none",

        args->ionEnabled ? "ion" : "none");

    return args;

  switch (error) {

    case CompileArgsError::NoCompiler: {

      JS_ReportErrorASCII(cx, "no WebAssembly compiler available");

      break;

    case CompileArgsError::OutOfMemory: {

      // Most callers are required to return 'false' without reporting an OOM,

      // so we make reporting it optional here.

      if (reportOOM) {

        ReportOutOfMemory(cx);

      break;

  return nullptr;

/*

 * [SMDOC] Tiered wasm compilation.

 * "Tiered compilation" refers to the mechanism where we first compile the code

 * with a fast non-optimizing compiler so that we can start running the code

 * quickly, while in the background recompiling the code with the slower

 * optimizing compiler.  Code created by baseline is called "tier-1"; code

 * created by the optimizing compiler is called "tier-2".  When the tier-2 code

 * is ready, we "tier up" the code by creating paths from tier-1 code into their

 * tier-2 counterparts; this patching is performed as the program is running.

 * ## Selecting the compilation mode

 * When wasm bytecode arrives, we choose the compilation strategy based on

 * switches and on aspects of the code and the hardware.  If switches allow

 * tiered compilation to happen (the normal case), the following logic applies.

 * If the code is sufficiently large that tiered compilation would be beneficial

 * but not so large that it might blow our compiled code budget and make

 * compilation fail, we choose tiered compilation.  Otherwise we go straight to

 * optimized code.

 * The expected benefit of tiering is computed by TieringBeneficial(), below,

 * based on various estimated parameters of the hardware: ratios of object code

 * to byte code, speed of the system, number of cores.

 * ## Mechanics of tiering up; patching

 * Every time control enters a tier-1 function, the function prologue loads its

 * tiering pointer from the tiering jump table (see JumpTable in WasmCode.h) and

 * jumps to it.

 * Initially, an entry in the tiering table points to the instruction inside the

 * tier-1 function that follows the jump instruction (hence the jump is an

 * expensive nop).  When the tier-2 compiler is finished, the table is patched

 * racily to point into the tier-2 function at the correct prologue location

 * (see loop near the end of Module::finishTier2()).  As tier-2 compilation is

 * performed at most once per Module, there is at most one such racy overwrite

 * per table element during the lifetime of the Module.

 * The effect of the patching is to cause the tier-1 function to jump to its

 * tier-2 counterpart whenever the tier-1 function is called subsequently.  That

 * is, tier-1 code performs standard frame setup on behalf of whatever code it

 * jumps to, and the target code (tier-1 or tier-2) allocates its own frame in

 * whatever way it wants.

 * The racy writing means that it is often nondeterministic whether tier-1 or

 * tier-2 code is reached by any call during the tiering-up process; if F calls

 * A and B in that order, it may reach tier-2 code for A and tier-1 code for B.

 * If F is running concurrently on threads T1 and T2, T1 and T2 may see code

 * from different tiers for either function.

 * Note, tiering up also requires upgrading the jit-entry stubs so that they

 * reference tier-2 code.  The mechanics of this upgrading are described at

 * WasmInstanceObject::getExportedFunction().

 * ## Current limitations of tiering

 * Tiering is not always seamless.  Partly, it is possible for a program to get

 * stuck in tier-1 code.  Partly, a function that has tiered up continues to

 * force execution to go via tier-1 code to reach tier-2 code, paying for an

 * additional jump and a slightly less optimized prologue than tier-2 code could

 * have had on its own.

 * Known tiering limitiations:

 * - We can tier up only at function boundaries.  If a tier-1 function has a

 *   long-running loop it will not tier up until it returns to its caller.  If

 *   this loop never exits (a runloop in a worker, for example) then the

 *   function will never tier up.

 *   To do better, we need OSR.

 * - Wasm Table entries are never patched during tier-up.  A Table of funcref

 *   holds not a JSFunction pointer, but a (code*,instance*) pair of pointers.

 * When a table.set operation is performed, the JSFunction value is decomposed

 * and its code and instance pointers are stored in the table; subsequently,

 * when a table.get operation is performed, the JSFunction value is

 * reconstituted from its code pointer using fairly elaborate machinery.  (The

 * mechanics are the same also for the reflected JS operations on a

 * WebAssembly.Table.  For everything, see WasmTable.{cpp,h}.)  The code pointer

 * in the Table will always be the code pointer belonging to the best tier that

 * was active at the time when that function was stored in that Table slot; in

 * many cases, it will be tier-1 code.  As a consequence, a call through a table

 * will first enter tier-1 code and then jump to tier-2 code.

 *   To do better, we must update all the tables in the system when an instance

 *   tiers up.  This is expected to be very hard.

 * - Imported Wasm functions are never patched during tier-up.  Imports are held

 *   in FuncImportInstanceData values in the instance, and for a wasm

 *   callee, what's stored is the raw code pointer into the best tier of the

 *   callee that was active at the time the import was resolved.  That could be

 *   baseline code, and if it is, the situation is as for Table entries: a call

 *   to an import will always go via that import's tier-1 code, which will tier

 * up with an indirect jump.

 *   To do better, we must update all the import tables in the system that

 *   import functions from instances whose modules have tiered up.  This is

 *   expected to be hard.

*/

// Classify the current system as one of a set of recognizable classes.  This

// really needs to get our tier-1 systems right.

//

// TODO: We don't yet have a good measure of how fast a system is.  We

// distinguish between mobile and desktop because these are very different kinds

// of systems, but we could further distinguish between low / medium / high end

// within those major classes.  If we do so, then constants below would be

// provided for each (class, architecture, system-tier) combination, not just

// (class, architecture) as now.

//

// CPU clock speed is not by itself a good predictor of system performance, as

// there are high-performance systems with slow clocks (recent Intel) and

// low-performance systems with fast clocks (older AMD).  We can also use

// physical memory, core configuration, OS details, CPU class and family, and

// CPU manufacturer to disambiguate.

enum class SystemClass {

  DesktopX86,

  DesktopX64,

  DesktopUnknown32,

  DesktopUnknown64,

  MobileX86,

  MobileArm32,

  MobileArm64,

  MobileUnknown32,

  MobileUnknown64

};

static SystemClass ClassifySystem() {

  bool isDesktop;

#if defined(ANDROID) || defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)

  isDesktop = false;

#else

  isDesktop = true;

#endif

  if (isDesktop) {

#if defined(JS_CODEGEN_X64)

    return SystemClass::DesktopX64;

#elif defined(JS_CODEGEN_X86)

    return SystemClass::DesktopX86;

#elif defined(JS_64BIT)

    return SystemClass::DesktopUnknown64;

#else

    return SystemClass::DesktopUnknown32;

#endif

  } else {

#if defined(JS_CODEGEN_X86)

    return SystemClass::MobileX86;

#elif defined(JS_CODEGEN_ARM)

    return SystemClass::MobileArm32;

#elif defined(JS_CODEGEN_ARM64)

    return SystemClass::MobileArm64;

#elif defined(JS_64BIT)

    return SystemClass::MobileUnknown64;

#else

    return SystemClass::MobileUnknown32;

#endif

// Code sizes in machine code bytes per bytecode byte, again empirical except

// where marked.

//

// The Ion estimate for ARM64 is the measured Baseline value scaled by a

// plausible factor for optimized code.

static const double x64Tox86Inflation = 1.25;

static const double x64IonBytesPerBytecode = 2.45;

static const double x86IonBytesPerBytecode =

    x64IonBytesPerBytecode * x64Tox86Inflation;

static const double arm32IonBytesPerBytecode = 3.3;

static const double arm64IonBytesPerBytecode = 3.0 / 1.4;  // Estimate

static const double x64BaselineBytesPerBytecode = x64IonBytesPerBytecode * 1.43;

static const double x86BaselineBytesPerBytecode =

    x64BaselineBytesPerBytecode * x64Tox86Inflation;

static const double arm32BaselineBytesPerBytecode =

    arm32IonBytesPerBytecode * 1.39;

static const double arm64BaselineBytesPerBytecode = 3.0;

static double OptimizedBytesPerBytecode(SystemClass cls) {

  switch (cls) {

    case SystemClass::DesktopX86:

    case SystemClass::MobileX86:

    case SystemClass::DesktopUnknown32:

      return x86IonBytesPerBytecode;

    case SystemClass::DesktopX64:

    case SystemClass::DesktopUnknown64:

      return x64IonBytesPerBytecode;

    case SystemClass::MobileArm32:

    case SystemClass::MobileUnknown32:

      return arm32IonBytesPerBytecode;

    case SystemClass::MobileArm64:

    case SystemClass::MobileUnknown64:

      return arm64IonBytesPerBytecode;

    default:

      MOZ_CRASH();

static double BaselineBytesPerBytecode(SystemClass cls) {

  switch (cls) {

    case SystemClass::DesktopX86:

    case SystemClass::MobileX86:

    case SystemClass::DesktopUnknown32:

      return x86BaselineBytesPerBytecode;

    case SystemClass::DesktopX64:

    case SystemClass::DesktopUnknown64:

      return x64BaselineBytesPerBytecode;

    case SystemClass::MobileArm32:

    case SystemClass::MobileUnknown32:

      return arm32BaselineBytesPerBytecode;

    case SystemClass::MobileArm64:

    case SystemClass::MobileUnknown64:

      return arm64BaselineBytesPerBytecode;

    default:

      MOZ_CRASH();

double wasm::EstimateCompiledCodeSize(Tier tier, size_t bytecodeSize) {

  SystemClass cls = ClassifySystem();

  switch (tier) {

    case Tier::Baseline:

      return double(bytecodeSize) * BaselineBytesPerBytecode(cls);

    case Tier::Optimized:

      return double(bytecodeSize) * OptimizedBytesPerBytecode(cls);

  MOZ_CRASH("bad tier");

// If parallel Ion compilation is going to take longer than this, we should

// tier.

static const double tierCutoffMs = 10;

// Compilation rate values are empirical except when noted, the reference

// systems are:

//

// Late-2013 MacBook Pro (2.6GHz 4 x hyperthreaded Haswell, Mac OS X)

// Late-2015 Nexus 5X (1.4GHz 4 x Cortex-A53 + 1.8GHz 2 x Cortex-A57, Android)

// Ca-2016 SoftIron Overdrive 1000 (1.7GHz 4 x Cortex-A57, Fedora)

//

// The rates are always per core.

//

// The estimate for ARM64 is the Baseline compilation rate on the SoftIron

// (because we have no Ion yet), divided by 5 to estimate Ion compile rate and

// then divided by 2 to make it more reasonable for consumer ARM64 systems.

static const double x64IonBytecodesPerMs = 2100;

static const double x86IonBytecodesPerMs = 1500;

static const double arm32IonBytecodesPerMs = 450;

static const double arm64IonBytecodesPerMs = 750;  // Estimate

// Tiering cutoff values: if code section sizes are below these values (when

// divided by the effective number of cores) we do not tier, because we guess

// that parallel Ion compilation will be fast enough.

static const double x64DesktopTierCutoff = x64IonBytecodesPerMs * tierCutoffMs;

static const double x86DesktopTierCutoff = x86IonBytecodesPerMs * tierCutoffMs;

static const double x86MobileTierCutoff = x86DesktopTierCutoff / 2;  // Guess

static const double arm32MobileTierCutoff =

    arm32IonBytecodesPerMs * tierCutoffMs;

static const double arm64MobileTierCutoff =

    arm64IonBytecodesPerMs * tierCutoffMs;

static double CodesizeCutoff(SystemClass cls) {

  switch (cls) {

    case SystemClass::DesktopX86:

    case SystemClass::DesktopUnknown32:

      return x86DesktopTierCutoff;

    case SystemClass::DesktopX64:

    case SystemClass::DesktopUnknown64:

      return x64DesktopTierCutoff;

    case SystemClass::MobileX86:

      return x86MobileTierCutoff;

    case SystemClass::MobileArm32:

    case SystemClass::MobileUnknown32:

      return arm32MobileTierCutoff;

    case SystemClass::MobileArm64:

    case SystemClass::MobileUnknown64:

      return arm64MobileTierCutoff;

    default:

      MOZ_CRASH();

// As the number of cores grows the effectiveness of each core dwindles (on the

// systems we care about for SpiderMonkey).

//

// The data are empirical, computed from the observed compilation time of the

// Tanks demo code on a variable number of cores.

//

// The heuristic may fail on NUMA systems where the core count is high but the

// performance increase is nil or negative once the program moves beyond one

// socket.  However, few browser users have such systems.

static double EffectiveCores(uint32_t cores) {

  if (cores <= 3) {

    return pow(cores, 0.9);

  return pow(cores, 0.75);

#ifndef JS_64BIT

// Don't tier if tiering will fill code memory to more to more than this

// fraction.

static const double spaceCutoffPct = 0.9;

#endif

// Figure out whether we should use tiered compilation or not.

static bool TieringBeneficial(uint32_t codeSize) {

  uint32_t cpuCount = GetHelperThreadCPUCount();

  MOZ_ASSERT(cpuCount > 0);

  // It's mostly sensible not to background compile when there's only one

  // hardware thread as we want foreground computation to have access to that.

  // However, if wasm background compilation helper threads can be given lower

  // priority then background compilation on single-core systems still makes

  // some kind of sense.  That said, this is a non-issue: as of September 2017

  // 1-core was down to 3.5% of our population and falling.

  if (cpuCount == 1) {

    return false;

  // Compute the max number of threads available to do actual background

  // compilation work.

  uint32_t workers = GetMaxWasmCompilationThreads();

  // The number of cores we will use is bounded both by the CPU count and the

  // worker count, since the worker count already takes this into account.

  uint32_t cores = workers;

  SystemClass cls = ClassifySystem();

  // Ion compilation on available cores must take long enough to be worth the

  // bother.

  double cutoffSize = CodesizeCutoff(cls);

  double effectiveCores = EffectiveCores(cores);

  if ((codeSize / effectiveCores) < cutoffSize) {

    return false;

  // Do not implement a size cutoff for 64-bit systems since the code size

  // budget for 64 bit is so large that it will hardly ever be an issue.

  // (Also the cutoff percentage might be different on 64-bit.)

#ifndef JS_64BIT

  // If the amount of executable code for baseline compilation jeopardizes the

  // availability of executable memory for ion code then do not tier, for now.

//

  // TODO: For now we consider this module in isolation.  We should really

  // worry about what else is going on in this process and might be filling up

  // the code memory.  It's like we need some kind of code memory reservation

  // system or JIT compilation for large modules.

  double ionRatio = OptimizedBytesPerBytecode(cls);

  double baselineRatio = BaselineBytesPerBytecode(cls);

  double needMemory = codeSize * (ionRatio + baselineRatio);

  double availMemory = LikelyAvailableExecutableMemory();

  double cutoff = spaceCutoffPct * MaxCodeBytesPerProcess;

  // If the sum of baseline and ion code makes us exceeds some set percentage

  // of the executable memory then disable tiering.

  if ((MaxCodeBytesPerProcess - availMemory) + needMemory > cutoff) {

    return false;

#endif

  return true;

// Ensure that we have the non-compiler requirements to tier safely.

static bool PlatformCanTier() {

  return CanUseExtraThreads() && jit::CanFlushExecutionContextForAllThreads();

CompilerEnvironment::CompilerEnvironment(const CompileArgs& args)

    : state_(InitialWithArgs), args_(&args) {}

CompilerEnvironment::CompilerEnvironment(CompileMode mode, Tier tier,

                                         DebugEnabled debugEnabled)

    : state_(InitialWithModeTierDebug),

      mode_(mode),

      tier_(tier),

      debug_(debugEnabled) {}

void CompilerEnvironment::computeParameters() {

  MOZ_ASSERT(state_ == InitialWithModeTierDebug);

  state_ = Computed;

void CompilerEnvironment::computeParameters(Decoder& d) {

  MOZ_ASSERT(!isComputed());

  if (state_ == InitialWithModeTierDebug) {

    computeParameters();

    return;

  bool baselineEnabled = args_->baselineEnabled;

  bool ionEnabled = args_->ionEnabled;

  bool debugEnabled = args_->debugEnabled;

  bool forceTiering = args_->forceTiering;

  bool hasSecondTier = ionEnabled;

  MOZ_ASSERT_IF(debugEnabled, baselineEnabled);

  MOZ_ASSERT_IF(forceTiering, baselineEnabled && hasSecondTier);

  // Various constraints in various places should prevent failure here.

  MOZ_RELEASE_ASSERT(baselineEnabled || ionEnabled);

  uint32_t codeSectionSize = 0;

  SectionRange range;

  if (StartsCodeSection(d.begin(), d.end(), &range)) {

    codeSectionSize = range.size;

  if (baselineEnabled && hasSecondTier &&

      (TieringBeneficial(codeSectionSize) || forceTiering) &&

      PlatformCanTier()) {

    mode_ = args_->features.experimentalCompilePipeline

                ? CompileMode::LazyTiering

                : CompileMode::EagerTiering;

    tier_ = Tier::Baseline;

  } else {

    mode_ = CompileMode::Once;

    tier_ = hasSecondTier ? Tier::Optimized : Tier::Baseline;

  debug_ = debugEnabled ? DebugEnabled::True : DebugEnabled::False;

  state_ = Computed;

template <class DecoderT, class ModuleGeneratorT>

static bool DecodeFunctionBody(DecoderT& d, ModuleGeneratorT& mg,

                               uint32_t funcIndex) {

  uint32_t bodySize;

  if (!d.readVarU32(&bodySize)) {

    return d.fail("expected number of function body bytes");

  if (bodySize > MaxFunctionBytes) {

    return d.fail("function body too big");

  const size_t offsetInModule = d.currentOffset();

  // Skip over the function body; it will be validated by the compilation

  // thread.

  const uint8_t* bodyBegin;

  if (!d.readBytes(bodySize, &bodyBegin)) {

    return d.fail("function body length too big");

  return mg.compileFuncDef(funcIndex, offsetInModule, bodyBegin,

                           bodyBegin + bodySize);

template <class DecoderT, class ModuleGeneratorT>

static bool DecodeCodeSection(const CodeMetadata& codeMeta, DecoderT& d,

                              ModuleGeneratorT& mg) {

  if (!codeMeta.codeSection) {

    if (codeMeta.numFuncDefs() != 0) {

      return d.fail("expected code section");

    return mg.finishFuncDefs();

  uint32_t numFuncDefs;

  if (!d.readVarU32(&numFuncDefs)) {

    return d.fail("expected function body count");

  if (numFuncDefs != codeMeta.numFuncDefs()) {

    return d.fail(

        "function body count does not match function signature count");

  for (uint32_t funcDefIndex = 0; funcDefIndex < numFuncDefs; funcDefIndex++) {

    if (!DecodeFunctionBody(d, mg, codeMeta.numFuncImports + funcDefIndex)) {

      return false;

  if (!d.finishSection(*codeMeta.codeSection, "code")) {

    return false;

  return mg.finishFuncDefs();

SharedModule wasm::CompileBuffer(const CompileArgs& args,

                                 const ShareableBytes& bytecode,

                                 UniqueChars* error,

                                 UniqueCharsVector* warnings,

                                 JS::OptimizedEncodingListener* listener) {

  Decoder d(bytecode.bytes, 0, error, warnings);

  MutableModuleMetadata moduleMeta = js_new<ModuleMetadata>();

  if (!moduleMeta || !moduleMeta->init(args)) {

    return nullptr;

  if (!DecodeModuleEnvironment(d, moduleMeta->codeMeta.get(), moduleMeta)) {

    return nullptr;

  CompilerEnvironment compilerEnv(args);

  compilerEnv.computeParameters(d);

  if (!moduleMeta->prepareForCompile(compilerEnv.mode())) {

    return nullptr;

  ModuleGenerator mg(*moduleMeta->codeMeta, compilerEnv,

                     compilerEnv.initialState(), nullptr, error, warnings);

  if (!mg.initializeCompleteTier()) {

    return nullptr;

  if (!DecodeCodeSection(*moduleMeta->codeMeta, d, mg)) {

    return nullptr;

  if (!DecodeModuleTail(d, moduleMeta->codeMeta, moduleMeta)) {

    return nullptr;

  return mg.finishModule(bytecode, moduleMeta, listener);

bool wasm::CompileCompleteTier2(const Bytes& bytecode, const Module& module,

                                UniqueChars* error, UniqueCharsVector* warnings,

                                Atomic<bool>* cancelled) {

  CompilerEnvironment compilerEnv(CompileMode::EagerTiering, Tier::Optimized,

                                  DebugEnabled::False);

  compilerEnv.computeParameters();

  const CodeMetadata& codeMeta = module.codeMeta();

  ModuleGenerator mg(codeMeta, compilerEnv, CompileState::EagerTier2, cancelled,

                     error, warnings);

  if (!mg.initializeCompleteTier()) {

    return false;

  if (codeMeta.codeSection) {

    const SectionRange& codeSection = *codeMeta.codeSection;

    const uint8_t* codeSectionStart = bytecode.begin() + codeSection.start;

    const uint8_t* codeSectionEnd = codeSectionStart + codeSection.size;

    Decoder d(codeSectionStart, codeSectionEnd, codeSection.start, error);

    if (!DecodeCodeSection(module.codeMeta(), d, mg)) {

      return false;

  } else {

    MOZ_ASSERT(codeMeta.numFuncDefs() == 0);

    if (!mg.finishFuncDefs()) {

      return false;

  return mg.finishTier2(module);

class PartialTierModuleGenerator {

 private:

  ModuleGenerator& mg_;

  uint32_t targetFuncIndex_;

 public:

  PartialTierModuleGenerator(ModuleGenerator& mg, uint32_t targetFuncIndex)

      : mg_(mg), targetFuncIndex_(targetFuncIndex) {}

  bool finishFuncDefs() { return mg_.finishFuncDefs(); }

  bool compileFuncDef(uint32_t funcIndex, uint32_t lineOrBytecode,

                      const uint8_t* begin, const uint8_t* end) {

    if (funcIndex != targetFuncIndex_) {

      return true;

    return mg_.compileFuncDef(funcIndex, lineOrBytecode, begin, end);

};

bool wasm::CompilePartialTier2(const Code& code, uint32_t funcIndex) {

  CompilerEnvironment compilerEnv(CompileMode::LazyTiering, Tier::Optimized,

                                  DebugEnabled::False);

  compilerEnv.computeParameters();

  const CodeMetadata& codeMeta = code.codeMeta();

  UniqueChars error;

  ModuleGenerator mg(codeMeta, compilerEnv, CompileState::LazyTier2, nullptr,

                     &error, nullptr);

  if (!mg.initializePartialTier(code, funcIndex)) {

    // The module is already validated, this must be an OOM

    MOZ_ASSERT(!error);

    return false;

  const Bytes& bytecode = code.bytecode();

  const FuncDefRange& funcRange = code.codeMeta().funcDefRange(funcIndex);

  const uint8_t* bodyBegin = bytecode.begin() + funcRange.bytecodeOffset;

  const uint8_t* bodyEnd = bodyBegin + funcRange.bodyLength;

  Decoder d(bytecode.begin(), bytecode.end(), 0, &error);

  if (!mg.compileFuncDef(funcIndex, funcRange.bytecodeOffset, bodyBegin,

                         bodyEnd) ||

      !mg.finishFuncDefs() || !mg.finishPartialTier2(code)) {

    // The module is already validated, this must be an OOM

    MOZ_RELEASE_ASSERT(!error);

    return false;

  return true;

class StreamingDecoder {

  Decoder d_;

  const ExclusiveBytesPtr& codeBytesEnd_;

  const Atomic<bool>& cancelled_;

 public:

  StreamingDecoder(const CodeMetadata& codeMeta, const Bytes& begin,

                   const ExclusiveBytesPtr& codeBytesEnd,

                   const Atomic<bool>& cancelled, UniqueChars* error,

                   UniqueCharsVector* warnings)

      : d_(begin, codeMeta.codeSection->start, error, warnings),

        codeBytesEnd_(codeBytesEnd),

        cancelled_(cancelled) {}

  bool fail(const char* msg) { return d_.fail(msg); }

  bool done() const { return d_.done(); }

  size_t currentOffset() const { return d_.currentOffset(); }

  bool waitForBytes(size_t numBytes) {

    numBytes = std::min(numBytes, d_.bytesRemain());

    const uint8_t* requiredEnd = d_.currentPosition() + numBytes;

    auto codeBytesEnd = codeBytesEnd_.lock();

    while (codeBytesEnd < requiredEnd) {

      if (cancelled_) {

        return false;

      codeBytesEnd.wait();

    return true;

  bool readVarU32(uint32_t* u32) {

    return waitForBytes(MaxVarU32DecodedBytes) && d_.readVarU32(u32);

  bool readBytes(size_t size, const uint8_t** begin) {

    return waitForBytes(size) && d_.readBytes(size, begin);

  bool finishSection(const SectionRange& range, const char* name) {

    return d_.finishSection(range, name);

};

static SharedBytes CreateBytecode(const Bytes& env, const Bytes& code,

                                  const Bytes& tail, UniqueChars* error) {

  size_t size = env.length() + code.length() + tail.length();

  if (size > MaxModuleBytes) {

    *error = DuplicateString("module too big");

    return nullptr;

  MutableBytes bytecode = js_new<ShareableBytes>();

  if (!bytecode || !bytecode->bytes.resize(size)) {

    return nullptr;

  uint8_t* p = bytecode->bytes.begin();

  memcpy(p, env.begin(), env.length());

  p += env.length();

  memcpy(p, code.begin(), code.length());

  p += code.length();

  memcpy(p, tail.begin(), tail.length());

  p += tail.length();

  MOZ_ASSERT(p == bytecode->end());

  return bytecode;

SharedModule wasm::CompileStreaming(

    const CompileArgs& args, const Bytes& envBytes, const Bytes& codeBytes,

    const ExclusiveBytesPtr& codeBytesEnd,

    const ExclusiveStreamEndData& exclusiveStreamEnd,

    const Atomic<bool>& cancelled, UniqueChars* error,

    UniqueCharsVector* warnings) {

  CompilerEnvironment compilerEnv(args);

  MutableModuleMetadata moduleMeta = js_new<ModuleMetadata>();

  if (!moduleMeta || !moduleMeta->init(args)) {

    return nullptr;

  CodeMetadata& codeMeta = *moduleMeta->codeMeta;

    Decoder d(envBytes, 0, error, warnings);

    if (!DecodeModuleEnvironment(d, &codeMeta, moduleMeta)) {

      return nullptr;

    compilerEnv.computeParameters(d);

    if (!codeMeta.codeSection) {

      d.fail("unknown section before code section");

      return nullptr;

    MOZ_RELEASE_ASSERT(codeMeta.codeSection->size == codeBytes.length());

    MOZ_RELEASE_ASSERT(d.done());

  if (!moduleMeta->prepareForCompile(compilerEnv.mode())) {

    return nullptr;

  ModuleGenerator mg(codeMeta, compilerEnv, compilerEnv.initialState(),

                     &cancelled, error, warnings);

  if (!mg.initializeCompleteTier()) {

    return nullptr;

    StreamingDecoder d(codeMeta, codeBytes, codeBytesEnd, cancelled, error,

                       warnings);

    if (!DecodeCodeSection(codeMeta, d, mg)) {

      return nullptr;

    MOZ_RELEASE_ASSERT(d.done());

    auto streamEnd = exclusiveStreamEnd.lock();

    while (!streamEnd->reached) {

      if (cancelled) {

        return nullptr;

      streamEnd.wait();

  const StreamEndData& streamEnd = exclusiveStreamEnd.lock();

  const Bytes& tailBytes = *streamEnd.tailBytes;

    Decoder d(tailBytes, codeMeta.codeSection->end(), error, warnings);

    if (!DecodeModuleTail(d, &codeMeta, moduleMeta)) {

      return nullptr;

    MOZ_RELEASE_ASSERT(d.done());

  SharedBytes bytecode = CreateBytecode(envBytes, codeBytes, tailBytes, error);

  if (!bytecode) {

    return nullptr;

  return mg.finishModule(*bytecode, moduleMeta, streamEnd.tier2Listener);

class DumpIonModuleGenerator {

 private:

  const CompilerEnvironment& compilerEnv_;

  CodeMetadata& codeMeta_;

  uint32_t targetFuncIndex_;

  IonDumpContents contents_;

  GenericPrinter& out_;

  UniqueChars* error_;

 public:

  DumpIonModuleGenerator(const CompilerEnvironment& compilerEnv,

                         CodeMetadata& codeMeta, uint32_t targetFuncIndex,

                         IonDumpContents contents, GenericPrinter& out,

                         UniqueChars* error)

      : compilerEnv_(compilerEnv),

        codeMeta_(codeMeta),

        targetFuncIndex_(targetFuncIndex),

        contents_(contents),

        out_(out),

        error_(error) {}

  bool finishFuncDefs() { return true; }

  bool compileFuncDef(uint32_t funcIndex, uint32_t lineOrBytecode,

                      const uint8_t* begin, const uint8_t* end) {

    if (funcIndex != targetFuncIndex_) {

      return true;

    FuncCompileInput input(funcIndex, lineOrBytecode, begin, end,

                           Uint32Vector());

    return IonDumpFunction(compilerEnv_, codeMeta_, input, contents_, out_,

                           error_);

};

bool wasm::DumpIonFunctionInModule(const ShareableBytes& bytecode,

                                   uint32_t targetFuncIndex,

                                   IonDumpContents contents,

                                   GenericPrinter& out, UniqueChars* error) {

  SharedCompileArgs compileArgs =

      CompileArgs::buildForValidation(FeatureArgs::allEnabled());

  if (!compileArgs) {

    return false;

  CompilerEnvironment compilerEnv(CompileMode::Once, Tier::Optimized,

                                  DebugEnabled::False);

  compilerEnv.computeParameters();

  UniqueCharsVector warnings;

  Decoder d(bytecode.bytes, 0, error, &warnings);

  MutableModuleMetadata moduleMeta = js_new<ModuleMetadata>();

  if (!moduleMeta || !moduleMeta->init(*compileArgs)) {

    return false;

  if (!DecodeModuleEnvironment(d, moduleMeta->codeMeta, moduleMeta)) {

    return false;

  DumpIonModuleGenerator mg(compilerEnv, *moduleMeta->codeMeta, targetFuncIndex,

                            contents, out, error);

  return moduleMeta->prepareForCompile(CompileMode::Once) &&

         DecodeCodeSection(*moduleMeta->codeMeta, d, mg);