lto-pgo.configure - mozsearch

mozilla-central/build/moz.configure/lto-pgo.configure

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-

# vim: set filetype=python:

# This Source Code Form is subject to the terms of the Mozilla Public

# License, v. 2.0. If a copy of the MPL was not distributed with this

# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# PGO

# ==============================================================

option(

    "--enable-profile-generate",

    env="MOZ_PROFILE_GENERATE",

    nargs="?",

    choices=("cross",),

    help="Build a PGO instrumented binary",

enable_profile_generate = depends_if("--enable-profile-generate")(lambda _: True)

imply_option("MOZ_PGO", enable_profile_generate)

set_config("MOZ_PROFILE_GENERATE", enable_profile_generate)

set_define("MOZ_PROFILE_GENERATE", enable_profile_generate)

option(

    "--enable-profile-use",

    env="MOZ_PROFILE_USE",

    nargs="?",

    choices=("cross",),

    help="Use a generated profile during the build",

enable_profile_use = depends_if("--enable-profile-use")(lambda _: True)

imply_option("MOZ_PGO", enable_profile_use)

set_config("MOZ_PROFILE_USE", enable_profile_use)

llvm_profdata = check_prog(

    "LLVM_PROFDATA", ["llvm-profdata"], allow_missing=True, paths=clang_search_path

# --call-graph-profile-sort is the default behavior for lld, and it proves to be

# more efficient than pgo-based orderfile. Don't pass it explicitly because it's

# option support differ from one version to the other (w/ or W: argument and the

# argument value).

@depends(select_linker, target)

def pgo_cg_sort(linker, target):

    return linker and linker.KIND == "lld" and target.os != "OSX"

@depends_if(llvm_profdata, when=enable_profile_use & ~pgo_cg_sort)

@checking("whether llvm-profdata supports 'order' subcommand")

def llvm_profdata_order(profdata):

    retcode, _, _ = get_cmd_output(profdata, "order", "--help")

    return retcode == 0

option(

    "--with-pgo-profile-path",

    help="Path to the directory with unmerged profile data to use during the build"

    ", or to a merged profdata file",

    nargs=1,

@depends(

    "--with-pgo-profile-path",

    "--enable-profile-use",

    llvm_profdata,

    build_environment,

@imports("os")

def pgo_profile_path(path, pgo_use, profdata, build_env):

    topobjdir = build_env.topobjdir

    if topobjdir.endswith("/js/src"):

        topobjdir = topobjdir[:-7]

    if not path:

        return os.path.join(topobjdir, "instrumented", "merged.profdata")

    if path and not pgo_use:

        die("Pass --enable-profile-use to use --with-pgo-profile-path.")

    if path and not profdata:

        die("LLVM_PROFDATA must be set to process the pgo profile.")

    if not os.path.isfile(path[0]):

        die("Argument to --with-pgo-profile-path must be a file.")

    if not os.path.isabs(path[0]):

        die("Argument to --with-pgo-profile-path must be an absolute path.")

    return path[0]

set_config("PGO_PROFILE_PATH", pgo_profile_path)

@depends(

    "--enable-profile-use",

    pgo_profile_path,

    llvm_profdata,

    llvm_profdata_order,

    build_environment,

def orderfile_path(profile_use, path, profdata, profdata_order, build_env):

    if not profile_use:

        return None

    if not profdata_order:

        return None

    topobjdir = build_env.topobjdir

    orderfile = os.path.join(topobjdir, "orderfile.txt")

    check_cmd_output(profdata, "order", path, "-o", orderfile)

    return orderfile

pgo_temporal = c_compiler.try_compile(

    flags=["-fprofile-generate", "-mllvm", "-pgo-temporal-instrumentation"],

    check_msg="whether the C compiler supports temporal instrumentation",

    when=enable_profile_generate & ~pgo_cg_sort,

@depends(

    c_compiler,

    select_linker,

    target,

    pgo_profile_path,

    target_is_windows,

    pgo_temporal,

    orderfile_path,

@imports("multiprocessing")

def pgo_flags(

    compiler,

    linker,

    target,

    profdata,

    target_is_windows,

    pgo_temporal,

    orderfile,

):

    if compiler.type == "gcc":

        return namespace(

            gen_cflags=["-fprofile-generate"],

            gen_ldflags=["-fprofile-generate"],

            use_cflags=["-fprofile-use", "-fprofile-correction", "-Wcoverage-mismatch"],

            use_ldflags=["-fprofile-use"],

    if compiler.type in ("clang-cl", "clang"):

        prefix = ""

        if compiler.type == "clang-cl":

            prefix = "/clang:"

            gen_ldflags = None

        else:

            gen_ldflags = ["-fprofile-generate"]

        use_ldflags = []

        if orderfile:

            if compiler.type == "clang-cl":

                use_ldflags += [

                    "-ORDER:@" + orderfile,

                    "/ignore:4037",  # Disable warn missing order symbol

            elif linker.KIND == "ld64" or (linker.KIND == "lld" and target.os == "OSX"):

                use_ldflags += ["-Wl,-order_file", orderfile]

            elif linker.KIND == "lld":

                use_ldflags += [

                    "-Wl,--symbol-ordering-file",

                    orderfile,

                    "-Wl,--no-warn-symbol-ordering",

            if use_ldflags:

                log.info("Activating PGO-based orderfile")

        gen_cflags = [prefix + "-fprofile-generate"]

        if pgo_temporal:

            gen_cflags += ["-mllvm", "-pgo-temporal-instrumentation"]

        if target_is_windows:

            # native llvm-profdata.exe on Windows can't read profile data

            # if name compression is enabled (which cross-compiling enables

            # by default)

            gen_cflags += ["-mllvm", "-enable-name-compression=false"]

        return namespace(

            gen_cflags=gen_cflags,

            gen_ldflags=gen_ldflags,

            use_cflags=[

                prefix + "-fprofile-use=%s" % profdata,

                # Some error messages about mismatched profile data

                # come in via -Wbackend-plugin, so disable those too.

                "-Wno-error=backend-plugin",

],

            use_ldflags=use_ldflags,

set_config("PROFILE_GEN_CFLAGS", pgo_flags.gen_cflags)

set_config("PROFILE_GEN_LDFLAGS", pgo_flags.gen_ldflags)

set_config("PROFILE_USE_CFLAGS", pgo_flags.use_cflags)

set_config("PROFILE_USE_LDFLAGS", pgo_flags.use_ldflags)

option(

    "--with-pgo-jarlog",

    help="Use the provided jarlog file when packaging during a profile-use " "build",

    nargs=1,

set_config("PGO_JARLOG_PATH", depends_if("--with-pgo-jarlog")(lambda p: p))

@depends("MOZ_PGO", "--enable-profile-use", "--enable-profile-generate", c_compiler)

def moz_pgo_rust(pgo, profile_use, profile_generate, c_compiler):

    if not pgo:

        return

    # Enabling PGO through MOZ_PGO only and not --enable* flags.

    if not profile_use and not profile_generate:

        return

    if profile_use and profile_generate:

        die("Cannot build with --enable-profile-use and --enable-profile-generate.")

    want_cross = (len(profile_use) and profile_use[0] == "cross") or (

        len(profile_generate) and profile_generate[0] == "cross"

    if not want_cross:

        return

    if c_compiler.type == "gcc":

        die("Cannot use cross-language PGO with GCC.")

    return True

set_config("MOZ_PGO_RUST", moz_pgo_rust)

# LTO

# ==============================================================

option(

    "--enable-lto",

    env="MOZ_LTO",

    nargs="*",

    choices=("full", "thin", "cross"),

    help="Enable LTO",

option(

    env="MOZ_LD64_KNOWN_GOOD",

    nargs=1,

    help="Indicate that ld64 is free of symbol aliasing bugs",

imply_option("MOZ_LD64_KNOWN_GOOD", moz_automation)

@depends(

    "--enable-lto",

    c_compiler,

    select_linker,

    "MOZ_LD64_KNOWN_GOOD",

    target,

    "--enable-profile-generate",

    pass_manager.enabled,

    "--enable-profile-use",

    moz_automation,

@imports("multiprocessing")

def lto(

    values,

    c_compiler,

    select_linker,

    ld64_known_good,

    target,

    instrumented_build,

    pass_manager,

    pgo_build,

    moz_automation,

):

    cflags = []

    ldflags = []

    enabled = None

    rust_lto = False

    if not values:

        return

    # Sanitize LTO modes.

    if "full" in values and "thin" in values:

        die("incompatible --enable-lto choices 'full' and 'thin'")

    # If a value was given to --enable-lto, use that.  Otherwise, make the lto

    # mode explicit, using full with gcc, and full or thin with clang depending

    # on the performance benefit.

    # Defaulting to full LTO is costly in terms of compilation time, so we only

    # default to it if MOZ_AUTOMATION and PGO are on, and for some platforms.

    # Based on speedometer3 scores, full lto + pgo is beneficial for Linux and

    # Windows for x86_64 targets.

    if values == () or values == ("cross",):

        if c_compiler.type == "gcc":

            values += ("full",)

        elif (

            pgo_build

            and moz_automation

            and target.os in ("WINNT", "GNU")

            and target.cpu == "x86_64"

):

            values += ("full",)

        else:

            values += ("thin",)

    if instrumented_build:

        log.warning("Disabling LTO because --enable-profile-generate is specified")

        return

    if c_compiler.type == "gcc":

        if "cross" in values:

            die("Cross-language LTO is not supported with GCC.")

        if "thin" in values:

            die(

                "gcc does not support thin LTO. Use `--enable-lto` "

                "to enable full LTO for gcc."

    if (

        target.kernel == "Darwin"

        and "cross" in values

        and select_linker.KIND == "ld64"

        and not ld64_known_good

):

        die(

            "The Mac linker is known to have a bug that affects cross-language "

            "LTO.  If you know that your linker is free from this bug, please "

            "set the environment variable `MOZ_LD64_KNOWN_GOOD=1` and re-run "

            "configure."

    if c_compiler.type == "clang":

        if "full" in values:

            cflags.append("-flto")

            ldflags.append("-flto")

        else:

            cflags.append("-flto=thin")

            ldflags.append("-flto=thin")

        if target.os == "Android" and "cross" in values:

            # Work around https://github.com/rust-lang/rust/issues/90088

            # by enabling the highest level of SSE the rust targets default

            # to.

            # https://github.com/rust-lang/rust/blob/bdfcb88e8b6203ccb46a2fb6649979b773efc8ac/compiler/rustc_target/src/spec/i686_linux_android.rs#L13

            # https://github.com/rust-lang/rust/blob/8d1083e319841624f64400e1524805a40d725439/compiler/rustc_target/src/spec/x86_64_linux_android.rs#L7

            if target.cpu == "x86":

                ldflags.append("-Wl,-plugin-opt=-mattr=+ssse3")

            elif target.cpu == "x86_64":

                ldflags.append("-Wl,-plugin-opt=-mattr=+sse4.2")

    elif c_compiler.type == "clang-cl":

        if "full" in values:

            cflags.append("-flto")

        else:

            cflags.append("-flto=thin")

        # With clang-cl, -flto can only be used with -c or -fuse-ld=lld.

        # AC_TRY_LINKs during configure don't have -c, so pass -fuse-ld=lld.

        cflags.append("-fuse-ld=lld")

        # Explicitly set the CPU to optimize for so the linker doesn't

        # choose a poor default.  Rust compilation by default uses the

        # pentium4 CPU on x86:

        # https://github.com/rust-lang/rust/blob/049a49b91151a88c95fa0d62a53fd0a0ac2c3af9/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs#L5

        # which specifically supports "long" (multi-byte) nops.  See

        # https://bugzilla.mozilla.org/show_bug.cgi?id=1568450#c8 for details.

        # The pentium4 seems like kind of a weird CPU to optimize for, but

        # it seems to have worked out OK thus far.  LLVM does not seem to

        # specifically schedule code for the pentium4's deep pipeline, so

        # that probably contributes to it being an OK default for our

        # purposes.

        if target.cpu == "x86":

            ldflags.append("-mllvm:-mcpu=pentium4")

        # This is also the CPU that Rust uses.  The LLVM source code

        # recommends this as the "generic 64-bit specific x86 processor model":

        # https://github.com/llvm/llvm-project/blob/e7694f34ab6a12b8bb480cbfcb396d0a64fe965f/llvm/lib/Target/X86/X86.td#L1165-L1187

        if target.cpu == "x86_64":

            ldflags.append("-mllvm:-mcpu=x86-64")

        # We do not need special flags for arm64.  Hooray for fixed-length

        # instruction sets.

    else:

        num_cores = multiprocessing.cpu_count()

        cflags.append("-flto")

        cflags.append("-flifetime-dse=1")

        ldflags.append("-flto=%s" % num_cores)

        ldflags.append("-flifetime-dse=1")

    # Tell LTO not to inline functions above a certain size, to mitigate

    # binary size growth while still getting good performance.

    # (For hot functions, PGO will put a multiplier on this limit.)

    if c_compiler.type == "clang-cl":

        ldflags.append("-mllvm:-import-instr-limit=10")

    elif target.kernel == "Darwin":

        ldflags.append("-Wl,-mllvm,-import-instr-limit=10")

    elif c_compiler.type == "clang":

        ldflags.append("-Wl,-plugin-opt=-import-instr-limit=10")

    # If we're using the new pass manager, we can also enable the new PM

    # during LTO. Further we can use the resulting size savings to increase

    # the import limit in hot functions.

    if pass_manager:

        if c_compiler.type == "clang-cl":

            if c_compiler.version >= "12.0.0" and c_compiler.version < "13.0.0":

                ldflags.append("-opt:ltonewpassmanager")

            if c_compiler.version >= "12.0.0":

                ldflags.append("-mllvm:-import-hot-multiplier=30")

        elif target.kernel == "Darwin":

            ldflags.append("-Wl,-mllvm,-import-hot-multiplier=30")

        else:

            if c_compiler.version < "13.0.0":

                ldflags.append("-Wl,-plugin-opt=new-pass-manager")

            ldflags.append("-Wl,-plugin-opt=-import-hot-multiplier=30")

    # Pick Rust LTO mode in case of cross lTO. Thin is the default.

    if "cross" in values:

        rust_lto = "full" if "full" in values else "thin"

    else:

        rust_lto = ""

    return namespace(

        enabled=True,

        cflags=cflags,

        ldflags=ldflags,

        rust_lto=rust_lto,

@depends(

    dso_flags,

    when=building_with_gnu_compatible_cc

    & gcc_use_gnu_ld

    & ~developer_options

    & ~enable_profile_generate,

def remove_dead_symbols(dso_flags):

    dso_flags.ldopts.append("-Wl,--gc-sections")

set_config("MOZ_LTO", lto.enabled)

set_define("MOZ_LTO", lto.enabled)

set_config("MOZ_LTO_CFLAGS", lto.cflags)

set_config("MOZ_LTO_LDFLAGS", lto.ldflags)

set_config("MOZ_LTO_RUST_CROSS", lto.rust_lto)