wptrunner.py - mozsearch

mozilla-central/testing/web-platform/tests/tools/wptrunner/wptrunner/wptrunner.py (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Testing :: web-platform-tests

Revision control

Copy as Markdown

Other Tools

# mypy: allow-untyped-calls, allow-untyped-defs

import json

import os

import signal

import sys

from collections import defaultdict

from datetime import datetime, timedelta

from typing import Any, Tuple

import wptserve

from wptserve import sslutils

from . import environment as env

from . import instruments

from . import mpcontext

from . import products

from . import testloader

from . import wptcommandline

from . import wptlogging

from . import wpttest

from mozlog import capture, handlers

from .font import FontInstaller

from .testrunner import ManagerGroup, TestImplementation

here = os.path.dirname(__file__)

logger = None

"""Runner for web-platform-tests

The runner has several design goals:

* Tests should run with no modification from upstream.

* Tests should be regarded as "untrusted" so that errors, timeouts and even

  crashes in the tests can be handled without failing the entire test run.

* For performance tests can be run in multiple browsers in parallel.

The upstream repository has the facility for creating a test manifest in JSON

format. This manifest is used directly to determine which tests exist. Local

metadata files are used to store the expected test results.

"""

def setup_logging(*args, **kwargs):

    global logger

    logger = wptlogging.setup(*args, **kwargs)

    return logger

def get_loader(test_paths: wptcommandline.TestPaths,

               product: products.Product,

               **kwargs: Any) -> Tuple[testloader.TestQueueBuilder, testloader.TestLoader]:

    run_info_extras = product.run_info_extras(logger, **kwargs)

    base_run_info = wpttest.get_run_info(kwargs["run_info"],

                                         product.name,

                                         browser_version=kwargs.get("browser_version"),

                                         browser_channel=kwargs.get("browser_channel"),

                                         verify=kwargs.get("verify"),

                                         debug=kwargs["debug"],

                                         extras=run_info_extras,

                                         device_serials=kwargs.get("device_serial"),

                                         adb_binary=kwargs.get("adb_binary"))

    subsuites = testloader.load_subsuites(logger,

                                          base_run_info,

                                          kwargs["subsuite_file"],

                                          set(kwargs["subsuites"] or []))

    if kwargs["test_groups_file"] is not None:

        test_groups = testloader.TestGroups(logger,

                                            kwargs["test_groups_file"],

                                            subsuites)

    else:

        test_groups = None

    test_manifests = testloader.ManifestLoader(test_paths,

                                               force_manifest_update=kwargs["manifest_update"],

                                               manifest_download=kwargs["manifest_download"]).load()

    manifest_filters = []

    test_filters = []

    include = kwargs["include"]

    if kwargs["include_file"]:

        include = include or []

        include.extend(testloader.read_include_from_file(kwargs["include_file"]))

    if test_groups:

        include = testloader.update_include_for_groups(test_groups, include)

    if kwargs["tags"] or kwargs["exclude_tags"]:

        test_filters.append(testloader.TagFilter(kwargs["tags"], kwargs["exclude_tags"]))

    if include or kwargs["exclude"] or kwargs["include_manifest"] or kwargs["default_exclude"]:

        manifest_filters.append(testloader.TestFilter(include=include,

                                                      exclude=kwargs["exclude"],

                                                      manifest_path=kwargs["include_manifest"],

                                                      test_manifests=test_manifests,

                                                      explicit=kwargs["default_exclude"]))

    ssl_enabled = sslutils.get_cls(kwargs["ssl_type"]).ssl_enabled

    h2_enabled = wptserve.utils.http2_compatible()

    test_queue_builder, chunker_kwargs = testloader.get_test_queue_builder(logger=logger,

                                                                           test_groups=test_groups,

                                                                           **kwargs)

    test_loader = testloader.TestLoader(test_manifests=test_manifests,

                                        test_types=kwargs["test_types"],

                                        base_run_info=base_run_info,

                                        subsuites=subsuites,

                                        manifest_filters=manifest_filters,

                                        test_filters=test_filters,

                                        chunk_type=kwargs["chunk_type"],

                                        total_chunks=kwargs["total_chunks"],

                                        chunk_number=kwargs["this_chunk"],

                                        include_https=ssl_enabled,

                                        include_h2=h2_enabled,

                                        include_webtransport_h3=kwargs["enable_webtransport_h3"],

                                        skip_timeout=kwargs["skip_timeout"],

                                        skip_crash=kwargs["skip_crash"],

                                        skip_implementation_status=kwargs["skip_implementation_status"],

                                        chunker_kwargs=chunker_kwargs)

    return test_queue_builder, test_loader

def list_test_groups(test_paths, product, **kwargs):

    env.do_delayed_imports(logger, test_paths)

    _, test_loader = get_loader(test_paths,

                                product,

                                **kwargs)

    for item in sorted(test_loader.groups(kwargs["test_types"])):

        print(item)

def list_disabled(test_paths, product, **kwargs):

    env.do_delayed_imports(logger, test_paths)

    rv = []

    _, test_loader = get_loader(test_paths, product, **kwargs)

    for test_type, tests in test_loader.disabled_tests.items():

        for test in tests:

            rv.append({"test": test.id, "reason": test.disabled()})

    print(json.dumps(rv, indent=2))

def list_tests(test_paths, product, **kwargs):

    env.do_delayed_imports(logger, test_paths)

    _, test_loader = get_loader(test_paths, product, **kwargs)

    for test in test_loader.test_ids:

        print(test)

def get_pause_after_test(test_loader, **kwargs):

    if kwargs["pause_after_test"] is not None:

        return kwargs["pause_after_test"]

    if kwargs["repeat_until_unexpected"]:

        return False

    if kwargs["headless"]:

        return False

    if kwargs["debug_test"]:

        return True

    tests = test_loader.tests

    is_single_testharness = True

    testharness_count = 0

    for tests_by_type in tests.values():

        for test_type, tests in tests_by_type.items():

            if test_type != "testharness" and len(tests):

                is_single_testharness = False

                break

            elif test_type == "testharness":

                testharness_count += len(tests)

                if testharness_count > 1:

                    is_single_testharness = False

                    break

    return kwargs["repeat"] == 1 and kwargs["rerun"] == 1 and is_single_testharness

def log_suite_start(tests_by_group, base_run_info, subsuites, run_by_dir):

    logger.suite_start(tests_by_group,

                       name='web-platform-test',

                       run_info=base_run_info,

                       extra={"run_by_dir": run_by_dir})

    for name, subsuite in subsuites.items():

        logger.add_subsuite(name=name, run_info=subsuite.run_info_extras)

def run_test_iteration(test_status, test_loader, test_queue_builder,

                       recording, test_environment, product, kwargs):

    """Runs the entire test suite.

    This is called for each repeat run requested."""

    tests_by_type = defaultdict(list)

    for test_type in test_loader.test_types:

        for subsuite_name, subsuite in test_loader.subsuites.items():

            type_tests_active = test_loader.tests[subsuite_name][test_type]

            type_tests_disabled = test_loader.disabled_tests[subsuite_name][test_type]

            if type_tests_active or type_tests_disabled:

                tests_by_type[(subsuite_name, test_type)].extend(type_tests_active)

                tests_by_type[(subsuite_name, test_type)].extend(type_tests_disabled)

    tests_by_group = test_queue_builder.tests_by_group(tests_by_type)

    log_suite_start(tests_by_group,

                    test_loader.base_run_info,

                    test_loader.subsuites,

                    kwargs["run_by_dir"])

    test_implementations = {}

    tests_to_run = defaultdict(list)

    for test_type in test_loader.test_types:

        executor_cls = product.executor_classes.get(test_type)

        if executor_cls is None:

            logger.warning(f"Unsupported test type {test_type} for product {product.name}")

            continue

        browser_cls = product.get_browser_cls(test_type)

        for subsuite_name, subsuite in test_loader.subsuites.items():

            if (subsuite_name, test_type) not in tests_by_type:

                continue

            run_info = subsuite.run_info

            executor_kwargs = product.get_executor_kwargs(logger,

                                                          test_type,

                                                          test_environment,

                                                          run_info,

                                                          subsuite=subsuite,

                                                          **kwargs)

            browser_kwargs = product.get_browser_kwargs(logger,

                                                        test_type,

                                                        run_info,

                                                        config=test_environment.config,

                                                        num_test_groups=len(tests_by_group),

                                                        subsuite=subsuite,

                                                        **kwargs)

            test_implementations[(subsuite_name, test_type)] = TestImplementation(executor_cls,

                                                                                  executor_kwargs,

                                                                                  browser_cls,

                                                                                  browser_kwargs)

            for test in test_loader.disabled_tests[subsuite_name][test_type]:

                logger.test_start(test.id, subsuite=subsuite_name)

                logger.test_end(test.id, status="SKIP", subsuite=subsuite_name)

                test_status.skipped += 1

            if test_type == "testharness":

                for test in test_loader.tests[subsuite_name][test_type]:

                    skip_reason = None

                    if test.testdriver and not executor_cls.supports_testdriver:

                        skip_reason = "Executor does not support testdriver.js"

                    elif test.jsshell and not executor_cls.supports_jsshell:

                        skip_reason = "Executor does not support jsshell"

                    if skip_reason:

                        logger.test_start(test.id, subsuite=subsuite_name)

                        logger.test_end(test.id,

                                        status="SKIP",

                                        subsuite=subsuite_name,

                                        message=skip_reason)

                        test_status.skipped += 1

                    else:

                        tests_to_run[(subsuite_name, test_type)].append(test)

            else:

                tests_to_run[(subsuite_name, test_type)] = test_loader.tests[subsuite_name][test_type]

    unexpected_fail_tests = defaultdict(list)

    unexpected_pass_tests = defaultdict(list)

    recording.pause()

    retry_counts = kwargs["retry_unexpected"]

    for retry_index in range(retry_counts + 1):

        if retry_index > 0:

            if kwargs["fail_on_unexpected_pass"]:

                for (subtests, test_type), tests in unexpected_pass_tests.items():

                    unexpected_fail_tests[(subtests, test_type)].extend(tests)

            tests_to_run = unexpected_fail_tests

            if sum(len(tests) for tests in tests_to_run.values()) == 0:

                break

            tests_by_group = test_queue_builder.tests_by_group(tests_to_run)

            logger.suite_end()

            log_suite_start(tests_by_group,

                            test_loader.base_run_info,

                            test_loader.subsuites,

                            kwargs["run_by_dir"])

        with ManagerGroup("web-platform-tests",

                          test_queue_builder,

                          test_implementations,

                          retry_index,

                          kwargs["rerun"],

                          kwargs["pause_after_test"],

                          kwargs["pause_on_unexpected"],

                          kwargs["restart_on_unexpected"],

                          kwargs["debug_info"],

                          not kwargs["no_capture_stdio"],

                          kwargs["restart_on_new_group"],

                          recording=recording,

                          max_restarts=kwargs["max_restarts"],

                          ) as manager_group:

            try:

                handle_interrupt_signals()

                manager_group.run(tests_to_run)

            except KeyboardInterrupt:

                logger.critical(

                    "Main thread got signal; "

                    "waiting for TestRunnerManager threads to exit.")

                manager_group.stop()

                manager_group.wait(timeout=10)

                raise

            test_status.total_tests += manager_group.test_count()

            unexpected_fail_tests = manager_group.unexpected_fail_tests()

            unexpected_pass_tests = manager_group.unexpected_pass_tests()

    test_status.unexpected_pass += sum(len(tests) for tests in unexpected_pass_tests.values())

    test_status.unexpected += sum(len(tests) for tests in unexpected_pass_tests.values())

    test_status.unexpected += sum(len(tests) for tests in unexpected_fail_tests.values())

    logger.suite_end()

    return True

def handle_interrupt_signals():

    def termination_handler(_signum, _unused_frame):

        raise KeyboardInterrupt()

    if sys.platform == "win32":

        signal.signal(signal.SIGBREAK, termination_handler)

    else:

        signal.signal(signal.SIGTERM, termination_handler)

def evaluate_runs(test_status, **kwargs):

    """Evaluates the test counts after the given number of repeat runs has finished"""

    if test_status.total_tests == 0:

        if test_status.skipped > 0:

            logger.warning("All requested tests were skipped")

        else:

            if kwargs["default_exclude"]:

                logger.info("No tests ran")

                return True

            else:

                logger.critical("No tests ran")

                return False

    if test_status.unexpected and not kwargs["fail_on_unexpected"]:

        logger.info(f"Tolerating {test_status.unexpected} unexpected results")

        return True

    all_unexpected_passed = (test_status.unexpected and

                             test_status.unexpected == test_status.unexpected_pass)

    if all_unexpected_passed and not kwargs["fail_on_unexpected_pass"]:

        logger.info(f"Tolerating {test_status.unexpected_pass} unexpected results "

                    "because they all PASS")

        return True

    return test_status.unexpected == 0

class TestStatus:

    """Class that stores information on the results of test runs for later reference"""

    def __init__(self):

        self.total_tests = 0

        self.skipped = 0

        self.unexpected = 0

        self.unexpected_pass = 0

        self.repeated_runs = 0

        self.expected_repeated_runs = 0

        self.all_skipped = False

def run_tests(config, product, test_paths, **kwargs):

    """Set up the test environment, load the list of tests to be executed, and

    invoke the remainder of the code to execute tests"""

    mp = mpcontext.get_context()

    if kwargs["instrument_to_file"] is None:

        recorder = instruments.NullInstrument()

    else:

        recorder = instruments.Instrument(kwargs["instrument_to_file"])

    with recorder as recording, capture.CaptureIO(logger,

                                                  not kwargs["no_capture_stdio"],

                                                  mp_context=mp):

        recording.set(["startup"])

        env.do_delayed_imports(logger, test_paths)

        env_extras = product.get_env_extras(**kwargs)

        product.check_args(**kwargs)

        if kwargs["install_fonts"]:

            env_extras.append(FontInstaller(

                logger,

                font_dir=kwargs["font_dir"],

                ahem=os.path.join(test_paths["/"].tests_path, "fonts/Ahem.ttf")

))

        recording.set(["startup", "load_tests"])

        test_queue_builder, test_loader = get_loader(test_paths,

                                                     product,

                                                     **kwargs)

        test_status = TestStatus()

        repeat = kwargs["repeat"]

        test_status.expected_repeated_runs = repeat

        if len(test_loader.test_ids) == 0 and kwargs["test_list"]:

            logger.critical("Unable to find any tests at the path(s):")

            for path in kwargs["test_list"]:

                logger.critical("  %s" % path)

            logger.critical("Please check spelling and make sure there are tests in the specified path(s).")

            return False, test_status

        kwargs["pause_after_test"] = get_pause_after_test(test_loader, **kwargs)

        ssl_config = {"type": kwargs["ssl_type"],

                      "openssl": {"openssl_binary": kwargs["openssl_binary"]},

                      "pregenerated": {"host_key_path": kwargs["host_key_path"],

                                       "host_cert_path": kwargs["host_cert_path"],

                                       "ca_cert_path": kwargs["ca_cert_path"]}}

        # testharness.js is global so we can't set the timeout multiplier in that file by subsuite

        testharness_timeout_multipler = product.get_timeout_multiplier("testharness",

                                                                       test_loader.base_run_info,

                                                                       **kwargs)

        mojojs_path = kwargs["mojojs_path"] if kwargs["enable_mojojs"] else None

        inject_script = kwargs["inject_script"] if kwargs["inject_script"] else None

        recording.set(["startup", "start_environment"])

        with env.TestEnvironment(test_paths,

                                 testharness_timeout_multipler,

                                 kwargs["pause_after_test"],

                                 kwargs["debug_test"],

                                 kwargs["debug_info"],

                                 product.env_options,

                                 ssl_config,

                                 env_extras,

                                 kwargs["enable_webtransport_h3"],

                                 mojojs_path,

                                 inject_script,

                                 kwargs["suppress_handler_traceback"]) as test_environment:

            recording.set(["startup", "ensure_environment"])

            try:

                test_environment.ensure_started()

                start_time = datetime.now()

            except env.TestEnvironmentError as e:

                logger.critical("Error starting test environment: %s" % e)

                raise

            recording.set(["startup"])

            max_time = None

            if "repeat_max_time" in kwargs:

                max_time = timedelta(minutes=kwargs["repeat_max_time"])

            repeat_until_unexpected = kwargs["repeat_until_unexpected"]

            # keep track of longest time taken to complete a test suite iteration

            # so that the runs can be stopped to avoid a possible TC timeout.

            longest_iteration_time = timedelta()

            while test_status.repeated_runs < repeat or repeat_until_unexpected:

                # if the next repeat run could cause the TC timeout to be reached,

                # stop now and use the test results we have.

                # Pad the total time by 10% to ensure ample time for the next iteration(s).

                estimate = (datetime.now() +

                            timedelta(seconds=(longest_iteration_time.total_seconds() * 1.1)))

                if not repeat_until_unexpected and max_time and estimate >= start_time + max_time:

                    logger.info(f"Ran {test_status.repeated_runs} of {repeat} iterations.")

                    break

                # begin tracking runtime of the test suite

                iteration_start = datetime.now()

                test_status.repeated_runs += 1

                if repeat_until_unexpected:

                    logger.info(f"Repetition {test_status.repeated_runs}")

                elif repeat > 1:

                    logger.info(f"Repetition {test_status.repeated_runs} / {repeat}")

                iter_success = run_test_iteration(test_status,

                                                  test_loader,

                                                  test_queue_builder,

                                                  recording,

                                                  test_environment,

                                                  product,

                                                  kwargs)

                # if there were issues with the suite run(tests not loaded, etc.) return

                if not iter_success:

                    return False, test_status

                recording.set(["after-end"])

                logger.info(f"Got {test_status.unexpected} unexpected results, "

                    f"with {test_status.unexpected_pass} unexpected passes")

                # Note this iteration's runtime

                iteration_runtime = datetime.now() - iteration_start

                # determine the longest test suite runtime seen.

                longest_iteration_time = max(longest_iteration_time,

                                             iteration_runtime)

                if repeat_until_unexpected and test_status.unexpected > 0:

                    break

                if test_status.repeated_runs == 1 and len(test_loader.test_ids) == test_status.skipped:

                    test_status.all_skipped = True

                    break

    # Return the evaluation of the runs and the number of repeated iterations that were run.

    return evaluate_runs(test_status, **kwargs), test_status

def check_stability(**kwargs):

    from . import stability

    if kwargs["stability"]:

        logger.warning("--stability is deprecated; please use --verify instead!")

        kwargs['verify_max_time'] = None

        kwargs['verify_chaos_mode'] = False

        kwargs['verify_repeat_loop'] = 0

        kwargs['verify_repeat_restart'] = 10 if kwargs['repeat'] == 1 else kwargs['repeat']

        kwargs['verify_output_results'] = True

    return stability.check_stability(logger,

                                     max_time=kwargs['verify_max_time'],

                                     chaos_mode=kwargs['verify_chaos_mode'],

                                     repeat_loop=kwargs['verify_repeat_loop'],

                                     repeat_restart=kwargs['verify_repeat_restart'],

                                     output_results=kwargs['verify_output_results'],

                                     **kwargs)

def start(**kwargs):

    assert logger is not None

    logged_critical = wptlogging.LoggedAboveLevelHandler("CRITICAL")

    handler = handlers.LogLevelFilter(logged_critical, "CRITICAL")

    logger.add_handler(handler)

    rv = False

    try:

        if kwargs["list_test_groups"]:

            list_test_groups(**kwargs)

        elif kwargs["list_disabled"]:

            list_disabled(**kwargs)

        elif kwargs["list_tests"]:

            list_tests(**kwargs)

        elif kwargs["verify"] or kwargs["stability"]:

            rv = check_stability(**kwargs) or logged_critical.has_log

        else:

            rv = not run_tests(**kwargs)[0] or logged_critical.has_log

    finally:

        logger.shutdown()

        logger.remove_handler(handler)

    return rv

def main():

    """Main entry point when calling from the command line"""

    kwargs = wptcommandline.parse_args()

    try:

        if kwargs["prefs_root"] is None:

            kwargs["prefs_root"] = os.path.abspath(os.path.join(here, "prefs"))

        setup_logging(kwargs, {"raw": sys.stdout})

        return start(**kwargs)

    except Exception:

        if kwargs["pdb"]:

            import pdb

            import traceback

            print(traceback.format_exc())

            pdb.post_mortem()

        else:

            raise