test_policy_evaluator.js

firefox-main/toolkit/components/ml/tests/xpcshell/test_policy_evaluator.js (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Machine Learning: On Device

Revision control

Copy as Markdown

Other Tools

Test Info: Warnings

This test gets skipped with pattern: os == 'android'
Manifest: toolkit/components/ml/tests/xpcshell/xpcshell.toml

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**

 * Unit tests for PolicyEvaluator.sys.mjs

 * Note: PolicyEvaluator is used internally by SecurityOrchestrator.

 * These tests verify policy evaluation behavior through the public API

 * rather than testing internal implementation details.

 * Focus: Policy matching, deny/allow effects, multiple conditions

*/

const { SecurityOrchestrator } = ChromeUtils.importESModule(

  "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"

);

const PREF_SECURITY_ENABLED = "browser.ml.security.enabled";

/** @type {SecurityOrchestrator|null} */

let orchestrator = null;

function setup() {

  Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);

function teardown() {

  Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);

  orchestrator = null;

/**

 * Test: policy matches the correct phase.

 * Reason:

 * Policies are scoped to specific phases (e.g., "tool.execution").

 * A policy should only evaluate when the envelope's phase matches,

 * ensuring policies don't interfere with unrelated operations.

*/

add_task(async function test_policy_matches_correct_phase() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1");

  // tool.execution phase should match our policies

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "Policy should match tool.execution phase"

);

  Assert.equal(decision.policyId, "block-unseen-links");

  teardown();

});

/**

 * Test: policy ignores unknown phases.

 * Reason:

 * When no policy matches the requested phase, the default behavior

 * is to allow. This ensures new phases can be added without requiring

 * policy updates, and unknown phases don't cause false denials.

*/

add_task(async function test_policy_ignores_unknown_phase() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1");

  // Unknown phase should not match any policies

  const decision = await orchestrator.evaluate({

    phase: "unknown.phase",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  Assert.equal(

    decision.effect,

    "allow",

    "Unknown phase should not match policies (allow by default)"

);

  teardown();

});

/**

 * Test: deny policy denies when condition fails.

 * Reason:

 * A deny policy with a failing condition (URL not in ledger) must

 * produce a deny decision with code, reason, policyId, and details.

 * This is the core security enforcement mechanism.

*/

add_task(async function test_deny_policy_denies_when_condition_fails() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1").add("https://example.com");

  // URL not in ledger = condition fails = deny

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"], // Not in ledger

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  Assert.equal(decision.effect, "deny", "Should deny when condition fails");

  Assert.equal(decision.code, "UNSEEN_LINK");

  Assert.ok(decision.reason, "Should have reason");

  Assert.equal(decision.policyId, "block-unseen-links");

  Assert.ok(decision.details, "Should include failure details");

  teardown();

});

/**

 * Test: deny policy passes through when condition passes.

 * Reason:

 * A deny policy only blocks when its condition fails. When the condition

 * passes (all URLs in ledger), the policy doesn't apply and the request

 * is allowed. This ensures legitimate requests aren't blocked.

*/

add_task(

  async function test_deny_policy_passes_through_when_condition_passes() {

    setup();

    orchestrator = await SecurityOrchestrator.create("test-session");

    const ledger = orchestrator.getSessionLedger();

    ledger.forTab("tab-1").add("https://example.com");

    // URL in ledger = condition passes = policy doesn't apply (allow)

    const decision = await orchestrator.evaluate({

      phase: "tool.execution",

      action: {

        type: "tool.call",

        tool: "get_page_content",

        urls: ["https://example.com"], // In ledger

        tabId: "tab-1",

},

      context: {

        currentTabId: "tab-1",

        mentionedTabIds: [],

        requestId: "test",

},

});

    Assert.equal(

      decision.effect,

      "allow",

      "Should allow when deny policy condition passes (policy doesn't apply)"

);

    teardown();

);

/**

 * Test: policy checks all URLs in the request.

 * Reason:

 * All-or-nothing security: if any URL in the request is unseen,

 * the entire request must be denied. Checking only the first URL

 * would allow attackers to smuggle unseen URLs in multi-URL requests.

*/

add_task(async function test_policy_checks_all_urls() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1").add("https://example.com");

  // Not adding evil.com

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: [

        "https://example.com", // OK

        "https://evil.com", // NOT OK

],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "Should deny if ANY URL fails condition (all-or-nothing)"

);

  teardown();

});

/**

 * Test: policy allows when all URLs are valid.

 * Reason:

 * When every URL in the request exists in the ledger, the condition

 * passes and the request is allowed. This validates the happy path

 * for multi-URL tool calls.

*/

add_task(async function test_policy_allows_when_all_urls_valid() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  const tabLedger = ledger.forTab("tab-1");

  tabLedger.add("https://example.com");

  tabLedger.add("https://mozilla.org");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://example.com", "https://mozilla.org"], // Both OK

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  Assert.equal(

    decision.effect,

    "allow",

    "Should allow when all URLs pass condition"

);

  teardown();

});

/**

 * Test: policy applies to get_page_content tool.

 * Reason:

 * The get_page_content tool fetches external URLs and is the primary

 * vector for prompt injection attacks. The block-unseen-links policy

 * must apply to this tool to prevent malicious URL access.

*/

add_task(async function test_policy_applies_to_get_page_content() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1");

  // Verify policy applies to get_page_content (the main URL-fetching tool)

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "Policy should apply to get_page_content"

);

  teardown();

});

/**

 * Test: deny decision includes policy information.

 * Reason:

 * Deny decisions must include diagnostic information (code, reason,

 * policyId, details) for logging and debugging. This helps identify

 * which policy blocked a request and why.

*/

add_task(async function test_deny_decision_includes_policy_info() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  // Verify decision structure

  Assert.equal(decision.effect, "deny", "Should have effect");

  Assert.equal(decision.code, "UNSEEN_LINK", "Should have code");

  Assert.ok(decision.reason, "Should have reason");

  Assert.equal(

    decision.policyId,

    "block-unseen-links",

    "Should identify policy"

);

  Assert.ok(decision.details, "Should have details");

  Assert.ok(

    decision.details.failedCondition,

    "Should identify failed condition"

);

  teardown();

});