test_json_policy_system.js

firefox-main/toolkit/components/ml/tests/xpcshell/test_json_policy_system.js (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Machine Learning: On Device

Revision control

Copy as Markdown

Other Tools

Test Info: Warnings

This test gets skipped with pattern: os == 'android'
Manifest: toolkit/components/ml/tests/xpcshell/xpcshell.toml

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**

 * Integration tests for JSON Policy System

 * Focus: End-to-end flows with real JSON policies

 * - Real policy loading from tool-execution-policies.json

 * - Critical allow/deny flows

 * - Integration with SecurityOrchestrator

 * - @Mentions support

*/

const { SecurityOrchestrator } = ChromeUtils.importESModule(

  "chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"

);

const PREF_SECURITY_ENABLED = "browser.ml.security.enabled";

const POLICY_JSON_URL =

  "chrome://global/content/ml/security/policies/tool-execution-policies.json";

/** @type {SecurityOrchestrator|null} */

let orchestrator = null;

function setup() {

  Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);

function teardown() {

  Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);

  orchestrator = null;

/**

 * Test: JSON policy file loads and has valid structure.

 * Reason:

 * The policy JSON file is fetched at runtime. This test validates that

 * the file exists, parses correctly, and contains the required fields

 * (id, phase, effect). Build-time validation catches authoring errors.

*/

add_task(async function test_json_policy_file_loads_and_validates() {

  const response = await fetch(POLICY_JSON_URL);

  const policyData = await response.json();

  // File exists and parses

  Assert.ok(response.ok, "Policy JSON should be accessible");

  Assert.ok(policyData.policies, "Should have policies array");

  Assert.greater(

    policyData.policies.length,

0,

    "Should have at least one policy"

);

  // First policy has required structure

  const policy = policyData.policies[0];

  Assert.ok(policy.id, "Policy should have id");

  Assert.ok(policy.phase, "Policy should have phase");

  Assert.ok(policy.effect, "Policy should have effect");

  teardown();

});

/**

 * Test: SecurityOrchestrator initializes with policies loaded.

 * Reason:

 * The orchestrator must load policies during initialization so they're

 * available for evaluation. This test verifies the full initialization

 * path works and policies are functional (not just loaded).

*/

add_task(async function test_orchestrator_initializes_with_policies() {

  setup();

  // If create succeeds, policies loaded correctly

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  Assert.ok(ledger, "Should initialize successfully");

  Assert.ok(orchestrator.getSessionLedger(), "Should have session ledger");

  // Verify policies work by testing actual evaluation

  ledger.forTab("tab-1");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "Policies should be loaded and working (denies unseen URL)"

);

  Assert.equal(

    decision.policyId,

    "block-unseen-links",

    "Should use JSON policy"

);

  teardown();

});

/**

 * Test: end-to-end deny for unseen link.

 * Reason:

 * Core security behavior: URLs not in the ledger must be denied.

 * This validates the real JSON policy produces the expected denial

 * with correct code and policyId.

*/

add_task(async function test_e2e_deny_unseen_link() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1"); // Empty ledger

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"], // Not in ledger

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-deny",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "CRITICAL: Should deny unseen URL (real policy from JSON)"

);

  Assert.equal(

    decision.code,

    "UNSEEN_LINK",

    "Should have UNSEEN_LINK code from JSON policy"

);

  Assert.equal(

    decision.policyId,

    "block-unseen-links",

    "Should be from block-unseen-links policy"

);

  teardown();

});

/**

 * Test: end-to-end deny if any URL is unseen.

 * Reason:

 * All-or-nothing security: if a request includes multiple URLs and

 * any one is unseen, the entire request must be denied. Partial

 * trust is not acceptable.

*/

add_task(async function test_e2e_deny_if_any_url_unseen() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  const tabLedger = ledger.forTab("tab-1");

  tabLedger.add("https://example.com");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: [

        "https://example.com", // OK

        "https://evil.com", // NOT OK

],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-deny-multiple",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "Should deny if ANY URL unseen (all-or-nothing security)"

);

  Assert.equal(decision.code, "UNSEEN_LINK");

  teardown();

});

/**

 * Test: end-to-end deny for malformed URL.

 * Reason:

 * Fail-closed behavior: URLs that can't be parsed or normalized

 * cannot be validated against the ledger. They must be treated

 * as unseen and denied.

*/

add_task(async function test_e2e_deny_malformed_url() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["not-a-valid-url"],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-malformed",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "Should deny malformed URL (fail-closed)"

);

  // Malformed URLs are treated as unseen (not in ledger) rather than

  // caught as specifically malformed

  Assert.equal(decision.code, "UNSEEN_LINK");

  teardown();

});

/**

 * Test: end-to-end allow for seeded URL.

 * Reason:

 * Core functionality: URLs that have been seeded into the ledger

 * (from user-visible page context) must be allowed. This is the

 * happy path for legitimate tool calls.

*/

add_task(async function test_e2e_allow_seeded_url() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  const tabLedger = ledger.forTab("tab-1");

  tabLedger.add("https://example.com");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://example.com"], // In ledger

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-allow",

},

});

  Assert.equal(

    decision.effect,

    "allow",

    "CRITICAL: Should allow seeded URL (real policy from JSON)"

);

  teardown();

});

/**

 * Test: end-to-end allow for multiple seeded URLs.

 * Reason:

 * Tool calls may request multiple URLs. When all URLs are in the

 * ledger, the request should be allowed. Validates that the

 * allUrlsIn condition handles arrays correctly.

*/

add_task(async function test_e2e_allow_multiple_seeded_urls() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  const tabLedger = ledger.forTab("tab-1");

  tabLedger.add("https://example.com");

  tabLedger.add("https://mozilla.org");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://example.com", "https://mozilla.org"],

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-allow-multiple",

},

});

  Assert.equal(decision.effect, "allow", "Should allow when all URLs seeded");

  teardown();

});

/**

 * Test: end-to-end allow for empty URLs array.

 * Reason:

 * Some tool calls don't require URL access. An empty URLs array

 * has nothing to validate, so the request should be allowed.

*/

add_task(async function test_e2e_allow_empty_urls() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: [], // No URLs to check

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-empty",

},

});

  Assert.equal(decision.effect, "allow", "Should allow when no URLs to check");

  teardown();

});

/**

 * Test: end-to-end allow for URL from @mentioned tab.

 * Reason:

 * The @mentions feature lets users explicitly grant access to URLs

 * from other tabs. When a URL exists in a mentioned tab's ledger,

 * the request should be allowed.

*/

add_task(async function test_e2e_allow_url_from_mentioned_tab() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  // Current tab

  ledger.forTab("tab-1").add("https://example.com");

  // Mentioned tab (different URL)

  ledger.forTab("tab-2").add("https://mozilla.org");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://mozilla.org"], // From @mentioned tab

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: ["tab-2"], // @mention tab-2

      requestId: "test-mention-allow",

},

});

  Assert.equal(

    decision.effect,

    "allow",

    "Should allow URL from @mentioned tab (merged ledger)"

);

  teardown();

});

/**

 * Test: end-to-end deny for URL not in current or @mentioned tabs.

 * Reason:

 * Even with @mentions, URLs must exist in some trusted ledger.

 * A URL not present in the current tab or any mentioned tab

 * must still be denied.

*/

add_task(async function test_e2e_deny_url_not_in_mentioned_tabs() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1").add("https://example.com");

  ledger.forTab("tab-2").add("https://mozilla.org");

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"], // Not in tab-1 or tab-2

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: ["tab-2"],

      requestId: "test-mention-deny",

},

});

  Assert.equal(

    decision.effect,

    "deny",

    "Should deny URL not in current or @mentioned tabs"

);

  teardown();

});

/**

 * Test: end-to-end URL normalization strips fragments.

 * Reason:

 * URLs differing only by fragment (#section) refer to the same resource.

 * Normalization ensures a user who visited `page` can access `page#section`

 * without false denials.

*/

add_task(async function test_e2e_url_normalization_strips_fragments() {

  setup();

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1").add("https://example.com/page"); // No fragment

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://example.com/page#section"], // Has fragment

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-normalize",

},

});

  Assert.equal(

    decision.effect,

    "allow",

    "Should allow after normalizing (fragments stripped)"

);

  teardown();

});

/**

 * Test: end-to-end preference switch bypasses policies.

 * Reason:

 * The preference switch (browser.ml.security.enabled=false) must bypass all

 * policy enforcement, allowing everything through. This enables

 * debugging and provides an escape hatch if policies cause issues.

*/

add_task(async function test_e2e_pref_switch_bypasses_policies() {

  setup();

  // Disable security

  Services.prefs.setBoolPref(PREF_SECURITY_ENABLED, false);

  orchestrator = await SecurityOrchestrator.create("test-session");

  const ledger = orchestrator.getSessionLedger();

  ledger.forTab("tab-1"); // Empty ledger

  const decision = await orchestrator.evaluate({

    phase: "tool.execution",

    action: {

      type: "tool.call",

      tool: "get_page_content",

      urls: ["https://evil.com"], // Unseen, but pref switch is off

      tabId: "tab-1",

},

    context: {

      currentTabId: "tab-1",

      mentionedTabIds: [],

      requestId: "test-prefswitch",

},

});

  Assert.equal(

    decision.effect,

    "allow",

    "Pref switch OFF: should bypass all policies (allow everything)"

);

  teardown();

});