Source code
Revision control
Copy as Markdown
Other Tools
Test Info: Warnings
- This test gets skipped with pattern: os == 'android'
- Manifest: toolkit/components/ml/tests/xpcshell/xpcshell.toml
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
/**
* Unit tests for PolicyEvaluator.sys.mjs
*
* Note: PolicyEvaluator is used internally by SecurityOrchestrator.
* These tests verify policy evaluation behavior through the public API
* rather than testing internal implementation details.
*
* Focus: Policy matching, deny/allow effects, multiple conditions
*/
const { SecurityOrchestrator } = ChromeUtils.importESModule(
"chrome://global/content/ml/security/SecurityOrchestrator.sys.mjs"
);
const PREF_SECURITY_ENABLED = "browser.ml.security.enabled";
/** @type {SecurityOrchestrator|null} */
let orchestrator = null;
function setup() {
Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
}
function teardown() {
Services.prefs.clearUserPref(PREF_SECURITY_ENABLED);
orchestrator = null;
}
/**
* Test: policy matches the correct phase.
*
* Reason:
* Policies are scoped to specific phases (e.g., "tool.execution").
* A policy should only evaluate when the envelope's phase matches,
* ensuring policies don't interfere with unrelated operations.
*/
add_task(async function test_policy_matches_correct_phase() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
ledger.forTab("tab-1");
// tool.execution phase should match our policies
const decision = await orchestrator.evaluate({
phase: "tool.execution",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
Assert.equal(
decision.effect,
"deny",
"Policy should match tool.execution phase"
);
Assert.equal(decision.policyId, "block-unseen-links");
teardown();
});
/**
* Test: policy ignores unknown phases.
*
* Reason:
* When no policy matches the requested phase, the default behavior
* is to allow. This ensures new phases can be added without requiring
* policy updates, and unknown phases don't cause false denials.
*/
add_task(async function test_policy_ignores_unknown_phase() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
ledger.forTab("tab-1");
// Unknown phase should not match any policies
const decision = await orchestrator.evaluate({
phase: "unknown.phase",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
Assert.equal(
decision.effect,
"allow",
"Unknown phase should not match policies (allow by default)"
);
teardown();
});
/**
* Test: deny policy denies when condition fails.
*
* Reason:
* A deny policy with a failing condition (URL not in ledger) must
* produce a deny decision with code, reason, policyId, and details.
* This is the core security enforcement mechanism.
*/
add_task(async function test_deny_policy_denies_when_condition_fails() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
// URL not in ledger = condition fails = deny
const decision = await orchestrator.evaluate({
phase: "tool.execution",
action: {
type: "tool.call",
tool: "get_page_content",
tabId: "tab-1",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
Assert.equal(decision.effect, "deny", "Should deny when condition fails");
Assert.equal(decision.code, "UNSEEN_LINK");
Assert.ok(decision.reason, "Should have reason");
Assert.equal(decision.policyId, "block-unseen-links");
Assert.ok(decision.details, "Should include failure details");
teardown();
});
/**
* Test: deny policy passes through when condition passes.
*
* Reason:
* A deny policy only blocks when its condition fails. When the condition
* passes (all URLs in ledger), the policy doesn't apply and the request
* is allowed. This ensures legitimate requests aren't blocked.
*/
add_task(
async function test_deny_policy_passes_through_when_condition_passes() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
// URL in ledger = condition passes = policy doesn't apply (allow)
const decision = await orchestrator.evaluate({
phase: "tool.execution",
action: {
type: "tool.call",
tool: "get_page_content",
tabId: "tab-1",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
Assert.equal(
decision.effect,
"allow",
"Should allow when deny policy condition passes (policy doesn't apply)"
);
teardown();
}
);
/**
* Test: policy checks all URLs in the request.
*
* Reason:
* All-or-nothing security: if any URL in the request is unseen,
* the entire request must be denied. Checking only the first URL
* would allow attackers to smuggle unseen URLs in multi-URL requests.
*/
add_task(async function test_policy_checks_all_urls() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
// Not adding evil.com
const decision = await orchestrator.evaluate({
phase: "tool.execution",
action: {
type: "tool.call",
tool: "get_page_content",
urls: [
],
tabId: "tab-1",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
Assert.equal(
decision.effect,
"deny",
"Should deny if ANY URL fails condition (all-or-nothing)"
);
teardown();
});
/**
* Test: policy allows when all URLs are valid.
*
* Reason:
* When every URL in the request exists in the ledger, the condition
* passes and the request is allowed. This validates the happy path
* for multi-URL tool calls.
*/
add_task(async function test_policy_allows_when_all_urls_valid() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
const tabLedger = ledger.forTab("tab-1");
const decision = await orchestrator.evaluate({
phase: "tool.execution",
action: {
type: "tool.call",
tool: "get_page_content",
tabId: "tab-1",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
Assert.equal(
decision.effect,
"allow",
"Should allow when all URLs pass condition"
);
teardown();
});
/**
* Test: policy applies to get_page_content tool.
*
* Reason:
* The get_page_content tool fetches external URLs and is the primary
* vector for prompt injection attacks. The block-unseen-links policy
* must apply to this tool to prevent malicious URL access.
*/
add_task(async function test_policy_applies_to_get_page_content() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
ledger.forTab("tab-1");
// Verify policy applies to get_page_content (the main URL-fetching tool)
const decision = await orchestrator.evaluate({
phase: "tool.execution",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
Assert.equal(
decision.effect,
"deny",
"Policy should apply to get_page_content"
);
teardown();
});
/**
* Test: deny decision includes policy information.
*
* Reason:
* Deny decisions must include diagnostic information (code, reason,
* policyId, details) for logging and debugging. This helps identify
* which policy blocked a request and why.
*/
add_task(async function test_deny_decision_includes_policy_info() {
setup();
orchestrator = await SecurityOrchestrator.create("test-session");
const ledger = orchestrator.getSessionLedger();
ledger.forTab("tab-1");
const decision = await orchestrator.evaluate({
phase: "tool.execution",
},
context: {
currentTabId: "tab-1",
mentionedTabIds: [],
requestId: "test",
},
});
// Verify decision structure
Assert.equal(decision.effect, "deny", "Should have effect");
Assert.equal(decision.code, "UNSEEN_LINK", "Should have code");
Assert.ok(decision.reason, "Should have reason");
Assert.equal(
decision.policyId,
"block-unseen-links",
"Should identify policy"
);
Assert.ok(decision.details, "Should have details");
Assert.ok(
decision.details.failedCondition,
"Should identify failed condition"
);
teardown();
});