test_intent_classifier.js

firefox-main/browser/components/aiwindow/models/tests/xpcshell/test_intent_classifier.js (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Machine Learning: General

Revision control

Copy as Markdown

Other Tools

Test Info: Warnings

This test runs only with pattern: os != 'android'
Manifest: browser/components/aiwindow/models/tests/xpcshell/xpcshell.toml

/* Any copyright is dedicated to the Public Domain.

 * http://creativecommons.org/publicdomain/zero/1.0/ */

const {

  IntentClassifier,

  normalizeTextForChatAllowlist,

  tokenizeTextForChatAllowlist,

  buildChatAllowlist,

  makeIsolatedPhraseChecker,

} = ChromeUtils.importESModule(

  "moz-src:///browser/components/aiwindow/models/IntentClassifier.sys.mjs"

);

const { sinon } = ChromeUtils.importESModule(

  "resource://testing-common/Sinon.sys.mjs"

);

add_task(async function test_getPromptIntent_basic() {

  const sb = sinon.createSandbox();

  try {

    const cases = [

      { prompt: "please search for news on firefox", expected: "search" },

        prompt: "Can you FIND me the docs for PageAssist?",

        expected: "search",

      }, // case-insensitive

      { prompt: "look up the best pizza in SF", expected: "search" },

      { prompt: "hello there, how are you?", expected: "chat" },

      { prompt: "tell me a joke", expected: "chat" },

];

    const fakeEngine = {

      run({ args: [[query]] }) {

        const searchKeywords = [

          "search",

          "find",

          "look",

          "query",

          "locate",

          "explore",

];

        const formattedPrompt = query.toLowerCase();

        const isSearch = searchKeywords.some(keyword =>

          formattedPrompt.includes(keyword)

);

        // Simulate model confidence scores

        if (isSearch) {

          return [

            { label: "search", score: 0.95 },

            { label: "chat", score: 0.05 },

];

        return [

          { label: "chat", score: 0.95 },

          { label: "search", score: 0.05 },

];

},

};

    sb.stub(IntentClassifier, "_createEngine").resolves(fakeEngine);

    for (const { prompt, expected } of cases) {

      const intent = await IntentClassifier.getPromptIntent(prompt);

      Assert.equal(

        intent,

        expected,

        `getPromptIntent("${prompt}") should return "${expected}"`

);

  } finally {

    sb.restore();

});

add_task(async function test_preprocessQuery_removes_question_marks() {

  // Call the real helper on the classifier

  const cases = [

    { input: "hello?", expected: "hello" },

    { input: "?prompt", expected: "prompt" },

    { input: "multiple???", expected: "multiple" },

    { input: "mid?dle", expected: "middle" },

    { input: "question? ", expected: "question" },

    { input: " no?  spaces? ", expected: "no  spaces" },

    { input: "???", expected: "" },

    { input: "clean input", expected: "clean input" },

];

  for (const { input, expected } of cases) {

    const result = IntentClassifier._preprocessQuery(input);

    Assert.equal(

      result,

      expected,

      `Expected "${input}" to preprocess to "${expected}", got "${result}"`

);

});

add_task(function test_normalizeTextForChatAllowlist_basic() {

  // lowercasing + trimming + collapsing internal spaces

  Assert.equal(

    normalizeTextForChatAllowlist("  HeLLo   There  "),

    "hello there",

    "Should lowercase, trim, and collapse spaces"

);

  // NFKC normalization: compatibility forms → canonical

  // Fullwidth characters normalize: e.g., 'ＴＥＳＴ' → 'test'

  Assert.equal(

    normalizeTextForChatAllowlist("ＴＥＳＴ  １２３"),

    "test 123",

    "Should NFKC-normalize fullwidth letters/digits"

);

  // Multiple whitespace kinds (NBSP, tabs, newlines) collapse

  Assert.equal(

    normalizeTextForChatAllowlist("a\u00A0b\tc\nd"),

    "a b c d",

    "Should collapse all whitespace kinds to single spaces"

);

});

add_task(function test_tokenizeTextForChatAllowlist_unicode_and_boundaries() {

  // Splits on non-word chars, keeps letters/digits/underscore

  Assert.deepEqual(

    tokenizeTextForChatAllowlist("hello, world! 42_times"),

    ["hello", "world", "42_times"],

    "Should split on punctuation and keep underscores"

);

  // Unicode letters should be treated as word chars (\p{L})

  Assert.deepEqual(

    tokenizeTextForChatAllowlist("mañana—café!"),

    ["mañana", "café"],

    "Should keep Unicode letters and split on punctuation (em dash, bang)"

);

  // Apostrophes split (non-word), as intended

  Assert.deepEqual(

    tokenizeTextForChatAllowlist("what's up"),

    ["what", "s", "up"],

    "Apostrophes are separators, so tokens split around them"

);

});

add_task(function test_buildChatAllowlist_grouping_and_normalization() {

  const phrases = [

    "sup",

    "hi there", // 2 tokens

    "what's up", // becomes "what s up" (3 tokens)

    " foo   bar  ", // leading/trailing + multiple spaces

    "", // empty should be skipped

    "___", // token of underscores counts as 1 token

];

  const sets = buildChatAllowlist(phrases);

  // Expect keys for lengths: 1, 2, 3

  Assert.ok(sets.has(1), "Should have set for single-token phrases");

  Assert.ok(sets.has(2), "Should have set for two-token phrases");

  Assert.ok(sets.has(3), "Should have set for three-token phrases");

  // 1-token set contains: "sup", "___"

  Assert.ok(sets.get(1).has("sup"), "Single-token set should contain 'sup'");

  Assert.ok(sets.get(1).has("___"), "Single-token set should contain '___'");

  // 2-token set contains normalized "hi there" and "foo bar"

  Assert.ok(

    sets.get(2).has("hi there"),

    "Two-token set should contain 'hi there'"

);

  Assert.ok(

    sets.get(2).has("foo bar"),

    "Two-token set should contain normalized 'foo bar'"

);

  // 3-token set contains "what s up" (note apostrophe split)

  Assert.ok(

    sets.get(3).has("what s up"),

    "Three-token set should contain 'what s up'"

);

  // Empty phrase skipped: nothing added for length 0

  for (const [k, set] of sets) {

    Assert.ok(

      k > 0 && set.size >= 1,

      "No empty keys, each set has at least one entry"

);

});

add_task(function test_isolated_phrase_checker_single_word_boundaries() {

  const phrases = ["sup", "hello", "___"];

  const isForced = makeIsolatedPhraseChecker(phrases);

  // Positive: exact token present

  Assert.ok(

    isForced("sup bro"),

    "Should match 'sup' as an isolated token at start"

);

  Assert.ok(

    isForced("hey, hello there"),

    "Should match 'hello' surrounded by punctuation"

);

  Assert.ok(isForced("foo ___ bar"), "Should match token with underscores");

  // Negative: partial-word should NOT match

  Assert.ok(

    !isForced("supposingly, this should not match"),

    "No partial-word match for 'sup'"

);

  Assert.ok(!isForced("supper time"), "No partial-word match inside 'supper'");

  Assert.ok(!isForced("shelloworld"), "No partial-word match for 'hello'");

});

add_task(function test_isolated_phrase_checker_multiword_and_punctuation() {

  // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up"

  const phrases = ["hi there", "what's up"];

  const isForced = makeIsolatedPhraseChecker(phrases);

  // Positive: punctuation between words should still match (token split)

  Assert.ok(

    isForced("hi—there!"),

    "Em dash between words should match 'hi there'"

);

  Assert.ok(

    isForced("well, hi there!!"),

    "Punctuation around phrase should match"

);

  Assert.ok(

    isForced("so, what’s up today?"),

    "Curly apostrophe splits to tokens; should match 'what s up'"

);

  // Negative: glued words should not match

  Assert.ok(

    !isForced("hithere"),

    "Concatenated words should not match 'hi there'"

);

  Assert.ok(

    !isForced("whatssup"),

    "Should not match 'what s up' without separators"

);

});

add_task(function test_isolated_phrase_checker_spacing_and_unicode_norm() {

  const phrases = ["good morning", "hello"];

  const isForced = makeIsolatedPhraseChecker(phrases);

  // Multiple spaces collapse

  Assert.ok(

    isForced("good     morning everyone"),

    "Multiple spaces between tokens should still match"

);

  // Fullwidth / NFKC normalization (ＴＥＳＴ) and basic usage

  Assert.ok(

    isForced("  HELLO  "),

    "Case and surrounding spaces should normalize and match 'hello'"

);

  // Non-breaking spaces and tabs

  Assert.ok(

    isForced("good\u00A0morning\tteam"),

    "NBSP and tabs normalize and match"

);

});

add_task(function test_isolated_phrase_checker_no_match_cases() {

  const phrases = ["hi there", "sup"];

  const isForced = makeIsolatedPhraseChecker(phrases);

  Assert.ok(!isForced(""), "Empty string should not match");

  Assert.ok(

    !isForced("nothing to see here"),

    "Unrelated text should not match"

);

  Assert.ok(

    !isForced("support"),

    "Partial token with 'sup' prefix should not match"

);

});

add_task(function test_isolated_phrase_checker_caching_stability() {

  const phrases = ["hello", "hi there"];

  const isForced = makeIsolatedPhraseChecker(phrases);

  // Repeated calls with the same input should return identical results (cache sanity)

  const q1 = "Hello there!";

  const first = isForced(q1);

  const second = isForced(q1);

  Assert.equal(

    first,

    second,

    "Same query should yield identical result across calls (cache-stable)"

);

  // Different whitespace should normalize to the same outcome

  Assert.equal(

    isForced("  hello   there "),

    isForced("hello there"),

    "Whitespace variations should not affect result"

);

});