Source code
Revision control
Copy as Markdown
Other Tools
/**
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
*/
/**
* This file contains LLM tool abstractions and tool definitions.
*/
import { searchBrowsingHistory as implSearchBrowsingHistory } from "moz-src:///browser/components/aiwindow/models/SearchBrowsingHistory.sys.mjs";
import { PageExtractorParent } from "resource://gre/actors/PageExtractorParent.sys.mjs";
const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
AIWindow:
"moz-src:///browser/components/aiwindow/ui/modules/AIWindow.sys.mjs",
BrowserWindowTracker: "resource:///modules/BrowserWindowTracker.sys.mjs",
PageDataService:
"moz-src:///browser/components/pagedata/PageDataService.sys.mjs",
});
const GET_OPEN_TABS = "get_open_tabs";
const SEARCH_BROWSING_HISTORY = "search_browsing_history";
const GET_PAGE_CONTENT = "get_page_content";
export const TOOLS = [GET_OPEN_TABS, SEARCH_BROWSING_HISTORY, GET_PAGE_CONTENT];
export const toolsConfig = [
{
type: "function",
function: {
name: GET_OPEN_TABS,
description:
"Access the user's browser and return a list of most recently browsed tabs. " +
"Each tab is represented by a JSON with the page's url, title and description " +
"if available. Default to return maximum 15 tabs.",
parameters: {
type: "object",
properties: {},
},
},
},
{
type: "function",
function: {
name: SEARCH_BROWSING_HISTORY,
description:
"Retrieve pages from the user's past browsing history, optionally filtered by " +
"topic and/or time range.",
parameters: {
type: "object",
properties: {
searchTerm: {
type: "string",
description:
"A concise phrase describing what the user is trying to find in their " +
"browsing history (topic, site, or purpose).",
},
startTs: {
type: "string",
description:
"Inclusive start of the time range as a local ISO 8601 datetime " +
"('YYYY-MM-DDTHH:mm:ss', no timezone).",
},
endTs: {
type: "string",
description:
"Inclusive end of the time range as a local ISO 8601 datetime " +
"('YYYY-MM-DDTHH:mm:ss', no timezone).",
},
},
},
},
},
{
type: "function",
function: {
name: GET_PAGE_CONTENT,
description:
"Retrieve cleaned text content of the provided browser page URL.",
parameters: {
properties: {
url: {
type: "string",
description:
"The complete URL of the page to fetch content from. This must exactly match " +
"a URL from the current conversation context. Use the full URL including " +
},
},
required: ["url"],
},
},
},
];
/**
* Retrieves a list of (up to n) the latest open tabs from the current active browser window.
* Ignores config pages (about:xxx).
* TODO: Ignores chat-only pages (FE to implement isSidebarMode flag).
*
* @param {number} n
* Maximum number of tabs to return. Defaults to 15.
* @returns {Promise<Array<object>>}
* A promise resolving to an array of tab metadata objects, each containing:
* - url {string}: The tab's current URL
* - title {string}: The tab's title
* - description {string}: Optional description (empty string if not available)
* - lastAccessed {number}: Last accessed timestamp in milliseconds
* Tabs are sorted by most recently accessed and limited to the first n results.
*/
export async function getOpenTabs(n = 15) {
const tabs = [];
for (const win of lazy.BrowserWindowTracker.orderedWindows) {
if (!lazy.AIWindow.isAIWindowActive(win)) {
continue;
}
if (!win.closed && win.gBrowser) {
for (const tab of win.gBrowser.tabs) {
const browser = tab.linkedBrowser;
const url = browser?.currentURI?.spec;
const title = tab.label;
if (url && !url.startsWith("about:")) {
tabs.push({
url,
title,
lastAccessed: tab.lastAccessed,
});
}
}
}
}
tabs.sort((a, b) => b.lastAccessed - a.lastAccessed);
const topTabs = tabs.slice(0, n);
return Promise.all(
topTabs.map(async ({ url, title, lastAccessed }) => {
let description = "";
if (url) {
description =
lazy.PageDataService.getCached(url)?.description ||
(await lazy.PageDataService.fetchPageData(url))?.description ||
"";
}
return { url, title, description, lastAccessed };
})
);
}
/**
* Tool entrypoint for search_browsing_history.
*
* Parameters (defaults shown):
* - searchTerm: "" - string used for search
* - startTs: null - local ISO timestamp lower bound, or null
* - endTs: null - local ISO timestamp upper bound, or null
* - historyLimit: 15 - max number of results
*
* Detailed behavior and implementation are in SearchBrowsingHistory.sys.mjs.
*
* @param {object} toolParams
* The search parameters.
* @param {string} toolParams.searchTerm
* The search string. If null or empty, semantic search is skipped and
* results are filtered by time range and sorted by last_visit_date and frecency.
* @param {string|null} toolParams.startTs
* Optional local ISO-8601 start timestamp (e.g. "2025-11-07T09:00:00").
* @param {string|null} toolParams.endTs
* Optional local ISO-8601 end timestamp (e.g. "2025-11-07T09:00:00").
* @param {number} toolParams.historyLimit
* Maximum number of history results to return.
* @returns {Promise<object>}
* A promise resolving to an object with the search term and history results.
* Includes `count` when matches exist, a `message` when none are found, or an
* `error` string on failure.
*/
export async function searchBrowsingHistory({
searchTerm = "",
startTs = null,
endTs = null,
historyLimit = 15,
}) {
return implSearchBrowsingHistory({
searchTerm,
startTs,
endTs,
historyLimit,
});
}
/**
* Strips heavy or unnecessary fields from a browser history search result.
*
* @param {string} result
* A JSON string representing the history search response.
* @returns {string}
* The sanitized JSON string with large fields (e.g., favicon, thumbnail)
* removed, or the original string if parsing fails.
*/
export function stripSearchBrowsingHistoryFields(result) {
try {
const data = JSON.parse(result);
if (
data.error ||
!Array.isArray(data.results) ||
data.results.length === 0
) {
return result;
}
// Remove large or unnecessary fields to save tokens
const OMIT_KEYS = ["favicon", "thumbnail"];
for (const item of data.results) {
if (item && typeof item === "object") {
for (const k of OMIT_KEYS) {
delete item[k];
}
}
}
return JSON.stringify(data);
} catch {
return result;
}
}
/**
* Class for handling page content extraction with configurable modes and limits.
*/
export class GetPageContent {
static DEFAULT_MODE = "reader";
static FALLBACK_MODE = "full";
static MAX_CHARACTERS = 10000;
static MODE_HANDLERS = {
viewport: async pageExtractor => {
const result = await pageExtractor.getText({ justViewport: true });
return { text: result.text };
},
reader: async pageExtractor => {
const text = await pageExtractor.getReaderModeContent();
return { text: typeof text === "string" ? text : "" };
},
full: async pageExtractor => {
const result = await pageExtractor.getText();
return { text: result };
},
};
/**
* Tool entrypoint for get_page_content.
*
* @param {object} toolParams
* @param {string} toolParams.url
* @param {Set<string>} allowedUrls
* @returns {Promise<string>}
* A promise resolving to a string containing the extracted page content
* with a descriptive header, or an error message if extraction fails.
*/
static async getPageContent({ url }, allowedUrls) {
try {
// Search through the allowed URLs and extract directly if exists
if (!allowedUrls.has(url)) {
// It might be a better idea to have the lifetime of the page be tied to the chat
// while it's open, and with a "keep alive" timeout. For now it's simpler to just
// load the page fresh every time.
return PageExtractorParent.getHeadlessExtractor(url, pageExtractor =>
this.#runExtraction(pageExtractor, this.DEFAULT_MODE, url)
);
}
// Search through all AI Windows to find the tab with the matching URL
let targetTab = null;
for (const win of lazy.BrowserWindowTracker.orderedWindows) {
if (!lazy.AIWindow.isAIWindowActive(win)) {
continue;
}
if (!win.closed && win.gBrowser) {
const tabs = win.gBrowser.tabs;
// Find the tab with the matching URL in this window
for (let i = 0; i < tabs.length; i++) {
const tab = tabs[i];
const currentURI = tab?.linkedBrowser?.currentURI;
if (currentURI?.spec === url) {
targetTab = tab;
break;
}
}
// If no match, try hostname matching for cases where protocols differ
if (!targetTab) {
try {
const inputHostPort = new URL(url).host;
targetTab = tabs.find(tab => {
try {
const tabHostPort = tab.linkedBrowser.currentURI.hostPort;
return tabHostPort === inputHostPort;
} catch {
return false;
}
});
} catch {
// Invalid URL, continue with original logic
}
}
// If we found the tab, stop searching
if (targetTab) {
break;
}
}
}
// If still no match, abort
if (!targetTab) {
return `Cannot find URL: ${url}, page content extraction failed.`;
}
// Attempt extraction
const currentWindowContext =
targetTab.linkedBrowser.browsingContext?.currentWindowContext;
if (!currentWindowContext) {
return `Cannot access content from "${targetTab.label}" at ${url}.`;
// Stripped message "The tab may still be loading or is not accessible." to not confuse the LLM
}
// Extract page content using PageExtractor
const pageExtractor =
await currentWindowContext.getActor("PageExtractor");
return this.#runExtraction(
pageExtractor,
this.DEFAULT_MODE,
`"${targetTab.label}" (${url})`
);
} catch (error) {
// i.e., will the LLM keep retrying get_page_content due to error?
console.error(error);
return `Error retrieving content from ${url}.`;
// Stripped ${error.message} content to not confruse the LLM
}
}
/**
* Main extraction function.
* label is of form `{tab.title} ({tab.url})`.
*
* @param {PageExtractor} pageExtractor
* @param {string} mode
* @param {string} label
* @returns {Promise<string>}
* A promise resolving to a formatted string containing the page content
* with mode and label information, or an error message if no content is available.
*/
static async #runExtraction(pageExtractor, mode, label) {
const selectedMode =
typeof mode === "string" && this.MODE_HANDLERS[mode]
? mode
: this.DEFAULT_MODE;
const handler = this.MODE_HANDLERS[selectedMode];
let extraction = null;
try {
extraction = await handler(pageExtractor);
} catch (err) {
console.error(
"[SmartWindow] get_page_content mode failed",
selectedMode,
err
);
}
let pageContent = "";
if (typeof extraction === "string") {
pageContent = extraction;
} else if (typeof extraction?.text === "string") {
pageContent = extraction.text;
}
// Track which mode was actually used (in case we fall back)
let actualMode = selectedMode;
// If reader mode returns no content, fall back to full mode
if (!pageContent && selectedMode === "reader") {
try {
const fallbackHandler = this.MODE_HANDLERS[this.FALLBACK_MODE];
extraction = await fallbackHandler(pageExtractor);
if (typeof extraction === "string") {
pageContent = extraction;
} else if (typeof extraction?.text === "string") {
pageContent = extraction.text;
}
if (pageContent) {
actualMode = this.FALLBACK_MODE;
}
} catch (err) {
console.error(
"[SmartWindow] get_page_content fallback mode failed",
this.FALLBACK_MODE,
err
);
}
}
if (!pageContent) {
return `get_page_content(${selectedMode}) returned no content for ${label}.`;
// Stripped message "Try another mode if you still need information." to not confruse the LLM
}
// Clean and truncate content for better LLM consumption
let cleanContent = pageContent
.replace(/\s+/g, " ") // Normalize whitespace
.replace(/\n\s*\n/g, "\n") // Clean up line breaks
.trim();
// Limit content length but be more generous for LLM processing
// we can remove this and directly do this in pageExtractor.
if (cleanContent.length > this.MAX_CHARACTERS) {
// Try to cut at a sentence boundary
const truncatePoint = cleanContent.lastIndexOf(".", this.MAX_CHARACTERS);
if (truncatePoint > this.MAX_CHARACTERS - 100) {
cleanContent = cleanContent.substring(0, truncatePoint + 1);
} else {
cleanContent = cleanContent.substring(0, this.MAX_CHARACTERS) + "...";
}
}
let modeLabel;
switch (actualMode) {
case "viewport":
modeLabel = "current viewport";
break;
case "reader":
modeLabel = "reader mode";
break;
case "full":
modeLabel = "full page";
break;
}
return `Content (${modeLabel}) from ${label}:
${cleanContent}`;
}
}