Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// @ts-nocheck - TODO - Remove this to type check this file.
/**
* This module handles the generation of embeddings using a dedicated
* machine learning (ML) engine.
*
* An embedding is a n-dimensional numerical representation of text
* (e.g., mdn documentation). The embedding generation involves converting
* text data such as title to meaningful vectors. These vectors could be
* compared using multi-dimensional distance measures such as cosine distance.
* Each vector is made up of values representing the relationship with
* features defined by the model.
*
* Production callers MUST go through the static factories `forPlaces` and
* `forGeneral` so the policy for picking an embedding family stays in one
* place. `forTest` is provided for tests and dev tooling.
*
* Note: The "engine" referenced in this module is specifically an ML engine
* used for feature extraction and embedding generation.
*/
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
const lazy = {};
XPCOMUtils.defineLazyServiceGetter(
lazy,
"mlUtils",
"@mozilla.org/ml-utils;1",
Ci.nsIMLUtils
);
ChromeUtils.defineESModuleGetters(lazy, {
createEngine: "chrome://global/content/ml/EngineProcess.sys.mjs",
});
ChromeUtils.defineLazyGetter(lazy, "console", () => {
return console.createInstance({
maxLogLevelPref: "browser.ml.logLevel",
prefix: "GeckoMLEmbeddingsGenerator",
});
});
// We set a limit for semantic search to see if device has at least 7 GiB
// Note: 8GB is ~7.48 GiB. So threshold has been set as 7 GiB (i.e 7.518 GB)
const REQUIRED_MEMORY_BYTES = 7 * 1024 * 1024 * 1024;
const REQUIRED_CPU_CORES = 2;
export const EMBEDDING_TYPE = Object.freeze({
STATIC: "static",
CONTEXTUAL: "contextual",
});
// Maps an embedding type to the engineConfigId that implements it.
const EMBEDDING_BACKEND_DEFAULTS = Object.freeze({
[EMBEDDING_TYPE.STATIC]: "static-embeddings",
[EMBEDDING_TYPE.CONTEXTUAL]: "onnx-native",
});
// Pref that picks static vs contextual (i.e. transformer based) embeddings
// for Places semantic history.
const PREF_PLACES_EMBEDDING_TYPE = "places.semanticHistory.embeddingType";
// Custom embedding size and model for ML driver support of testing new embeddings.
const PREF_CONTEXTUAL_EMBEDDING_DIM = "browser.ml.embedGen.textEmbeddingSize";
const PREF_CONTEXTUAL_MODEL_NAME =
"browser.ml.embedGen.textEmbeddingFeatureModel";
// Default embedding-dimension envelope for engines that do not pin a list of
// supportedDimensions: multiple of 8, in [128, 2048].
const EMBEDDING_DIM_MIN = 128;
const EMBEDDING_DIM_MAX = 2048;
const EMBEDDING_DIM_GRANULARITY = 8;
function isValidOnnxEmbeddingDim(dim) {
return (
Number.isInteger(dim) &&
dim >= EMBEDDING_DIM_MIN &&
dim <= EMBEDDING_DIM_MAX &&
dim % EMBEDDING_DIM_GRANULARITY === 0
);
}
const staticEmbeddingsOptions = Object.freeze({
subfolder: "models/minishlab/potion-retrieval-32M",
// Available: fp32, fp16, fp8_e5m2, fp8_e4m3
dtype: "fp16",
// Use zstd compression.
compression: true,
});
// Registry of engine configurations keyed by engineConfigId. Each entry is
// the base option dict for that engine; resolveEngineOptions() clones the
// entry, fills in dimension + per-call overrides, and stamps engineConfigId.
const ENGINE_OPTIONS = new Map([
[
"onnx-native",
{
taskName: "feature-extraction",
featureId: "simple-text-embedder",
timeoutMS: -1,
numThreads: 2,
backend: "onnx-native",
fallbackEngine: "onnx-wasm",
preferredDimension: 384,
},
],
[
"onnx-wasm",
{
taskName: "feature-extraction",
featureId: "simple-text-embedder",
timeoutMS: -1,
numThreads: 2,
backend: "onnx",
preferredDimension: 384,
},
],
[
"static-embeddings",
{
featureId: "simple-text-embedder",
modelId: "mozilla/static-embeddings",
modelRevision: "v1.0.0",
taskName: "static-embeddings",
modelHub: "mozilla",
backend: "static-embeddings",
supportedDimensions: [32, 64, 128, 256, 512],
preferredDimension: 512,
},
],
]);
/**
* Resolve engine options for the given embedding type. Reads
* `browser.ml.embedGen.*` dev prefs for the contextual path.
*
* @param {string} embeddingType
* One of EMBEDDING_TYPE.*
* @param {number} [embeddingSize]
* Explicit dimension override. Otherwise the engine's preferredDimension
* (static) or the dev pref (contextual) is used.
* @returns {object} merged engine options
*/
function resolveEngineOptions(embeddingType, embeddingSize) {
const engineConfigId = EMBEDDING_BACKEND_DEFAULTS[embeddingType];
if (!engineConfigId) {
throw new TypeError(`Unknown embedding type ${embeddingType}`);
}
const base = ENGINE_OPTIONS.get(engineConfigId);
const options = { ...base, engineConfigId };
if (embeddingType === EMBEDDING_TYPE.STATIC) {
const dim = embeddingSize ?? options.preferredDimension;
if (!options.supportedDimensions.includes(dim)) {
throw new TypeError(`Unsupported static embedding size ${dim}`);
}
options.embeddingDimension = dim;
options.staticEmbeddingsOptions = {
...staticEmbeddingsOptions,
dimensions: dim,
};
return options;
}
// Contextual: dimension / model come from dev prefs unless explicit
// overrides were passed.
const dim =
embeddingSize ??
Services.prefs.getIntPref(
PREF_CONTEXTUAL_EMBEDDING_DIM,
options.preferredDimension
);
if (!isValidOnnxEmbeddingDim(dim)) {
throw new TypeError(`Unsupported contextual embedding size ${dim}`);
}
options.embeddingDimension = dim;
const modelOverride = Services.prefs.getStringPref(
PREF_CONTEXTUAL_MODEL_NAME,
""
);
if (modelOverride) {
options.modelId = modelOverride;
}
return options;
}
/**
* Get the type of the embedding we're using for Places
* (embedding vs contexual) based on config/Nimbus prefs
*/
function resolvePlacesEmbeddingType() {
const val = Services.prefs.getStringPref(
PREF_PLACES_EMBEDDING_TYPE,
EMBEDDING_TYPE.STATIC
);
return val === EMBEDDING_TYPE.CONTEXTUAL
? EMBEDDING_TYPE.CONTEXTUAL
: EMBEDDING_TYPE.STATIC;
}
export class EmbeddingsGenerator {
#engine = undefined;
#promiseEngine;
#embeddingSize;
options;
/**
* Internal — use {@link EmbeddingsGenerator.forPlaces},
* {@link EmbeddingsGenerator.forGeneral} or
* {@link EmbeddingsGenerator.forTest}.
*
* @param {object} resolvedOptions Output of `resolveEngineOptions`.
*/
constructor(resolvedOptions) {
this.options = resolvedOptions;
this.#embeddingSize = resolvedOptions.embeddingDimension;
}
/**
* Places semantic history. Embedding family comes from the Nimbus-driven
* pref `places.semanticHistory.embeddingType`.
*
* @returns {EmbeddingsGenerator}
*/
static forPlaces() {
return new EmbeddingsGenerator(
resolveEngineOptions(resolvePlacesEmbeddingType())
);
}
/**
* Smart Window memories, navigation, and other general callers.
*
* @returns {EmbeddingsGenerator}
*/
static forGeneral() {
return new EmbeddingsGenerator(
resolveEngineOptions(EMBEDDING_TYPE.CONTEXTUAL)
);
}
/**
* Tests and dev tooling only. Production callers MUST use forPlaces /
* forGeneral so policy stays in one place.
*
* @param {object} opts
* @param {string} opts.type One of EMBEDDING_TYPE.*
* @param {number} [opts.embeddingSize] Explicit dimension override.
* @returns {EmbeddingsGenerator}
*/
static forTest({ type, embeddingSize } = {}) {
return new EmbeddingsGenerator(resolveEngineOptions(type, embeddingSize));
}
get embeddingSize() {
return this.#embeddingSize;
}
/**
* Returns model metadata that affects how embeddings are stored persistently.
* If any of these values change, embeddings should be recomputed.
*
* Note that we are using only feature information passed in the constructor and
* don't rely on inference manager details. This is to keep from persistent
* embeddings to rerun unless needed and not require the ML Engine to be created
* (which may trigger a model download) before the dimension can be checked.
*
* @returns {{
* featureId: string,
* embeddingDimension: number,
* modelId: string | undefined
* }}
*/
get modelContext() {
const { featureId, embeddingDimension, modelId } = this.options || {};
return { featureId, embeddingDimension, modelId };
}
/**
* Checks if there is sufficient physical memory available.
*
* Compares the system's total physical memory against a defined
* threshold (REQUIRED_MEMORY_BYTES). It retrieves the total memory
* using the lazy.mlUtils.totalPhysicalMemory property and returns
* a boolean indicating whether the available memory meets or exceeds
* the threshold.
*
* @returns {boolean}
* - **true** if the total physical memory is equal to or greater
* than REQUIRED_MEMORY_BYTES.
* - **false** if the total physical memory is less than
* REQUIRED_MEMORY_BYTES.
*/
isEnoughPhysicalMemoryAvailable() {
lazy.console.debug(
`totalPhysicalMemory = ${lazy.mlUtils.totalPhysicalMemory}`
);
return lazy.mlUtils.totalPhysicalMemory >= REQUIRED_MEMORY_BYTES;
}
/**
* Checks if there are sufficient CPU cores available.
*
* Compares the system's optimal CPU concurrency against a defined
* threshold (REQUIRED_CPU_CORES). It retrieves the number of available
* CPU cores using the `lazy.mlUtils.getOptimalCPUConcurrency()` method
* and returns a boolean indicating whether the available cores meet
* or exceed the threshold.
*
* @returns {boolean}
* - **true** if the number of CPU cores is equal to or greater
* than REQUIRED_CPU_CORES.
* - **false** if the number of CPU cores is less than
* REQUIRED_CPU_CORES.
*/
isEnoughCpuCoresAvailable() {
lazy.console.debug(
`Number of CPU cores = ${lazy.mlUtils.getOptimalCPUConcurrency()}`
);
return lazy.mlUtils.getOptimalCPUConcurrency() >= REQUIRED_CPU_CORES;
}
/**
* Creates an ML engine if it does not already exist.
*
* @private
* @returns {Promise<void>}
* Resolves when the engine is created or already exists.
* @throws {Error}
* If the engine cannot be initialized using either primary or fallback options.
*/
async createEngineIfNotPresent() {
if (!this.#engine) {
try {
this.#engine = await lazy.createEngine(this.options);
} catch (ex) {
lazy.console.warn(
`Engine ${this.options.backend} init failed. Falling back to wasm. Error:` +
ex
);
if (this.options.fallbackEngine) {
const fallbackBase = ENGINE_OPTIONS.get(this.options.fallbackEngine);
const fallbackOptions = {
...fallbackBase,
engineConfigId: this.options.fallbackEngine,
embeddingDimension: this.#embeddingSize,
featureId: this.options.featureId,
modelId: this.options.modelId,
};
try {
this.#engine = await lazy.createEngine(fallbackOptions);
} catch (fallbackEx) {
lazy.console.error(
`Fallback engine ${fallbackOptions.backend} also failed. Error:` +
fallbackEx
);
throw new Error(
"Unable to initialize the ML engine (including fallback).",
{ cause: fallbackEx }
);
}
} else {
lazy.console.error(
"Unable to initialize the ML engine and no Fallback was provided. " +
ex
);
throw new Error(
"Unable to initialize the ML engine and no Fallback was provided. ",
{ cause: ex }
);
}
}
}
}
/**
* Shuts down the ML engine if it has been initialized.
*
* @private
* @returns {Promise<void>}
*/
async shutdown() {
await this.#engine.terminate?.();
}
/**
* Embeds a single text using the ML engine.
*
* @param {string} text
* The input text to be embedded.
* @returns {Promise<any>}
* A promise that resolves with the embedding result from the engine.
*/
async embed(text) {
await this.ensureEngine();
if (typeof text !== "string" || text.trim() === "") {
throw new Error("Invalid input: text must be a non-empty string");
}
const request = {
args: [text],
options: { pooling: "mean", normalize: true },
};
return this.engineRun(request);
}
/**
* Generates embeddings for multiple texts (batch of text) using the
* ML engine.
*
* This asynchronous method takes an array of input texts and returns
* an array of corresponding embedding vectors. Each vector is a
* float array of size `#embeddingSize`, representing the mean-pooled
* and normalized embedding of the input text.
*
* @param {string[]} texts
* The array of texts to be embedded.
* @returns {Promise<number[][]>}
* A promise that resolves to an array of embedding vectors,
* where each vector is an array of floats with length equal to
* `#embeddingSize`.
*/
async embedMany(texts) {
await this.ensureEngine();
if (!Array.isArray(texts)) {
throw new Error("Expected an array of texts");
}
if (texts.length === 0) {
throw new Error("embedMany received an empty array of texts");
}
// call the engine once with the batch of texts.
let batchTensors = await this.engineRun({
args: this.options.backend == "static-embeddings" ? texts : [texts],
options: { pooling: "mean", normalize: true, max_length: 100 },
});
// If the result is triple nested, extract the inner array.
if (
Array.isArray(batchTensors) &&
batchTensors.length === 1 &&
Array.isArray(batchTensors[0])
) {
if (batchTensors[0].length !== this.#embeddingSize) {
batchTensors = batchTensors[0];
}
}
return batchTensors;
}
/**
* Ensures the ML engine is initialized.
*
* @returns {Promise<object>}
* A promise resolving to the initialized engine.
*/
async ensureEngine() {
if (this.#engine?.engineStatus == "closed") {
this.#promiseEngine = null;
this.#engine = null;
}
if (this.#promiseEngine) {
return this.#promiseEngine;
}
let { promise, resolve, reject } = Promise.withResolvers();
// Suppress unhandled rejection when there are no concurrent callers awaiting this promise.
promise.catch(() => {});
this.#promiseEngine = promise;
try {
await this.createEngineIfNotPresent();
resolve(this.#engine);
} catch (e) {
this.#promiseEngine = null;
reject(e);
throw e;
}
return this.#engine;
}
// for easier testing purpose
async engineRun(request) {
return await this.#engine.run(request);
}
// Helper to wrap createEngine for testing purpose
createEngine(args) {
return lazy.createEngine(args);
}
// Helper for test
setEngine(mockEngine) {
this.#engine = mockEngine;
}
}