Source code
Revision control
Copy as Markdown
Other Tools
Test Info:
/* Any copyright is dedicated to the Public Domain.
"use strict";
const rootDataUrl =
async function fetchArticle(url) {
const response = await fetch(url);
return await response.text();
}
let testData = [];
const distilBartModel = {
taskName: "summarization",
modelId: "Mozilla/distilbart-cnn-12-6",
dtype: "q8",
// To keep history, we reuse xenova in the perf name
perfModelId: "Xenova/distilbart-cnn-12-6",
};
const qwenModel = {
taskName: "text-generation",
modelId: "Mozilla/Qwen2.5-0.5B-Instruct",
dtype: "q8",
// To keep history, we reuse onnx-community in the perf name
perfModelId: "onnx-community/Qwen2.5-0.5B-Instruct",
};
const articles = [{ data: `${rootDataUrl}/big.txt`, type: "big" }];
let numEngines = 0;
for (const model of [distilBartModel, qwenModel]) {
for (const article of articles) {
// Replace all non-alphabnumeric or dash or underscore by underscore
const perfName = `${model.perfModelId.replace(/\//g, "-")}_${article.type}`;
const engineId = `engine-${numEngines}`;
const options = { ...model, article: article.data, engineId, perfName };
numEngines += 1;
options.trackPeakMemory = false;
testData.push(options);
}
}
const perfMetadata = {
owner: "GenAI Team",
name: "browser_ml_summarizer_perf.js",
description: "Template test for latency for Summarizer model",
options: {
default: {
perfherder: true,
perfherder_metrics: [
{
name: "latency",
unit: "ms",
shouldAlert: true,
},
{
name: "memory",
unit: "MiB",
shouldAlert: true,
},
{
name: "tokenSpeed",
unit: "tokens/s",
shouldAlert: true,
lowerIsBetter: false,
},
{
name: "charactersSpeed",
unit: "chars/s",
shouldAlert: true,
lowerIsBetter: false,
},
],
verbose: true,
manifest: "perftest.toml",
manifest_flavor: "browser-chrome",
try_platform: ["linux", "mac", "win"],
},
},
};
requestLongerTimeout(60);
// To run locally
// pip install huggingface-hub
// huggingface-cli download {model_id} --local-dir MOZ_ML_LOCAL_DIR/onnx-models/{model_id}/{revision}
// Update your test in
// Then run: ./mach lint -l perfdocs --fix .
// This will auto-generate docs
async function run_summarizer_with_perf({
taskName,
modelId,
article,
dtype,
engineId,
perfName,
trackPeakMemory,
browserPrefs = null,
}) {
let chatInput = await fetchArticle(article);
const minNewTokens = 195;
const maxNewTokens = 200;
let requestOptions = {
max_new_tokens: minNewTokens,
min_new_tokens: maxNewTokens,
};
const options = new PipelineOptions({
engineId,
taskName,
modelHubUrlTemplate: "{model}/{revision}",
modelId,
modelRevision: "main",
dtype,
useExternalDataFormat: true,
timeoutMS: -1,
});
if (taskName === "text-generation") {
chatInput = [
{
role: "system",
content:
"Your role is to summarize the provided content as succinctly as possible while retaining the most important information",
},
{
role: "user",
content: chatInput,
},
];
requestOptions = {
max_new_tokens: minNewTokens,
min_new_tokens: maxNewTokens,
return_full_text: true,
return_tensors: false,
do_sample: false,
};
}
const request = {
args: [chatInput],
options: requestOptions,
};
info(`is request null | ${request === null || request === undefined}`);
await perfTest({
name: `sum-${perfName}`,
options,
request,
trackPeakMemory,
browserPrefs,
});
}
/*
* distilbart Model
*/
add_task(async function test_ml_distilbart_tiny_article() {
await run_summarizer_with_perf(testData[0]);
});
add_task(async function test_ml_distilbart_tiny_article_mem() {
await run_summarizer_with_perf({ ...testData[0], trackPeakMemory: true });
});
add_task(async function test_ml_distilbart_tiny_article_mem_no_ion() {
await run_summarizer_with_perf({
...testData[0],
trackPeakMemory: true,
browserPrefs: [["javascript.options.wasm_optimizingjit", false]],
});
});
/*
* Qwen model
*/
add_task(async function test_ml_qwen_big_article() {
await run_summarizer_with_perf(testData[1]);
});
add_task(async function test_ml_qwen_big_article_with_mem() {
await run_summarizer_with_perf({ ...testData[1], trackPeakMemory: true });
});