browser_ml_summarizer_perf.js

Enable keyboard shortcuts

/* Any copyright is dedicated to the Public Domain.

http://creativecommons.org/publicdomain/zero/1.0/ */

"use strict";

const rootDataUrl =

  "chrome://mochitests/content/browser/toolkit/components/ml/tests/browser/data/articles";

async function fetchArticle(url) {

  const response = await fetch(url);

  return await response.text();

let testData = [];

const distilBartModel = {

  taskName: "summarization",

  modelId: "Mozilla/distilbart-cnn-12-6",

  dtype: "q8",

  // To keep history, we reuse xenova in the perf name

  perfModelId: "Xenova/distilbart-cnn-12-6",

};

const qwenModel = {

  taskName: "text-generation",

  modelId: "Mozilla/Qwen2.5-0.5B-Instruct",

  dtype: "q8",

  // To keep history, we reuse onnx-community in the perf name

  perfModelId: "onnx-community/Qwen2.5-0.5B-Instruct",

};

const articles = [{ data: `${rootDataUrl}/big.txt`, type: "big" }];

let numEngines = 0;

for (const model of [distilBartModel, qwenModel]) {

  for (const article of articles) {

    // Replace all non-alphabnumeric or dash or underscore by underscore

    const perfName = `${model.perfModelId.replace(/\//g, "-")}_${article.type}`;

    const engineId = `engine-${numEngines}`;

    const options = { ...model, article: article.data, engineId, perfName };

    numEngines += 1;

    options.trackPeakMemory = false;

    testData.push(options);

const perfMetadata = {

  owner: "GenAI Team",

  name: "browser_ml_summarizer_perf.js",

  description: "Template test for latency for Summarizer model",

  options: {

    default: {

      perfherder: true,

      perfherder_metrics: [

          name: "latency",

          unit: "ms",

          shouldAlert: true,

},

          name: "memory",

          unit: "MiB",

          shouldAlert: true,

},

          name: "tokenSpeed",

          unit: "tokens/s",

          shouldAlert: true,

          lowerIsBetter: false,

},

          name: "charactersSpeed",

          unit: "chars/s",

          shouldAlert: true,

          lowerIsBetter: false,

},

],

      verbose: true,

      manifest: "perftest.toml",

      manifest_flavor: "browser-chrome",

      try_platform: ["linux", "mac", "win"],

},

},

};

requestLongerTimeout(60);

// To run locally

// pip install huggingface-hub

// huggingface-cli download {model_id} --local-dir MOZ_ML_LOCAL_DIR/onnx-models/{model_id}/{revision}

// Update your test in

// Then run:  ./mach lint -l perfdocs --fix .

// This will auto-generate docs

async function run_summarizer_with_perf({

  taskName,

  modelId,

  article,

  dtype,

  engineId,

  perfName,

  trackPeakMemory,

  browserPrefs = null,

}) {

  let chatInput = await fetchArticle(article);

  const minNewTokens = 195;

  const maxNewTokens = 200;

  let requestOptions = {

    max_new_tokens: minNewTokens,

    min_new_tokens: maxNewTokens,

};

  const options = new PipelineOptions({

    engineId,

    taskName,

    modelHubUrlTemplate: "{model}/{revision}",

    modelId,

    modelRevision: "main",

    dtype,

    useExternalDataFormat: true,

    timeoutMS: -1,

});

  if (taskName === "text-generation") {

    chatInput = [

        role: "system",

        content:

          "Your role is to summarize the provided content as succinctly as possible while retaining the most important information",

},

        role: "user",

        content: chatInput,

},

];

    requestOptions = {

      max_new_tokens: minNewTokens,

      min_new_tokens: maxNewTokens,

      return_full_text: true,

      return_tensors: false,

      do_sample: false,

};

  const request = {

    args: [chatInput],

    options: requestOptions,

};

  info(`is request null | ${request === null || request === undefined}`);

  await perfTest({

    name: `sum-${perfName}`,

    options,

    request,

    trackPeakMemory,

    browserPrefs,

});

/*

 * distilbart Model

*/

add_task(async function test_ml_distilbart_tiny_article() {

  await run_summarizer_with_perf(testData[0]);

});

add_task(async function test_ml_distilbart_tiny_article_mem() {

  await run_summarizer_with_perf({ ...testData[0], trackPeakMemory: true });

});

add_task(async function test_ml_distilbart_tiny_article_mem_no_ion() {

  await run_summarizer_with_perf({

    ...testData[0],

    trackPeakMemory: true,

    browserPrefs: [["javascript.options.wasm_optimizingjit", false]],

});

});

/*

 * Qwen model

*/

add_task(async function test_ml_qwen_big_article() {

  await run_summarizer_with_perf(testData[1]);

});

add_task(async function test_ml_qwen_big_article_with_mem() {

  await run_summarizer_with_perf({ ...testData[1], trackPeakMemory: true });

});