FeatureModel.sys.mjs

mozilla-central/browser/extensions/newtab/lib/InferredModel/FeatureModel.sys.mjs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Firefox :: New Tab Page

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**

 * This file has classes to combine New Tab feature events (aggregated from a sqlLite table) into an interest model.

*/

import {

  FORMAT,

  AggregateResultKeys,

  SPECIAL_FEATURE_CLICK,

} from "resource://newtab/lib/InferredModel/InferredConstants.sys.mjs";

export const DAYS_TO_MS = 60 * 60 * 24 * 1000;

const MAX_INT_32 = 2 ** 32;

/**

 * Divides numerator fields by the denominator. Value is set to 0 if denominator is missing or 0.

 * @param {Object.<string, number>} numerator

 * @param {Object.<string, number>} denominator

 * returns {Object.<string, number>}

*/

function divideDict(numerator, denominator) {

  const result = {};

  Object.keys(numerator).forEach(k => {

    result[k] = denominator[k] ? numerator[k] / denominator[k] : 0;

});

  return result;

/**

 * Returns a secure random value between 0 and 1

*/

function secureRandomNumber() {

  const array = new Uint32Array(1);

  crypto.getRandomValues(array);

  return array[0] / MAX_INT_32;

/**

 * Applies laplace noise at a given scale

 * @param {number} scale value

 * @returns noisy value

*/

function laplaceNoise(scale) {

  const u = secureRandomNumber() - 0.5;

  return -scale * Math.sign(u) * Math.log(1 - 2 * Math.abs(u));

/**

 * Unary encoding with randomized response for differential privacy.

 * The output must be decoded to back to an integer when aggregating a historgram on a server

 * @param {number} x - Integer input (0 <= x < N)

 * @param {number} N - Number of values (see ablove)

 * @param {number} p - Probability of keeping a 1-bit as 1 (after one-hot encoding the output)

 * @param {number} q - Probability of flipping a 0-bit to 1

 * @returns {string} - Bitstring after unary encoding and randomized response

*/

export function unaryEncodeDiffPrivacy(x, N, p, q) {

  const bitstring = [];

  const randomValues = new Uint32Array(N);

  crypto.getRandomValues(randomValues);

  for (let i = 0; i < N; i++) {

    const trueBit = i === x ? 1 : 0;

    const rand = randomValues[i] / MAX_INT_32;

    if (trueBit === 1) {

      bitstring.push(rand <= p ? "1" : "0");

    } else {

      bitstring.push(rand <= q ? "1" : "0");

  return bitstring.join("");

/**

 * Adds value to all a particular key in a dictionary. If the key is missing it sets the value.

 * @param {Object} dict - The dictionary to modify.

 * @param {string} key - The key whose value should be added or set.

 * @param {number} value - The value to add to the key.

*/

export function dictAdd(dict, key, value) {

  if (key in dict) {

    dict[key] += value;

  } else {

    dict[key] = value;

/**

 * Apply function to all keys in dictionary, returning new dictionary.

 * @param {Object} obj - The object whose values should be transformed.

 * @param {Function} fn - The function to apply to each value.

 * @returns {Object} A new object with the transformed values.

*/

export function dictApply(obj, fn) {

  return Object.fromEntries(

    Object.entries(obj).map(([key, value]) => [key, fn(value)])

);

/**

 * Class for re-scaling events based on time passed.

*/

export class DayTimeWeighting {

/**

   * Instantiate class based on a series of day periods in the past.

   * @param {int[]} pastDays Series of number of days, indicating days ago intervals in reverse chonological order.

   * Intervals are added: If the first value is 1 and the second is 5, then the first inteval is 0-1 and second interval is between 1 and 6.

   * @param {number[]} relativeWeight Relative weight of each period. Must be same length as pastDays

*/

  constructor(pastDays, relativeWeight) {

    this.pastDays = pastDays;

    this.relativeWeight = relativeWeight;

  static fromJSON(json) {

    return new DayTimeWeighting(json.days, json.relative_weight);

/**

   * Get a series of interval pairs in the past based on the pastDays.

   * @param {number} curTimeMs Base time time in MS. Usually current time.

   * @returns

*/

  getDateIntervals(curTimeMs) {

    let curEndTime = curTimeMs;

    const res = this.pastDays.map(daysAgo => {

      const start = new Date(curEndTime - daysAgo * DAYS_TO_MS);

      const end = new Date(curEndTime);

      curEndTime = start;

      return { start, end };

});

    return res;

/**

   * Get relative weight of current index.

   * @param {int} weightIndex Index

   * @returns {number} Weight at index, or 0 if index out of range.

*/

  getRelativeWeight(weightIndex) {

    if (weightIndex >= this.pastDays.length) {

      return 0;

    return this.relativeWeight[weightIndex];

/**

 * Describes the mapping from a set of aggregated events to a single interest feature

*/

export class InterestFeatures {

  constructor(

    name,

    featureWeights,

    thresholds = null,

    diff_p = 0.5,

    diff_q = 0.5

) {

    this.name = name;

    this.featureWeights = featureWeights;

    // Thresholds must be in ascending order

    this.thresholds = thresholds;

    this.diff_p = diff_p;

    this.diff_q = diff_q;

  static fromJSON(name, json) {

    return new InterestFeatures(

      name,

      json.features,

      json.thresholds || null,

      json.diff_p,

      json.diff_q

);

/**

   * Quantize a feature value based on the thresholds specified in the class.

   * @param {number} inValue Value computed by model for the feature.

   * @returns Quantized value. A value between 0 and number of thresholds specified (inclusive)

*/

  applyThresholds(inValue) {

    if (!this.thresholds) {

      return inValue;

    for (let k = 0; k < this.thresholds.length; k++) {

      if (inValue < this.thresholds[k]) {

        return k;

    return this.thresholds.length;

/**

   * Applies Differential Privacy Unary Encoding method, outputting a one-hot encoded vector with randomizaiton.

   * Accurate historgrams of values can be computed with reasonable accuracy.

   * If the class has no or 0 p/q values set for differential privacy, then response is original number non-encoded.

   * @param {number} inValue Value to randomize

   * @returns Bitfield as a string, that is the same as the thresholds length + 1

*/

  applyDifferentialPrivacy(inValue) {

    if (!this.thresholds || !this.diff_p) {

      return inValue;

    return unaryEncodeDiffPrivacy(

      inValue,

      this.thresholds.length + 1,

      this.diff_p,

      this.diff_q

);

/**

 * Manages relative tile importance

*/

export class TileImportance {

  constructor(tileImportanceMappings) {

    this.mappings = {};

    for (const [formatKey, formatId] of Object.entries(FORMAT)) {

      if (formatKey in tileImportanceMappings) {

        this.mappings[formatId] = tileImportanceMappings[formatKey];

  getRelativeCTRForTile(tileType) {

    return this.mappings[tileType] || 1;

  static fromJSON(json) {

    return new TileImportance(json);

/***

 * A simple model for aggregating features

*/

export class FeatureModel {

/**

   * @param {string} modelId

   * @param {Object} dayTimeWeighting Data for day time weighting class

   * @param {Object} interestVectorModel Data for interest model

   * @param {Object} tileImportance Data for tile importance

   * @param {boolean} rescale Whether to rescale to max value

   * @param {boolean} logScale Whether to apply natural log (ln(x+ 1)) before rescaling

*/

  constructor({

    modelId,

    dayTimeWeighting,

    interestVectorModel,

    tileImportance,

    modelType,

    rescale = true,

    logScale = false,

    noiseScale = 0,

    laplaceNoiseFn = laplaceNoise,

  }) {

    this.modelId = modelId;

    this.tileImportance = tileImportance;

    this.dayTimeWeighting = dayTimeWeighting;

    this.interestVectorModel = interestVectorModel;

    this.rescale = rescale;

    this.logScale = logScale;

    this.modelType = modelType;

    this.noiseScale = noiseScale;

    this.laplaceNoiseFn = laplaceNoiseFn;

  static fromJSON(json) {

    const dayTimeWeighting = DayTimeWeighting.fromJSON(json.day_time_weighting);

    const interestVectorModel = {};

    const tileImportance = TileImportance.fromJSON(json.tile_importance || {});

    for (const [name, featureJson] of Object.entries(json.interest_vector)) {

      interestVectorModel[name] = InterestFeatures.fromJSON(name, featureJson);

    return new FeatureModel({

      dayTimeWeighting,

      tileImportance,

      interestVectorModel,

      normalize: json.normalize,

      rescale: json.rescale,

      logScale: json.log_scale,

      clickScale: json.clickScale,

      modelType: json.model_type,

      noiseScale: json.noise_scale,

});

  supportsCoarseInterests() {

    return Object.values(this.interestVectorModel).every(

      fm => fm.thresholds && fm.thresholds.length

);

  supportsCoarsePrivateInterests() {

    return Object.values(this.interestVectorModel).every(

      fm =>

        fm.thresholds &&

        fm.thresholds.length &&

        "diff_p" in fm &&

        "diff_q" in fm

);

/**

   * Return date intervals for the query

*/

  getDateIntervals(curTimeMs) {

    return this.dayTimeWeighting.getDateIntervals(curTimeMs);

/**

   * Computes an interest vector or aggregate based on the model and raw sql inout.

   * @param {Object} config

   * @param {Array.<Array.<string|number>>} config.dataForIntervals Raw aggregate output from SQL query. Could be clicks or impressions

   * @param {Object.<string, number>} config.indexSchema Map of keys to indices in each sub-array in dataForIntervals

   * @param {boolean} [config.applyThresholding=false] Whether to apply thresholds

   * @param {boolean} [config.applyDifferntialPrivacy=false] Whether to apply differential privacy. This will be used for sending to telemetry.

   * @returns

*/

  computeInterestVector({

    dataForIntervals,

    indexSchema,

    applyThresholding = false,

    applyDifferentialPrivacy = false,

  }) {

    const processedPerTimeInterval = dataForIntervals.map(

      (intervalData, idx) => {

        const intervalRawTotal = {};

        const perPeriodTotals = {};

        intervalData.forEach(aggElement => {

          const feature = aggElement[indexSchema[AggregateResultKeys.FEATURE]];

          let value = aggElement[indexSchema[AggregateResultKeys.VALUE]]; // In the future we could support format here

          dictAdd(intervalRawTotal, feature, value);

});

        const weight = this.dayTimeWeighting.getRelativeWeight(idx); // Weight for this time interval

        Object.values(this.interestVectorModel).forEach(interestFeature => {

          for (const featureUsed of Object.keys(

            interestFeature.featureWeights

          )) {

            if (featureUsed in intervalRawTotal) {

              dictAdd(

                perPeriodTotals,

                interestFeature.name,

                intervalRawTotal[featureUsed] *

                  weight *

                  interestFeature.featureWeights[featureUsed]

);

});

        return perPeriodTotals;

);

    // Since we are doing linear combinations, it is fine to do the day-time weighting at this step

    let totalResults = {};

    processedPerTimeInterval.forEach(intervalTotals => {

      for (const key of Object.keys(intervalTotals)) {

        dictAdd(totalResults, key, intervalTotals[key]);

});

    let numClicks = -1;

    // If clicks is a feature, it's handled as special case

    if (SPECIAL_FEATURE_CLICK in totalResults) {

      numClicks = totalResults[SPECIAL_FEATURE_CLICK];

      delete totalResults[SPECIAL_FEATURE_CLICK];

    if (this.logScale) {

      totalResults = dictApply(totalResults, x => Math.log(x + 1));

    if (this.rescale) {

      let divisor = Math.max(...Object.values(totalResults));

      if (divisor <= 0.001) {

        divisor = 0.001;

      totalResults = dictApply(totalResults, x => x / divisor);

    if (this.clickScale && numClicks > 0) {

      totalResults = dictApply(totalResults, x => x / numClicks);

    if (numClicks >= 0) {

      totalResults[SPECIAL_FEATURE_CLICK] = numClicks;

    if (applyThresholding) {

      if (applyDifferentialPrivacy) {

        // Zero values need to be shown so they can be randomized

        Object.values(this.interestVectorModel).forEach(interestFeature => {

          if (!(interestFeature.name in totalResults)) {

            totalResults[interestFeature.name] = 0;

});

      for (const key of Object.keys(totalResults)) {

        if (key in this.interestVectorModel) {

          totalResults[key] = this.interestVectorModel[key].applyThresholds(

            totalResults[key],

            applyDifferentialPrivacy

);

          if (applyDifferentialPrivacy) {

            totalResults[key] = this.interestVectorModel[

key

            ].applyDifferentialPrivacy(

              totalResults[key],

              applyDifferentialPrivacy

);

    return totalResults;

/**

   * Given pre-computed inferredInterests for clicks and impressions, returns a ctr result with

   * @param {Object} clickDict clicks dictionary

   * @param {Object} impressionDict impression dictionary

   * @param {String} model_id Model ID

   * @returns model

*/

  computeCTRInterestVectors(clickDict, impressionDict, model_id) {

    const inferredInterests = divideDict(clickDict, impressionDict);

    this.applyLaplaceNoise(inferredInterests);

    return { ...inferredInterests, model_id };

/**

   * Applies laplace noise to values in a dictionary if specified in the model

   * @param {Object} inputDict

   * @returns

*/

  applyLaplaceNoise(inputDict) {

    if (!this.noiseScale) {

      return;

    for (const key in inputDict) {

      if (typeof inputDict[key] === "number") {

        const noise = this.laplaceNoiseFn(this.noiseScale);

        inputDict[key] += noise;

/**

   * Computes the interest vector for data intervals, as well as the coarse and privatized (with randomess)

*/

  computeInterestVectors({

    dataForIntervals,

    indexSchema,

    model_id = "unknown",

    condensePrivateValues = true,

  }) {

    const result = {};

    let inferredInterests;

    let coarseInferredInterests;

    let coarsePrivateInferredInterests;

    inferredInterests = this.computeInterestVector({

      dataForIntervals,

      indexSchema,

});

    const updatedFuzzyInterests = { ...inferredInterests };

    this.applyLaplaceNoise(updatedFuzzyInterests);

    result.inferredInterests = { ...updatedFuzzyInterests, model_id };

    if (this.supportsCoarseInterests()) {

      coarseInferredInterests = this.computeInterestVector({

        dataForIntervals,

        indexSchema,

        applyThresholding: true,

});

      if (coarseInferredInterests) {

        result.coarseInferredInterests = {

          ...coarseInferredInterests,

          model_id,

};

    if (this.supportsCoarsePrivateInterests()) {

      coarsePrivateInferredInterests = this.computeInterestVector({

        dataForIntervals,

        indexSchema,

        applyThresholding: true,

        applyDifferentialPrivacy: true,

});

      if (coarsePrivateInferredInterests) {

        if (condensePrivateValues) {

          result.coarsePrivateInferredInterests = {

            // Key order preserved in Gecko

            values: Object.values(coarsePrivateInferredInterests),

            model_id,

};

        } else {

          result.coarsePrivateInferredInterests = {

            ...coarsePrivateInferredInterests,

            model_id,

};

    return result;