Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
* This file has classes to combine New Tab feature events (aggregated from a sqlLite table) into an interest model.
*/
import {
FORMAT,
AggregateResultKeys,
SPECIAL_FEATURE_CLICK,
export const DAYS_TO_MS = 60 * 60 * 24 * 1000;
const MAX_INT_32 = 2 ** 32;
/**
* Divides numerator fields by the denominator. Value is set to 0 if denominator is missing or 0.
* @param {Object.<string, number>} numerator
* @param {Object.<string, number>} denominator
* returns {Object.<string, number>}
*/
function divideDict(numerator, denominator) {
const result = {};
Object.keys(numerator).forEach(k => {
result[k] = denominator[k] ? numerator[k] / denominator[k] : 0;
});
return result;
}
/**
* Returns a secure random value between 0 and 1
*/
function secureRandomNumber() {
const array = new Uint32Array(1);
crypto.getRandomValues(array);
return array[0] / MAX_INT_32;
}
/**
* Applies laplace noise at a given scale
* @param {number} scale value
* @returns noisy value
*/
function laplaceNoise(scale) {
const u = secureRandomNumber() - 0.5;
return -scale * Math.sign(u) * Math.log(1 - 2 * Math.abs(u));
}
/**
* Unary encoding with randomized response for differential privacy.
* The output must be decoded to back to an integer when aggregating a historgram on a server
* @param {number} x - Integer input (0 <= x < N)
* @param {number} N - Number of values (see ablove)
* @param {number} p - Probability of keeping a 1-bit as 1 (after one-hot encoding the output)
* @param {number} q - Probability of flipping a 0-bit to 1
* @returns {string} - Bitstring after unary encoding and randomized response
*/
export function unaryEncodeDiffPrivacy(x, N, p, q) {
const bitstring = [];
const randomValues = new Uint32Array(N);
crypto.getRandomValues(randomValues);
for (let i = 0; i < N; i++) {
const trueBit = i === x ? 1 : 0;
const rand = randomValues[i] / MAX_INT_32;
if (trueBit === 1) {
bitstring.push(rand <= p ? "1" : "0");
} else {
bitstring.push(rand <= q ? "1" : "0");
}
}
return bitstring.join("");
}
/**
* Adds value to all a particular key in a dictionary. If the key is missing it sets the value.
* @param {Object} dict - The dictionary to modify.
* @param {string} key - The key whose value should be added or set.
* @param {number} value - The value to add to the key.
*/
export function dictAdd(dict, key, value) {
if (key in dict) {
dict[key] += value;
} else {
dict[key] = value;
}
}
/**
* Apply function to all keys in dictionary, returning new dictionary.
* @param {Object} obj - The object whose values should be transformed.
* @param {Function} fn - The function to apply to each value.
* @returns {Object} A new object with the transformed values.
*/
export function dictApply(obj, fn) {
return Object.fromEntries(
Object.entries(obj).map(([key, value]) => [key, fn(value)])
);
}
/**
* Class for re-scaling events based on time passed.
*/
export class DayTimeWeighting {
/**
* Instantiate class based on a series of day periods in the past.
* @param {int[]} pastDays Series of number of days, indicating days ago intervals in reverse chonological order.
* Intervals are added: If the first value is 1 and the second is 5, then the first inteval is 0-1 and second interval is between 1 and 6.
* @param {number[]} relativeWeight Relative weight of each period. Must be same length as pastDays
*/
constructor(pastDays, relativeWeight) {
this.pastDays = pastDays;
this.relativeWeight = relativeWeight;
}
static fromJSON(json) {
return new DayTimeWeighting(json.days, json.relative_weight);
}
/**
* Get a series of interval pairs in the past based on the pastDays.
* @param {number} curTimeMs Base time time in MS. Usually current time.
* @returns
*/
getDateIntervals(curTimeMs) {
let curEndTime = curTimeMs;
const res = this.pastDays.map(daysAgo => {
const start = new Date(curEndTime - daysAgo * DAYS_TO_MS);
const end = new Date(curEndTime);
curEndTime = start;
return { start, end };
});
return res;
}
/**
* Get relative weight of current index.
* @param {int} weightIndex Index
* @returns {number} Weight at index, or 0 if index out of range.
*/
getRelativeWeight(weightIndex) {
if (weightIndex >= this.pastDays.length) {
return 0;
}
return this.relativeWeight[weightIndex];
}
}
/**
* Describes the mapping from a set of aggregated events to a single interest feature
*/
export class InterestFeatures {
constructor(
name,
featureWeights,
thresholds = null,
diff_p = 0.5,
diff_q = 0.5
) {
this.name = name;
this.featureWeights = featureWeights;
// Thresholds must be in ascending order
this.thresholds = thresholds;
this.diff_p = diff_p;
this.diff_q = diff_q;
}
static fromJSON(name, json) {
return new InterestFeatures(
name,
json.features,
json.thresholds || null,
json.diff_p,
json.diff_q
);
}
/**
* Quantize a feature value based on the thresholds specified in the class.
* @param {number} inValue Value computed by model for the feature.
* @returns Quantized value. A value between 0 and number of thresholds specified (inclusive)
*/
applyThresholds(inValue) {
if (!this.thresholds) {
return inValue;
}
for (let k = 0; k < this.thresholds.length; k++) {
if (inValue < this.thresholds[k]) {
return k;
}
}
return this.thresholds.length;
}
/**
* Applies Differential Privacy Unary Encoding method, outputting a one-hot encoded vector with randomizaiton.
* Accurate historgrams of values can be computed with reasonable accuracy.
* If the class has no or 0 p/q values set for differential privacy, then response is original number non-encoded.
* @param {number} inValue Value to randomize
* @returns Bitfield as a string, that is the same as the thresholds length + 1
*/
applyDifferentialPrivacy(inValue) {
if (!this.thresholds || !this.diff_p) {
return inValue;
}
return unaryEncodeDiffPrivacy(
inValue,
this.thresholds.length + 1,
this.diff_p,
this.diff_q
);
}
}
/**
* Manages relative tile importance
*/
export class TileImportance {
constructor(tileImportanceMappings) {
this.mappings = {};
for (const [formatKey, formatId] of Object.entries(FORMAT)) {
if (formatKey in tileImportanceMappings) {
this.mappings[formatId] = tileImportanceMappings[formatKey];
}
}
}
getRelativeCTRForTile(tileType) {
return this.mappings[tileType] || 1;
}
static fromJSON(json) {
return new TileImportance(json);
}
}
/***
* A simple model for aggregating features
*/
export class FeatureModel {
/**
*
* @param {string} modelId
* @param {Object} dayTimeWeighting Data for day time weighting class
* @param {Object} interestVectorModel Data for interest model
* @param {Object} tileImportance Data for tile importance
* @param {boolean} rescale Whether to rescale to max value
* @param {boolean} logScale Whether to apply natural log (ln(x+ 1)) before rescaling
*/
constructor({
modelId,
dayTimeWeighting,
interestVectorModel,
tileImportance,
modelType,
rescale = true,
logScale = false,
noiseScale = 0,
laplaceNoiseFn = laplaceNoise,
}) {
this.modelId = modelId;
this.tileImportance = tileImportance;
this.dayTimeWeighting = dayTimeWeighting;
this.interestVectorModel = interestVectorModel;
this.rescale = rescale;
this.logScale = logScale;
this.modelType = modelType;
this.noiseScale = noiseScale;
this.laplaceNoiseFn = laplaceNoiseFn;
}
static fromJSON(json) {
const dayTimeWeighting = DayTimeWeighting.fromJSON(json.day_time_weighting);
const interestVectorModel = {};
const tileImportance = TileImportance.fromJSON(json.tile_importance || {});
for (const [name, featureJson] of Object.entries(json.interest_vector)) {
interestVectorModel[name] = InterestFeatures.fromJSON(name, featureJson);
}
return new FeatureModel({
dayTimeWeighting,
tileImportance,
interestVectorModel,
normalize: json.normalize,
rescale: json.rescale,
logScale: json.log_scale,
clickScale: json.clickScale,
modelType: json.model_type,
noiseScale: json.noise_scale,
});
}
supportsCoarseInterests() {
return Object.values(this.interestVectorModel).every(
fm => fm.thresholds && fm.thresholds.length
);
}
supportsCoarsePrivateInterests() {
return Object.values(this.interestVectorModel).every(
fm =>
fm.thresholds &&
fm.thresholds.length &&
"diff_p" in fm &&
"diff_q" in fm
);
}
/**
* Return date intervals for the query
*/
getDateIntervals(curTimeMs) {
return this.dayTimeWeighting.getDateIntervals(curTimeMs);
}
/**
* Computes an interest vector or aggregate based on the model and raw sql inout.
* @param {Object} config
* @param {Array.<Array.<string|number>>} config.dataForIntervals Raw aggregate output from SQL query. Could be clicks or impressions
* @param {Object.<string, number>} config.indexSchema Map of keys to indices in each sub-array in dataForIntervals
* @param {boolean} [config.applyThresholding=false] Whether to apply thresholds
* @param {boolean} [config.applyDifferntialPrivacy=false] Whether to apply differential privacy. This will be used for sending to telemetry.
* @returns
*/
computeInterestVector({
dataForIntervals,
indexSchema,
applyThresholding = false,
applyDifferentialPrivacy = false,
}) {
const processedPerTimeInterval = dataForIntervals.map(
(intervalData, idx) => {
const intervalRawTotal = {};
const perPeriodTotals = {};
intervalData.forEach(aggElement => {
const feature = aggElement[indexSchema[AggregateResultKeys.FEATURE]];
let value = aggElement[indexSchema[AggregateResultKeys.VALUE]]; // In the future we could support format here
dictAdd(intervalRawTotal, feature, value);
});
const weight = this.dayTimeWeighting.getRelativeWeight(idx); // Weight for this time interval
Object.values(this.interestVectorModel).forEach(interestFeature => {
for (const featureUsed of Object.keys(
interestFeature.featureWeights
)) {
if (featureUsed in intervalRawTotal) {
dictAdd(
perPeriodTotals,
interestFeature.name,
intervalRawTotal[featureUsed] *
weight *
interestFeature.featureWeights[featureUsed]
);
}
}
});
return perPeriodTotals;
}
);
// Since we are doing linear combinations, it is fine to do the day-time weighting at this step
let totalResults = {};
processedPerTimeInterval.forEach(intervalTotals => {
for (const key of Object.keys(intervalTotals)) {
dictAdd(totalResults, key, intervalTotals[key]);
}
});
let numClicks = -1;
// If clicks is a feature, it's handled as special case
if (SPECIAL_FEATURE_CLICK in totalResults) {
numClicks = totalResults[SPECIAL_FEATURE_CLICK];
delete totalResults[SPECIAL_FEATURE_CLICK];
}
if (this.logScale) {
totalResults = dictApply(totalResults, x => Math.log(x + 1));
}
if (this.rescale) {
let divisor = Math.max(...Object.values(totalResults));
if (divisor <= 0.001) {
divisor = 0.001;
}
totalResults = dictApply(totalResults, x => x / divisor);
}
if (this.clickScale && numClicks > 0) {
totalResults = dictApply(totalResults, x => x / numClicks);
}
if (numClicks >= 0) {
totalResults[SPECIAL_FEATURE_CLICK] = numClicks;
}
if (applyThresholding) {
if (applyDifferentialPrivacy) {
// Zero values need to be shown so they can be randomized
Object.values(this.interestVectorModel).forEach(interestFeature => {
if (!(interestFeature.name in totalResults)) {
totalResults[interestFeature.name] = 0;
}
});
}
for (const key of Object.keys(totalResults)) {
if (key in this.interestVectorModel) {
totalResults[key] = this.interestVectorModel[key].applyThresholds(
totalResults[key],
applyDifferentialPrivacy
);
if (applyDifferentialPrivacy) {
totalResults[key] = this.interestVectorModel[
key
].applyDifferentialPrivacy(
totalResults[key],
applyDifferentialPrivacy
);
}
}
}
}
return totalResults;
}
/**
* Given pre-computed inferredInterests for clicks and impressions, returns a ctr result with
* @param {Object} clickDict clicks dictionary
* @param {Object} impressionDict impression dictionary
* @param {String} model_id Model ID
* @returns model
*/
computeCTRInterestVectors(clickDict, impressionDict, model_id) {
const inferredInterests = divideDict(clickDict, impressionDict);
this.applyLaplaceNoise(inferredInterests);
return { ...inferredInterests, model_id };
}
/**
* Applies laplace noise to values in a dictionary if specified in the model
* @param {Object} inputDict
* @returns
*/
applyLaplaceNoise(inputDict) {
if (!this.noiseScale) {
return;
}
for (const key in inputDict) {
if (typeof inputDict[key] === "number") {
const noise = this.laplaceNoiseFn(this.noiseScale);
inputDict[key] += noise;
}
}
}
/**
* Computes the interest vector for data intervals, as well as the coarse and privatized (with randomess)
*/
computeInterestVectors({
dataForIntervals,
indexSchema,
model_id = "unknown",
condensePrivateValues = true,
}) {
const result = {};
let inferredInterests;
let coarseInferredInterests;
let coarsePrivateInferredInterests;
inferredInterests = this.computeInterestVector({
dataForIntervals,
indexSchema,
});
const updatedFuzzyInterests = { ...inferredInterests };
this.applyLaplaceNoise(updatedFuzzyInterests);
result.inferredInterests = { ...updatedFuzzyInterests, model_id };
if (this.supportsCoarseInterests()) {
coarseInferredInterests = this.computeInterestVector({
dataForIntervals,
indexSchema,
applyThresholding: true,
});
if (coarseInferredInterests) {
result.coarseInferredInterests = {
...coarseInferredInterests,
model_id,
};
}
}
if (this.supportsCoarsePrivateInterests()) {
coarsePrivateInferredInterests = this.computeInterestVector({
dataForIntervals,
indexSchema,
applyThresholding: true,
applyDifferentialPrivacy: true,
});
if (coarsePrivateInferredInterests) {
if (condensePrivateValues) {
result.coarsePrivateInferredInterests = {
// Key order preserved in Gecko
values: Object.values(coarsePrivateInferredInterests),
model_id,
};
} else {
result.coarsePrivateInferredInterests = {
...coarsePrivateInferredInterests,
model_id,
};
}
}
}
return result;
}
}