Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Debounce time in milliseconds - this should be long enough to account for
// sync script tags that could appear between desired meta tags
const TIMEOUT_DELAY = 1000;
const ACCEPTED_PROTOCOLS = ["http:", "https:"];
// Possible description tags, listed in order from least favourable to most favourable
const DESCRIPTION_RULES = [
"twitter:description",
"description",
"og:description",
];
// Possible image tags, listed in order from least favourable to most favourable
const PREVIEW_IMAGE_RULES = [
"thumbnail",
"twitter:image",
"og:image",
"og:image:url",
"og:image:secure_url",
];
/*
* Checks if the incoming meta tag has a greater score than the current best
* score by checking the index of the meta tag in the list of rules provided.
*
* @param {Array} aRules
* The list of rules for a given type of meta tag
* @param {String} aTag
* The name or property of the incoming meta tag
* @param {String} aEntry
* The current best entry for the given meta tag
*
* @returns {Boolean} true if the incoming meta tag is better than the current
* best meta tag of that same kind, false otherwise
*/
function shouldExtractMetadata(aRules, aTag, aEntry) {
return aRules.indexOf(aTag) > aEntry.currMaxScore;
}
/*
* Ensure that the preview image URL is safe and valid before storing
*
* @param {URL} aURL
* A URL object that needs to be checked for valid principal and protocol
*
* @returns {Boolean} true if the preview URL is safe and can be stored, false otherwise
*/
function checkLoadURIStr(aURL) {
if (!ACCEPTED_PROTOCOLS.includes(aURL.protocol)) {
return false;
}
try {
let ssm = Services.scriptSecurityManager;
let principal = ssm.createNullPrincipal({});
ssm.checkLoadURIStrWithPrincipal(
principal,
aURL.href,
ssm.DISALLOW_INHERIT_PRINCIPAL
);
} catch (e) {
return false;
}
return true;
}
/*
* This listens to DOMMetaAdded events and collects relevant metadata about the
* meta tag received. Then, it sends the metadata gathered from the meta tags
* and the url of the page as it's payload to be inserted into moz_places.
*/
export class ContentMetaChild extends JSWindowActorChild {
constructor() {
super();
// Store a mapping of the best description and preview
// image collected so far for a given URL.
this.metaTags = new Map();
}
didDestroy() {
for (let entry of this.metaTags.values()) {
entry.timeout.cancel();
}
}
handleEvent(event) {
switch (event.type) {
case "DOMContentLoaded":
const metaTags = this.contentWindow.document.querySelectorAll("meta");
for (let metaTag of metaTags) {
this.onMetaTag(metaTag);
}
break;
case "DOMMetaAdded":
this.onMetaTag(event.originalTarget);
break;
default:
}
}
onMetaTag(metaTag) {
const window = metaTag.ownerGlobal;
// If there's no meta tag, ignore this. Also verify that the window
// matches just to be safe.
if (!metaTag || !metaTag.ownerDocument || window != this.contentWindow) {
return;
}
const url = metaTag.ownerDocument.documentURI;
let name = metaTag.name;
let prop = metaTag.getAttributeNS(null, "property");
if (!name && !prop) {
return;
}
let tag = name || prop;
const entry = this.metaTags.get(url) || {
description: { value: null, currMaxScore: -1 },
image: { value: null, currMaxScore: -1 },
timeout: null,
};
// Malformed meta tag - do not store it
const content = metaTag.getAttributeNS(null, "content");
if (!content) {
return;
}
if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) {
// Extract the description
entry.description.value = content;
entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
} else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) {
// Extract the preview image
let value;
try {
value = new URL(content, url);
} catch (e) {
return;
}
if (value && checkLoadURIStr(value)) {
entry.image.value = value.href;
entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag);
}
} else {
// We don't care about other meta tags
return;
}
if (!this.metaTags.has(url)) {
this.metaTags.set(url, entry);
}
if (entry.timeout) {
entry.timeout.delay = TIMEOUT_DELAY;
} else {
// We want to debounce incoming meta tags until we're certain we have the
// best one for description and preview image, and only store that one
entry.timeout = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
entry.timeout.initWithCallback(
() => {
entry.timeout = null;
this.metaTags.delete(url);
// We try to cancel the timers when we get destroyed, but if
// there's a race, catch it:
if (!this.manager || this.manager.isClosed) {
return;
}
// Save description and preview image to moz_places
this.sendAsyncMessage("Meta:SetPageInfo", {
url,
description: entry.description.value,
previewImageURL: entry.image.value,
});
// Telemetry for recording the size of page metadata
let metadataSize = entry.description.value
? entry.description.value.length
: 0;
metadataSize += entry.image.value ? entry.image.value.length : 0;
Services.telemetry
.getHistogramById("PAGE_METADATA_SIZE")
.add(metadataSize);
},
TIMEOUT_DELAY,
Ci.nsITimer.TYPE_ONE_SHOT
);
}
}
}