Source code
Revision control
Copy as Markdown
Other Tools
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
/**
* Given a web page content document, finds candidates for an explicitly
* declared canonical URL. Includes a fallback URL to use in case the content
* did not declare a canonical URL.
*
* @param {Document} document
* @returns {CanonicalURLSourceResults}
*/
export function findCandidates(document) {
return {
link: getLinkRelCanonical(document),
opengraph: getOpenGraphUrl(document),
jsonLd: getJSONLDUrl(document),
fallback: getFallbackCanonicalUrl(document),
};
}
/**
* Given a set of canonical URL candidates from `CanonicalURL.findCandidates`,
* returns the best value to use as the canonical URL.
*
* @param {CanonicalURLSourceResults} sources
* @returns {string}
*/
export function pickCanonicalUrl(sources) {
return (
sources.link ?? sources.opengraph ?? sources.jsonLd ?? sources.fallback
);
}
/**
* TODO: resolve relative URLs
* TODO: can be a different hostname or domain; does that need special handling?
*
*
* @param {Document} document
* @returns {string|null}
*/
function getLinkRelCanonical(document) {
return document.querySelector('link[rel="canonical"]')?.getAttribute("href");
}
/**
*
* @param {Document} document
* @returns {string|null}
*/
function getOpenGraphUrl(document) {
return document
.querySelector('meta[property="og:url"]')
?.getAttribute("content");
}
/**
* Naïvely returns the first JSON-LD entity's URL, if found.
* TODO: make sure it's a web page-like/content schema?
*
*
* @param {Document} document
* @returns {string|null}
*/
function getJSONLDUrl(document) {
const firstMatch = Array.from(
document.querySelectorAll('script[type="application/ld+json"]')
)
.map(script => {
try {
return JSON.parse(script.textContent);
} catch {
return null;
}
})
.find(obj => obj && obj.url && typeof obj.url === "string");
return firstMatch?.url;
}
/**
* @param {Document} document
* @returns {string|null}
*/
function getFallbackCanonicalUrl(document) {
const fallbackUrl = URL.parse(document.documentURI);
if (fallbackUrl) {
fallbackUrl.hash = "";
return fallbackUrl.toString();
}
return null;
}