fathom.mjs - mozsearch

mozilla-central/toolkit/modules/third_party/fathom/fathom.mjs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Toolkit :: General

Revision control

Copy as Markdown

Other Tools

/*

DO NOT TOUCH fathom.mjs DIRECTLY. See the README for instructions.

*/

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**

 * A :func:`rule` depends on another rule which itself depends on the first

 * rule again, either directly or indirectly.

*/

class CycleError extends Error {

/**

  * An examined element was not contained in a browser ``window`` object, but

  * something needed it to be.

*/

class NoWindowError extends Error {

var exceptions = /*#__PURE__*/Object.freeze({

  __proto__: null,

  CycleError: CycleError,

  NoWindowError: NoWindowError

});

/**

 * Return the passed-in arg. Useful as a default.

*/

function identity(x) {

    return x;

/*eslint-env browser*/

/**

 * From an iterable return the best item, according to an arbitrary comparator

 * function. In case of a tie, the first item wins.

 * @arg by {function} Given an item of the iterable, return a value to compare

 * @arg isBetter {function} Return whether its first arg is better than its

 *     second

*/

function best(iterable, by, isBetter) {

    let bestSoFar, bestKeySoFar;

    let isFirst = true;

    forEach(

        function (item) {

            const key = by(item);

            if (isBetter(key, bestKeySoFar) || isFirst) {

                bestSoFar = item;

                bestKeySoFar = key;

                isFirst = false;

},

        iterable);

    if (isFirst) {

        throw new Error('Tried to call best() on empty iterable');

    return bestSoFar;

/**

 * Return the maximum item from an iterable, as defined by >.

 * Works with any type that works with >. If multiple items are equally great,

 * return the first.

 * @arg by {function} Given an item of the iterable, returns a value to

 *     compare

*/

function max(iterable, by = identity) {

    return best(iterable, by, (a, b) => a > b);

/**

 * Return an Array of maximum items from an iterable, as defined by > and ===.

 * If an empty iterable is passed in, return [].

*/

function maxes(iterable, by = identity) {

    let bests = [];

    let bestKeySoFar;

    let isFirst = true;

    forEach(

        function (item) {

            const key = by(item);

            if (key > bestKeySoFar || isFirst) {

                bests = [item];

                bestKeySoFar = key;

                isFirst = false;

            } else if (key === bestKeySoFar) {

                bests.push(item);

},

        iterable);

    return bests;

/**

 * Return the minimum item from an iterable, as defined by <.

 * If multiple items are equally great, return the first.

*/

function min(iterable, by = identity) {

    return best(iterable, by, (a, b) => a < b);

/**

 * Return the sum of an iterable, as defined by the + operator.

*/

function sum(iterable) {

    let total;

    let isFirst = true;

    forEach(

        function assignOrAdd(addend) {

            if (isFirst) {

                total = addend;

                isFirst = false;

            } else {

                total += addend;

},

        iterable);

    return total;

/**

 * Return the number of items in an iterable, consuming it as a side effect.

*/

function length(iterable) {

    let num = 0;

    // eslint-disable-next-line no-unused-vars

    for (let item of iterable) {

        num++;

    return num;

/**

 * Iterate, depth first, over a DOM node. Return the original node first.

 * @arg shouldTraverse {function} Given a node, say whether we should

 *     include it and its children. Default: always true.

*/

function *walk(element, shouldTraverse = element => true) {

    yield element;

    for (let child of element.childNodes) {

        if (shouldTraverse(child)) {

            for (let w of walk(child, shouldTraverse)) {

                yield w;

const blockTags = new Set(

    ['ADDRESS', 'BLOCKQUOTE', 'BODY', 'CENTER', 'DIR', 'DIV', 'DL',

     'FIELDSET', 'FORM', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HR',

     'ISINDEX', 'MENU', 'NOFRAMES', 'NOSCRIPT', 'OL', 'P', 'PRE',

     'TABLE', 'UL', 'DD', 'DT', 'FRAMESET', 'LI', 'TBODY', 'TD',

     'TFOOT', 'TH', 'THEAD', 'TR', 'HTML']);

/**

 * Return whether a DOM element is a block element by default (rather than by

 * styling).

*/

function isBlock(element) {

    return blockTags.has(element.tagName);

/**

 * Yield strings of text nodes within a normalized DOM node and its children,

 * without venturing into any contained block elements.

 * @arg shouldTraverse {function} Specify additional elements to exclude by

 *     returning false

*/

function *inlineTexts(element, shouldTraverse = element => true) {

    // TODO: Could we just use querySelectorAll() with a really long

    // selector rather than walk(), for speed?

    for (let child of walk(element,

                           element => !(isBlock(element) ||

                                        element.tagName === 'SCRIPT' &&

                                        element.tagName === 'STYLE')

                                      && shouldTraverse(element))) {

        if (child.nodeType === child.TEXT_NODE) {

            // wholeText() is not implemented by jsdom, so we use

            // textContent(). The result should be the same, since

            // we're calling it on only text nodes, but it may be

            // slower. On the positive side, it means we don't need to

            // normalize the DOM tree first.

            yield child.textContent;

/**

 * Return the total length of the inline text within an element, with

 * whitespace collapsed.

 * @arg shouldTraverse {function} Specify additional elements to exclude by

 *     returning false

*/

function inlineTextLength(element, shouldTraverse = element => true) {

    return sum(map(text => collapseWhitespace(text).length,

                   inlineTexts(element, shouldTraverse)));

/**

 * Return a string with each run of whitespace collapsed to a single space.

*/

function collapseWhitespace(str) {

    return str.replace(/\s{2,}/g, ' ');

/**

 * Return the ratio of the inline text length of the links in an element to the

 * inline text length of the entire element.

 * @arg inlineLength {number} Optionally, the precalculated inline

 *     length of the fnode. If omitted, we will calculate it ourselves.

*/

function linkDensity(fnode, inlineLength) {

    if (inlineLength === undefined) {

        inlineLength = inlineTextLength(fnode.element);

    const lengthWithoutLinks = inlineTextLength(fnode.element,

                                                element => element.tagName !== 'A');

    return (inlineLength - lengthWithoutLinks) / inlineLength;

/**

 * Return whether an element is a text node that consist wholly of whitespace.

*/

function isWhitespace(element) {

    return (element.nodeType === element.TEXT_NODE &&

            element.textContent.trim().length === 0);

/**

 * Get a key of a map, first setting it to a default value if it's missing.

*/

function setDefault(map, key, defaultMaker) {

    if (map.has(key)) {

        return map.get(key);

    const defaultValue = defaultMaker();

    map.set(key, defaultValue);

    return defaultValue;

/**

 * Get a key of a map or, if it's missing, a default value.

*/

function getDefault(map, key, defaultMaker) {

    if (map.has(key)) {

        return map.get(key);

    return defaultMaker();

/**

 * Return an Array, the reverse topological sort of the given nodes.

 * @arg nodes An iterable of arbitrary things

 * @arg nodesThatNeed {function} Take a node and returns an Array of nodes

 *     that depend on it

*/

function toposort(nodes, nodesThatNeed) {

    const ret = [];

    const todo = new Set(nodes);

    const inProgress = new Set();

    function visit(node) {

        if (inProgress.has(node)) {

            throw new CycleError('The graph has a cycle.');

        if (todo.has(node)) {

            inProgress.add(node);

            for (let needer of nodesThatNeed(node)) {

                visit(needer);

            inProgress.delete(node);

            todo.delete(node);

            ret.push(node);

    while (todo.size > 0) {

        visit(first(todo));

    return ret;

/**

 * A Set with the additional methods it ought to have had

*/

class NiceSet extends Set {

/**

     * Remove and return an arbitrary item. Throw an Error if I am empty.

*/

    pop() {

        for (let v of this.values()) {

            this.delete(v);

            return v;

        throw new Error('Tried to pop from an empty NiceSet.');

/**

     * Union another set or other iterable into myself.

     * @return myself, for chaining

*/

    extend(otherSet) {

        for (let item of otherSet) {

            this.add(item);

        return this;

/**

     * Subtract another set from a copy of me.

     * @return a copy of myself excluding the elements in ``otherSet``.

*/

    minus(otherSet) {

        const ret = new NiceSet(this);

        for (const item of otherSet) {

            ret.delete(item);

        return ret;

/**

     * Actually show the items in me.

*/

    toString() {

        return '{' + Array.from(this).join(', ') + '}';

/**

 * Return the first item of an iterable.

*/

function first(iterable) {

    for (let i of iterable) {

        return i;

/**

 * Given any node in a DOM tree, return the root element of the tree, generally

 * an HTML element.

*/

function rootElement(element) {

    return element.ownerDocument.documentElement;

/**

 * Return the number of times a regex occurs within the string `haystack`.

 * Caller must make sure `regex` has the 'g' option set.

*/

function numberOfMatches(regex, haystack) {

    return (haystack.match(regex) || []).length;

/**

 * Wrap a scoring callback, and set its element to the page root iff a score is

 * returned.

 * This is used to build rulesets which classify entire pages rather than

 * picking out specific elements.

 * For example, these rules might classify a page as a "login page", influenced

 * by whether they have login buttons or username fields:

 * ``rule(type('loginPage'), score(page(pageContainsLoginButton))),``

 * ``rule(type('loginPage'), score(page(pageContainsUsernameField)))``

*/

function page(scoringFunction) {

    function wrapper(fnode) {

        const scoreAndTypeAndNote = scoringFunction(fnode);

        if (scoreAndTypeAndNote.score !== undefined) {

            scoreAndTypeAndNote.element = rootElement(fnode.element);

        return scoreAndTypeAndNote;

    return wrapper;

/**

 * Sort the elements by their position in the DOM.

 * @arg fnodes {iterable} fnodes to sort

 * @return {Array} sorted fnodes

*/

function domSort(fnodes) {

    function compare(a, b) {

        const element = a.element;

        const position = element.compareDocumentPosition(b.element);

        if (position & element.DOCUMENT_POSITION_FOLLOWING) {

            return -1;

        } else if (position & element.DOCUMENT_POSITION_PRECEDING) {

            return 1;

        } else {

            return 0;

    return Array.from(fnodes).sort(compare);

/* istanbul ignore next */

/**

 * Return the DOM element contained in a passed-in fnode. Return passed-in DOM

 * elements verbatim.

 * @arg fnodeOrElement {Node|Fnode}

*/

function toDomElement(fnodeOrElement) {

    return isDomElement(fnodeOrElement) ? fnodeOrElement : fnodeOrElement.element;

/**

 * Checks whether any of the element's attribute values satisfy some condition.

 * Example::

 *     rule(type('foo'),

 *          score(attributesMatch(element,

 *                                attr => attr.includes('good'),

 *                                ['id', 'alt']) ? 2 : 1))

 * @arg element {Node} Element whose attributes you want to search

 * @arg predicate {function} A condition to check. Take a string and

 *     return a boolean. If an attribute has multiple values (e.g. the class

 *     attribute), attributesMatch will check each one.

 * @arg attrs {string[]} An Array of attributes you want to search. If none are

 *     provided, search all.

 * @return Whether any of the attribute values satisfy the predicate function

*/

function attributesMatch(element, predicate, attrs = []) {

    const attributes = attrs.length === 0 ? Array.from(element.attributes).map(a => a.name) : attrs;

    for (let i = 0; i < attributes.length; i++) {

        const attr = element.getAttribute(attributes[i]);

        // If the attribute is an array, apply the scoring function to each element

        if (attr && ((Array.isArray(attr) && attr.some(predicate)) || predicate(attr))) {

            return true;

    return false;

/* istanbul ignore next */

/**

 * Yield an element and each of its ancestors.

*/

function *ancestors(element) {

    yield element;

    let parent;

    while ((parent = element.parentNode) !== null && parent.nodeType === parent.ELEMENT_NODE) {

        yield parent;

        element = parent;

/**

 * Return the sigmoid of the argument: 1 / (1 + exp(-x)). This is useful for

 * crunching a feature value that may have a wide range into the range (0, 1)

 * without a hard ceiling: the sigmoid of even a very large number will be a

 * little larger than that of a slightly smaller one.

 * @arg x {Number} a number to be compressed into the range (0, 1)

*/

function sigmoid(x) {

    return 1 / (1 + Math.exp(-x));

/* istanbul ignore next */

/**

 * Return whether an element is practically visible, considering things like 0

 * size or opacity, ``visibility: hidden`` and ``overflow: hidden``.

 * Merely being scrolled off the page in either horizontally or vertically

 * doesn't count as invisible; the result of this function is meant to be

 * independent of viewport size.

 * @throws {NoWindowError} The element (or perhaps one of its ancestors) is not

 *     in a window, so we can't find the `getComputedStyle()` routine to call.

 *     That routine is the source of most of the information we use, so you

 *     should pick a different strategy for non-window contexts.

*/

function isVisible(fnodeOrElement) {

    // This could be 5x more efficient if https://github.com/w3c/csswg-drafts/issues/4122 happens.

    const element = toDomElement(fnodeOrElement);

    const elementWindow = windowForElement(element);

    const elementRect = element.getBoundingClientRect();

    const elementStyle = elementWindow.getComputedStyle(element);

    // Alternative to reading ``display: none`` due to Bug 1381071.

    if (elementRect.width === 0 && elementRect.height === 0 && elementStyle.overflow !== 'hidden') {

        return false;

    if (elementStyle.visibility === 'hidden') {

        return false;

    // Check if the element is irrevocably off-screen:

    if (elementRect.x + elementRect.width < 0 ||

        elementRect.y + elementRect.height < 0

) {

        return false;

    for (const ancestor of ancestors(element)) {

        const isElement = ancestor === element;

        const style = isElement ? elementStyle : elementWindow.getComputedStyle(ancestor);

        if (style.opacity === '0') {

            return false;

        if (style.display === 'contents') {

            // ``display: contents`` elements have no box themselves, but children are

            // still rendered.

            continue;

        const rect = isElement ? elementRect : ancestor.getBoundingClientRect();

        if ((rect.width === 0 || rect.height === 0) && elementStyle.overflow === 'hidden') {

            // Zero-sized ancestors don’t make descendants hidden unless the descendant

            // has ``overflow: hidden``.

            return false;

    return true;

/**

 * Return the extracted [r, g, b, a] values from a string like "rgba(0, 5, 255, 0.8)",

 * and scale them to 0..1. If no alpha is specified, return undefined for it.

*/

function rgbaFromString(str) {

    const m = str.match(/^rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*(?:,\s*(\d+(?:\.\d+)?)\s*)?\)$/i);

    if (m) {

        return [m[1] / 255, m[2] / 255, m[3] / 255, m[4] === undefined ? undefined : parseFloat(m[4])];

    } else {

        throw new Error('Color ' + str + ' did not match pattern rgb[a](r, g, b[, a]).');

/**

 * Return the saturation 0..1 of a color defined by RGB values 0..1.

*/

function saturation(r, g, b) {

    const cMax = Math.max(r, g, b);

    const cMin = Math.min(r, g, b);

    const delta = cMax - cMin;

    const lightness = (cMax + cMin) / 2;

    const denom = (1 - (Math.abs(2 * lightness - 1)));

    // Return 0 if it's black (R, G, and B all 0).

    return (denom === 0) ? 0 : delta / denom;

/**

 * Scale a number to the range [0, 1] using a linear slope.

 * For a rising line, the result is 0 until the input reaches zeroAt, then

 * increases linearly until oneAt, at which it becomes 1. To make a falling

 * line, where the result is 1 to the left and 0 to the right, use a zeroAt

 * greater than oneAt.

*/

function linearScale(number, zeroAt, oneAt) {

    const isRising = zeroAt < oneAt;

    if (isRising) {

        if (number <= zeroAt) {

            return 0;

        } else if (number >= oneAt) {

            return 1;

    } else {

        if (number >= zeroAt) {

            return 0;

        } else if (number <= oneAt) {

            return 1;

    const slope = 1 / (oneAt - zeroAt);

    return slope * (number - zeroAt);

// -------- Routines below this point are private to the framework. --------

/**

 * Flatten out an iterable of iterables into a single iterable of non-

 * iterables. Does not consider strings to be iterable.

*/

function *flatten(iterable) {

    for (const i of iterable) {

        if (typeof i !== 'string' && isIterable(i)) {

            yield *(flatten(i));

        } else {

            yield i;

/**

 * A lazy, top-level ``Array.map()`` workalike that works on anything iterable

*/

function *map(fn, iterable) {

    for (const i of iterable) {

        yield fn(i);

/**

 * A lazy, top-level ``Array.forEach()`` workalike that works on anything

 * iterable

*/

function forEach(fn, iterable) {

    for (const i of iterable) {

        fn(i);

/* istanbul ignore next */

/**

 * @return whether a thing appears to be a DOM element.

*/

function isDomElement(thing) {

    return thing.nodeName !== undefined;

function isIterable(thing) {

    return thing && typeof thing[Symbol.iterator] === 'function';

/**

 * Return an backward iterator over an Array without reversing it in place.

*/

function *reversed(array) {

    for (let i = array.length - 1; i >= 0; i--) {

        yield array[i];

/* istanbul ignore next */

/*

 * Return the window an element is in.

 * @throws {NoWindowError} There isn't such a window.

*/

function windowForElement(element) {

    let doc = element.ownerDocument;

    if (doc === null) {

        // The element itself was a document.

        doc = element;

    const win = doc.defaultView;

    if (win === null) {

        throw new NoWindowError();

    return win;

var utilsForFrontend = /*#__PURE__*/Object.freeze({

  __proto__: null,

  identity: identity,

  best: best,

  max: max,

  maxes: maxes,

  min: min,

  sum: sum,

  length: length,

  walk: walk,

  isBlock: isBlock,

  inlineTexts: inlineTexts,

  inlineTextLength: inlineTextLength,

  collapseWhitespace: collapseWhitespace,

  linkDensity: linkDensity,

  isWhitespace: isWhitespace,

  setDefault: setDefault,

  getDefault: getDefault,

  toposort: toposort,

  NiceSet: NiceSet,

  first: first,

  rootElement: rootElement,

  numberOfMatches: numberOfMatches,

  page: page,

  domSort: domSort,

  toDomElement: toDomElement,

  attributesMatch: attributesMatch,

  ancestors: ancestors,

  sigmoid: sigmoid,

  isVisible: isVisible,

  rgbaFromString: rgbaFromString,

  saturation: saturation,

  linearScale: linearScale,

  flatten: flatten,

  map: map,

  forEach: forEach,

  isDomElement: isDomElement,

  reversed: reversed,

  windowForElement: windowForElement

});

/**

 * Return the number of stride nodes between 2 DOM nodes *at the same

 * level of the tree*, without going up or down the tree.

 * ``left`` xor ``right`` may also be undefined.

*/

function numStrides(left, right) {

    let num = 0;

    // Walk right from left node until we hit the right node or run out:

    let sibling = left;

    let shouldContinue = sibling && sibling !== right;

    while (shouldContinue) {

        sibling = sibling.nextSibling;

        if ((shouldContinue = sibling && sibling !== right) &&

            !isWhitespace(sibling)) {

            num += 1;

    if (sibling !== right) {  // Don't double-punish if left and right are siblings.

        // Walk left from right node:

        sibling = right;

        while (sibling) {

            sibling = sibling.previousSibling;

            if (sibling && !isWhitespace(sibling)) {

                num += 1;

    return num;

/**

 * Return a topological distance between 2 DOM nodes or :term:`fnodes<fnode>`

 * weighted according to the similarity of their ancestry in the DOM. For

 * instance, if one node is situated inside ``<div><span><b><theNode>`` and the

 * other node is at ``<differentDiv><span><b><otherNode>``, they are considered

 * close to each other for clustering purposes. This is useful for picking out

 * nodes which have similar purposes.

 * Return ``Number.MAX_VALUE`` if one of the nodes contains the other.

 * This is largely an implementation detail of :func:`clusters`, but you can

 * call it yourself if you wish to implement your own clustering. Takes O(n log

 * n) time.

 * Note that the default costs may change; pass them in explicitly if they are

 * important to you.

 * @arg fnodeA {Node|Fnode}

 * @arg fnodeB {Node|Fnode}

 * @arg differentDepthCost {number} Cost for each level deeper one node is than

 *    the other below their common ancestor

 * @arg differentTagCost {number} Cost for a level below the common ancestor

 *    where tagNames differ

 * @arg sameTagCost {number} Cost for a level below the common ancestor where

 *    tagNames are the same

 * @arg strideCost {number} Cost for each stride node between A and B. Stride

 *     nodes are siblings or siblings-of-ancestors that lie between the 2

 *     nodes. These interposed nodes make it less likely that the 2 nodes

 *     should be together in a cluster.

 * @arg additionalCost {function} Return an additional cost, given 2 fnodes or

 *    nodes.

*/

function distance(fnodeA,

                         fnodeB,

                         {differentDepthCost = 2,

                          differentTagCost = 2,

                          sameTagCost = 1,

                          strideCost = 1,

                          additionalCost = (fnodeA, fnodeB) => 0} = {}) {

    // I was thinking of something that adds little cost for siblings. Up

    // should probably be more expensive than down (see middle example in the

    // Nokia paper).

    // TODO: Test and tune default costs. They're off the cuff at the moment.

    if (fnodeA === fnodeB) {

        return 0;

    const elementA = isDomElement(fnodeA) ? fnodeA : fnodeA.element;

    const elementB = isDomElement(fnodeB) ? fnodeB : fnodeB.element;

    // Stacks that go from the common ancestor all the way to A and B:

    const aAncestors = [elementA];

    const bAncestors = [elementB];

    let aAncestor = elementA;

    let bAncestor = elementB;

    // Ascend to common parent, stacking them up for later reference:

    while (!aAncestor.contains(elementB)) {  // Note: an element does contain() itself.

        aAncestor = aAncestor.parentNode;

        aAncestors.push(aAncestor); //aAncestors = [a, b]. aAncestor = b // if a is outer: no loop here; aAncestors = [a]. aAncestor = a.

    // In compareDocumentPosition()'s opinion, inside implies after. Basically,

    // before and after pertain to opening tags.

    const comparison = elementA.compareDocumentPosition(elementB);

    // If either contains the other, abort. We'd either return a misleading

    // number or else walk upward right out of the document while trying to

    // make the ancestor stack.

    if (comparison & (elementA.DOCUMENT_POSITION_CONTAINS | elementA.DOCUMENT_POSITION_CONTAINED_BY)) {

        return Number.MAX_VALUE;

    // Make an ancestor stack for the right node too so we can walk

    // efficiently down to it:

    do {

        bAncestor = bAncestor.parentNode;  // Assumes we've early-returned above if A === B. This walks upward from the outer node and up out of the tree. It STARTS OUT with aAncestor === bAncestor!

        bAncestors.push(bAncestor);

    } while (bAncestor !== aAncestor);

    // Figure out which node is left and which is right, so we can follow

    // sibling links in the appropriate directions when looking for stride

    // nodes:

    let left = aAncestors;

    let right = bAncestors;

    let cost = 0;

    if (comparison & elementA.DOCUMENT_POSITION_FOLLOWING) {

        // A is before, so it could contain the other node. What did I mean to do if one contained the other?

        left = aAncestors;

        right = bAncestors;

    } else if (comparison & elementA.DOCUMENT_POSITION_PRECEDING) {

        // A is after, so it might be contained by the other node.

        left = bAncestors;

        right = aAncestors;

    // Descend to both nodes in parallel, discounting the traversal

    // cost iff the nodes we hit look similar, implying the nodes dwell

    // within similar structures.

    while (left.length || right.length) {

        const l = left.pop();

        const r = right.pop();

        if (l === undefined || r === undefined) {

            // Punishment for being at different depths: same as ordinary

            // dissimilarity punishment for now

            cost += differentDepthCost;

        } else {

            // TODO: Consider similarity of classList.

            cost += l.tagName === r.tagName ? sameTagCost : differentTagCost;

        // Optimization: strides might be a good dimension to eliminate.

        if (strideCost !== 0) {

            cost += numStrides(l, r) * strideCost;

    return cost + additionalCost(fnodeA, fnodeB);

/**

 * Return the spatial distance between 2 fnodes or elements, assuming a

 * rendered page.

 * Specifically, return the distance in pixels between the centers of

 * ``fnodeA.element.getBoundingClientRect()`` and

 * ``fnodeB.element.getBoundingClientRect()``.

*/

function euclidean(fnodeA, fnodeB) {

/**

     * Return the horizontal distance from the left edge of the viewport to the

     * center of an element, given a DOMRect object for it. It doesn't matter

     * that the distance is affected by the page's scroll offset, since the 2

     * elements have the same offset.

*/

    function xCenter(domRect) {

        return domRect.left + domRect.width / 2;

    function yCenter(domRect) {

        return domRect.top + domRect.height / 2;

    const elementA = toDomElement(fnodeA);

    const elementB = toDomElement(fnodeB);

    const aRect = elementA.getBoundingClientRect();

    const bRect = elementB.getBoundingClientRect();

    return Math.sqrt((xCenter(aRect) - xCenter(bRect)) ** 2 +

                     (yCenter(aRect) - yCenter(bRect)) ** 2);

/** A lower-triangular matrix of inter-cluster distances */

class DistanceMatrix {

/**

     * @arg distance {function} Some notion of distance between 2 given nodes

*/

    constructor(elements, distance) {

        // A sparse adjacency matrix:

        // {A => {},

        //  B => {A => 4},

        //  C => {A => 4, B => 4},

        //  D => {A => 4, B => 4, C => 4}

        //  E => {A => 4, B => 4, C => 4, D => 4}}

//

        // A, B, etc. are arrays of [arrays of arrays of...] nodes, each

        // array being a cluster. In this way, they not only accumulate a

        // cluster but retain the steps along the way.

//

        // This is an efficient data structure in terms of CPU and memory, in

        // that we don't have to slide a lot of memory around when we delete a

        // row or column from the middle of the matrix while merging. Of

        // course, we lose some practical efficiency by using hash tables, and

        // maps in particular are slow in their early implementations.

        this._matrix = new Map();

        // Convert elements to clusters:

        const clusters = elements.map(el => [el]);

        // Init matrix:

        for (let outerCluster of clusters) {

            const innerMap = new Map();

            for (let innerCluster of this._matrix.keys()) {

                innerMap.set(innerCluster, distance(outerCluster[0],

                                                    innerCluster[0]));

            this._matrix.set(outerCluster, innerMap);

        this._numClusters = clusters.length;

    // Return (distance, a: clusterA, b: clusterB) of closest-together clusters.

    // Replace this to change linkage criterion.

    closest() {

        const self = this;

        if (this._numClusters < 2) {

            throw new Error('There must be at least 2 clusters in order to return the closest() ones.');

        // Return the distances between every pair of clusters.

        function clustersAndDistances() {

            const ret = [];

            for (let [outerKey, row] of self._matrix.entries()) {

                for (let [innerKey, storedDistance] of row.entries()) {

                    ret.push({a: outerKey, b: innerKey, distance: storedDistance});

            return ret;

        // Optimizing this by inlining the loop and writing it less

        // functionally doesn't help:

        return min(clustersAndDistances(), x => x.distance);

    // Look up the distance between 2 clusters in me. Try the lookup in the

    // other direction if the first one falls in the nonexistent half of the

    // triangle.

    _cachedDistance(clusterA, clusterB) {

        let ret = this._matrix.get(clusterA).get(clusterB);

        if (ret === undefined) {

            ret = this._matrix.get(clusterB).get(clusterA);

        return ret;

    // Merge two clusters.

    merge(clusterA, clusterB) {

        // An example showing how rows merge:

        //  A: {}

        //  B: {A: 1}

        //  C: {A: 4, B: 4},

        //  D: {A: 4, B: 4, C: 4}

        //  E: {A: 4, B: 4, C: 2, D: 4}}

//

        // Step 2:

        //  C: {}

        //  D: {C: 4}

        //  E: {C: 2, D: 4}}

        //  AB: {C: 4, D: 4, E: 4}

//

        // Step 3:

        //  D:  {}

        //  AB: {D: 4}

        //  CE: {D: 4, AB: 4}

        // Construct new row, finding min distances from either subcluster of

        // the new cluster to old clusters.

//

        // There will be no repetition in the matrix because, after all,

        // nothing pointed to this new cluster before it existed.

        const newRow = new Map();

        for (let outerKey of this._matrix.keys()) {

            if (outerKey !== clusterA && outerKey !== clusterB) {

                newRow.set(outerKey, Math.min(this._cachedDistance(clusterA, outerKey),

                                              this._cachedDistance(clusterB, outerKey)));

        // Delete the rows of the clusters we're merging.

        this._matrix.delete(clusterA);

        this._matrix.delete(clusterB);

        // Remove inner refs to the clusters we're merging.

        for (let inner of this._matrix.values()) {

            inner.delete(clusterA);

            inner.delete(clusterB);

        // Attach new row.

        this._matrix.set([clusterA, clusterB], newRow);

        // There is a net decrease of 1 cluster:

        this._numClusters -= 1;

    numClusters() {

        return this._numClusters;

    // Return an Array of nodes for each cluster in me.

    clusters() {

        // TODO: Can't get map to work here. Don't know why.

        return Array.from(this._matrix.keys()).map(e => Array.from(flatten(e)));

/**

 * Partition the given nodes into one or more clusters by position in the DOM

 * tree.

 * This implements an agglomerative clustering. It uses single linkage, since

 * we're talking about adjacency here more than Euclidean proximity: the

 * clusters we're talking about in the DOM will tend to be adjacent, not

 * overlapping. We haven't tried other linkage criteria yet.

 * In a later release, we may consider score or notes.

 * @arg {Fnode[]|Node[]} fnodes :term:`fnodes<fnode>` or DOM nodes to group

 *     into clusters

 * @arg {number} splittingDistance The closest-nodes :func:`distance` beyond

 *     which we will not attempt to unify 2 clusters. Make this larger to make

 *     larger clusters.

 * @arg getDistance {function} A function that returns some notion of numerical

 *    distance between 2 nodes. Default: :func:`distance`

 * @return {Array} An Array of Arrays, with each Array containing all the

 *     nodes in one cluster. Note that neither the clusters nor the nodes are

 *     in any particular order. You may find :func:`domSort` helpful to remedy

 *     the latter.

*/

function clusters(fnodes, splittingDistance, getDistance = distance) {

    const matrix = new DistanceMatrix(fnodes, getDistance);

    let closest;

    while (matrix.numClusters() > 1 && (closest = matrix.closest()).distance < splittingDistance) {

        matrix.merge(closest.a, closest.b);

    return matrix.clusters();

var clusters$1 = /*#__PURE__*/Object.freeze({

  __proto__: null,

  distance: distance,

  euclidean: euclidean,

  clusters: clusters

});

// The left-hand side of a rule

/**

 * Take nodes that match a given DOM selector. Example:

 * ``dom('meta[property="og:title"]')``

 * Every ruleset has at least one ``dom`` or :func:`element` rule, as that is

 * where nodes begin to flow into the system. If run against a subtree of a

 * document, the root of the subtree is not considered as a possible match.

*/

function dom(selector) {

    return new DomLhs(selector);

/**

 * Take a single given node if it matches a given DOM selector, without looking

 * through its descendents or ancestors. Otherwise, take no nodes. Example:

 * ``element('input')``

 * This is useful for applications in which you want Fathom to classify an

 * element the user has selected, rather than scanning the whole page for

 * candidates.

*/

function element(selector) {

    return new ElementLhs(selector);

/**

 * Rules and the LHSs and RHSs that comprise them have no mutable state. This

 * lets us make BoundRulesets from Rulesets without duplicating the rules. It

 * also lets us share a common cache among rules: multiple ones might care

 * about a cached type(), for instance; there isn't a one-to-one relationship

 * of storing with caring. There would also, because of the interdependencies

 * of rules in a ruleset, be little use in segmenting the caches: if you do

 * something that causes one to need to be cleared, you'll need to clear many

 * more as well.

 * Lhses are responsible for maintaining ruleset.maxCache.

 * Lhs and its subclasses are private to the Fathom framework.

*/

class Lhs {

    constructor() {

        this._predicate = () => true;

    /** Return a new Lhs of the appropriate kind, given its first call. */

    static fromFirstCall(firstCall) {

        // firstCall is never 'dom', because dom() directly returns a DomLhs.

        if (firstCall.method === 'type') {

            return new TypeLhs(...firstCall.args);

        } else if (firstCall.method === 'and') {

            return new AndLhs(firstCall.args);

        } else if (firstCall.method === 'nearest') {

            return new NearestLhs(firstCall.args);

        } else {

            throw new Error('The left-hand side of a rule() must start with dom(), type(), and(), or nearest().');

/**

     * Prune nodes from consideration early in run execution, before scoring is

     * done.

     * Reserve this for where you are sure it is always correct or when

     * performance demands it. It is generally preferable to use :func:`score`

     * and let the :doc:`trainer<training>` determine the relative significance

     * of each rule. Human intuition as to what is important is often wrong:

     * for example, one might assume that a music player website would include

     * the word "play", but this does not hold once you include sites in other

     * languages.

     * Can be chained after :func:`type` or :func:`dom`.

     * Example: ``dom('p').when(isVisible)``

     * @arg {function} predicate Accepts a fnode and returns a boolean

*/

    when(predicate) {

        let lhs = this.clone();

        lhs._predicate = predicate;

        return lhs;

/**

     * Of all the dom nodes selected by type() or dom(), return only

     * the fnodes that satisfy all the predicates imposed by calls to

     * when()

*/

    fnodesSatisfyingWhen(fnodes) {

        return Array.from(fnodes).filter(this._predicate);

/**

     * Return an iterable of output fnodes selected by this left-hand-side

     * expression.

     * Pre: The rules I depend on have already been run, and their results are

     * in ruleset.typeCache.

     * @arg ruleset {BoundRuleset}

*/

    // fnodes (ruleset) {}

/**

     * Check that a RHS-emitted fact is legal for this kind of LHS, and throw

     * an error if it isn't.

*/

    checkFact(fact) {}

/**

     * Return the single type the output of the LHS is guaranteed to have.

     * Return undefined if there is no such single type we can ascertain.

*/

    guaranteedType() {}

/**

     * Return the type I aggregate if I am an aggregate LHS; return undefined

     * otherwise.

*/

    aggregatedType() {}

/**

     * Return each combination of types my selected nodes could be locally (that

     * is, by this rule only) constrained to have.

     * For example, type(A) would return [A]. and(A, or(B, C)) would return

     * [AB, AC, ABC]. More examples:

     * or(A, B) → typeIn(A, B, C)  # Finalizes A, B.   combos A, B, AB: finalizes AB. Optimization: there's no point in returning the last combo in ors. Compilation into 2 rules with identical RHSs will inherently implement this optimization.

     * or(A, B) → typeIn(A, B)  # Finalizes A, B

     * or(A, B) → A  # Finalizes B

     * and(A) -> A  # Finalizes nothing

     * and(A, B) -> A  # Finalizes nothing.   AB: Ø

     * and(A) -> typeIn(A, B)  # Finalizes A.   A

     * and(A, B) -> typeIn(A, B)  # Finalizes nothing.   AB

     * and(A, B) -> typeIn(A, B, C)  # Finalizes A, B.   AB

     * and(A, or(B, C)) -> D  # Finalizes A, B, C.   AB, AC, ABC: ABC

     * and(A, or(B, C)) -> B  # Finalizes A, C.   AB, AC, ABC: AC

     * type(A).not(and(A, B)) ->

     * @return {NiceSet[]}

*/

    // possibleTypeCombinations() {}

/**

     * Types mentioned in this LHS.

     * In other words, the types I need to know the assignment status of before

     * I can make my selections

     * @return NiceSet of strings

*/

    // typesMentioned() {}

class DomLhs extends Lhs {

    constructor(selector) {

        super();

        if (selector === undefined) {

            throw new Error('A querySelector()-style selector is required as the argument to ' + this._callName() + '().');

        this.selector = selector;

/**

     * Return the name of this kind of LHS, for use in error messages.

*/

    _callName() {

        return 'dom';

    clone() {

        return new this.constructor(this.selector);

    fnodes(ruleset) {

        return this._domNodesToFilteredFnodes(

            ruleset,

            ruleset.doc.querySelectorAll(this.selector));

/**

     * Turn a NodeList of DOM nodes into an array of fnodes, and filter out

     * those that don't match the :func:`when()` clause.

*/

    _domNodesToFilteredFnodes(ruleset, domNodes) {

        let ret = [];

        for (let i = 0; i < domNodes.length; i++) {

            ret.push(ruleset.fnodeForElement(domNodes[i]));

        return this.fnodesSatisfyingWhen(ret);

    checkFact(fact) {

        if (fact.type === undefined) {

            throw new Error(`The right-hand side of a ${this._callName()}() rule failed to specify a type. This means there is no way for its output to be used by later rules. All it specified was ${fact}.`);

    asLhs() {

        return this;

    possibleTypeCombinations() {

        return [];

    typesMentioned() {

        return new NiceSet();

class ElementLhs extends DomLhs {

    _callName() {

        return 'element';

    fnodes(ruleset) {

        return this._domNodesToFilteredFnodes(

            ruleset,

            ruleset.doc.matches(this.selector) ? [ruleset.doc] : []);

/** Internal representation of a LHS constrained by type but not by max() */

class TypeLhs extends Lhs {

    constructor(type) {

        super();

        if (type === undefined) {

            throw new Error('A type name is required when calling type().');

        this._type = type;  // the input type

    clone() {

        return new this.constructor(this._type);

    fnodes(ruleset) {

        const cached = getDefault(ruleset.typeCache, this._type, () => []);

        return this.fnodesSatisfyingWhen(cached);

    /** Override the type previously specified by this constraint. */

    type(inputType) {

        // Preserve the class in case this is a TypeMaxLhs.

        return new this.constructor(inputType);

/**

     * Of the nodes selected by a ``type`` call to the left, constrain the LHS

     * to return only the max-scoring one. If there is a tie, more than 1 node

     * will be returned. Example: ``type('titley').max()``

*/

    max() {

        return new TypeMaxLhs(this._type);

/**

     * Take the nodes selected by a ``type`` call to the left, group them into

     * clusters, and return the nodes in the cluster that has the highest total

     * score (on the relevant type).

     * Nodes come out in arbitrary order, so, if you plan to emit them,

     * consider using ``.out('whatever').allThrough(domSort)``. See

     * :func:`domSort`.

     * If multiple clusters have equally high scores, return an arbitrary one,

     * because Fathom has no way to represent arrays of arrays in rulesets.

     * @arg options {Object} The same depth costs taken by :func:`distance`,

     *     plus ``splittingDistance``, which is the distance beyond which 2

     *     clusters will be considered separate. ``splittingDistance``, if

     *     omitted, defaults to 3.

*/

    bestCluster(options) {

        return new BestClusterLhs(this._type, options);

    // Other clustering calls could be called biggestCluster() (having the most

    // nodes) and bestAverageCluster() (having the highest average score).

    guaranteedType() {

        return this._type;

    possibleTypeCombinations() {

        return [this.typesMentioned()];

    typesMentioned() {

        return new NiceSet([this._type]);

/**

 * Abstract LHS that is an aggregate function taken across all fnodes of a type

 * The main point here is that any aggregate function over a (typed) set of

 * nodes depends on first computing all the rules that could emit those nodes

 * (nodes of that type).

*/

class AggregateTypeLhs extends TypeLhs {

    aggregatedType() {

        return this._type;

/**

 * Internal representation of a LHS that has both type and max([NUMBER])

 * constraints. max(NUMBER != 1) support is not yet implemented.

*/

class TypeMaxLhs extends AggregateTypeLhs {

/**

     * Return the max-scoring node (or nodes if there is a tie) of the given

     * type.

*/

    fnodes(ruleset) {

        // TODO: Optimize better. Walk the dependency tree, and run only the

        // rules that could possibly lead to a max result. As part of this,

        // make RHSs expose their max potential scores.

        const self = this;

        // Work around V8 bug:

        // https://stackoverflow.com/questions/32943776/using-super-within-an-

        // arrow-function-within-an-arrow-function-within-a-method

        const getSuperFnodes = () => super.fnodes(ruleset);

        return setDefault(

            ruleset.maxCache,

            this._type,

            function maxFnodesOfType() {

                return maxes(getSuperFnodes(), fnode => ruleset.weightedScore(fnode.scoresSoFarFor(self._type)));

});

class BestClusterLhs extends AggregateTypeLhs {

    constructor(type, options) {

        super(type);

        this._options = options || {splittingDistance: 3};

/**

     * Group the nodes of my type into clusters, and return the cluster with

     * the highest total score for that type.

*/

    fnodes(ruleset) {

        // Get the nodes of the type:

        const fnodesOfType = Array.from(super.fnodes(ruleset));

        if (fnodesOfType.length === 0) {

            return [];

        // Cluster them:

        const clusts = clusters(

            fnodesOfType,

            this._options.splittingDistance,

            (a, b) => distance(a, b, this._options));

        // Tag each cluster with the total of its nodes' scores:

        const clustsAndSums = clusts.map(

            clust => [clust,

                      sum(clust.map(fnode => fnode.scoreFor(this._type)))]);

        // Return the highest-scoring cluster:

        return max(clustsAndSums, clustAndSum => clustAndSum[1])[0];

class AndLhs extends Lhs {

    constructor(lhss) {

        super();

        // For the moment, we accept only type()s as args. TODO: Generalize to

        // type().max() and such later.

        this._args = lhss.map(sideToTypeLhs);

    *fnodes(ruleset) {

        // Take an arbitrary one for starters. Optimization: we could always

        // choose the pickiest one to start with.

        const fnodes = this._args[0].fnodes(ruleset);

        // Then keep only the fnodes that have the type of every other arg:

        fnodeLoop: for (let fnode of fnodes) {

            for (let otherLhs of this._args.slice(1)) {

                // Optimization: could use a .hasTypeSoFar() below

                if (!fnode.hasType(otherLhs.guaranteedType())) {

                    // TODO: This is n^2. Why is there no set intersection in JS?!

                    continue fnodeLoop;

            yield fnode;

    possibleTypeCombinations() {

        return [this.typesMentioned()];

    typesMentioned() {

        return new NiceSet(this._args.map(arg => arg.guaranteedType()));

function sideToTypeLhs(side) {

    const lhs = side.asLhs();

    if (!(lhs.constructor === TypeLhs)) {

        throw new Error('and() and nearest() support only simple type() calls as arguments for now.');

        // TODO: Though we could solve this with a compilation step: and(type(A), type(B).max()) is equivalent to type(B).max() -> type(Bmax); and(type(A), type(Bmax)).

        // In fact, we should be able to compile most (any?) arbitrary and()s, including nested ands and and(type(...).max(), ...) constructions into several and(type(A), type(B), ...) rules.

    return lhs;

class NearestLhs extends Lhs {

    constructor([a, b, distance]) {

        super();

        this._a = sideToTypeLhs(a);

        this._b = sideToTypeLhs(b);

        this._distance = distance;

/**

     * Return an iterable of {fnodes, transformer} pairs.

*/

    *fnodes(ruleset) {

        // Go through all the left arg's nodes. For each one, find the closest

        // right-arg's node. O(a * b). Once a right-arg's node is used, we

        // don't eliminate it from consideration, because then order of left-

        // args' nodes would matter.

        // TODO: Still not sure how to get the distance to factor into the

        // score unless we hard-code nearest() to do that. It's a

        // matter of not being able to bind on the RHS to the output of the

        // distance function on the LHS. Perhaps we could at least make

        // distance part of the note and read it in a props() callback.

        // We're assuming here that simple type() calls return just plain

        // fnodes, not {fnode, rhsTransformer} pairs:

        const as_ = this._a.fnodes(ruleset);

        const bs = Array.from(this._b.fnodes(ruleset));

        if (bs.length > 0) {

            // If bs is empty, there can be no nearest nodes, so don't emit any.

            for (const a of as_) {

                const nearest = min(bs, b => this._distance(a, b));

                yield {fnode: a,

                       rhsTransformer: function setNoteIfEmpty(fact) {

                           // If note is explicitly set by the RHS, let it take

                           // precedence, even though that makes this entire LHS

                           // pointless.

                           if (fact.note === undefined) {

                               fact.note = nearest;  // TODO: Wrap this in an object to make room to return distance later.

                           return fact;

}};

    checkFact(fact) {

        // Barf if the fact doesn't set a type at least. It should be a *new* type or at least one that doesn't result in cycles, but we can't deduce that.

    possibleTypeCombinations() {

        return [new NiceSet([this._a.guaranteedType()])];

    typesMentioned() {

        return new NiceSet([this._a.guaranteedType(),

                            this._b.guaranteedType()]);

    guaranteedType() {

        return this._a.guaranteedType();

// The right-hand side of a rule

const TYPE = 1;

const NOTE = 2;

const SCORE = 4;

const ELEMENT = 8;

const SUBFACTS = {

    type: TYPE,

    note: NOTE,

    score: SCORE,

    element: ELEMENT

};

/**

 * Expose the output of this rule's LHS as a "final result" to the surrounding

 * program. It will be available by calling :func:`~BoundRuleset.get` on the

 * ruleset and passing the key. You can run each node through a callback

 * function first by adding :func:`through()`, or you can run the entire set of

 * nodes through a callback function by adding :func:`allThrough()`.

*/

function out(key) {

    return new OutwardRhs(key);

class InwardRhs {

    constructor(calls = [], max = Infinity, types) {

        this._calls = calls.slice();

        this._max = max;  // max score

        this._types = new NiceSet(types);  // empty set if unconstrained

/**

     * Declare that the maximum returned subscore is such and such,

     * which helps the optimizer plan efficiently. This doesn't force it to be

     * true; it merely throws an error at runtime if it isn't. To lift an

     * ``atMost`` constraint, call ``atMost()`` (with no args). The reason

     * ``atMost`` and ``typeIn`` apply until explicitly cleared is so that, if

     * someone used them for safety reasons on a lexically distant rule you are

     * extending, you won't stomp on their constraint and break their

     * invariants accidentally.

*/

    atMost(score) {

        return new this.constructor(this._calls, score, this._types);

    _checkAtMost(fact) {

        if (fact.score !== undefined && fact.score > this._max) {

            throw new Error(`Score of ${fact.score} exceeds the declared atMost(${this._max}).`);

/**

      * Determine any of type, note, score, and element using a callback. This

      * overrides any previous call to `props` and, depending on what

      * properties of the callback's return value are filled out, may override

      * the effects of other previous calls as well.

      * The callback should return...

      * * An optional :term:`subscore`

      * * A type (required on ``dom(...)`` rules, defaulting to the input one on

      *   ``type(...)`` rules)

      * * Optional notes

      * * An element, defaulting to the input one. Overriding the default

      *   enables a callback to walk around the tree and say things about nodes

      *   other than the input one.

*/

    props(callback) {

        function getSubfacts(fnode) {

            const subfacts = callback(fnode);

            // Filter the raw result down to okayed properties so callbacks

            // can't insert arbitrary keys (like conserveScore, which might

            // mess up the optimizer).

            for (let subfact in subfacts) {

                if (!SUBFACTS.hasOwnProperty(subfact) || !(SUBFACTS[subfact] & getSubfacts.possibleSubfacts)) {

                    // The ES5.1 spec says in 12.6.4 that it's fine to delete

                    // as we iterate.

                    delete subfacts[subfact];

            return subfacts;

        // Thse are the subfacts this call could affect:

        getSubfacts.possibleSubfacts = TYPE | NOTE | SCORE | ELEMENT;

        getSubfacts.kind = 'props';

        return new this.constructor(this._calls.concat(getSubfacts),

                                    this._max,

                                    this._types);

/**

     * Set the type applied to fnodes processed by this RHS.

*/

    type(theType) {

        // In the future, we might also support providing a callback that receives

        // the fnode and returns a type. We couldn't reason based on these, but the

        // use would be rather a consise way to to override part of what a previous

        // .props() call provides.

        // Actually emit a given type.

        function getSubfacts() {

            return {type: theType};

        getSubfacts.possibleSubfacts = TYPE;

        getSubfacts.type = theType;

        getSubfacts.kind = 'type';

        return new this.constructor(this._calls.concat(getSubfacts),

                                    this._max,

                                    this._types);

/**

     * Constrain this rule to emit 1 of a set of given types. Pass no args to lift

     * a previous ``typeIn`` constraint, as you might do when basing a LHS on a

     * common value to factor out repetition.

     * ``typeIn`` is mostly a hint for the query planner when you're emitting types

     * dynamically from ``props`` calls—in fact, an error will be raised if

     * ``props`` is used without a ``typeIn`` or ``type`` to constrain it—but it

     * also checks conformance at runtime to ensure validity.

*/

    typeIn(...types) {

        // Rationale: If we used the spelling "type('a', 'b', ...)" instead of

        // this, one might expect type('a', 'b').type(fn) to have the latter

        // call override, while expecting type(fn).type('a', 'b') to keep both

        // in effect. Then different calls to type() don't consistently

        // override each other, and the rules get complicated. Plus you can't

        // inherit a type constraint and then sub in another type-returning

        // function that still gets the constraint applied.

        return new this.constructor(this._calls,

                                    this._max,

                                    types);

/**

     * Check a fact for conformance with any typeIn() call.

     * @arg leftType the type of the LHS, which becomes my emitted type if the

     *    fact doesn't specify one

*/

    _checkTypeIn(result, leftType) {

        if (this._types.size > 0) {

            if (result.type === undefined) {

                if (!this._types.has(leftType)) {

                    throw new Error(`A right-hand side claimed, via typeIn(...) to emit one of the types ${this._types} but actually inherited ${leftType} from the left-hand side.`);

            } else if (!this._types.has(result.type)) {

                throw new Error(`A right-hand side claimed, via typeIn(...) to emit one of the types ${this._types} but actually emitted ${result.type}.`);

/**

     * Whatever the callback returns (even ``undefined``) becomes the note of

     * the fact. This overrides any previous call to ``note``.

*/

    note(callback) {

        function getSubfacts(fnode) {

            return {note: callback(fnode)};

        getSubfacts.possibleSubfacts = NOTE;

        getSubfacts.kind = 'note';

        return new this.constructor(this._calls.concat(getSubfacts),

                                    this._max,

                                    this._types);

/**

     * Affect the confidence with which the input node should be considered a

     * member of a type.

     * The parameter is generally between 0 and 1 (inclusive), with 0 meaning

     * the node does not have the "smell" this rule checks for and 1 meaning it

     * does. The range between 0 and 1 is available to represent "fuzzy"

     * confidences. If you have an unbounded range to compress down to [0, 1],

     * consider using :func:`sigmoid` or a scaling thereof.

     * Since every node can have multiple, independent scores (one for each

     * type), this applies to the type explicitly set by the RHS or, if none,

     * to the type named by the ``type`` call on the LHS. If the LHS has none

     * because it's a ``dom(...)`` LHS, an error is raised.

     * @arg {number|function} scoreOrCallback Can either be a static number,

     *     generally 0 to 1 inclusive, or else a callback which takes the fnode

     *     and returns such a number. If the callback returns a boolean, it is

     *     cast to a number.

*/

    score(scoreOrCallback) {

        let getSubfacts;

        function getSubfactsFromNumber(fnode) {

            return {score: scoreOrCallback};

        function getSubfactsFromFunction(fnode) {

            let result = scoreOrCallback(fnode);

            if (typeof result === 'boolean') {

                // Case bools to numbers for convenience. Boolean features are

                // common. Don't cast other things, as it frustrates ruleset

                // debugging.

                result = Number(result);

            return {score: result};

        if (typeof scoreOrCallback === 'number') {

            getSubfacts = getSubfactsFromNumber;

        } else {

            getSubfacts = getSubfactsFromFunction;

        getSubfacts.possibleSubfacts = SCORE;

        getSubfacts.kind = 'score';

        return new this.constructor(this._calls.concat(getSubfacts),

                                    this._max,

                                    this._types);

    // Future: why not have an .element() method for completeness?

    // -------- Methods below this point are private to the framework. --------

/**

     * Run all my props().type().note().score() stuff across a given fnode,

     * enforce my max() stuff, and return a fact ({element, type, score,

     * notes}) for incorporation into that fnode (or a different one, if

     * element is specified). Any of the 4 fact properties can be missing;

     * filling in defaults is a job for the caller.

     * @arg leftType The type the LHS takes in

*/

    fact(fnode, leftType) {

        const doneKinds = new Set();

        const result = {};

        let haveSubfacts = 0;

        for (let call of reversed(this._calls)) {

            // If we've already called a call of this kind, then forget it.

            if (!doneKinds.has(call.kind)) {

                doneKinds.add(call.kind);

                if (~haveSubfacts & call.possibleSubfacts) {

                    // This call might provide a subfact we are missing.

                    const newSubfacts = call(fnode);

                    // We start with an empty object, so we're okay here.

                    // eslint-disable-next-line guard-for-in

                    for (let subfact in newSubfacts) {

                        // A props() callback could insert arbitrary keys into

                        // the result, but it shouldn't matter, because nothing

                        // pays any attention to them.

                        if (!result.hasOwnProperty(subfact)) {

                            result[subfact] = newSubfacts[subfact];

                        haveSubfacts |= SUBFACTS[subfact];

        this._checkAtMost(result);

        this._checkTypeIn(result, leftType);

        return result;

/**

     * Return a record describing the types I might emit (which means either to

     * add a type to a fnode or to output a fnode that already has that type).

     * {couldChangeType: whether I might add a type to the fnode,

     *  possibleTypes: If couldChangeType, the types I might emit; empty set if

     *      we cannot infer it. If not couldChangeType, undefined.}

*/

    possibleEmissions() {

        // If there is a typeIn() constraint or there is a type() call to the

        // right of all props() calls, we have a constraint. We hunt for the

        // tightest constraint we can find, favoring a type() call because it

        // gives us a single type but then falling back to a typeIn().

        let couldChangeType = false;

        for (let call of reversed(this._calls)) {

            if (call.kind === 'props') {

                couldChangeType = true;

                break;

            } else if (call.kind === 'type') {

                return {couldChangeType: true,

                        possibleTypes: new Set([call.type])};

        return {couldChangeType,

                possibleTypes: this._types};

class OutwardRhs {

    constructor(key, through = x => x, allThrough = x => x) {

        this.key = key;

        this.callback = through;

        this.allCallback = allThrough;

/**

     * Append ``.through`` to :func:`out` to run each :term:`fnode` emitted

     * from the LHS through an arbitrary function before returning it to the

     * containing program. Example::

     *     out('titleLengths').through(fnode => fnode.noteFor('title').length)

*/

    through(callback) {

        return new this.constructor(this.key, callback, this.allCallback);

/**

     * Append ``.allThrough`` to :func:`out` to run the entire iterable of

     * emitted :term:`fnodes<fnode>` through an arbitrary function before

     * returning them to the containing program. Example::

     *     out('sortedTitles').allThrough(domSort)

*/

    allThrough(callback) {

        return new this.constructor(this.key, this.callback, callback);

    asRhs() {

        return this;

function props(callback) {

    return new Side({method: 'props', args: [callback]});

/** Constrain to an input type on the LHS, or apply a type on the RHS. */

function type(theType) {

    return new Side({method: 'type', args: [theType]});

function note(callback) {

    return new Side({method: 'note', args: [callback]});

function score(scoreOrCallback) {

    return new Side({method: 'score', args: [scoreOrCallback]});

function atMost(score) {

    return new Side({method: 'atMost', args: [score]});

function typeIn(...types) {

    return new Side({method: 'typeIn', args: types});

/**

 * Pull nodes that conform to multiple conditions at once.

 * For example: ``and(type('title'), type('english'))``

 * Caveats: ``and`` supports only simple ``type`` calls as arguments for now,

 * and it may fire off more rules as prerequisites than strictly necessary.

 * ``not`` and ``or`` don't exist yet, but you can express ``or`` the long way

 * around by having 2 rules with identical RHSs.

*/

function and(...lhss) {

    return new Side({method: 'and', args: lhss});

/**

 * Experimental. For each :term:`fnode` from ``typeCallA``, find the closest

 * node from ``typeCallB``, and attach it as a note. The note is attached to

 * the type specified by the RHS, defaulting to the type of ``typeCallA``. If

 * no nodes are emitted from ``typeCallB``, do nothing.

 * For example... ::

 *     nearest(type('image'), type('price'))

 * The score of the ``typeCallA`` can be added to the new type's score by using

 * :func:`conserveScore` (though this routine has since been removed)::

 *     rule(nearest(type('image'), type('price')),

 *          type('imageWithPrice').score(2).conserveScore())

 * Caveats: ``nearest`` supports only simple ``type`` calls as arguments ``a``

 * and ``b`` for now.

 * @arg distance {function} A function that takes 2 fnodes and returns a

 *     numerical distance between them. Included options are :func:`distance`,

 *     which is a weighted topological distance, and :func:`euclidean`, which

 *     is a spatial distance.

*/

function nearest(typeCallA, typeCallB, distance = euclidean) {

    return new Side({method: 'nearest', args: [typeCallA, typeCallB, distance]});

/**

 * A chain of calls that can be compiled into a Rhs or Lhs, depending on its

 * position in a Rule. This lets us use type() as a leading call for both RHSs

 * and LHSs. I would prefer to do this dynamically, but that wouldn't compile

 * down to old versions of ES.

*/

class Side {

    constructor(...calls) {

        // A "call" is like {method: 'dom', args: ['p.smoo']}.

        this._calls = calls;

    max() {

        return this._and('max');

    bestCluster(options) {

        return this._and('bestCluster', options);

    props(callback) {

        return this._and('props', callback);

    type(...types) {

        return this._and('type', ...types);

    note(callback) {

        return this._and('note', callback);

    score(scoreOrCallback) {

        return this._and('score', scoreOrCallback);

    atMost(score) {

        return this._and('atMost', score);

    typeIn(...types) {

        return this._and('typeIn', ...types);

    and(...lhss) {

        return this._and('and', lhss);

    _and(method, ...args) {

        return new this.constructor(...this._calls.concat({method, args}));

    asLhs() {

        return this._asSide(Lhs.fromFirstCall(this._calls[0]), this._calls.slice(1));

    asRhs() {

        return this._asSide(new InwardRhs(), this._calls);

    _asSide(side, calls) {

        for (let call of calls) {

            side = side[call.method](...call.args);

        return side;

    when(pred) {

        return this._and('when', pred);

/**

 * A wrapper around a DOM node, storing :term:`types<type>`,

 * :term:`scores<score>`, and :term:`notes<note>` that apply to it

*/

class Fnode {

/**

     * @arg element The DOM element described by the fnode.

     * @arg ruleset The ruleset which created the fnode.

*/

    constructor(element, ruleset) {

        if (element === undefined) {

            throw new Error("Someone tried to make a fnode without specifying the element they're talking about.");

/**

         * The raw DOM element this fnode describes

*/

        this.element = element;

        this._ruleset = ruleset;

        // A map of type => {score: number, note: anything}. `score` is always

        // present and defaults to 1. A note is set iff `note` is present and

        // not undefined.

        this._types = new Map();

        // Note: conserveScore() is temporarily absent in 3.0.

//

        // By default, a fnode has an independent score for each of its types.

        // However, a RHS can opt to conserve the score of an upstream type,

        // carrying it forward into another type. To avoid runaway scores in

        // the case that multiple rules choose to do this, we limit the

        // contribution of an upstream type's score to being multiplied in a

        // single time. In this set, we keep track of which upstream types'

        // scores have already been multiplied into each type. LHS fnode => Set

        // of types whose score for that node have been multiplied into this

        // node's score.

        this._conservedScores = new Map();

/**

     * Return whether the given type is one of the ones attached to the wrapped

     * HTML node.

*/

    hasType(type) {

        // Run type(theType) against the ruleset to make sure this doesn't

        // return false just because we haven't lazily run certain rules yet.

        this._computeType(type);

        return this._types.has(type);

/**

     * Return the confidence, in the range (0, 1), that the fnode belongs to the

     * given type, 0 by default.

*/

    scoreFor(type) {

        this._computeType(type);

        return sigmoid(this._ruleset.weightedScore(this.scoresSoFarFor(type)) +

                       getDefault(this._ruleset.biases, type, () => 0));

/**

     * Return the fnode's note for the given type, ``undefined`` if none.

*/

    noteFor(type) {

        this._computeType(type);

        return this._noteSoFarFor(type);

/**

     * Return whether this fnode has a note for the given type.

     * ``undefined`` is not considered a note and may be overwritten with

     * impunity.

*/

    hasNoteFor(type) {

        this._computeType(type);

        return this._hasNoteSoFarFor(type);

    // -------- Methods below this point are private to the framework. --------

/**

     * Return an iterable of the types tagged onto me by rules that have

     * already executed.

*/

    typesSoFar() {

        return this._types.keys();

    _noteSoFarFor(type) {

        return this._typeRecordForGetting(type).note;

    _hasNoteSoFarFor(type) {

        return this._noteSoFarFor(type) !== undefined;

/**

     * Return the score thus far computed on me for a certain type. Doesn't

     * implicitly run any rules. If no score has yet been determined for the

     * given type, return undefined.

*/

    scoresSoFarFor(type) {

        return this._typeRecordForGetting(type).score;

/**

     * Add a given number to one of our per-type scores. Implicitly assign us

     * the given type. Keep track of which rule it resulted from so we can

     * later mess with the coeffs.

*/

    addScoreFor(type, score, ruleName) {

        this._typeRecordForSetting(type).score.set(ruleName, score);

/**

     * Set the note attached to one of our types. Implicitly assign us that

     * type if we don't have it already.

*/

    setNoteFor(type, note) {

        if (this._hasNoteSoFarFor(type)) {

            if (note !== undefined) {

                throw new Error(`Someone (likely the right-hand side of a rule) tried to add a note of type ${type} to an element, but one of that type already exists. Overwriting notes is not allowed, since it would make the order of rules matter.`);

            // else the incoming note is undefined and we already have the

            // type, so it's a no-op

        } else {

            // Apply either a type and note or just a type (which means a note

            // that is undefined):

            this._typeRecordForSetting(type).note = note;

/**

     * Return a score/note record for a type, creating it if it doesn't exist.

*/

    _typeRecordForSetting(type) {

        return setDefault(this._types, type, () => ({score: new Map()}));

/**

     * Manifest a temporary type record for reading, working around the lack of

     * a .? operator in JS.

*/

    _typeRecordForGetting(type) {

        return getDefault(this._types, type, () => ({score: new Map()}));

/**

     * Make sure any scores, notes, and type-tagging for the given type are

     * computed for my element.

*/

    _computeType(theType) {

        if (!this._types.has(theType)) {  // Prevent infinite recursion when an A->A rule looks at A's note in a callback.

            this._ruleset.get(type(theType));

/**

 * Construct and return the proper type of rule class based on the

 * inwardness/outwardness of the RHS.

 * @arg lhs {Lhs} The left-hand side of the rule

 * @arg rhs {Rhs} The right-hand side of the rule

 * @arg options {object} Other, optional information about the rule.

 *     Currently, the only recognized option is ``name``, which points to a

 *     string that uniquely identifies this rule in a ruleset. The name

 *     correlates this rule with one of the coefficients passed into

 *     :func:`ruleset`. If no name is given, an identifier is assigned based on

 *     the index of this rule in the ruleset, but that is, of course, brittle.

*/

function rule(lhs, rhs, options) {

    // Since out() is a valid call only on the RHS (unlike type()), we can take

    // a shortcut here: any outward RHS will already be an OutwardRhs; we don't

    // need to sidetrack it through being a Side. And OutwardRhs has an asRhs()

    // that just returns itself.

    if (typeof rhs === 'string') {

        rhs = out(rhs);

    return new ((rhs instanceof OutwardRhs) ? OutwardRule : InwardRule)(lhs, rhs, options);

let nextRuleNumber = 0;

function newInternalRuleName() {

    return '_' + nextRuleNumber++;

/**

 * We place the in/out distinction in Rules because it determines whether the

 * RHS result is cached, and Rules are responsible for maintaining the rulewise

 * cache ruleset.ruleCache.

*/

class Rule {  // abstract

    constructor(lhs, rhs, options) {

        this.lhs = lhs.asLhs();

        this.rhs = rhs.asRhs();

        // TODO: Make auto-generated rule names be based on the out types of

        // the rules, e.g. _priceish_4. That way, adding rules for one type

        // won't make the coeffs misalign for another.

        this.name = (options ? options.name : undefined) || newInternalRuleName();

/**

     * Return a NiceSet of the rules that this one shallowly depends on in the

     * given ruleset. In a BoundRuleset, this may include rules that have

     * already been executed.

     * Depend on emitters of any LHS type this rule finalizes. (See

     * _typesFinalized for a definition.) Depend on adders of any other LHS

     * types (because, after all, we need to know what nodes have that type in

     * order to find the set of LHS nodes). This works for simple rules and

     * complex ones like and().

     * Specific examples (where A is a type):

     * * A.max->* depends on anything emitting A.

     * * Even A.max->A depends on A emitters, because we have to have all the

     *   scores factored in first. For example, what if we did

     *   max(A)->score(.5)?

     * * A->A depends on anything adding A.

     * * A->(something other than A) depends on anything emitting A. (For

     *   example, we need the A score finalized before we could transfer it to

     *   B using conserveScore().)

     * * A->out() also depends on anything emitting A. Fnode methods aren't

     *   smart enough to lazily run emitter rules as needed. We could make them

     *   so if it was shown to be an advantage.

*/

    prerequisites(ruleset) {

        // Optimization: we could cache the result of this when in a compiled (immutable) ruleset.

        // Extend prereqs with rules derived from each of the given types. If

        // no rules are found, raise an exception, as that indicates a

        // malformed ruleset.

        function extendOrThrow(prereqs, types, ruleGetter, verb) {

            for (let type of types) {

                const rules = ruleGetter(type);

                if (rules.length > 0) {

                    prereqs.extend(rules);

                } else {

                    throw new Error(`No rule ${verb} the "${type}" type, but another rule needs it as input.`);

        const prereqs = new NiceSet();

        // Add finalized types:

        extendOrThrow(prereqs, this._typesFinalized(), type => ruleset.inwardRulesThatCouldEmit(type), 'emits');

        // Add mentioned types:

        // We could say this.lhs.typesMentioned().minus(typesFinalized) as an

        // optimization. But since types mentioned are a superset of types

        // finalized and rules adding are a subset of rules emitting, we get

        // the same result without.

        extendOrThrow(prereqs, this.lhs.typesMentioned(), type => ruleset.inwardRulesThatCouldAdd(type), 'adds');

        return prereqs;

/**

     * Return the types that this rule finalizes.

     * To "finalize" a type means to make sure we're finished running all

     * possible rules that might change a node's score or notes w.r.t. a given

     * type. This is generally done because we're about to use those data for

     * something, like computing a new type's score or or an aggregate

     * function. Exhaustively, we're about to...

     * * change the type of the nodes or

     * * aggregate all nodes of a type

     * This base-class implementation just returns what aggregate functions

     * need, since that need spans inward and outward rules.

     * @return Set of types

*/

    _typesFinalized() {

        // Get the types that are fed to aggregate functions. Aggregate

        // functions are more demanding than a simple type() LHS. A type() LHS

        // itself does not finalize its nodes because the things it could do to

        // them without changing their type (adding notes, adding to score)

        // are immutable or commutative (respectively). Thus, we require a RHS

        // type change in order to require finalization of a simple type()

        // mention. A max(B), OTOH, is not commutative with other B->B rules

        // (imagine type(B).max()->score(.5)), so it must depend on B emitters

        // and thus finalize B. (This will have to be relaxed or rethought when

        // we do the max()/atMost() optimization. Perhaps we can delegate to

        // aggregate functions up in Rule.prerequisites() to ask what their

        // prereqs are. If they implement such an optimization, they can reply.

        // Otherwise, we can assume they are all the nodes of their type.)

//

        // TODO: Could arbitrary predicates (once we implement those) matter

        // too? Maybe it's not just aggregations.

        const type = this.lhs.aggregatedType();

        return (type === undefined) ? new NiceSet() : new NiceSet([type]);

/**

 * A normal rule, whose results head back into the Fathom knowledgebase, to be

 * operated on by further rules.

*/

class InwardRule extends Rule {

    // TODO: On construct, complain about useless rules, like a dom() rule that

    // doesn't assign a type.

/**

     * Return an iterable of the fnodes emitted by the RHS of this rule.

     * Side effect: update ruleset's store of fnodes, its accounting of which

     * rules are done executing, and its cache of results per type.

*/

    results(ruleset) {

        if (ruleset.doneRules.has(this)) {  // shouldn't happen

            throw new Error('A bug in Fathom caused results() to be called on an inward rule twice. That could cause redundant score contributions, etc.');

        const self = this;

        // For now, we consider most of what a LHS computes to be cheap, aside

        // from type() and type().max(), which are cached by their specialized

        // LHS subclasses.

        const leftResults = this.lhs.fnodes(ruleset);

        // Avoid returning a single fnode more than once. LHSs uniquify

        // themselves, but the RHS can change the element it's talking

        // about and thus end up with dupes.

        const returnedFnodes = new Set();

        // Merge facts into fnodes:

        forEach(

            // leftResult can be either a fnode or a {fnode, rhsTransformer} pair.

            function updateFnode(leftResult) {

                const leftType = self.lhs.guaranteedType();

                // Get a fnode and a RHS transformer, whether a plain fnode is

                // returned or a {fnode, rhsTransformer} pair:

                const {fnode: leftFnode = leftResult, rhsTransformer = identity} = leftResult;

                // Grab the fact from the RHS, and run the LHS's optional

                // transformer over it to pick up anything special it wants to

                // do:

                const fact = rhsTransformer(self.rhs.fact(leftFnode, leftType));

                self.lhs.checkFact(fact);

                const rightFnode = ruleset.fnodeForElement(fact.element || leftFnode.element);

                // If the RHS doesn't specify a type, default to the

                // type of the LHS, if any:

                const rightType = fact.type || self.lhs.guaranteedType();

                if (fact.score !== undefined) {

                    if (rightType !== undefined) {

                        rightFnode.addScoreFor(rightType, fact.score, self.name);

                    } else {

                        throw new Error(`The right-hand side of a rule specified a score (${fact.score}) with neither an explicit type nor one we could infer from the left-hand side.`);

                if (fact.type !== undefined || fact.note !== undefined) {

                    // There's a reason to call setNoteFor.

                    if (rightType === undefined) {

                        throw new Error(`The right-hand side of a rule specified a note (${fact.note}) with neither an explicit type nor one we could infer from the left-hand side. Notes are per-type, per-node, so that's a problem.`);

                    } else {

                        rightFnode.setNoteFor(rightType, fact.note);

                returnedFnodes.add(rightFnode);

},

            leftResults);

        // Update ruleset lookup tables.

        // First, mark this rule as done:

        ruleset.doneRules.add(this);

        // Then, stick each fnode in typeCache under all applicable types.

        // Optimization: we really only need to loop over the types

        // this rule can possibly add.

        for (let fnode of returnedFnodes) {

            for (let type of fnode.typesSoFar()) {

                setDefault(ruleset.typeCache, type, () => new Set()).add(fnode);

        return returnedFnodes.values();

/**

     * Return a Set of the types that could be emitted back into the system.

     * To emit a type means to either to add it to a fnode emitted from the RHS

     * or to leave it on such a fnode where it already exists.

*/

    typesItCouldEmit() {

        const rhs = this.rhs.possibleEmissions();

        if (!rhs.couldChangeType && this.lhs.guaranteedType() !== undefined) {

            // It's a b -> b rule.

            return new Set([this.lhs.guaranteedType()]);

        } else if (rhs.possibleTypes.size > 0) {

            // We can prove the type emission from the RHS alone.

            return rhs.possibleTypes;

        } else {

            throw new Error('Could not determine the emitted type of a rule because its right-hand side calls props() without calling typeIn().');

/**

     * Return a Set of types I could add to fnodes I output (where the fnodes

     * did not already have them).

*/

    typesItCouldAdd() {

        const ret = new Set(this.typesItCouldEmit());

        ret.delete(this.lhs.guaranteedType());

        return ret;

/**

     * Add the types we could change to the superclass's result.

*/

    _typesFinalized() {

        const self = this;

        function typesThatCouldChange() {

            const ret = new NiceSet();

            // Get types that could change:

            const emissions = self.rhs.possibleEmissions();

            if (emissions.couldChangeType) {

                // Get the possible guaranteed combinations of types on the LHS

                // (taking just this LHS into account). For each combo, if the RHS

                // adds a type that's not in the combo, the types in the combo get

                // unioned into ret.

                for (let combo of self.lhs.possibleTypeCombinations()) {

                    for (let rhsType of emissions.possibleTypes) {

                        if (!combo.has(rhsType)) {

                            ret.extend(combo);

                            break;

            // Optimization: the possible combos could be later expanded to be

            // informed by earlier rules which add the types mentioned in the LHS.

            // If the only way for something to get B is to have Q first, then we

            // can add Q to each combo and end up with fewer types finalized. Would

            // this imply the existence of a Q->B->Q cycle and thus be impossible?

            // Think about it. If we do this, we can centralize that logic here,

            // rather than repeating it in all the Lhs subclasses).

            return ret;

        return typesThatCouldChange().extend(super._typesFinalized());

/**

 * A rule whose RHS is an out(). This represents a final goal of a ruleset.

 * Its results go out into the world, not inward back into the Fathom

 * knowledgebase.

*/

class OutwardRule extends Rule {

/**

     * Compute the whole thing, including any .through() and .allThrough().

     * Do not mark me done in ruleset.doneRules; out rules are never marked as

     * done so they can be requested many times without having to cache their

     * (potentially big, since they aren't necessarily fnodes?) results. (We

     * can add caching later if it proves beneficial.)

*/

    results(ruleset) {

/**

         * From a LHS's ``{fnode, rhsTransform}`` object or plain fnode, pick off just

         * the fnode and return it.

*/

        function justFnode(fnodeOrStruct) {

            return (fnodeOrStruct instanceof Fnode) ? fnodeOrStruct : fnodeOrStruct.fnode;

        return this.rhs.allCallback(map(this.rhs.callback, map(justFnode, this.lhs.fnodes(ruleset))));

/**

     * @return the key under which the output of this rule will be available

*/

    key() {

        return this.rhs.key;

/**

     * OutwardRules finalize all types mentioned.

*/

    _typesFinalized() {

        return this.lhs.typesMentioned().extend(super._typesFinalized());

/**

 * A shortcut for creating a new :class:`Ruleset`, for symmetry with

 * :func:`rule`

*/

function ruleset(rules, coeffs = [], biases = []) {

    return new Ruleset(rules, coeffs, biases);

/**

 * An unbound ruleset. When you bind it by calling :func:`~Ruleset.against()`,

 * the resulting :class:`BoundRuleset` will be immutable.

*/

class Ruleset {

/**

     * @arg rules {Array} Rules returned from :func:`rule`

     * @arg coeffs {Map} A map of rule names to numerical weights, typically

     *     returned by the :doc:`trainer<training>`. Example:

     *     ``[['someRuleName', 5.04], ...]``. If not given, coefficients

     *     default to 1.

     * @arg biases {object} A map of type names to neural-net biases. These

     *      enable accurate confidence estimates. Example: ``[['someType',

     *      -2.08], ...]``. If absent, biases default to 0.

*/

    constructor(rules, coeffs = [], biases = []) {

        this._inRules = [];

        this._outRules = new Map();  // key -> rule

        this._rulesThatCouldEmit = new Map();  // type -> [rules]

        this._rulesThatCouldAdd = new Map();  // type -> [rules]

        // Private to the framework:

        this._coeffs = new Map(coeffs);  // rule name => coefficient

        this.biases = new Map(biases);  // type name => bias

        // Separate rules into out ones and in ones, and sock them away. We do

        // this here so mistakes raise errors early.

        for (let rule of rules) {

            if (rule instanceof InwardRule) {

                this._inRules.push(rule);

                // Keep track of what inward rules can emit or add:

                // TODO: Combine these hashes for space efficiency:

                const emittedTypes = rule.typesItCouldEmit();

                for (let type of emittedTypes) {

                    setDefault(this._rulesThatCouldEmit, type, () => []).push(rule);

                for (let type of rule.typesItCouldAdd()) {

                    setDefault(this._rulesThatCouldAdd, type, () => []).push(rule);

            } else if (rule instanceof OutwardRule) {

                this._outRules.set(rule.key(), rule);

            } else {

                throw new Error(`This element of ruleset()'s first param wasn't a rule: ${rule}`);

/**

     * Commit this ruleset to running against a specific DOM tree or subtree.

     * When run against a subtree, the root of the subtree is not considered as

     * a possible match.

     * This doesn't actually modify the Ruleset but rather returns a fresh

     * :class:`BoundRuleset`, which contains caches and other stateful, per-DOM

     * bric-a-brac.

*/

    against(doc) {

        return new BoundRuleset(doc,

                                this._inRules,

                                this._outRules,

                                this._rulesThatCouldEmit,

                                this._rulesThatCouldAdd,

                                this._coeffs,

                                this.biases);

/**

     * Return all the rules (both inward and outward) that make up this ruleset.

     * From this, you can construct another ruleset like this one but with your

     * own rules added.

*/

    rules() {

        return Array.from([...this._inRules, ...this._outRules.values()]);

/**

 * A ruleset that is earmarked to analyze a certain DOM

 * Carries a cache of rule results on that DOM. Typically comes from

 * :meth:`~Ruleset.against`.

*/

class BoundRuleset {

/**

     * @arg inRules {Array} Non-out() rules

     * @arg outRules {Map} Output key -> out() rule

*/

    constructor(doc, inRules, outRules, rulesThatCouldEmit, rulesThatCouldAdd, coeffs, biases) {

        this.doc = doc;

        this._inRules = inRules;

        this._outRules = outRules;

        this._rulesThatCouldEmit = rulesThatCouldEmit;

        this._rulesThatCouldAdd = rulesThatCouldAdd;

        this._coeffs = coeffs;

        // Private, for the use of only helper classes:

        this.biases = biases;

        this._clearCaches();

        this.elementCache = new WeakMap();  // DOM element => fnode about it

        this.doneRules = new Set();  // InwardRules that have been executed. OutwardRules can be executed more than once because they don't change any fnodes and are thus idempotent.

/**

     * Change my coefficients and biases after construction.

     * @arg coeffs See the :class:`Ruleset` constructor.

     * @arg biases See the :class:`Ruleset` constructor.

*/

    setCoeffsAndBiases(coeffs, biases = []) {

        // Destructuring assignment doesn't make it through rollup properly

        // (https://github.com/rollup/rollup-plugin-commonjs/issues/358):

        this._coeffs = new Map(coeffs);

        this.biases = new Map(biases);

        this._clearCaches();

/**

     * Clear the typeCache and maxCache, usually in the wake of changing

     * ``this._coeffs``, because both of thise depend on weighted scores.

*/

    _clearCaches() {

        this.maxCache = new Map();  // type => Array of max fnode (or fnodes, if tied) of this type

        this.typeCache = new Map();  // type => Set of all fnodes of this type found so far. (The dependency resolution during execution ensures that individual types will be comprehensive just in time.)

/**

     * Return an array of zero or more fnodes.

     * @arg thing {string|Lhs|Node} Can be

     *       (1) A string which matches up with an "out" rule in the ruleset.

     *           If the out rule uses through(), the results of through's

     *           callback (which might not be fnodes) will be returned.

     *       (2) An arbitrary LHS which we calculate and return the results of.

     *       (3) A DOM node, for which we will return the corresponding fnode.

     *     Results are cached for cases (1) and (3).

*/

    get(thing) {

        if (typeof thing === 'string') {

            if (this._outRules.has(thing)) {

                return Array.from(this._execute(this._outRules.get(thing)));

            } else {

                throw new Error(`There is no out() rule with key "${thing}".`);

        } else if (isDomElement(thing)) {

            // Return the fnode and let it run type(foo) on demand, as people

            // ask it things like scoreFor(foo).

            return this.fnodeForElement(thing);

        } else if (thing.asLhs !== undefined) {

            // Make a temporary out rule, and run it. This may add things to

            // the ruleset's cache, but that's fine: it doesn't change any

            // future results; it just might make them faster. For example, if

            // you ask for .get(type('smoo')) twice, the second time will be a

            // cache hit.

            const outRule = rule(thing, out(Symbol('outKey')));

            return Array.from(this._execute(outRule));

        } else {

            throw new Error('ruleset.get() expects a string, an expression like on the left-hand side of a rule, or a DOM node.');

/**

     * Return the weighted sum of the per-rule, per-type scores from a fnode.

     * @arg mapOfScores a Map of rule name to the [0, 1] score it computed for

     *      the type in question

*/

    weightedScore(mapOfScores) {

        let total = 0;

        for (const [name, score] of mapOfScores) {

            total += score * getDefault(this._coeffs, name, () => 1);

        return total;

    // Provide an opaque context object to be made available to all ranker

    // functions.

    // context (object) {

    //     self.context = object;

    // }

    // -------- Methods below this point are private to the framework. --------

/**

     * Return all the thus-far-unexecuted rules that will have to run to run

     * the requested rule, in the form of Map(prereq: [rulesItIsNeededBy]).

*/

    _prerequisitesTo(rule, undonePrereqs = new Map()) {

        for (let prereq of rule.prerequisites(this)) {

            if (!this.doneRules.has(prereq)) {

                // prereq is not already run. (If it were, we wouldn't care

                // about adding it to the graph.)

                const alreadyAdded = undonePrereqs.has(prereq);

                setDefault(undonePrereqs, prereq, () => []).push(rule);

                // alreadyAdded means we've already computed the prereqs of

                // this prereq and added them to undonePrereqs. So, now

                // that we've hooked up the rule to this prereq in the

                // graph, we can stop. But, if we haven't, then...

                if (!alreadyAdded) {

                    this._prerequisitesTo(prereq, undonePrereqs);

        return undonePrereqs;

/**

     * Run the given rule (and its dependencies, in the proper order), and

     * return its results.

     * The caller is responsible for ensuring that _execute() is not called

     * more than once for a given InwardRule, lest non-idempotent

     * transformations, like score contributions, be applied to fnodes more

     * than once.

     * The basic idea is to sort rules in topological order (according to input

     * and output types) and then run them. On top of that, we do some

     * optimizations. We keep a cache of results by type (whether partial or

     * comprehensive--either way, the topology ensures that any

     * non-comprehensive typeCache entry is made comprehensive before another

     * rule needs it). And we prune our search for prerequisite rules at the

     * first encountered already-executed rule.

*/

    _execute(rule) {

        const prereqs = this._prerequisitesTo(rule);

        let sorted;

        try {

            sorted = [rule].concat(toposort(prereqs.keys(),

                                            prereq => prereqs.get(prereq)));

        } catch (exc) {

            if (exc instanceof CycleError) {

                throw new CycleError('There is a cyclic dependency in the ruleset.');

            } else {

                throw exc;

        let fnodes;

        for (let eachRule of reversed(sorted)) {

            // Sock each set of results away in this.typeCache:

            fnodes = eachRule.results(this);

        return Array.from(fnodes);

    /** @return {Rule[]} */

    inwardRulesThatCouldEmit(type) {

        return getDefault(this._rulesThatCouldEmit, type, () => []);

    /** @return {Rule[]} */

    inwardRulesThatCouldAdd(type) {

        return getDefault(this._rulesThatCouldAdd, type, () => []);

/**

     * @return the Fathom node that describes the given DOM element. This does

     *     not trigger any execution, so the result may be incomplete.

*/

    fnodeForElement(element) {

        return setDefault(this.elementCache,

                          element,

                          () => new Fnode(element, this));

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

const version = '3.7.3';

export { and, atMost, clusters$1 as clusters, dom, element, exceptions, nearest, note, out, props, rule, ruleset, score, type, typeIn, utilsForFrontend as utils, version };