FormAutofillHeuristics.sys.mjs

mozilla-central/toolkit/components/formautofill/shared/FormAutofillHeuristics.sys.mjs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Toolkit :: Form Autofill

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

import { FormAutofill } from "resource://autofill/FormAutofill.sys.mjs";

import { HeuristicsRegExp } from "resource://gre/modules/shared/HeuristicsRegExp.sys.mjs";

const lazy = {};

ChromeUtils.defineESModuleGetters(lazy, {

  CreditCard: "resource://gre/modules/CreditCard.sys.mjs",

  CreditCardRulesets: "resource://gre/modules/shared/CreditCardRuleset.sys.mjs",

  FieldScanner: "resource://gre/modules/shared/FieldScanner.sys.mjs",

  FormAutofillUtils: "resource://gre/modules/shared/FormAutofillUtils.sys.mjs",

  LabelUtils: "resource://gre/modules/shared/LabelUtils.sys.mjs",

});

/**

 * To help us classify sections, we want to know what fields can appear

 * multiple times in a row.

 * Such fields, like `address-line{X}`, should not break sections.

*/

const MULTI_FIELD_NAMES = [

  "address-level3",

  "address-level2",

  "address-level1",

  "tel",

  "postal-code",

  "email",

  "street-address",

];

/**

 * To help us classify sections that can appear only N times in a row.

 * For example, the only time multiple cc-number fields are valid is when

 * there are four of these fields in a row.

 * Otherwise, multiple cc-number fields should be in separate sections.

*/

const MULTI_N_FIELD_NAMES = {

  "cc-number": 4,

};

export class FormSection {

  static ADDRESS = "address";

  static CREDIT_CARD = "creditCard";

  #fieldDetails = [];

  #name = "";

  constructor(fieldDetails) {

    if (!fieldDetails.length) {

      throw new TypeError("A section should contain at least one field");

    fieldDetails.forEach(field => this.addField(field));

    const fieldName = fieldDetails[0].fieldName;

    if (lazy.FormAutofillUtils.isAddressField(fieldName)) {

      this.type = FormSection.ADDRESS;

    } else if (lazy.FormAutofillUtils.isCreditCardField(fieldName)) {

      this.type = FormSection.CREDIT_CARD;

    } else {

      throw new Error("Unknown field type to create a section.");

  get fieldDetails() {

    return this.#fieldDetails;

  get name() {

    return this.#name;

  addField(fieldDetail) {

    this.#name ||= fieldDetail.sectionName;

    this.#fieldDetails.push(fieldDetail);

/**

 * Returns the autocomplete information of fields according to heuristics.

*/

export const FormAutofillHeuristics = {

  RULES: HeuristicsRegExp.getRules(),

  LABEL_RULES: HeuristicsRegExp.getLabelRules(),

  CREDIT_CARD_FIELDNAMES: [],

  ADDRESS_FIELDNAMES: [],

/**

   * Try to find a contiguous sub-array within an array.

   * @param {Array} array

   * @param {Array} subArray

   * @returns {boolean}

   *          Return whether subArray was found within the array or not.

*/

  _matchContiguousSubArray(array, subArray) {

    return array.some((elm, i) =>

      subArray.every((sElem, j) => sElem == array[i + j])

);

},

/**

   * Try to find the field that is look like a month select.

   * @param {DOMElement} element

   * @returns {boolean}

   *          Return true if we observe the trait of month select in

   *          the current element.

*/

  _isExpirationMonthLikely(element) {

    if (!HTMLSelectElement.isInstance(element)) {

      return false;

    const options = [...element.options];

    const desiredValues = Array(12)

      .fill(1)

      .map((v, i) => v + i);

    // The number of month options shouldn't be less than 12 or larger than 13

    // including the default option.

    if (options.length < 12 || options.length > 13) {

      return false;

    return (

      this._matchContiguousSubArray(

        options.map(e => +e.value),

        desiredValues

      ) ||

      this._matchContiguousSubArray(

        options.map(e => +e.label),

        desiredValues

);

},

/**

   * Try to find the field that is look like a year select.

   * @param {DOMElement} element

   * @returns {boolean}

   *          Return true if we observe the trait of year select in

   *          the current element.

*/

  _isExpirationYearLikely(element) {

    if (!HTMLSelectElement.isInstance(element)) {

      return false;

    const options = [...element.options];

    // A normal expiration year select should contain at least the last three years

    // in the list.

    const curYear = new Date().getFullYear();

    const desiredValues = Array(3)

      .fill(0)

      .map((v, i) => v + curYear + i);

    return (

      this._matchContiguousSubArray(

        options.map(e => +e.value),

        desiredValues

      ) ||

      this._matchContiguousSubArray(

        options.map(e => +e.label),

        desiredValues

);

},

/**

   * Try to match the telephone related fields to the grammar

   * list to see if there is any valid telephone set and correct their

   * field names.

   * @param {FieldScanner} scanner

   *        The current parsing status for all elements

   * @returns {boolean}

   *          Return true if there is any field can be recognized in the parser,

   *          otherwise false.

*/

  _parsePhoneFields(scanner, _fieldDetail) {

    let matchingResult;

    const GRAMMARS = this.PHONE_FIELD_GRAMMARS;

    function isGrammarSeparator(index) {

      return !GRAMMARS[index][0];

    const savedIndex = scanner.parsingIndex;

    for (let ruleFrom = 0; ruleFrom < GRAMMARS.length; ) {

      const detailStart = scanner.parsingIndex;

      let ruleTo = ruleFrom;

      for (let count = 0; ruleTo < GRAMMARS.length; ruleTo++, count++) {

        // Bail out when reaching the end of the current set of grammars

        // or there are no more elements to parse

        if (

          isGrammarSeparator(ruleTo) ||

          !scanner.elementExisting(detailStart + count)

) {

          break;

        const [category, , length] = GRAMMARS[ruleTo];

        const detail = scanner.getFieldDetailByIndex(detailStart + count);

        // If the field is not what this grammar rule is interested in, skip processing.

        if (

          !detail ||

          detail.fieldName != category ||

          detail.reason == "autocomplete"

) {

          break;

        const element = detail.element;

        if (length && (!element.maxLength || length < element.maxLength)) {

          break;

      // if we reach the grammar separator, that means all the previous rules are matched.

      // Set the matchingResult so we update field names accordingly.

      if (isGrammarSeparator(ruleTo)) {

        matchingResult = { ruleFrom, ruleTo };

        break;

      // Fast forward to the next rule set.

      for (; ruleFrom < GRAMMARS.length; ) {

        if (isGrammarSeparator(ruleFrom++)) {

          break;

    if (matchingResult) {

      const { ruleFrom, ruleTo } = matchingResult;

      for (let i = ruleFrom; i < ruleTo; i++) {

        scanner.updateFieldName(scanner.parsingIndex, GRAMMARS[i][1]);

        scanner.parsingIndex++;

    // If the previous parsed field is a "tel" field, run heuristic to see

    // if the current field is a "tel-extension" field

    const field = scanner.getFieldDetailByIndex(scanner.parsingIndex);

    if (field && field.reason != "autocomplete") {

      const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1);

      if (

        prev &&

        lazy.FormAutofillUtils.getCategoryFromFieldName(prev.fieldName) == "tel"

) {

        const regExpTelExtension = new RegExp(

          "\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT

          "iug"

);

        if (this._matchRegexp(field.element, regExpTelExtension)) {

          scanner.updateFieldName(scanner.parsingIndex, "tel-extension");

          scanner.parsingIndex++;

    return savedIndex != scanner.parsingIndex;

},

/**

   * Try to find the correct address-line[1-3] sequence and correct their field

   * names.

   * @param {FieldScanner} scanner

   *        The current parsing status for all elements

   * @returns {boolean}

   *          Return true if there is any field can be recognized in the parser,

   *          otherwise false.

*/

  _parseStreetAddressFields(scanner, _fieldDetail) {

    const INTERESTED_FIELDS = [

      "street-address",

      "address-line1",

      "address-line2",

      "address-line3",

];

    const fields = [];

    for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) {

      const detail = scanner.getFieldDetailByIndex(idx);

      if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {

        break;

      fields.push(detail);

    if (!fields.length) {

      return false;

    switch (fields.length) {

      case 1:

        if (

          fields[0].reason != "autocomplete" &&

          ["address-line2", "address-line3"].includes(fields[0].fieldName)

) {

          scanner.updateFieldName(scanner.parsingIndex, "address-line1");

        break;

      case 2:

        if (fields[0].reason == "autocomplete") {

          if (

            fields[0].fieldName == "street-address" &&

            (fields[1].fieldName == "address-line2" ||

              fields[1].reason != "autocomplete")

) {

            scanner.updateFieldName(

              scanner.parsingIndex,

              "address-line1",

              true

);

        } else {

          scanner.updateFieldName(scanner.parsingIndex, "address-line1");

        scanner.updateFieldName(scanner.parsingIndex + 1, "address-line2");

        break;

      case 3:

      default:

        scanner.updateFieldName(scanner.parsingIndex, "address-line1");

        scanner.updateFieldName(scanner.parsingIndex + 1, "address-line2");

        scanner.updateFieldName(scanner.parsingIndex + 2, "address-line3");

        break;

    scanner.parsingIndex += fields.length;

    return true;

},

  _parseAddressFields(scanner, fieldDetail) {

    const INTERESTED_FIELDS = ["address-level1", "address-level2"];

    if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {

      return false;

    const fields = [];

    for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) {

      const detail = scanner.getFieldDetailByIndex(idx);

      if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {

        break;

      fields.push(detail);

    if (!fields.length) {

      return false;

    // State & City(address-level2)

    if (fields.length == 1) {

      if (fields[0].fieldName == "address-level2") {

        const prev = scanner.getFieldDetailByIndex(scanner.parsingIndex - 1);

        if (

          prev &&

          !prev.fieldName &&

          HTMLSelectElement.isInstance(prev.element)

) {

          scanner.updateFieldName(scanner.parsingIndex - 1, "address-level1");

          scanner.parsingIndex += 1;

          return true;

        const next = scanner.getFieldDetailByIndex(scanner.parsingIndex + 1);

        if (

          next &&

          !next.fieldName &&

          HTMLSelectElement.isInstance(next.element)

) {

          scanner.updateFieldName(scanner.parsingIndex + 1, "address-level1");

          scanner.parsingIndex += 2;

          return true;

    scanner.parsingIndex += fields.length;

    return true;

},

/**

   * Try to look for expiration date fields and revise the field names if needed.

   * @param {FieldScanner} scanner

   *        The current parsing status for all elements

   * @returns {boolean}

   *          Return true if there is any field can be recognized in the parser,

   *          otherwise false.

*/

  _parseCreditCardExpiryFields(scanner, fieldDetail) {

    const INTERESTED_FIELDS = ["cc-exp", "cc-exp-month", "cc-exp-year"];

    if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {

      return false;

    const fields = [];

    for (let idx = scanner.parsingIndex; ; idx++) {

      const detail = scanner.getFieldDetailByIndex(idx);

      if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {

        break;

      fields.push(detail);

    // Don't process the fields if expiration month and expiration year are already

    // matched by regex in correct order.

    if (

      (fields.length == 1 && fields[0].fieldName == "cc-exp") ||

      (fields.length == 2 &&

        fields[0].fieldName == "cc-exp-month" &&

        fields[1].fieldName == "cc-exp-year")

) {

      scanner.parsingIndex += fields.length;

      return true;

    const prevCCFields = new Set();

    for (let idx = scanner.parsingIndex - 1; ; idx--) {

      const detail = scanner.getFieldDetailByIndex(idx);

      if (

        lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=

        "creditCard"

) {

        break;

      prevCCFields.add(detail.fieldName);

    // We update the "cc-exp-*" fields to correct "cc-ex-*" fields order when

    // the following conditions are met:

    // 1. The previous elements are identified as credit card fields and

    //    cc-number is in it

    // 2. There is no "cc-exp-*" fields in the previous credit card elements

    if (

      ["cc-number", "cc-name"].some(f => prevCCFields.has(f)) &&

      !["cc-exp", "cc-exp-month", "cc-exp-year"].some(f => prevCCFields.has(f))

) {

      if (fields.length == 1) {

        scanner.updateFieldName(scanner.parsingIndex, "cc-exp");

      } else if (fields.length == 2) {

        scanner.updateFieldName(scanner.parsingIndex, "cc-exp-month");

        scanner.updateFieldName(scanner.parsingIndex + 1, "cc-exp-year");

      scanner.parsingIndex += fields.length;

      return true;

    // Set field name to null as it failed to match any patterns.

    for (let idx = 0; idx < fields.length; idx++) {

      scanner.updateFieldName(scanner.parsingIndex + idx, null);

    return false;

},

/**

   * Look for cc-*-name fields when *-name field is present

   * @param {FieldScanner} scanner

   *        The current parsing status for all elements

   * @returns {boolean}

   *          Return true if there is any field can be recognized in the parser,

   *          otherwise false.

*/

  _parseCreditCardNameFields(scanner, fieldDetail) {

    const INTERESTED_FIELDS = [

      "name",

      "given-name",

      "additional-name",

      "family-name",

];

    if (!INTERESTED_FIELDS.includes(fieldDetail.fieldName)) {

      return false;

    const fields = [];

    for (let idx = scanner.parsingIndex; ; idx++) {

      const detail = scanner.getFieldDetailByIndex(idx);

      if (!INTERESTED_FIELDS.includes(detail?.fieldName)) {

        break;

      fields.push(detail);

    const prevCCFields = new Set();

    for (let idx = scanner.parsingIndex - 1; ; idx--) {

      const detail = scanner.getFieldDetailByIndex(idx);

      if (

        lazy.FormAutofillUtils.getCategoryFromFieldName(detail?.fieldName) !=

        "creditCard"

) {

        break;

      prevCCFields.add(detail.fieldName);

    // We update the "name" fields to "cc-name" fields when the following

    // conditions are met:

    // 1. The preceding fields are identified as credit card fields and

    //    contain the "cc-number" field.

    // 2. No "cc-name-*" field is found among the preceding credit card fields.

    // 3. The "cc-csc" field is not present among the preceding credit card fields.

    if (

      ["cc-number"].some(f => prevCCFields.has(f)) &&

      !["cc-name", "cc-given-name", "cc-family-name", "cc-csc"].some(f =>

        prevCCFields.has(f)

) {

      // If there is only one field, assume the name field a `cc-name` field

      if (fields.length == 1) {

        scanner.updateFieldName(scanner.parsingIndex, `cc-name`);

        scanner.parsingIndex += 1;

      } else {

        // update *-name to cc-*-name

        for (const field of fields) {

          scanner.updateFieldName(

            scanner.parsingIndex,

            `cc-${field.fieldName}`

);

          scanner.parsingIndex += 1;

      return true;

    return false;

},

/**

   * This function should provide all field details of a form which are placed

   * in the belonging section. The details contain the autocomplete info

   * (e.g. fieldName, section, etc).

   * @param {HTMLFormElement} form

   *        the elements in this form to be predicted the field info.

   * @returns {Array<FormSection>}

   *        all sections within its field details in the form.

*/

  getFormInfo(form) {

    const elements = Array.from(form.elements).filter(element =>

      lazy.FormAutofillUtils.isCreditCardOrAddressFieldType(element)

);

    const scanner = new lazy.FieldScanner(elements, element =>

      this.inferFieldInfo(element, elements)

);

    while (!scanner.parsingFinished) {

      const savedIndex = scanner.parsingIndex;

      // First, we get the inferred field info

      const fieldDetail = scanner.getFieldDetailByIndex(scanner.parsingIndex);

      if (

        this._parsePhoneFields(scanner, fieldDetail) ||

        this._parseStreetAddressFields(scanner, fieldDetail) ||

        this._parseAddressFields(scanner, fieldDetail) ||

        this._parseCreditCardExpiryFields(scanner, fieldDetail) ||

        this._parseCreditCardNameFields(scanner, fieldDetail)

) {

        continue;

      // If there is no field parsed, the parsing cursor can be moved

      // forward to the next one.

      if (savedIndex == scanner.parsingIndex) {

        scanner.parsingIndex++;

    lazy.LabelUtils.clearLabelMap();

    const fields = scanner.fieldDetails;

    const sections = [

      ...this._classifySections(

        fields.filter(f => lazy.FormAutofillUtils.isAddressField(f.fieldName))

),

      ...this._classifySections(

        fields.filter(f =>

          lazy.FormAutofillUtils.isCreditCardField(f.fieldName)

),

];

    return sections.sort(

      (a, b) =>

        fields.indexOf(a.fieldDetails[0]) - fields.indexOf(b.fieldDetails[0])

);

},

/**

   * The result is an array contains the sections with its belonging field details.

   * @param   {Array<FieldDetails>} fieldDetails field detail array to be classified

   * @returns {Array<FormSection>} The array with the sections.

*/

  _classifySections(fieldDetails) {

    let sections = [];

    for (let i = 0; i < fieldDetails.length; i++) {

      const cur = fieldDetails[i];

      const [currentSection] = sections.slice(-1);

      // The section this field might be placed into.

      let candidateSection = null;

      // Use name group from autocomplete attribute (ex, section-xxx) to look for the section

      // we might place this field into.

      // If the field doesn't have a section name, the candidate section is the previous section.

      if (!currentSection || !cur.sectionName) {

        candidateSection = currentSection;

      } else if (cur.sectionName) {

        // If the field has a section name, the candidate section is the nearest section that

        // either shares the same name or lacks a name.

        for (let idx = sections.length - 1; idx >= 0; idx--) {

          if (!sections[idx].name || sections[idx].name == cur.sectionName) {

            candidateSection = sections[idx];

            break;

      if (candidateSection) {

        let createNewSection = true;

        // We might create a new section instead of placing the field in the candiate section if

        // the section already has a field with the same field name.

        // We also check visibility for both the fields with the same field name because we don't

        // wanht to create a new section for an invisible field.

        if (

          candidateSection.fieldDetails.find(

            f => f.fieldName == cur.fieldName && f.isVisible && cur.isVisible

) {

          // For some field type, it is common to have multiple fields in one section, for example,

          // email. In that case, we will not create a new section even when the candidate section

          // already has a field with the same field name.

          const [lastFieldDetail] = candidateSection.fieldDetails.slice(-1);

          if (lastFieldDetail.fieldName == cur.fieldName) {

            if (MULTI_FIELD_NAMES.includes(cur.fieldName)) {

              createNewSection = false;

            } else if (cur.fieldName in MULTI_N_FIELD_NAMES) {

              // This is the heuristic to handle special cases where we can have multiple

              // fields in one section, but only if the field has appeared N times in a row.

              // For example, websites can use 4 consecutive 4-digit `cc-number` fields

              // instead of one 16-digit `cc-number` field.

              const N = MULTI_N_FIELD_NAMES[cur.fieldName];

              if (lastFieldDetail.part) {

                // If `part` is set, we have already identified this field can be

                // merged previously

                if (lastFieldDetail.part < N) {

                  createNewSection = false;

                  fieldDetails[i].part = lastFieldDetail.part + 1;

                // If the next N fields are all the same field, we can merge them

              } else if (

                N == 2 ||

                fieldDetails

                  .slice(i + 1, i + N - 1)

                  .every(f => f.fieldName == cur.fieldName)

) {

                lastFieldDetail.part = 1;

                fieldDetails[i].part = 2;

                createNewSection = false;

        } else {

          // The field doesn't exist in the candidate section, add it.

          createNewSection = false;

        if (!createNewSection) {

          candidateSection.addField(fieldDetails[i]);

          continue;

      // Create a new section

      sections.push(new FormSection([fieldDetails[i]]));

    return sections;

},

  _getPossibleFieldNames(element) {

    let fieldNames = [];

    const isAutoCompleteOff =

      element.autocomplete == "off" || element.form?.autocomplete == "off";

    if (!isAutoCompleteOff || FormAutofill.creditCardsAutocompleteOff) {

      fieldNames.push(...this.CREDIT_CARD_FIELDNAMES);

    if (!isAutoCompleteOff || FormAutofill.addressesAutocompleteOff) {

      fieldNames.push(...this.ADDRESS_FIELDNAMES);

    if (HTMLSelectElement.isInstance(element)) {

      const FIELDNAMES_FOR_SELECT_ELEMENT = [

        "address-level1",

        "address-level2",

        "country",

        "cc-exp-month",

        "cc-exp-year",

        "cc-exp",

        "cc-type",

];

      fieldNames = fieldNames.filter(name =>

        FIELDNAMES_FOR_SELECT_ELEMENT.includes(name)

);

    return fieldNames;

},

/**

   * Get inferred information about an input element using autocomplete info, fathom and regex-based heuristics.

   * @param {HTMLElement} element - The input element to infer information about.

   * @param {Array<HTMLElement>} elements - See `getFathomField` for details

   * @returns {Array} - An array containing:

   *                    [0]the inferred field name

   *                    [1]autocomplete information if the element has autocompelte attribute, null otherwise.

   *                    [2]fathom confidence if fathom considers it a cc field, null otherwise.

*/

  inferFieldInfo(element, elements = []) {

    const autocompleteInfo = element.getAutocompleteInfo();

    // An input[autocomplete="on"] will not be early return here since it stll

    // needs to find the field name.

    if (

      autocompleteInfo?.fieldName &&

      !["on", "off"].includes(autocompleteInfo.fieldName)

) {

      return [autocompleteInfo.fieldName, autocompleteInfo, null];

    const fields = this._getPossibleFieldNames(element);

    // "email" type of input is accurate for heuristics to determine its Email

    // field or not. However, "tel" type is used for ZIP code for some web site

    // (e.g. HomeDepot, BestBuy), so "tel" type should be not used for "tel"

    // prediction.

    if (element.type == "email" && fields.includes("email")) {

      return ["email", null, null];

    if (lazy.FormAutofillUtils.isFathomCreditCardsEnabled()) {

      // We don't care fields that are not supported by fathom

      const fathomFields = fields.filter(r =>

        lazy.CreditCardRulesets.types.includes(r)

);

      const [matchedFieldName, confidence] = this.getFathomField(

        element,

        fathomFields,

        elements

);

      // At this point, use fathom's recommendation if it has one

      if (matchedFieldName) {

        return [matchedFieldName, null, confidence];

      // Continue to run regex-based heuristics even when fathom doesn't recognize

      // the field. Since the regex-based heuristic has good search coverage but

      // has a worse precision. We use it in conjunction with fathom to maximize

      // our search coverage. For example, when a <input> is not considered cc-name

      // by fathom but is considered cc-name by regex-based heuristic, if the form

      // also contains a cc-number identified by fathom, we will treat the form as a

      // valid cc form; hence both cc-number & cc-name are identified.

    // Check every select for options that

    // match credit card network names in value or label.

    if (HTMLSelectElement.isInstance(element)) {

      if (this._isExpirationMonthLikely(element)) {

        return ["cc-exp-month", null, null];

      } else if (this._isExpirationYearLikely(element)) {

        return ["cc-exp-year", null, null];

      const options = Array.from(element.querySelectorAll("option"));

      if (

        options.find(

          option =>

            lazy.CreditCard.getNetworkFromName(option.value) ||

            lazy.CreditCard.getNetworkFromName(option.text)

) {

        return ["cc-type", null, null];

      // At least two options match the country name, otherwise some state name might

      // also match a country name, ex, Georgia. We check the last two

      // options rather than the first, as selects often start with a non-country display option.

      const countryDisplayNames = Array.from(FormAutofill.countries.values());

      if (

        options.length >= 2 &&

        options

          .slice(-2)

          .every(

            option =>

              countryDisplayNames.includes(option.value) ||

              countryDisplayNames.includes(option.text)

) {

        return ["country", null, null];

    // Find a matched field name using regexp-based heuristics

    const matchedFieldName = this._findMatchedFieldName(element, fields);

    return [matchedFieldName, null, null];

},

/**

   * Using Fathom, say what kind of CC field an element is most likely to be.

   * This function deoesn't only run fathom on the passed elements. It also

   * runs fathom for all elements in the FieldScanner for optimization purpose.

   * @param {HTMLElement} element

   * @param {Array} fields

   * @param {Array<HTMLElement>} elements - All other eligible elements in the same form. This is mainly used as an

   *                                        optimization approach to run fathom model on all eligible elements

   *                                        once instead of one by one

   * @returns {Array} A tuple of [field name, probability] describing the

   *   highest-confidence classification

*/

  getFathomField(element, fields, elements = []) {

    if (!fields.length) {

      return [null, null];

    if (!this._fathomConfidences?.get(element)) {

      this._fathomConfidences = new Map();

      // This should not throw unless we run into an OOM situation, at which

      // point we have worse problems and this failing is not a big deal.

      elements = elements.includes(element) ? elements : [element];

      const confidences = this.getFormAutofillConfidences(elements);

      for (let i = 0; i < elements.length; i++) {

        this._fathomConfidences.set(elements[i], confidences[i]);

    const elementConfidences = this._fathomConfidences.get(element);

    if (!elementConfidences) {

      return [null, null];

    let highestField = null;

    let highestConfidence = lazy.FormAutofillUtils.ccFathomConfidenceThreshold; // Start with a threshold of 0.5

    for (let [key, value] of Object.entries(elementConfidences)) {

      if (!fields.includes(key)) {

        // ignore field that we don't care

        continue;

      if (value > highestConfidence) {

        highestConfidence = value;

        highestField = key;

    if (!highestField) {

      return [null, null];

    // Used by test ONLY! This ensure testcases always get the same confidence

    if (lazy.FormAutofillUtils.ccFathomTestConfidence > 0) {

      highestConfidence = lazy.FormAutofillUtils.ccFathomTestConfidence;

    return [highestField, highestConfidence];

},

/**

   * @param {Array} elements Array of elements that we want to get result from fathom cc rules

   * @returns {object} Fathom confidence keyed by field-type.

*/

  getFormAutofillConfidences(elements) {

    if (

      lazy.FormAutofillUtils.ccHeuristicsMode ==

      lazy.FormAutofillUtils.CC_FATHOM_NATIVE

) {

      const confidences = ChromeUtils.getFormAutofillConfidences(elements);

      return confidences.map(c => {

        let result = {};

        for (let [fieldName, confidence] of Object.entries(c)) {

          let type =

            lazy.FormAutofillUtils.formAutofillConfidencesKeyToCCFieldType(

              fieldName

);

          result[type] = confidence;

        return result;

});

    return elements.map(element => {

/**

       * Return how confident our ML model is that `element` is a field of the

       * given type.

       * @param {string} fieldName The Fathom type to check against. This is

       *   conveniently the same as the autocomplete attribute value that means

       *   the same thing.

       * @returns {number} Confidence in range [0, 1]

*/

      function confidence(fieldName) {

        const ruleset = lazy.CreditCardRulesets[fieldName];

        const fnodes = ruleset.against(element).get(fieldName);

        // fnodes is either 0 or 1 item long, since we ran the ruleset

        // against a single element:

        return fnodes.length ? fnodes[0].scoreFor(fieldName) : 0;

      // Bang the element against the ruleset for every type of field:

      const confidences = {};

      lazy.CreditCardRulesets.types.map(fieldName => {

        confidences[fieldName] = confidence(fieldName);

});

      return confidences;

});

},

/**

   * @typedef ElementStrings

   * @type {object}

   * @yields {string} id - element id.

   * @yields {string} name - element name.

   * @yields {Array<string>} labels - extracted labels.

*/

/**

   * Extract all the signature strings of an element.

   * @param {HTMLElement} element

   * @returns {Array<string>}

*/

  _getElementStrings(element) {

    return [element.id, element.name, element.placeholder?.trim()];

},

/**

   * Extract all the label strings associated with an element.

   * @param {HTMLElement} element

   * @returns {ElementStrings}

*/

  _getElementLabelStrings(element) {

    return {

      *[Symbol.iterator]() {

        const labels = lazy.LabelUtils.findLabelElements(element);

        for (let label of labels) {

          yield* lazy.LabelUtils.extractLabelStrings(label);

        const ariaLabels = element.getAttribute("aria-label");

        if (ariaLabels) {

          yield* [ariaLabels];

},

};

},

  // In order to support webkit we need to avoid usage of negative lookbehind due to low support

  // First safari version with support is 16.4 (Release Date: 27th March 2023)

  // https://caniuse.com/js-regexp-lookbehind

  // We can mimic the behaviour of negative lookbehinds by using a named capture group

  // (?<!not)word -> (?<neg>notword)|word

  // TODO: Bug 1829583

  testRegex(regex, string) {

    const matches = string?.matchAll(regex);

    if (!matches) {

      return false;

    const excludeNegativeCaptureGroups = [];

    for (const match of matches) {

      excludeNegativeCaptureGroups.push(

        ...match.filter(m => m !== match?.groups?.neg).filter(Boolean)

);

    return excludeNegativeCaptureGroups?.length > 0;

},

/**

   * Find the first matching field name from a given list of field names

   * that matches an HTML element.

   * The function first tries to match the element against a set of

   * pre-defined regular expression rules. If no match is found, it

   * then checks for label-specific rules, if they exist.

   * Note: For label rules, the keyword is often more general

   * (e.g., "^\\W*address"), hence they are only searched within labels

   * to reduce the occurrence of false positives.

   * @param {HTMLElement} element The element to match.

   * @param {Array<string>} fieldNames An array of field names to compare against.

   * @returns {string|null} The name of the matched field, or null if no match was found.

*/

  _findMatchedFieldName(element, fieldNames) {

    if (!fieldNames.length) {

      return null;

    // Attempt to match the element against the default set of rules

    let matchedFieldName = fieldNames.find(fieldName =>

      this._matchRegexp(element, this.RULES[fieldName])

);

    // If no match is found, and if a label rule exists for the field,

    // attempt to match against the label rules

    if (!matchedFieldName) {

      matchedFieldName = fieldNames.find(fieldName => {

        const regexp = this.LABEL_RULES[fieldName];

        return this._matchRegexp(element, regexp, { attribute: false });

});

    return matchedFieldName;

},

/**

   * Determine whether the regexp can match any of element strings.

   * @param {HTMLElement} element The HTML element to match.

   * @param {RegExp} regexp       The regular expression to match against.

   * @param {object} [options]    Optional parameters for matching.

   * @param {boolean} [options.attribute=true]

   *                              Whether to match against the element's attributes.

   * @param {boolean} [options.label=true]

   *                              Whether to match against the element's labels.

   * @returns {boolean} True if a match is found, otherwise false.

*/

  _matchRegexp(element, regexp, { attribute = true, label = true } = {}) {

    if (!regexp) {

      return false;

    if (attribute) {

      const elemStrings = this._getElementStrings(element);

      if (elemStrings.find(s => this.testRegex(regexp, s?.toLowerCase()))) {

        return true;

    if (label) {

      const elementLabelStrings = this._getElementLabelStrings(element);

      for (const s of elementLabelStrings) {

        if (this.testRegex(regexp, s?.toLowerCase())) {

          return true;

    return false;

},

/**

   * Phone field grammars - first matched grammar will be parsed. Grammars are

   * separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are

   * parsed separately unless they are necessary parts of the match.

   * The following notation is used to describe the patterns:

   * <cc> - country code field.

   * <ac> - area code field.

   * <phone> - phone or prefix.

   * <suffix> - suffix.

   * <ext> - extension.

   * :N means field is limited to N characters, otherwise it is unlimited.

   * (pattern <field>)? means pattern is optional and matched separately.

   * This grammar list from Chromium will be enabled partially once we need to

   * support more cases of Telephone fields.

*/

  PHONE_FIELD_GRAMMARS: [

    // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>

    // (Ext: <ext>)?)?

    // {REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0},

    // {REGEX_AREA, FIELD_AREA_CODE, 0},

    // {REGEX_PHONE, FIELD_PHONE, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?

    // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3},

    // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},

    // {REGEX_PHONE, FIELD_SUFFIX, 4},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?

    // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},

    // {REGEX_PHONE, FIELD_AREA_CODE, 3},

    // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},

    // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?

    ["tel", "tel-country-code", 3],

    ["tel", "tel-area-code", 3],

    ["tel", "tel-local-prefix", 3],

    ["tel", "tel-local-suffix", 4],

    [null, null, 0],

    // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?

    // {REGEX_AREA, FIELD_AREA_CODE, 0},

    // {REGEX_PHONE, FIELD_PHONE, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?

    // {REGEX_PHONE, FIELD_AREA_CODE, 0},

    // {REGEX_PHONE, FIELD_PHONE, 3},

    // {REGEX_PHONE, FIELD_SUFFIX, 4},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?

    // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},

    // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},

    // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?

    // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},

    // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},

    // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?

    // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},

    // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},

    // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},

    // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Area code: <ac>:3 Prefix: <prefix>:3 Suffix: <suffix>:4 (Ext: <ext>)?

    // {REGEX_AREA, FIELD_AREA_CODE, 3},

    // {REGEX_PREFIX, FIELD_PHONE, 3},

    // {REGEX_SUFFIX, FIELD_SUFFIX, 4},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?

    // {REGEX_PHONE, FIELD_AREA_CODE, 0},

    // {REGEX_PREFIX, FIELD_PHONE, 0},

    // {REGEX_SUFFIX, FIELD_SUFFIX, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?

    ["tel", "tel-area-code", 0],

    ["tel", "tel-local-prefix", 3],

    ["tel", "tel-local-suffix", 4],

    [null, null, 0],

    // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?

    // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0},

    // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},

    // {REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <ac> - <phone> (Ext: <ext>)?

    // {REGEX_AREA, FIELD_AREA_CODE, 0},

    // {REGEX_PHONE, FIELD_PHONE, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?

    // {REGEX_PHONE, FIELD_COUNTRY_CODE, 3},

    // {REGEX_PHONE, FIELD_PHONE, 10},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Ext: <ext>

    // {REGEX_EXTENSION, FIELD_EXTENSION, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

    // Phone: <phone> (Ext: <ext>)?

    // {REGEX_PHONE, FIELD_PHONE, 0},

    // {REGEX_SEPARATOR, FIELD_NONE, 0},

],

};

ChromeUtils.defineLazyGetter(

  FormAutofillHeuristics,

  "CREDIT_CARD_FIELDNAMES",

  () =>

    Object.keys(FormAutofillHeuristics.RULES).filter(name =>

      lazy.FormAutofillUtils.isCreditCardField(name)

);

ChromeUtils.defineLazyGetter(FormAutofillHeuristics, "ADDRESS_FIELDNAMES", () =>

  Object.keys(FormAutofillHeuristics.RULES).filter(name =>

    lazy.FormAutofillUtils.isAddressField(name)

);

export default FormAutofillHeuristics;