__init__.py - mozsearch

mozilla-central/tools/lint/fluent-lint/__init__.py

Enable keyboard shortcuts

Source code

File a bug in Developer Infrastructure :: Lint and Formatting

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public

# License, v. 2.0. If a copy of the MPL was not distributed with this

# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import bisect

import os

import re

from html.parser import HTMLParser

import mozpack.path as mozpath

import yaml

from fluent.syntax import ast, parse, visitor

from mozlint import result

from mozlint.pathutils import expand_exclusions

class TextElementHTMLParser(HTMLParser):

    """HTML Parser for TextElement.

    TextElements may contain embedded html tags, which can include

    quotes in attributes. We only want to check the actual text.

"""

    def __init__(self):

        super().__init__()

        self.extracted_text = []

    def handle_data(self, data):

        self.extracted_text.append(data)

class Linter(visitor.Visitor):

    """Fluent linter implementation.

    This subclasses the Fluent AST visitor. Methods are called corresponding

    to each type of node in the Fluent AST. It is possible to control

    whether a node is recursed into by calling the generic_visit method on

    the superclass.

    See the documentation here:

    https://www.projectfluent.org/python-fluent/fluent.syntax/stable/usage.html

"""

    def __init__(

        self, path, config, exclusions, contents, offsets_and_lines, brand_names=[]

):

        super().__init__()

        self.path = path

        self.config = config

        self.exclusions = exclusions

        self.contents = contents

        self.offsets_and_lines = offsets_and_lines

        self.results = []

        self.identifier_re = re.compile(r"[a-z0-9-]+")

        self.apostrophe_re = re.compile(r"\w'")

        self.incorrect_apostrophe_re = re.compile(r"\w\u2018\w")

        self.single_quote_re = re.compile(r"'(.+)'")

        self.double_quote_re = re.compile(r"\".+\"")

        self.ellipsis_re = re.compile(r"\.\.\.")

        self.brand_names = brand_names

        self.minimum_id_length = 9

        self.state = {

            # The resource comment should be at the top of the page after the license.

            "node_can_be_resource_comment": True,

            # Group comments must be followed by a message. Two group comments are not

            # allowed in a row.

            "can_have_group_comment": True,

            # Comment bound to the current message

            "comment": "",

            # The current group comment

            "group_comment": "",

            # Variables in the current message

            "variables": [],

        attributes = [

            "label",

            "value",

            "accesskey",

            "alt",

            "title",

            "tooltiptext",

            "placeholder",

            "aria-label",

            "aria-description",

            "aria-valuetext",

            "style",

            # For XUL key/command setup.

            "key",

            "keycode",

            # For download filenames:

            "download",

            # Used in the Firefox prefs

            "searchkeywords",

            # Used by search-textbox.js

            "searchbuttonlabel",

            # Used in toolbar customization.

            "toolbarname",

            # Used in moz-message-bar.

            "message",

            # Used in dialogs (should be moved to using fluent IDs though)

            "buttonlabelaccept",

            "buttonaccesskeyaccept",

            "buttonlabelcancel",

            "buttonaccesskeycancel",

            "buttonlabelextra2",

            "buttonaccesskeyextra2",

            # Used in app menu notifications (should be moved to use fluent IDs)

            "buttonlabel",

            "buttonaccesskey",

            "secondarybuttonlabel",

            "secondarybuttonaccesskey",

            # Commonly used in Lit-based web components

            "heading",

            "description",

        self.known_attribute_list = [a.lower() for a in attributes]

        # Set this to true to debug print the root node's json. This is useful for

        # writing new lint rules, or debugging existing ones.

        self.debug_print_json = False

    def generic_visit(self, node):

        node_name = type(node).__name__

        self.state["node_can_be_resource_comment"] = self.state[

            "node_can_be_resource_comment"

        ] and (

            # This is the root node.

            node_name == "Resource"

            # Empty space is allowed.

            or node_name == "Span"

            # Comments are allowed

            or node_name == "Comment"

        if self.debug_print_json:

            import json

            print(json.dumps(node.to_json(), indent=2))

            # Only debug print the root node.

            self.debug_print_json = False

        super(Linter, self).generic_visit(node)

    def visit_Attribute(self, node):

        # Only visit values for Attribute nodes, the identifier comes from dom.

        super().generic_visit(node.value)

    def visit_FunctionReference(self, node):

        # We don't recurse into function references, the identifiers there are

        # allowed to be free form.

        pass

    def visit_Message(self, node):

        # There must be at least one message or term between group comments.

        self.state["can_have_group_comment"] = True

        self.last_message_id = node.id.name

        super().generic_visit(node)

        # Do this here instead as visit_Attribute doesn't have access to the

        # message's comment.

        for attr in node.attributes:

            if not attr.id.name.lower() in self.known_attribute_list:

                comment = self.state["comment"] + self.state["group_comment"]

                if not f".{attr.id.name}" in comment:

                    self.add_error(

                        attr,

                        "VA01",

                        "Use attributes designed for localized content directly."

                        " If script-based processing is necessary, add a comment"

                        f" explaining why. The linter didn't recognize: .{attr.id.name}",

                        "warning",

        # Check if variables are referenced in comments

        if self.state["variables"]:

            comments = self.state["comment"] + self.state["group_comment"]

            missing_references = [

                v for v in self.state["variables"] if f"${v}" not in comments

            if missing_references:

                self.add_error(

                    node,

                    "VC01",

                    "Messages including variables should have a comment "

                    "explaining what will replace the variable. "

                    "Missing references: "

                    + ", ".join([f"${m}" for m in missing_references]),

        # Reset current comment and variable references after reading the

        # message.

        self.state["comment"] = ""

        self.state["variables"] = []

    def visit_Term(self, node):

        # There must be at least one message or term between group comments.

        self.state["can_have_group_comment"] = True

        self.last_message_id = None

        super().generic_visit(node)

        # Reset current comment and variable references after reading the term.

        self.state["comment"] = ""

        self.state["variables"] = []

    def visit_MessageReference(self, node):

        # We don't recurse into message references, the identifiers are either

        # checked elsewhere or are attributes and come from DOM.

        pass

    def visit_Identifier(self, node):

        if (

            self.path not in self.exclusions["ID01"]["files"]

            and node.name not in self.exclusions["ID01"]["messages"]

            and not self.identifier_re.fullmatch(node.name)

):

            self.add_error(

                node,

                "ID01",

                f"Identifiers may only contain lowercase characters and - (ID: {node.name})",

        if (

            len(node.name) < self.minimum_id_length

            and self.path not in self.exclusions["ID02"]["files"]

            and node.name not in self.exclusions["ID02"]["messages"]

):

            self.add_error(

                node,

                "ID02",

                f"Identifiers must be at least {self.minimum_id_length} characters long (ID: {node.name}",

    def visit_TextElement(self, node):

        parser = TextElementHTMLParser()

        parser.feed(node.value)

        for text in parser.extracted_text:

            # To check for apostrophes, first remove pairs of straight quotes

            # used as delimiters.

            cleaned_str = re.sub(self.single_quote_re, "\1", node.value)

            if self.apostrophe_re.search(cleaned_str):

                self.add_error(

                    node,

                    "TE01",

                    "Strings with apostrophes should use foo\u2019s instead of foo's.",

            if self.incorrect_apostrophe_re.search(text):

                self.add_error(

                    node,

                    "TE02",

                    "Strings with apostrophes should use foo\u2019s instead of foo\u2018s.",

            if self.single_quote_re.search(text):

                self.add_error(

                    node,

                    "TE03",

                    "Single-quoted strings should use Unicode \u2018foo\u2019 instead of 'foo'.",

            if self.double_quote_re.search(text):

                self.add_error(

                    node,

                    "TE04",

                    'Double-quoted strings should use Unicode \u201cfoo\u201d instead of "foo".',

            if self.ellipsis_re.search(text):

                self.add_error(

                    node,

                    "TE05",

                    "Strings with an ellipsis should use the Unicode \u2026 character"

                    " instead of three periods",

            # If part of a message, check for brand names

            if (

                self.last_message_id is not None

                and self.path not in self.exclusions["CO01"]["files"]

                and self.last_message_id not in self.exclusions["CO01"]["messages"]

):

                found_brands = []

                for brand in self.brand_names:

                    if re.search(rf"\b{re.escape(brand)}\b", text):

                        found_brands.append(brand)

                if found_brands:

                    self.add_error(

                        node,

                        "CO01",

                        "Strings should use the corresponding terms instead of"

                        f" hard-coded brand names ({', '.join(found_brands)})",

    def visit_ResourceComment(self, node):

        # This node is a comment with: "###"

        if not self.state["node_can_be_resource_comment"]:

            self.add_error(

                node,

                "RC01",

                "Resource comments (###) should be placed at the top of the file, just "

                "after the license header. There should only be one resource comment "

                "per file.",

            return

        lines_after = get_newlines_count_after(node.span, self.contents)

        lines_before = get_newlines_count_before(node.span, self.contents)

        if node.span.end == len(self.contents) - 1:

            # This file only contains a resource comment.

            return

        if lines_after != 2:

            self.add_error(

                node,

                "RC02",

                "Resource comments (###) should be followed by one empty line.",

            return

        if lines_before != 2:

            self.add_error(

                node,

                "RC03",

                "Resource comments (###) should have one empty line above them.",

            return

    def visit_SelectExpression(self, node):

        # We only want to visit the variant values, the identifiers in selectors

        # and keys are allowed to be free form.

        for variant in node.variants:

            super().generic_visit(variant.value)

        # Store the variable used for the SelectExpression, excluding functions

        # like PLATFORM()

        if (

            type(node.selector) is ast.VariableReference

            and node.selector.id.name not in self.state["variables"]

):

            self.state["variables"].append(node.selector.id.name)

    def visit_Comment(self, node):

        # This node is a comment with: "#"

        # Store the comment

        self.state["comment"] = node.content

    def visit_GroupComment(self, node):

        # This node is a comment with: "##"

        # Store the group comment

        self.state["group_comment"] = node.content

        if not self.state["can_have_group_comment"]:

            self.add_error(

                node,

                "GC04",

                "Group comments (##) must be followed by at least one message "

                "or term. Make sure that a single group comment with multiple "

                "paragraphs is not separated by whitespace, as it will be "

                "interpreted as two different comments.",

            return

        self.state["can_have_group_comment"] = False

        lines_after = get_newlines_count_after(node.span, self.contents)

        lines_before = get_newlines_count_before(node.span, self.contents)

        if node.span.end == len(self.contents) - 1:

            # The group comment is the last thing in the file.

            if node.content == "":

                # Empty comments are allowed at the end of the file.

                return

            self.add_error(

                node,

                "GC01",

                "Group comments (##) should not be at the end of the file, they should "

                "always be above a message. Only an empty group comment is allowed at "

                "the end of a file.",

            return

        if lines_after != 2:

            self.add_error(

                node,

                "GC02",

                "Group comments (##) should be followed by one empty line.",

            return

        if lines_before != 2:

            self.add_error(

                node,

                "GC03",

                "Group comments (##) should have an empty line before them.",

            return

    def visit_VariableReference(self, node):

        # Identifiers are allowed to be free form, but need to store them

        # for comment checks.

        if node.id.name not in self.state["variables"]:

            self.state["variables"].append(node.id.name)

    def add_error(self, node, rule, msg, level=None):

        (col, line) = self.span_to_line_and_col(node.span)

        res = {

            "path": self.path,

            "lineno": line,

            "column": col,

            "rule": rule,

            "message": msg,

        if level:

            res["level"] = level

        self.results.append(result.from_config(self.config, **res))

    def span_to_line_and_col(self, span):

        i = bisect.bisect_left(self.offsets_and_lines, (span.start, 0))

        if i > 0:

            col = span.start - self.offsets_and_lines[i - 1][0]

        else:

            col = 1 + span.start

        return (col, self.offsets_and_lines[i][1])

def get_offsets_and_lines(contents):

    """Return a list consisting of tuples of (offset, line).

    The Fluent AST contains spans of start and end offsets in the file.

    This function returns a list of offsets and line numbers so that errors

    can be reported using line and column.

"""

    line = 1

    result = []

    for m in re.finditer(r"\n", contents):

        result.append((m.start(), line))

        line += 1

    return result

def get_newlines_count_after(span, contents):

    # Determine the number of newlines.

    count = 0

    for i in range(span.end, len(contents)):

        assert contents[i] != "\r", "This linter does not handle \\r characters."

        if contents[i] != "\n":

            break

        count += 1

    return count

def get_newlines_count_before(span, contents):

    # Determine the range of newline characters.

    count = 0

    for i in range(span.start - 1, 0, -1):

        assert contents[i] != "\r", "This linter does not handle \\r characters."

        if contents[i] != "\n":

            break

        count += 1

    return count

def get_exclusions(root):

    with open(

        mozpath.join(root, "tools", "lint", "fluent-lint", "exclusions.yml")

    ) as f:

        exclusions = list(yaml.safe_load_all(f))[0]

        for error_type in exclusions:

            exclusions[error_type]["files"] = set(

                [mozpath.join(root, x) for x in exclusions[error_type]["files"]]

        return exclusions

def get_branding_list(root, brand_files):

    class MessageExtractor(visitor.Visitor):

        def __init__(self):

            self.brands = []

            self.last_message_id = None

        def visit_Term(self, node):

            self.last_message_id = node.id.name

            self.generic_visit(node)

        def visit_TextElement(self, node):

            if self.last_message_id:

                self.brands += [node.value]

                self.last_message_id = None

            self.generic_visit(node)

    extractor = MessageExtractor()

    for brand_path in brand_files:

        brand_file = mozpath.join(root, brand_path)

        if os.path.exists(brand_file):

            with open(brand_file, encoding="utf-8") as f:

                messages = parse(f.read())

                extractor.visit(messages)

    return list(set(extractor.brands))

def lint(paths, config, fix=None, **lintargs):

    root = lintargs["root"]

    files = list(expand_exclusions(paths, config, root))

    exclusions = get_exclusions(root)

    brand_files = config.get("brand-files")

    brand_names = get_branding_list(root, brand_files)

    results = []

    for path in files:

        contents = open(path, encoding="utf-8").read()

        linter = Linter(

            path,

            config,

            exclusions,

            contents,

            get_offsets_and_lines(contents),

            brand_names,

        linter.visit(parse(contents))

        results.extend(linter.results)

    return results