Source code

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import bisect
import os
import re
from html.parser import HTMLParser
import mozpack.path as mozpath
import yaml
from fluent.syntax import ast, parse, visitor
from mozlint import result
from mozlint.pathutils import expand_exclusions
class TextElementHTMLParser(HTMLParser):
"""HTML Parser for TextElement.
TextElements may contain embedded html tags, which can include
quotes in attributes. We only want to check the actual text.
"""
def __init__(self):
super().__init__()
self.extracted_text = []
def handle_data(self, data):
self.extracted_text.append(data)
class Linter(visitor.Visitor):
"""Fluent linter implementation.
This subclasses the Fluent AST visitor. Methods are called corresponding
to each type of node in the Fluent AST. It is possible to control
whether a node is recursed into by calling the generic_visit method on
the superclass.
See the documentation here:
"""
def __init__(
self, path, config, exclusions, contents, offsets_and_lines, brand_names=[]
):
super().__init__()
self.path = path
self.config = config
self.exclusions = exclusions
self.contents = contents
self.offsets_and_lines = offsets_and_lines
self.results = []
self.identifier_re = re.compile(r"[a-z0-9-]+")
self.apostrophe_re = re.compile(r"\w'")
self.incorrect_apostrophe_re = re.compile(r"\w\u2018\w")
self.single_quote_re = re.compile(r"'(.+)'")
self.double_quote_re = re.compile(r"\".+\"")
self.ellipsis_re = re.compile(r"\.\.\.")
self.brand_names = brand_names
self.minimum_id_length = 9
self.state = {
# The resource comment should be at the top of the page after the license.
"node_can_be_resource_comment": True,
# Group comments must be followed by a message. Two group comments are not
# allowed in a row.
"can_have_group_comment": True,
# Comment bound to the current message
"comment": "",
# The current group comment
"group_comment": "",
# Variables in the current message
"variables": [],
}
attributes = [
"label",
"value",
"accesskey",
"alt",
"title",
"tooltiptext",
"placeholder",
"aria-label",
"aria-description",
"aria-valuetext",
"style",
# For XUL key/command setup.
"key",
"keycode",
# For download filenames:
"download",
# Used in the Firefox prefs
"searchkeywords",
# Used by search-textbox.js
"searchbuttonlabel",
# Used in toolbar customization.
"toolbarname",
# Used in moz-message-bar.
"message",
# Used in dialogs (should be moved to using fluent IDs though)
"buttonlabelaccept",
"buttonaccesskeyaccept",
"buttonlabelcancel",
"buttonaccesskeycancel",
"buttonlabelextra2",
"buttonaccesskeyextra2",
# Used in app menu notifications (should be moved to use fluent IDs)
"buttonlabel",
"buttonaccesskey",
"secondarybuttonlabel",
"secondarybuttonaccesskey",
# Commonly used in Lit-based web components
"heading",
"description",
]
self.known_attribute_list = [a.lower() for a in attributes]
# Set this to true to debug print the root node's json. This is useful for
# writing new lint rules, or debugging existing ones.
self.debug_print_json = False
def generic_visit(self, node):
node_name = type(node).__name__
self.state["node_can_be_resource_comment"] = self.state[
"node_can_be_resource_comment"
] and (
# This is the root node.
node_name == "Resource"
# Empty space is allowed.
or node_name == "Span"
# Comments are allowed
or node_name == "Comment"
)
if self.debug_print_json:
import json
print(json.dumps(node.to_json(), indent=2))
# Only debug print the root node.
self.debug_print_json = False
super(Linter, self).generic_visit(node)
def visit_Attribute(self, node):
# Only visit values for Attribute nodes, the identifier comes from dom.
super().generic_visit(node.value)
def visit_FunctionReference(self, node):
# We don't recurse into function references, the identifiers there are
# allowed to be free form.
pass
def visit_Message(self, node):
# There must be at least one message or term between group comments.
self.state["can_have_group_comment"] = True
self.last_message_id = node.id.name
super().generic_visit(node)
# Do this here instead as visit_Attribute doesn't have access to the
# message's comment.
for attr in node.attributes:
if not attr.id.name.lower() in self.known_attribute_list:
comment = self.state["comment"] + self.state["group_comment"]
if not f".{attr.id.name}" in comment:
self.add_error(
attr,
"VA01",
"Use attributes designed for localized content directly."
" If script-based processing is necessary, add a comment"
f" explaining why. The linter didn't recognize: .{attr.id.name}",
"warning",
)
# Check if variables are referenced in comments
if self.state["variables"]:
comments = self.state["comment"] + self.state["group_comment"]
missing_references = [
v for v in self.state["variables"] if f"${v}" not in comments
]
if missing_references:
self.add_error(
node,
"VC01",
"Messages including variables should have a comment "
"explaining what will replace the variable. "
"Missing references: "
+ ", ".join([f"${m}" for m in missing_references]),
)
# Reset current comment and variable references after reading the
# message.
self.state["comment"] = ""
self.state["variables"] = []
def visit_Term(self, node):
# There must be at least one message or term between group comments.
self.state["can_have_group_comment"] = True
self.last_message_id = None
super().generic_visit(node)
# Reset current comment and variable references after reading the term.
self.state["comment"] = ""
self.state["variables"] = []
def visit_MessageReference(self, node):
# We don't recurse into message references, the identifiers are either
# checked elsewhere or are attributes and come from DOM.
pass
def visit_Identifier(self, node):
if (
self.path not in self.exclusions["ID01"]["files"]
and node.name not in self.exclusions["ID01"]["messages"]
and not self.identifier_re.fullmatch(node.name)
):
self.add_error(
node,
"ID01",
"Identifiers may only contain lowercase characters and -",
)
if (
len(node.name) < self.minimum_id_length
and self.path not in self.exclusions["ID02"]["files"]
and node.name not in self.exclusions["ID02"]["messages"]
):
self.add_error(
node,
"ID02",
f"Identifiers must be at least {self.minimum_id_length} characters long",
)
def visit_TextElement(self, node):
parser = TextElementHTMLParser()
parser.feed(node.value)
for text in parser.extracted_text:
# To check for apostrophes, first remove pairs of straight quotes
# used as delimiters.
cleaned_str = re.sub(self.single_quote_re, "\1", node.value)
if self.apostrophe_re.search(cleaned_str):
self.add_error(
node,
"TE01",
"Strings with apostrophes should use foo\u2019s instead of foo's.",
)
if self.incorrect_apostrophe_re.search(text):
self.add_error(
node,
"TE02",
"Strings with apostrophes should use foo\u2019s instead of foo\u2018s.",
)
if self.single_quote_re.search(text):
self.add_error(
node,
"TE03",
"Single-quoted strings should use Unicode \u2018foo\u2019 instead of 'foo'.",
)
if self.double_quote_re.search(text):
self.add_error(
node,
"TE04",
'Double-quoted strings should use Unicode \u201cfoo\u201d instead of "foo".',
)
if self.ellipsis_re.search(text):
self.add_error(
node,
"TE05",
"Strings with an ellipsis should use the Unicode \u2026 character"
" instead of three periods",
)
# If part of a message, check for brand names
if (
self.last_message_id is not None
and self.path not in self.exclusions["CO01"]["files"]
and self.last_message_id not in self.exclusions["CO01"]["messages"]
):
found_brands = []
for brand in self.brand_names:
if brand in text:
found_brands.append(brand)
if found_brands:
self.add_error(
node,
"CO01",
"Strings should use the corresponding terms instead of"
f" hard-coded brand names ({', '.join(found_brands)})",
)
def visit_ResourceComment(self, node):
# This node is a comment with: "###"
if not self.state["node_can_be_resource_comment"]:
self.add_error(
node,
"RC01",
"Resource comments (###) should be placed at the top of the file, just "
"after the license header. There should only be one resource comment "
"per file.",
)
return
lines_after = get_newlines_count_after(node.span, self.contents)
lines_before = get_newlines_count_before(node.span, self.contents)
if node.span.end == len(self.contents) - 1:
# This file only contains a resource comment.
return
if lines_after != 2:
self.add_error(
node,
"RC02",
"Resource comments (###) should be followed by one empty line.",
)
return
if lines_before != 2:
self.add_error(
node,
"RC03",
"Resource comments (###) should have one empty line above them.",
)
return
def visit_SelectExpression(self, node):
# We only want to visit the variant values, the identifiers in selectors
# and keys are allowed to be free form.
for variant in node.variants:
super().generic_visit(variant.value)
# Store the variable used for the SelectExpression, excluding functions
# like PLATFORM()
if (
type(node.selector) is ast.VariableReference
and node.selector.id.name not in self.state["variables"]
):
self.state["variables"].append(node.selector.id.name)
def visit_Comment(self, node):
# This node is a comment with: "#"
# Store the comment
self.state["comment"] = node.content
def visit_GroupComment(self, node):
# This node is a comment with: "##"
# Store the group comment
self.state["group_comment"] = node.content
if not self.state["can_have_group_comment"]:
self.add_error(
node,
"GC04",
"Group comments (##) must be followed by at least one message "
"or term. Make sure that a single group comment with multiple "
"paragraphs is not separated by whitespace, as it will be "
"interpreted as two different comments.",
)
return
self.state["can_have_group_comment"] = False
lines_after = get_newlines_count_after(node.span, self.contents)
lines_before = get_newlines_count_before(node.span, self.contents)
if node.span.end == len(self.contents) - 1:
# The group comment is the last thing in the file.
if node.content == "":
# Empty comments are allowed at the end of the file.
return
self.add_error(
node,
"GC01",
"Group comments (##) should not be at the end of the file, they should "
"always be above a message. Only an empty group comment is allowed at "
"the end of a file.",
)
return
if lines_after != 2:
self.add_error(
node,
"GC02",
"Group comments (##) should be followed by one empty line.",
)
return
if lines_before != 2:
self.add_error(
node,
"GC03",
"Group comments (##) should have an empty line before them.",
)
return
def visit_VariableReference(self, node):
# Identifiers are allowed to be free form, but need to store them
# for comment checks.
if node.id.name not in self.state["variables"]:
self.state["variables"].append(node.id.name)
def add_error(self, node, rule, msg, level=None):
(col, line) = self.span_to_line_and_col(node.span)
res = {
"path": self.path,
"lineno": line,
"column": col,
"rule": rule,
"message": msg,
}
if level:
res["level"] = level
self.results.append(result.from_config(self.config, **res))
def span_to_line_and_col(self, span):
i = bisect.bisect_left(self.offsets_and_lines, (span.start, 0))
if i > 0:
col = span.start - self.offsets_and_lines[i - 1][0]
else:
col = 1 + span.start
return (col, self.offsets_and_lines[i][1])
def get_offsets_and_lines(contents):
"""Return a list consisting of tuples of (offset, line).
The Fluent AST contains spans of start and end offsets in the file.
This function returns a list of offsets and line numbers so that errors
can be reported using line and column.
"""
line = 1
result = []
for m in re.finditer(r"\n", contents):
result.append((m.start(), line))
line += 1
return result
def get_newlines_count_after(span, contents):
# Determine the number of newlines.
count = 0
for i in range(span.end, len(contents)):
assert contents[i] != "\r", "This linter does not handle \\r characters."
if contents[i] != "\n":
break
count += 1
return count
def get_newlines_count_before(span, contents):
# Determine the range of newline characters.
count = 0
for i in range(span.start - 1, 0, -1):
assert contents[i] != "\r", "This linter does not handle \\r characters."
if contents[i] != "\n":
break
count += 1
return count
def get_exclusions(root):
with open(
mozpath.join(root, "tools", "lint", "fluent-lint", "exclusions.yml")
) as f:
exclusions = list(yaml.safe_load_all(f))[0]
for error_type in exclusions:
exclusions[error_type]["files"] = set(
[mozpath.join(root, x) for x in exclusions[error_type]["files"]]
)
return exclusions
def get_branding_list(root, brand_files):
class MessageExtractor(visitor.Visitor):
def __init__(self):
self.brands = []
self.last_message_id = None
def visit_Term(self, node):
self.last_message_id = node.id.name
self.generic_visit(node)
def visit_TextElement(self, node):
if self.last_message_id:
self.brands += [node.value]
self.last_message_id = None
self.generic_visit(node)
extractor = MessageExtractor()
for brand_path in brand_files:
brand_file = mozpath.join(root, brand_path)
if os.path.exists(brand_file):
with open(brand_file, encoding="utf-8") as f:
messages = parse(f.read())
extractor.visit(messages)
return list(set(extractor.brands))
def lint(paths, config, fix=None, **lintargs):
root = lintargs["root"]
files = list(expand_exclusions(paths, config, root))
exclusions = get_exclusions(root)
brand_files = config.get("brand-files")
brand_names = get_branding_list(root, brand_files)
results = []
for path in files:
contents = open(path, "r", encoding="utf-8").read()
linter = Linter(
path,
config,
exclusions,
contents,
get_offsets_and_lines(contents),
brand_names,
)
linter.visit(parse(contents))
results.extend(linter.results)
return results