Source code
Revision control
Copy as Markdown
Other Tools
# Copyright Mozilla Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from itertools import product
from re import finditer
from typing import Tuple, cast
from fluent.syntax import FluentParser, ParseError
from fluent.syntax import ast as ftl
from fluent.syntax.stream import FluentParserStream
from ...model import (
CatchallKey,
Comment,
Entry,
Expression,
LinePos,
Message,
Metadata,
Pattern,
PatternMessage,
Resource,
Section,
SelectMessage,
VariableRef,
)
from .. import Format
def fluent_parse(
source: bytes | str | ftl.Resource, *, with_linepos: bool = True
) -> Resource[Message]:
"""
Parse a .ftl file into a message resource.
Message and term references are represented by `message` function annotations,
with term identifiers prefixed with a `-`.
Attribute values are assigned to the Entry.properties.
Messages with no value will have an empty pattern assigned as their Entry.value.
Function names are lower-cased, so e.g. the Fluent `NUMBER` is `number` in the Resource.
The parsed resource will not include any metadata.
"""
if isinstance(source, ftl.Resource):
fluent_res = source
lpm = None # Source is required for line positions
else:
source_str = source if isinstance(source, str) else str(source, "utf-8")
fluent_res = FluentParser(with_spans=with_linepos).parse(source_str)
lpm = LinePosMapper(source_str) if with_linepos else None
entries: list[Entry[Message] | Comment] = []
section = Section((), entries)
resource = Resource(Format.fluent, [section])
fluent_body = fluent_res.body
if fluent_body and isinstance(fbc := fluent_body[0], ftl.Comment) and fbc.content:
resource.meta.append(Metadata("info", fbc.content))
fluent_body = fluent_body[1:]
for entry in fluent_body:
if isinstance(entry, (ftl.Message, ftl.Term)):
try:
entries.append(fluent_entry(entry, lpm))
except Exception as err:
raise ValueError(f"Error parsing message {entry.id.name}") from err
elif isinstance(entry, ftl.ResourceComment):
if entry.content:
resource.comment = (
(resource.comment.rstrip() + "\n\n" + entry.content)
if resource.comment
else entry.content
)
elif isinstance(entry, ftl.GroupComment):
if entries or section.comment:
entries = []
section = Section((), entries, comment=entry.content or "")
if lpm and entry.span:
span = entry.span
section.linepos = lpm.get_linepos(
span.start, span.start, span.start, span.end
)
resource.sections.append(section)
else:
section.comment = entry.content or ""
if lpm and entry.span:
span = entry.span
section.linepos = lpm.get_linepos(
span.start, span.start, span.start, span.end
)
elif isinstance(entry, ftl.Comment):
if entry.content:
comment = Comment(entry.content)
if lpm and entry.span:
span = entry.span
comment.linepos = lpm.get_linepos(
span.start, span.start, span.start, span.end
)
entries.append(comment)
else: # Junk
try:
message = entry.annotations[0].message
except Exception:
message = ""
if not message:
message = "Fluent parser error"
if entries:
prev_entry = next(
(entry for entry in reversed(entries) if isinstance(entry, Entry)),
None,
)
if prev_entry:
message += f" after message {'.'.join(prev_entry.id)}"
prev_lp = entries[-1].linepos
if prev_lp:
message += f" at line {prev_lp.end}"
raise ValueError(message)
return resource
def fluent_parse_entry(
source: str | ftl.Message | ftl.Term, *, with_linepos: bool = True
) -> Entry[Message]:
"""
Parse a Fluent message or term into an Entry.
Message and term references are represented by `message` function annotations,
with term identifiers prefixed with a `-`.
Attribute values are assigned to the Entry.properties.
Messages with no value will have an empty pattern assigned as their Entry.value.
Function names are lower-cased, so e.g. the Fluent `NUMBER` is `number` in the result.
"""
if isinstance(source, str):
ftl_entry = FluentParser(with_spans=with_linepos).parse_entry(source)
lpm = LinePosMapper(source) if with_linepos else None
else:
ftl_entry = source
lpm = None # Source is required for line positions
if not isinstance(ftl_entry, (ftl.Message, ftl.Term)):
raise ValueError("Source is not a Fluent entry")
return fluent_entry(ftl_entry, lpm)
def fluent_parse_message(source: str) -> Message:
"""
Parse a Fluent pattern into a Message.
Function names are lower-cased, so e.g. the Fluent `NUMBER` is `number` in the result.
Leading and trailing whitespace is stripped.
"""
source = source.lstrip().replace("\n", "\n ")
parser = FluentParser(with_spans=False)
ps = FluentParserStream(source)
try:
ftl_pattern = parser.get_pattern(ps, is_block=False)
except ParseError as err:
raise ValueError(err.message or "Fluent parser error") from err
return message(ftl_pattern)
def fluent_entry(
ftl_entry: ftl.Message | ftl.Term,
lpm: LinePosMapper | None,
) -> Entry[Message]:
id = ftl_entry.id.name
if isinstance(ftl_entry, ftl.Term):
id = "-" + id
value = message(ftl_entry.value) if ftl_entry.value else PatternMessage([])
comment = ftl_entry.comment.content or "" if ftl_entry.comment else ""
entry = Entry(id=(id,), value=value, comment=comment)
if lpm and ftl_entry.span:
c_span = (
ftl_entry.comment.span
if comment and ftl_entry.comment and ftl_entry.comment.span
else ftl_entry.span
)
k_span = ftl_entry.id.span or ftl_entry.span
v_span = (ftl_entry.value.span if ftl_entry.value else None) or ftl_entry.span
entry.linepos = lpm.get_linepos(
c_span.start, k_span.start, v_span.start, v_span.end
)
for attr in ftl_entry.attributes:
prop_msg = message(attr.value)
if entry.properties is None:
entry.properties = {attr.id.name: prop_msg}
else:
entry.properties[attr.id.name] = prop_msg
if lpm and entry.linepos and attr.span:
span = attr.value.span or attr.span
entry.linepos.end = span.end
return entry
def message(ftl_pattern: ftl.Pattern) -> Message:
sel_data = find_selectors(ftl_pattern, [])
sel_expressions = [sd[0] for sd in sel_data]
filter: list[Key | None] = [None] * len(sel_expressions)
msg_variants: dict[tuple[Key, ...], Pattern]
var_names: set[str] = set()
if sel_expressions:
key_lists = [list(dict.fromkeys(sd[2])) for sd in sel_data]
for keys in key_lists:
keys.sort(key=lambda k: (k[2], not k[1]))
msg_variants = {key: [] for key in product(*key_lists)}
else:
msg_variants = {(): []}
def add_pattern(ftl_pattern: ftl.Pattern) -> None:
el: (
ftl.TextElement
| ftl.Placeable
| ftl.InlineExpression
| ftl.SelectExpression
)
for el in ftl_pattern.elements:
while isinstance(el, ftl.Placeable):
el = el.expression
if isinstance(el, ftl.SelectExpression):
msg_sel = next(sd[0] for sd in sel_data if el.selector in sd[1])
idx = sel_expressions.index(msg_sel)
prev_filt = filter[idx]
for v in el.variants:
filter[idx] = variant_key(v)
add_pattern(v.value)
filter[idx] = prev_filt
else:
for keys, msg_pattern in msg_variants.items():
if all(
(filt is None or key == filt) for key, filt in zip(keys, filter)
):
if isinstance(el, ftl.TextElement):
if msg_pattern and isinstance(msg_pattern[-1], str):
msg_pattern[-1] += el.value
else:
msg_pattern.append(el.value)
else:
expr = inline_expression(el)
if isinstance(expr.arg, VariableRef):
var_names.add(expr.arg.name)
msg_pattern.append(expr)
add_pattern(ftl_pattern)
if sel_expressions:
declarations = {}
selectors = []
for expr in sel_expressions:
stem = expr.arg.name if isinstance(expr.arg, VariableRef) else ""
i = 0
name = stem
while name in var_names or name == "":
i += 1
name = f"{stem}_{i}"
declarations[name] = expr
selectors.append(VariableRef(name))
var_names.add(name)
variants = {
tuple(map(message_key, keys)): msg_pattern
for keys, msg_pattern in msg_variants.items()
if msg_pattern
}
return SelectMessage(declarations, tuple(selectors), variants)
else:
return PatternMessage(next(iter(msg_variants.values())))
Key = Tuple[str, bool, bool]
"(name, is_numeric, is_default)"
def variant_key(v: ftl.Variant) -> Key:
name: str
is_numeric: bool
if isinstance(v.key, ftl.Identifier):
name = v.key.name
is_numeric = False
else:
name = v.key.value
is_numeric = True
return (name, is_numeric, v.default)
def message_key(key: Key) -> str | CatchallKey:
name, _, is_default = key
return CatchallKey(name) if is_default else name
def find_selectors(
pattern: ftl.Pattern,
result: list[tuple[Expression, list[ftl.InlineExpression], list[Key]]],
) -> list[tuple[Expression, list[ftl.InlineExpression], list[Key]]]:
for el in pattern.elements:
if isinstance(el, ftl.Placeable) and isinstance(
el.expression, ftl.SelectExpression
):
ftl_sel = el.expression.selector
keys = [variant_key(v) for v in el.expression.variants]
msg_sel = select_expression(ftl_sel, keys)
prev = next((x for x in result if x[0] == msg_sel), None)
if prev:
_, ftl_list, key_list = prev
ftl_list.append(ftl_sel)
key_list += keys
else:
result.append((msg_sel, [ftl_sel], keys))
for v in el.expression.variants:
find_selectors(v.value, result)
return result
def select_expression(ftl_sel: ftl.InlineExpression, keys: list[Key]) -> Expression:
plural_categories = ("zero", "one", "two", "few", "many", "other")
if isinstance(ftl_sel, ftl.VariableReference):
name = (
"number"
if all(
is_numeric or name in plural_categories for name, is_numeric, _ in keys
)
else "string"
)
return Expression(VariableRef(ftl_sel.id.name), name)
elif isinstance(ftl_sel, ftl.StringLiteral):
return Expression(literal_value(ftl_sel), "string")
else:
return inline_expression(ftl_sel)
def inline_expression(exp: ftl.InlineExpression) -> Expression:
if isinstance(exp, ftl.NumberLiteral):
value = exp.value
return Expression(value, "number")
elif isinstance(exp, ftl.StringLiteral):
value = exp.parse().get("value") or ""
return Expression(value)
elif isinstance(exp, ftl.MessageReference):
name = exp.id.name
if exp.attribute is not None:
name += "." + exp.attribute.name
return Expression(name, "message")
elif isinstance(exp, ftl.TermReference):
name = "-" + exp.id.name
if exp.attribute is not None:
name += "." + exp.attribute.name
ftl_named = exp.arguments.named if exp.arguments else []
return Expression(
name,
"message",
{opt.name.name: literal_value(opt.value) for opt in ftl_named},
)
elif isinstance(exp, ftl.VariableReference):
name = exp.id.name
return Expression(VariableRef(name))
else: # ftl.FunctionReference
name = exp.id.name.lower()
if len(exp.arguments.positional) > 1:
raise ValueError(
f"Functions with more than one positional argument are not supported: {name}"
)
ftl_arg: ftl.Placeable | ftl.InlineExpression | None = next(
iter(exp.arguments.positional), None
)
while isinstance(ftl_arg, ftl.Placeable):
ftl_arg = cast(ftl.InlineExpression, ftl_arg.expression)
arg: str | VariableRef | None
if not ftl_arg:
arg = None
elif isinstance(ftl_arg, ftl.NumberLiteral) or isinstance(
ftl_arg, ftl.StringLiteral
):
arg = literal_value(ftl_arg)
elif isinstance(ftl_arg, ftl.VariableReference):
arg = VariableRef(ftl_arg.id.name)
else:
raise ValueError(f"Unexpected value: {ftl_arg}")
ftl_named = exp.arguments.named
return Expression(
arg,
name,
{opt.name.name: literal_value(opt.value) for opt in ftl_named},
)
def literal_value(arg: ftl.NumberLiteral | ftl.StringLiteral) -> str:
return (
arg.value
if isinstance(arg, ftl.NumberLiteral)
else arg.parse().get("value") or ""
)
class LinePosMapper:
def __init__(self, src: str) -> None:
self._len = len(src)
self._newlines = [m.start() for m in finditer("\n", src)]
def _get_line(self, char_idx: int) -> int:
# Treat the end of the string as a newline.
if not self._newlines and char_idx >= self._len:
return 2
return next(
(idx + 1 for idx, nl in enumerate(self._newlines) if nl > char_idx),
len(self._newlines) + 1,
)
def get_linepos(self, start: int, key: int, value: int, end: int) -> LinePos:
start_line = self._get_line(start)
key_line = start_line if key == start else self._get_line(key)
value_line = key_line if value == key else self._get_line(value)
end_line = self._get_line(end)
return LinePos(start_line, key_line, value_line, end_line)