Source code
Revision control
Copy as Markdown
Other Tools
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
'''Merge resources across channels.
Merging resources is done over a series of parsed resources, or source
strings.
The nomenclature is that the resources are ordered from newest to oldest.
The generated file structure is taken from the newest file, and then the
next-newest, etc. The values of the returned entities are taken from the
newest to the oldest resource, too.
In merge_resources, there's an option to choose the values from oldest
to newest instead.
'''
from collections import OrderedDict, defaultdict
from codecs import encode
from functools import reduce
from compare_locales import parser as cl
from compare_locales.parser.base import StickyEntry
from compare_locales.compare.utils import AddRemove
class MergeNotSupportedError(ValueError):
pass
def merge_channels(name, resources):
try:
parser = cl.getParser(name)
except UserWarning:
raise MergeNotSupportedError(
f'Unsupported file format ({name}).')
entities = merge_resources(parser, resources)
return encode(serialize_legacy_resource(entities), parser.encoding)
def merge_resources(parser, resources, keep_newest=True):
'''Merge parsed or unparsed resources, returning a enumerable of Entities.
Resources are ordered from newest to oldest in the input. The structure
of the generated content is taken from the newest resource first, and
then filled by the next etc.
Values are also taken from the newest, unless keep_newest is False,
then values are taken from the oldest first.
'''
def parse_resource(resource):
# The counter dict keeps track of number of identical comments.
counter = defaultdict(int)
if isinstance(resource, bytes):
parser.readContents(resource)
resource = parser.walk()
pairs = [get_key_value(entity, counter) for entity in resource]
return OrderedDict(pairs)
def get_key_value(entity, counter):
if isinstance(entity, cl.Comment):
counter[entity.val] += 1
# Use the (value, index) tuple as the key. AddRemove will
# de-deplicate identical comments at the same index.
return ((entity.val, counter[entity.val]), entity)
if isinstance(entity, cl.Whitespace):
# Use the Whitespace instance as the key so that it's always
# unique. Adjecent whitespace will be folded into the longer one in
# prune.
return (entity, entity)
return (entity.key, entity)
entities = reduce(
lambda x, y: merge_two(x, y, keep_newer=keep_newest),
map(parse_resource, resources))
return entities.values()
def merge_two(newer, older, keep_newer=True):
'''Merge two OrderedDicts.
The order of the result dict is determined by `newer`.
The values in the dict are the newer ones by default, too.
If `keep_newer` is False, the values will be taken from the older
dict.
'''
diff = AddRemove()
diff.set_left(newer.keys())
diff.set_right(older.keys())
# Create a flat sequence of all entities in order reported by AddRemove.
get_entity = get_newer_entity if keep_newer else get_older_entity
contents = [(key, get_entity(newer, older, key)) for _, key in diff]
def prune(acc, cur):
_, entity = cur
if entity is None:
# Prune Nones which stand for duplicated comments.
return acc
if len(acc) and isinstance(entity, cl.Whitespace):
_, prev_entity = acc[-1]
if isinstance(prev_entity, cl.Whitespace):
# Prefer the longer whitespace.
if len(entity.all) > len(prev_entity.all):
acc[-1] = (entity, entity)
return acc
acc.append(cur)
return acc
pruned = reduce(prune, contents, [])
return OrderedDict(pruned)
def get_newer_entity(newer, older, key):
entity = newer.get(key, None)
# Always prefer the newer version.
if entity is not None:
return entity
return older.get(key)
def get_older_entity(newer, older, key):
entity = older.get(key, None)
# If we don't have an older version, or it's a StickyEntry,
# get a newer version
if entity is None or isinstance(entity, StickyEntry):
return newer.get(key)
return entity
def serialize_legacy_resource(entities):
return "".join(entity.all for entity in entities)