serializer.py - mozsearch

Enable keyboard shortcuts

# This Source Code Form is subject to the terms of the Mozilla Public

# License, v. 2.0. If a copy of the MPL was not distributed with this

# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""Serialize string changes.

The serialization logic is based on the cross-channel merge algorithm.

It's taking the file structure for the first file, and localizable entries

from the last.

Input data is the parsed reference as a list of parser.walk(),

the existing localized file, also a list of parser.walk(), and a dictionary

of newly added keys and raw values.

To remove a string from a localization, pass `None` as value for a key.

The marshalling between raw values and entities is done via Entity.unwrap

and Entity.wrap.

To avoid adding English reference strings into the generated file, the

actual entities in the reference are replaced with Placeholders, which

are removed in a final pass over the result of merge_resources. After that,

we also prune whitespace once more.`

"""

from codecs import encode

from functools import reduce

from compare_locales.merge import merge_resources, serialize_legacy_resource

from compare_locales.parser import getParser

from compare_locales.parser.base import (

    Entity,

    PlaceholderEntity,

    Junk,

    Whitespace,

class SerializationNotSupportedError(ValueError):

    pass

def serialize(filename, reference, old_l10n, new_data):

    """Returns a byte string of the serialized content to use.

    Input are a filename to create the right parser, a reference and

    an existing localization, both as the result of parser.walk().

    Finally, new_data is a dictionary of key to raw values to serialize.

    Raises a SerializationNotSupportedError if we don't support the file

    format.

"""

    try:

        parser = getParser(filename)

    except UserWarning:

        raise SerializationNotSupportedError(f"Unsupported file format ({filename}).")

    # create template, whitespace and all

    placeholders = [

        placeholder(entry) for entry in reference if not isinstance(entry, Junk)

    ref_mapping = {entry.key: entry for entry in reference if isinstance(entry, Entity)}

    # strip obsolete strings

    old_l10n = sanitize_old(ref_mapping.keys(), old_l10n, new_data)

    # create new Entities

    # .val can just be "", merge_channels doesn't need that

    new_l10n = []

    for key, new_raw_val in new_data.items():

        if new_raw_val is None or key not in ref_mapping:

            continue

        ref_ent = ref_mapping[key]

        new_l10n.append(ref_ent.wrap(new_raw_val))

    merged = merge_resources(

        parser, [placeholders, old_l10n, new_l10n], keep_newest=False

    pruned = prune_placeholders(merged)

    return encode(serialize_legacy_resource(pruned), parser.encoding)

def sanitize_old(known_keys, old_l10n, new_data):

    """Strip Junk and replace obsolete messages with placeholders.

    If new_data has `None` as a value, strip the existing translation.

    Use placeholders generously, so that we can rely on `prune_placeholders`

    to find their associated comments and remove them, too.

"""

    def should_placeholder(entry):

        # If entry is an Entity, check if it's obsolete

        # or marked to be removed.

        if not isinstance(entry, Entity):

            return False

        if entry.key not in known_keys:

            return True

        return entry.key in new_data and new_data[entry.key] is None

    return [

        placeholder(entry) if should_placeholder(entry) else entry

        for entry in old_l10n

        if not isinstance(entry, Junk)

def placeholder(entry):

    if isinstance(entry, Entity):

        return PlaceholderEntity(entry.key)

    return entry

def prune_placeholders(entries):

    pruned = [entry for entry in entries if not isinstance(entry, PlaceholderEntity)]

    def prune_whitespace(acc, entity):

        if len(acc) and isinstance(entity, Whitespace):

            prev_entity = acc[-1]

            if isinstance(prev_entity, Whitespace):

                # Prefer the longer whitespace.

                if len(entity.all) > len(prev_entity.all):

                    acc[-1] = entity

                return acc

        acc.append(entity)

        return acc

    return reduce(prune_whitespace, pruned, [])