extract.py - mozsearch

Enable keyboard shortcuts

# ext/extract.py

# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file>

# This module is part of Mako and is released under

# the MIT License: http://www.opensource.org/licenses/mit-license.php

import re

from mako import compat

from mako import lexer

from mako import parsetree

class MessageExtractor(object):

    def process_file(self, fileobj):

        template_node = lexer.Lexer(

            fileobj.read(), input_encoding=self.config["encoding"]

        ).parse()

        for extracted in self.extract_nodes(template_node.get_children()):

            yield extracted

    def extract_nodes(self, nodes):

        translator_comments = []

        in_translator_comments = False

        input_encoding = self.config["encoding"] or "ascii"

        comment_tags = list(

            filter(None, re.split(r"\s+", self.config["comment-tags"]))

        for node in nodes:

            child_nodes = None

            if (

                in_translator_comments

                and isinstance(node, parsetree.Text)

                and not node.content.strip()

):

                # Ignore whitespace within translator comments

                continue

            if isinstance(node, parsetree.Comment):

                value = node.text.strip()

                if in_translator_comments:

                    translator_comments.extend(

                        self._split_comment(node.lineno, value)

                    continue

                for comment_tag in comment_tags:

                    if value.startswith(comment_tag):

                        in_translator_comments = True

                        translator_comments.extend(

                            self._split_comment(node.lineno, value)

                continue

            if isinstance(node, parsetree.DefTag):

                code = node.function_decl.code

                child_nodes = node.nodes

            elif isinstance(node, parsetree.BlockTag):

                code = node.body_decl.code

                child_nodes = node.nodes

            elif isinstance(node, parsetree.CallTag):

                code = node.code.code

                child_nodes = node.nodes

            elif isinstance(node, parsetree.PageTag):

                code = node.body_decl.code

            elif isinstance(node, parsetree.CallNamespaceTag):

                code = node.expression

                child_nodes = node.nodes

            elif isinstance(node, parsetree.ControlLine):

                if node.isend:

                    in_translator_comments = False

                    continue

                code = node.text

            elif isinstance(node, parsetree.Code):

                in_translator_comments = False

                code = node.code.code

            elif isinstance(node, parsetree.Expression):

                code = node.code.code

            else:

                continue

            # Comments don't apply unless they immediately precede the message

            if (

                translator_comments

                and translator_comments[-1][0] < node.lineno - 1

):

                translator_comments = []

            translator_strings = [

                comment[1] for comment in translator_comments

            if isinstance(code, compat.text_type):

                code = code.encode(input_encoding, "backslashreplace")

            used_translator_comments = False

            # We add extra newline to work around a pybabel bug

            # (see python-babel/babel#274, parse_encoding dies if the first

            # input string of the input is non-ascii)

            # Also, because we added it, we have to subtract one from

            # node.lineno

            code = compat.byte_buffer(compat.b("\n") + code)

            for message in self.process_python(

                code, node.lineno - 1, translator_strings

):

                yield message

                used_translator_comments = True

            if used_translator_comments:

                translator_comments = []

            in_translator_comments = False

            if child_nodes:

                for extracted in self.extract_nodes(child_nodes):

                    yield extracted

    @staticmethod

    def _split_comment(lineno, comment):

        """Return the multiline comment at lineno split into a list of

        comment line numbers and the accompanying comment line"""

        return [

            (lineno + index, line)

            for index, line in enumerate(comment.splitlines())