yaml.py - mozsearch

mozilla-central/third_party/python/PyYAML/examples/pygments-lexer/yaml.py

Enable keyboard shortcuts

Source code

File a bug in Firefox Build System :: General

Revision control

Copy as Markdown

Other Tools

"""

yaml.py

Lexer for YAML, a human-friendly data serialization language

(http://yaml.org/).

Written by Kirill Simonov <xi@resolvent.net>.

License: Whatever suitable for inclusion into the Pygments package.

"""

from pygments.lexer import  \

        ExtendedRegexLexer, LexerContext, include, bygroups

from pygments.token import  \

        Text, Comment, Punctuation, Name, Literal

__all__ = ['YAMLLexer']

class YAMLLexerContext(LexerContext):

    """Indentation context for the YAML lexer."""

    def __init__(self, *args, **kwds):

        super(YAMLLexerContext, self).__init__(*args, **kwds)

        self.indent_stack = []

        self.indent = -1

        self.next_indent = 0

        self.block_scalar_indent = None

def something(TokenClass):

    """Do not produce empty tokens."""

    def callback(lexer, match, context):

        text = match.group()

        if not text:

            return

        yield match.start(), TokenClass, text

        context.pos = match.end()

    return callback

def reset_indent(TokenClass):

    """Reset the indentation levels."""

    def callback(lexer, match, context):

        text = match.group()

        context.indent_stack = []

        context.indent = -1

        context.next_indent = 0

        context.block_scalar_indent = None

        yield match.start(), TokenClass, text

        context.pos = match.end()

    return callback

def save_indent(TokenClass, start=False):

    """Save a possible indentation level."""

    def callback(lexer, match, context):

        text = match.group()

        extra = ''

        if start:

            context.next_indent = len(text)

            if context.next_indent < context.indent:

                while context.next_indent < context.indent:

                    context.indent = context.indent_stack.pop()

                if context.next_indent > context.indent:

                    extra = text[context.indent:]

                    text = text[:context.indent]

        else:

            context.next_indent += len(text)

        if text:

            yield match.start(), TokenClass, text

        if extra:

            yield match.start()+len(text), TokenClass.Error, extra

        context.pos = match.end()

    return callback

def set_indent(TokenClass, implicit=False):

    """Set the previously saved indentation level."""

    def callback(lexer, match, context):

        text = match.group()

        if context.indent < context.next_indent:

            context.indent_stack.append(context.indent)

            context.indent = context.next_indent

        if not implicit:

            context.next_indent += len(text)

        yield match.start(), TokenClass, text

        context.pos = match.end()

    return callback

def set_block_scalar_indent(TokenClass):

    """Set an explicit indentation level for a block scalar."""

    def callback(lexer, match, context):

        text = match.group()

        context.block_scalar_indent = None

        if not text:

            return

        increment = match.group(1)

        if increment:

            current_indent = max(context.indent, 0)

            increment = int(increment)

            context.block_scalar_indent = current_indent + increment

        if text:

            yield match.start(), TokenClass, text

            context.pos = match.end()

    return callback

def parse_block_scalar_empty_line(IndentTokenClass, ContentTokenClass):

    """Process an empty line in a block scalar."""

    def callback(lexer, match, context):

        text = match.group()

        if (context.block_scalar_indent is None or

                len(text) <= context.block_scalar_indent):

            if text:

                yield match.start(), IndentTokenClass, text

        else:

            indentation = text[:context.block_scalar_indent]

            content = text[context.block_scalar_indent:]

            yield match.start(), IndentTokenClass, indentation

            yield (match.start()+context.block_scalar_indent,

                    ContentTokenClass, content)

        context.pos = match.end()

    return callback

def parse_block_scalar_indent(TokenClass):

    """Process indentation spaces in a block scalar."""

    def callback(lexer, match, context):

        text = match.group()

        if context.block_scalar_indent is None:

            if len(text) <= max(context.indent, 0):

                context.stack.pop()

                context.stack.pop()

                return

            context.block_scalar_indent = len(text)

        else:

            if len(text) < context.block_scalar_indent:

                context.stack.pop()

                context.stack.pop()

                return

        if text:

            yield match.start(), TokenClass, text

            context.pos = match.end()

    return callback

def parse_plain_scalar_indent(TokenClass):

    """Process indentation spaces in a plain scalar."""

    def callback(lexer, match, context):

        text = match.group()

        if len(text) <= context.indent:

            context.stack.pop()

            context.stack.pop()

            return

        if text:

            yield match.start(), TokenClass, text

            context.pos = match.end()

    return callback

class YAMLLexer(ExtendedRegexLexer):

    """Lexer for the YAML language."""

    name = 'YAML'

    aliases = ['yaml']

    filenames = ['*.yaml', '*.yml']

    mimetypes = ['text/x-yaml']

    tokens = {

        # the root rules

        'root': [

            # ignored whitespaces

            (r'[ ]+(?=#|$)', Text.Blank),

            # line breaks

            (r'\n+', Text.Break),

            # a comment

            (r'#[^\n]*', Comment.Single),

            # the '%YAML' directive

            (r'^%YAML(?=[ ]|$)', reset_indent(Name.Directive),

                'yaml-directive'),

            # the %TAG directive

            (r'^%TAG(?=[ ]|$)', reset_indent(Name.Directive),

                'tag-directive'),

            # document start and document end indicators

            (r'^(?:---|\.\.\.)(?=[ ]|$)',

                reset_indent(Punctuation.Document), 'block-line'),

            # indentation spaces

            (r'[ ]*(?![ \t\n\r\f\v]|$)',

                save_indent(Text.Indent, start=True),

                ('block-line', 'indentation')),

],

        # trailing whitespaces after directives or a block scalar indicator

        'ignored-line': [

            # ignored whitespaces

            (r'[ ]+(?=#|$)', Text.Blank),

            # a comment

            (r'#[^\n]*', Comment.Single),

            # line break

            (r'\n', Text.Break, '#pop:2'),

],

        # the %YAML directive

        'yaml-directive': [

            # the version number

            (r'([ ]+)([0-9]+\.[0-9]+)',

                bygroups(Text.Blank, Literal.Version), 'ignored-line'),

],

        # the %YAG directive

        'tag-directive': [

            # a tag handle and the corresponding prefix

            (r'([ ]+)(!|![0-9A-Za-z_-]*!)'

                r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',

                bygroups(Text.Blank, Name.Type, Text.Blank, Name.Type),

                'ignored-line'),

],

        # block scalar indicators and indentation spaces

        'indentation': [

            # trailing whitespaces are ignored

            (r'[ ]*$', something(Text.Blank), '#pop:2'),

            # whitespaces preceding block collection indicators

            (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text.Indent)),

            # block collection indicators

            (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),

            # the beginning a block line

            (r'[ ]*', save_indent(Text.Indent), '#pop'),

],

        # an indented line in the block context

        'block-line': [

            # the line end

            (r'[ ]*(?=#|$)', something(Text.Blank), '#pop'),

            # whitespaces separating tokens

            (r'[ ]+', Text.Blank),

            # tags, anchors and aliases,

            include('descriptors'),

            # block collections and scalars

            include('block-nodes'),

            # flow collections and quoted scalars

            include('flow-nodes'),

            # a plain scalar

            (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',

                something(Literal.Scalar.Plain),

                'plain-scalar-in-block-context'),

],

        # tags, anchors, aliases

        'descriptors' : [

            # a full-form tag

            (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Name.Type),

            # a tag in the form '!', '!suffix' or '!handle!suffix'

            (r'!(?:[0-9A-Za-z_-]+)?'

                r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Name.Type),

            # an anchor

            (r'&[0-9A-Za-z_-]+', Name.Anchor),

            # an alias

            (r'\*[0-9A-Za-z_-]+', Name.Alias),

],

        # block collections and scalars

        'block-nodes': [

            # implicit key

            (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),

            # literal and folded scalars

            (r'[|>]', Punctuation.Indicator,

                ('block-scalar-content', 'block-scalar-header')),

],

        # flow collections and quoted scalars

        'flow-nodes': [

            # a flow sequence

            (r'\[', Punctuation.Indicator, 'flow-sequence'),

            # a flow mapping

            (r'\{', Punctuation.Indicator, 'flow-mapping'),

            # a single-quoted scalar

            (r'\'', Literal.Scalar.Flow.Quote, 'single-quoted-scalar'),

            # a double-quoted scalar

            (r'\"', Literal.Scalar.Flow.Quote, 'double-quoted-scalar'),

],

        # the content of a flow collection

        'flow-collection': [

            # whitespaces

            (r'[ ]+', Text.Blank),

            # line breaks

            (r'\n+', Text.Break),

            # a comment

            (r'#[^\n]*', Comment.Single),

            # simple indicators

            (r'[?:,]', Punctuation.Indicator),

            # tags, anchors and aliases

            include('descriptors'),

            # nested collections and quoted scalars

            include('flow-nodes'),

            # a plain scalar

            (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',

                something(Literal.Scalar.Plain),

                'plain-scalar-in-flow-context'),

],

        # a flow sequence indicated by '[' and ']'

        'flow-sequence': [

            # include flow collection rules

            include('flow-collection'),

            # the closing indicator

            (r'\]', Punctuation.Indicator, '#pop'),

],

        # a flow mapping indicated by '{' and '}'

        'flow-mapping': [

            # include flow collection rules

            include('flow-collection'),

            # the closing indicator

            (r'\}', Punctuation.Indicator, '#pop'),

],

        # block scalar lines

        'block-scalar-content': [

            # line break

            (r'\n', Text.Break),

            # empty line

            (r'^[ ]+$',

                parse_block_scalar_empty_line(Text.Indent,

                    Literal.Scalar.Block)),

            # indentation spaces (we may leave the state here)

            (r'^[ ]*', parse_block_scalar_indent(Text.Indent)),

            # line content

            (r'[^\n\r\f\v]+', Literal.Scalar.Block),

],

        # the content of a literal or folded scalar

        'block-scalar-header': [

            # indentation indicator followed by chomping flag

            (r'([1-9])?[+-]?(?=[ ]|$)',

                set_block_scalar_indent(Punctuation.Indicator),

                'ignored-line'),

            # chomping flag followed by indentation indicator

            (r'[+-]?([1-9])?(?=[ ]|$)',

                set_block_scalar_indent(Punctuation.Indicator),

                'ignored-line'),

],

        # ignored and regular whitespaces in quoted scalars

        'quoted-scalar-whitespaces': [

            # leading and trailing whitespaces are ignored

            (r'^[ ]+|[ ]+$', Text.Blank),

            # line breaks are ignored

            (r'\n+', Text.Break),

            # other whitespaces are a part of the value

            (r'[ ]+', Literal.Scalar.Flow),

],

        # single-quoted scalars

        'single-quoted-scalar': [

            # include whitespace and line break rules

            include('quoted-scalar-whitespaces'),

            # escaping of the quote character

            (r'\'\'', Literal.Scalar.Flow.Escape),

            # regular non-whitespace characters

            (r'[^ \t\n\r\f\v\']+', Literal.Scalar.Flow),

            # the closing quote

            (r'\'', Literal.Scalar.Flow.Quote, '#pop'),

],

        # double-quoted scalars

        'double-quoted-scalar': [

            # include whitespace and line break rules

            include('quoted-scalar-whitespaces'),

            # escaping of special characters

            (r'\\[0abt\tn\nvfre "\\N_LP]', Literal.Scalar.Flow.Escape),

            # escape codes

            (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',

                Literal.Scalar.Flow.Escape),

            # regular non-whitespace characters

            (r'[^ \t\n\r\f\v\"\\]+', Literal.Scalar.Flow),

            # the closing quote

            (r'"', Literal.Scalar.Flow.Quote, '#pop'),

],

        # the beginning of a new line while scanning a plain scalar

        'plain-scalar-in-block-context-new-line': [

            # empty lines

            (r'^[ ]+$', Text.Blank),

            # line breaks

            (r'\n+', Text.Break),

            # document start and document end indicators

            (r'^(?=---|\.\.\.)', something(Punctuation.Document), '#pop:3'),

            # indentation spaces (we may leave the block line state here)

            (r'^[ ]*', parse_plain_scalar_indent(Text.Indent), '#pop'),

],

        # a plain scalar in the block context

        'plain-scalar-in-block-context': [

            # the scalar ends with the ':' indicator

            (r'[ ]*(?=:[ ]|:$)', something(Text.Blank), '#pop'),

            # the scalar ends with whitespaces followed by a comment

            (r'[ ]+(?=#)', Text.Blank, '#pop'),

            # trailing whitespaces are ignored

            (r'[ ]+$', Text.Blank),

            # line breaks are ignored

            (r'\n+', Text.Break, 'plain-scalar-in-block-context-new-line'),

            # other whitespaces are a part of the value

            (r'[ ]+', Literal.Scalar.Plain),

            # regular non-whitespace characters

            (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+',

                Literal.Scalar.Plain),

],

        # a plain scalar is the flow context

        'plain-scalar-in-flow-context': [

            # the scalar ends with an indicator character

            (r'[ ]*(?=[,:?\[\]{}])', something(Text.Blank), '#pop'),

            # the scalar ends with a comment

            (r'[ ]+(?=#)', Text.Blank, '#pop'),

            # leading and trailing whitespaces are ignored

            (r'^[ ]+|[ ]+$', Text.Blank),

            # line breaks are ignored

            (r'\n+', Text.Break),

            # other whitespaces are a part of the value

            (r'[ ]+', Literal.Scalar.Plain),

            # regular non-whitespace characters

            (r'[^ \t\n\r\f\v,:?\[\]{}]+', Literal.Scalar.Plain),

],

    def get_tokens_unprocessed(self, text=None, context=None):

        if context is None:

            context = YAMLLexerContext(text, 0)

        return super(YAMLLexer, self).get_tokens_unprocessed(text, context)