rewrites.py - mozsearch

"""Early-pipeline operations that error-check and lower grammars."""

from __future__ import annotations

# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls

import collections

import dataclasses

from dataclasses import dataclass

import typing

from .grammar import (CallMethod, Element, End, ErrorSymbol, Exclude, Grammar,

                      LenientNt, Literal, LookaheadRule, Optional,

                      NoLineTerminatorHere, Nt, NtDef, NtParameter, Production,

                      ReduceExpr, ReduceExprOrAccept, Some, UnicodeCategory,

                      Var, is_concrete_element)

from .ordered import OrderedFrozenSet, OrderedSet

from .runtime import ErrorToken, ErrorTokenClass

# *** Checking for errors *****************************************************

T = typing.TypeVar("T")

def fix(f: typing.Callable[[T], T], start: T) -> T:

    """Compute a fixed point of `f`, the hard way, starting from `start`."""

    prev, current = start, f(start)

    while current != prev:

        prev, current = current, f(current)

    return current

def empty_nt_set(grammar: Grammar) -> typing.Dict[LenientNt, ReduceExprOrAccept]:

    """Determine which nonterminals in `grammar` can produce the empty string.

    Return a dict {nt: expr} that maps each such nonterminal to the expr

    that should be evaluated when reducing the empty string to nt.

    So, for example, if we have a production

        a ::= b? c?  => CallMethod("a", [0, 1])

    then the resulting dictionary will contain the entry

    `("a", CallMethod("a", [None, None]))`.

"""

    empties: typing.Dict[LenientNt, ReduceExprOrAccept] = {}

    def production_is_empty(p: Production) -> bool:

        return all(isinstance(e, LookaheadRule)

                   or isinstance(e, Optional)

                   or (isinstance(e, Nt) and e in empties)

                   or e is NoLineTerminatorHere

                   for e in p.body)

    def evaluate_reducer_with_empty_matches(p: Production) -> ReduceExprOrAccept:

        # partial evaluation of p.reducer

        stack = [e for e in p.body if is_concrete_element(e)]

        Expr = typing.TypeVar("Expr", ReduceExpr, ReduceExprOrAccept)

        def eval(expr: Expr) -> Expr:

            if expr is None:

                return None

            elif isinstance(expr, Some):

                return Some(eval(expr.inner))

            elif isinstance(expr, CallMethod):

                return dataclasses.replace(

                    expr,

                    args=tuple(eval(arg_expr) for arg_expr in expr.args)

            elif isinstance(expr, int):

                e = stack[expr]

                if isinstance(e, Optional):

                    return None

                else:

                    assert isinstance(e, Nt)

                    result = empties[e]

                    assert not isinstance(result, str)

                    return result

            elif expr == 'accept':

                # Hmm, this is not ideal! Maybe 'accept' needs to take an

                # argument so that the normal case is Accept(0) and this case

                # is Accept(eval(expr.args[0])).

                return 'accept'

            else:

                raise TypeError(

                    "internal error: unhandled reduce expression type {!r}"

                    .format(expr))

        return eval(p.reducer)

    done = False

    while not done:

        done = True

        for nt, nt_def in grammar.nonterminals.items():

            if nt not in empties:

                for p in nt_def.rhs_list:

                    if production_is_empty(p):

                        if nt in empties:

                            raise ValueError(

                                "ambiguous grammar: multiple productions for "

                                "{!r} match the empty string"

                                .format(nt))

                        done = False

                        empties[nt] = evaluate_reducer_with_empty_matches(p)

    return empties

def check_cycle_free(grammar: Grammar) -> None:

    """Throw an exception if any nonterminal in `grammar` produces itself

    via a cycle of 1 or more productions.

"""

    empties = empty_nt_set(grammar)

    # OK, first find out which nonterminals directly produce which other

    # nonterminals (after possibly erasing some optional/empty nts).

    direct_produces: typing.Dict[LenientNt, typing.Set[Nt]] = {}

    for orig in grammar.nonterminals:

        direct_produces[orig] = set()

        for source_production in grammar.nonterminals[orig].rhs_list:

            for rhs, _r in expand_optional_symbols_in_rhs(source_production.body, grammar, empties):

                result: typing.List[Nt] = []

                all_possibly_empty_so_far = True

                # If we break out of the following loop, that means it turns

                # out that this production does not produce *any* strings that

                # are just a single nonterminal.

                for e in rhs:

                    if grammar.is_terminal(e):

                        break  # no good, this production contains a terminal

                    elif isinstance(e, Nt):

                        if e in empties:

                            if all_possibly_empty_so_far:

                                result.append(e)

                        else:

                            if not all_possibly_empty_so_far:

                                # Give up - we have 2+ nonterminals that can't

                                # be empty.

                                break

                            all_possibly_empty_so_far = False

                            result = [e]

                    elif isinstance(e, Exclude):

                        if isinstance(e.inner, Nt):

                            result.append(e.inner)

                    elif isinstance(e, LookaheadRule):

                        # Ignore the restriction. We lose a little precision

                        # here, and could report a cycle where there isn't one,

                        # but it's unlikely in real-world grammars.

                        pass

                    elif e is NoLineTerminatorHere:

                        # This doesn't affect the property we're checking.

                        pass

                    elif isinstance(e, Literal):

                        if e.text != "":

                            # This production contains a non-empty character,

                            # therefore it cannot correspond to an empty cycle.

                            break

                    elif isinstance(e, UnicodeCategory):

                        # This production consume a class of character,

                        # therefore it cannot correspond to an empty cycle.

                        break

                    elif isinstance(e, End):

                        # This production consume the End meta-character,

                        # therefore it cannot correspond to an empty cycle,

                        # even if this character is expect to be produced

                        # infinitely once the end is reached.

                        break

                    elif isinstance(e, CallMethod):

                        # This production execute code, but does not consume

                        # any input.

                        pass

                    else:

                        # Optional is not possible because we called

                        # expand_optional_symbols_in_rhs. ErrorSymbol

                        # effectively matches the empty string (though only if

                        # nothing else matches).

                        assert isinstance(e, ErrorSymbol)

                else:

                    # If we get here, we didn't break, so our results are good!

                    # nt can definitely produce all the nonterminals in result.

                    direct_produces[orig] |= set(result)

    def step(

            produces: typing.Dict[LenientNt, typing.Set[Nt]]

    ) -> typing.Dict[LenientNt, typing.Set[Nt]]:

        return {

            orig: dest | set(b for a in dest for b in produces[a])

            for orig, dest in produces.items()

    produces = fix(step, direct_produces)

    for nt in grammar.nonterminals:

        if nt in produces[nt]:

            raise ValueError(

                "invalid grammar: nonterminal {} can produce itself"

                .format(nt))

def check_lookahead_rules(grammar: Grammar) -> None:

    """Check that no LookaheadRule appears at the end of a production (or before

    elements that can produce the empty string).

    If there are any offending lookahead rules, throw a ValueError.

"""

    empties = empty_nt_set(grammar)

    check_cycle_free(grammar)

    for nt in grammar.nonterminals:

        for source_production in grammar.nonterminals[nt].rhs_list:

            body = source_production.body

            for rhs, _r in expand_optional_symbols_in_rhs(body, grammar, empties):

                # XXX BUG: The next if-condition is insufficient, since it

                # fails to detect a lookahead restriction followed by a

                # nonterminal that can match the empty string.

                if rhs and isinstance(rhs[-1], LookaheadRule):

                    raise ValueError(

                        "invalid grammar: lookahead restriction "

                        "at end of production: {}"

                        .format(grammar.production_to_str(nt, body)))

def check_no_line_terminator_here(grammar: Grammar) -> None:

    empties = empty_nt_set(grammar)

    def check(e: Element, nt: LenientNt, body: typing.List[Element]) -> None:

        if grammar.is_terminal(e):

            pass

        elif isinstance(e, Nt):

            if e in empties:

                raise ValueError(

                    "invalid grammar: [no LineTerminator here] cannot appear next to "

                    "a nonterminal that matches the empty string\n"

                    "in production: {}".format(grammar.production_to_str(nt, body)))

        else:

            raise ValueError(

                "invalid grammar: [no LineTerminator here] must appear only "

                "between terminals and/or nonterminals\n"

                "in production: {}".format(grammar.production_to_str(nt, body)))

    for nt in grammar.nonterminals:

        for production in grammar.nonterminals[nt].rhs_list:

            body = production.body

            for i, e in enumerate(body):

                if e is NoLineTerminatorHere:

                    if i == 0 or i == len(body) - 1:

                        raise ValueError(

                            "invalid grammar: [no LineTerminator here] must be between two symbols\n"

                            "in production: {}".format(grammar.production_to_str(nt, body)))

                    check(body[i - 1], nt, body)

                    check(body[i + 1], nt, body)

def expand_parameterized_nonterminals(grammar: Grammar) -> Grammar:

    """Replace parameterized nonterminals with specialized copies.

    For example, a single pair `nt_name: NtDef(params=('A', 'B'), ...)` in

    `grammar.nonterminals` will be replaced with (assuming A and B are boolean

    parameters) up to four pairs, each having an Nt object as the key and an

    NtDef with no parameters as the value.

    `grammar.nonterminals` must have string keys.

    Returns a new copy of `grammar` with Nt keys, whose NtDefs all have

    `nt_def.params == []`.

"""

    todo = collections.deque(grammar.goals())

    new_nonterminals = {}

    def expand(nt: Nt) -> NtDef:

        """Expand grammar.nonterminals[nt](**args).

        Returns the expanded NtDef, which contains no conditional

        productions or Nt objects.

"""

        if nt.args is None:

            args_dict = None

        else:

            args_dict = dict(nt.args)

        def evaluate_arg(arg: NtParameter) -> NtParameter:

            if isinstance(arg, Var):

                return args_dict[arg.name]

            else:

                return arg

        def expand_element(e: Element) -> Element:

            if isinstance(e, Optional):

                return Optional(expand_element(e.inner))

            elif isinstance(e, Exclude):

                return Exclude(expand_element(e.inner), tuple(map(expand_element, e.exclusion_list)))

            elif isinstance(e, Nt):

                args = tuple((name, evaluate_arg(arg))

                             for name, arg in e.args)

                e = Nt(e.name, args)

                if e not in new_nonterminals:

                    todo.append(e)

                return e

            else:

                return e

        def expand_production(p: Production) -> Production:

            return p.copy_with(

                body=[expand_element(e) for e in p.body],

                condition=None)

        def expand_productions(nt_def: NtDef) -> NtDef:

            result = []

            for p in nt_def.rhs_list:

                if p.condition is None:

                    included = True

                else:

                    param, value = p.condition

                    included = (args_dict[param] == value)

                if included:

                    result.append(expand_production(p))

            return NtDef((), result, nt_def.type)

        nt_def = grammar.nonterminals[nt.name]

        assert tuple(name for name, value in nt.args) == nt_def.params

        return expand_productions(nt_def)

    while todo:

        nt = todo.popleft()

        if nt not in new_nonterminals:  # not already expanded

            new_nonterminals[nt] = expand(nt)

    # "type: ignore" because this runs afoul of Python's covariance rules for

    # Mapping; but it conforms to the intended type.

    return grammar.with_nonterminals(new_nonterminals)  # type: ignore

# *** Start sets and follow sets **********************************************

EMPTY = "(empty)"

END = None

TerminalOrEmpty = str

TerminalOrEmptyOrErrorToken = typing.Union[str, ErrorTokenClass]

StartSets = typing.Dict[Nt, OrderedFrozenSet[TerminalOrEmptyOrErrorToken]]

def start_sets(grammar: Grammar) -> StartSets:

    """Compute the start sets for nonterminals in a grammar.

    A nonterminal's start set is the set of tokens that a match for that

    nonterminal may start with, plus EMPTY if it can match the empty string

    and ErrorToken if it can start with an error.

"""

    # How this works: Note that we can replace the words "match" and "start

    # with" in the definition above with more queries about start sets.

    # 1.  A nonterminal's start set contains a terminal `t` if any of its

    #     productions contains either `t` or a nonterminal with `t` in *its*

    #     start set, preceded only by zero or more nonterminals that have EMPTY

    #     in *their* start sets. Plus:

    # 2.  A nonterminal's start set contains EMPTY if any of its productions

    #     consists entirely of nonterminals that have EMPTY in *their* start

    #     sets.

    # This definition is rather circular. We want the smallest collection of

    # start sets satisfying these rules, and we get that by iterating to a

    # fixed point.

    assert all(isinstance(nt, Nt) for nt in grammar.nonterminals)

    start: StartSets

    start = {typing.cast(Nt, nt): OrderedFrozenSet() for nt in grammar.nonterminals}

    done = False

    while not done:

        done = True

        for nt, nt_def in grammar.nonterminals.items():

            assert isinstance(nt, Nt)

            # Compute start set for each `prod` based on `start` so far.

            # Could be incomplete, but we'll ratchet up as we iterate.

            nt_start = OrderedFrozenSet(

                t for p in nt_def.rhs_list for t in seq_start(grammar, start, p.body))

            if nt_start != start[nt]:

                start[nt] = nt_start

                done = False

    return start

def seq_start(

        grammar: Grammar,

        start: StartSets,

        seq: typing.List[Element]

) -> OrderedFrozenSet[TerminalOrEmptyOrErrorToken]:

    """Compute the start set for a sequence of elements."""

    s: OrderedSet[TerminalOrEmptyOrErrorToken] = OrderedSet([EMPTY])

    for i, e in enumerate(seq):

        if EMPTY not in s:  # preceding elements never match the empty string

            break

        s.remove(EMPTY)

        if grammar.is_terminal(e):

            assert isinstance(e, str)

            s.add(e)

        elif isinstance(e, ErrorSymbol):

            s.add(ErrorToken)

        elif isinstance(e, Nt):

            s |= start[e]

        elif e is NoLineTerminatorHere:

            s.add(EMPTY)

        else:

            assert isinstance(e, LookaheadRule)

            future = seq_start(grammar, start, seq[i + 1:])

            if e.positive:

                future &= e.set

            else:

                future -= e.set

            return OrderedFrozenSet(future)

    return OrderedFrozenSet(s)

StartSetCache = typing.List[typing.List[OrderedFrozenSet[TerminalOrEmptyOrErrorToken]]]

def make_start_set_cache(

        grammar: Grammar,

        prods: typing.List[Prod],

        start: StartSets

) -> StartSetCache:

    """Compute start sets for all suffixes of productions in the grammar.

    Returns a list of lists `cache` such that

    `cache[n][i] == seq_start(grammar, start, prods[n][i:])`.

    (The cache is for speed, since seq_start was being called millions of

    times.)

"""

    def suffix_start_list(

            rhs: typing.List[Element]

    ) -> typing.List[OrderedFrozenSet[TerminalOrEmptyOrErrorToken]]:

        sets: typing.List[OrderedFrozenSet[TerminalOrEmptyOrErrorToken]]

        sets = [OrderedFrozenSet([EMPTY])]

        for e in reversed(rhs):

            s: OrderedFrozenSet[TerminalOrEmptyOrErrorToken]

            if grammar.is_terminal(e):

                assert isinstance(e, str)

                s = OrderedFrozenSet([e])

            elif isinstance(e, ErrorSymbol):

                s = OrderedFrozenSet([ErrorToken])

            elif isinstance(e, Nt):

                s = start[e]

                if EMPTY in s:

                    s = OrderedFrozenSet((s - {EMPTY}) | sets[-1])

            elif e is NoLineTerminatorHere:

                s = sets[-1]

            else:

                assert isinstance(e, LookaheadRule)

                if e.positive:

                    s = OrderedFrozenSet(sets[-1] & e.set)

                else:

                    s = OrderedFrozenSet(sets[-1] - e.set)

            assert isinstance(s, OrderedFrozenSet)

            assert s == seq_start(grammar, start, rhs[len(rhs) - len(sets):])

            sets.append(s)

        sets.reverse()

        assert sets == [seq_start(grammar, start, rhs[i:])

                        for i in range(len(rhs) + 1)]

        return sets

    return [suffix_start_list(prod.rhs) for prod in prods]

FollowSet = OrderedSet[typing.Union[TerminalOrEmptyOrErrorToken, None]]

FollowSets = typing.DefaultDict[Nt, FollowSet]

def follow_sets(

        grammar: Grammar,

        prods_with_indexes_by_nt: typing.DefaultDict[

            LenientNt,

            typing.List[typing.Tuple[int, typing.List[Element]]]

],

        start_set_cache: StartSetCache

) -> FollowSets:

    """Compute all follow sets for nonterminals in a grammar.

    The follow set for a nonterminal `A`, as defined in the book, is "the set

    of terminals that can appear immediately to the right of `A` in some

    sentential form"; plus, "If `A` can be the rightmost symbol in some

    sentential form, then $ is in FOLLOW(A)."

    Returns a default-dictionary mapping nts to follow sets.

"""

    # Set of nonterminals already seen, including those we are in the middle of

    # analyzing. The algorithm starts at `goal` and walks all reachable

    # nonterminals, recursively.

    visited = set()

    # The results. By definition, nonterminals that are not reachable from the

    # goal nt have empty follow sets.

    follow: FollowSets = collections.defaultdict(OrderedSet)

    # If `(x, y) in subsumes_relation`, then x can appear at the end of a

    # production of y, and therefore follow[x] should be <= follow[y].

    # (We could maintain that invariant throughout, but at present we

    # brute-force iterate to a fixed point at the end.)

    subsumes_relation: OrderedSet[typing.Tuple[Nt, Nt]]

    subsumes_relation = OrderedSet()

    # `END` is $. It is, of course, in follow[each goal nonterminal]. It gets

    # into other nonterminals' follow sets through the subsumes relation.

    for init_nt in grammar.init_nts:

        assert isinstance(init_nt, Nt)

        follow[init_nt].add(END)

    def visit(nt: Nt) -> None:

        if nt in visited:

            return

        visited.add(nt)

        for prod_index, rhs in prods_with_indexes_by_nt[nt]:

            for i, symbol in enumerate(rhs):

                if isinstance(symbol, Nt):

                    visit(symbol)

                    after = start_set_cache[prod_index][i + 1]

                    if EMPTY in after:

                        after -= {EMPTY}

                        subsumes_relation.add((symbol, nt))

                    follow[symbol] |= after

    for nt in grammar.init_nts:

        assert isinstance(nt, Nt)

        visit(nt)

    # Now iterate to a fixed point on the subsumes relation.

    done = False

    while not done:

        done = True  # optimistically

        for target, source in subsumes_relation:

            if follow[source] - follow[target]:

                follow[target] |= follow[source]

                done = False

    return follow

# *** Lowering ****************************************************************

# At this point, lowered productions start getting farther from the original

# source.  We need to associate them with the original grammar in order to

# produce correct output, so we use Prod values to represent productions.

# -   `nt` is the name of the nonterminal as it appears in the original

#     grammar.

# -   `index` is the index of the source production, within nt's productions,

#     in the original grammar.

# -   `rhs` is the fully lowered/expanded right-hand-side of the production.

# There may be many productions in a grammar that all have the same `nt` and

# `index` because they were all produced from the same source production.

@dataclass

class Prod:

    nt: Nt

    index: int

    rhs: typing.List

    reducer: ReduceExprOrAccept

def expand_optional_symbols_in_rhs(

        rhs: typing.List[Element],

        grammar: Grammar,

        empties: typing.Dict[LenientNt, ReduceExprOrAccept],

        start_index: int = 0

) -> typing.Iterable[typing.Tuple[typing.List[Element], typing.Dict[int, ReduceExpr]]]:

    """Expand a sequence with optional symbols into sequences that have none.

    rhs is a list of symbols, possibly containing optional elements. This

    yields every list that can be made by replacing each optional element

    either with its .inner value, or with nothing.

    Each list is accompanied by the list of the indices of optional elements in

    `rhs` that were dropped.

    For example, `expand_optional_symbols_in_rhs(["if", Optional("else")])`

    yields the two pairs `(["if"], [1])` and `["if", "else"], []`.

"""

    replacement: ReduceExpr

    for i in range(start_index, len(rhs)):

        e = rhs[i]

        if isinstance(e, Optional):

            if isinstance(e.inner, Nt) and e.inner in empties:

                # If this is already possibly-empty in the input grammar, it's an

                # error! The grammar is ambiguous.

                raise ValueError(

                    "ambiguous grammar: {} is ambiguous because {} can match "

                    "the empty string"

                    .format(grammar.element_to_str(e),

                            grammar.element_to_str(e.inner)))

            replacement = None

            break

        elif isinstance(e, Nt) and e in empties:

            empty_expr = empties[e]

            # The replacement can't be 'accept' because that only happens with

            # InitNt nonterminals, which are never used in productions.

            assert not isinstance(empty_expr, str)

            replacement = empty_expr

            break

    else:

        yield rhs[start_index:], {}

        return

    for expanded, r in expand_optional_symbols_in_rhs(rhs, grammar, empties, i + 1):

        e = rhs[i]

        rhs_inner = e.inner if isinstance(e, Optional) else e

        # without rhs[i]

        r2 = r.copy()

        r2[i] = replacement

        yield rhs[start_index:i] + expanded, r2

        # with rhs[i]

        yield rhs[start_index:i] + [rhs_inner] + expanded, r

def expand_all_optional_elements(grammar: Grammar) -> typing.Tuple[

        Grammar,

        typing.List[Prod],

        typing.DefaultDict[LenientNt, typing.List[typing.Tuple[int, typing.List[Element]]]]

]:

    """Expand optional elements in the grammar.

    We replace each production that contains an optional element with two

    productions: one with and one without. Downstream of this step, we can

    ignore the possibility of optional elements.

"""

    expanded_grammar: typing.Dict[LenientNt, NtDef] = {}

    # This was capturing the set of empty production to simplify the work of

    # the previous algorithm which was trying to determine the lookahead.

    # However, with the LR0Generator this is no longer needed as we are

    # generating deliberatly inconsistent parse table states, which are then

    # properly fixed by adding lookahead information where needed, and without

    # bugs!

    # empties = empty_nt_set(grammar)

    empties: typing.Dict[LenientNt, ReduceExprOrAccept] = {}

    # Put all the productions in one big list, so each one has an index. We

    # will use the indices in the action table (as reduce action payloads).

    prods: typing.List[Prod] = []

    prods_with_indexes_by_nt: \

        typing.DefaultDict[LenientNt, typing.List[typing.Tuple[int, typing.List[Element]]]] = \

        collections.defaultdict(list)

    for nt, nt_def in grammar.nonterminals.items():

        assert isinstance(nt, Nt)

        prods_expanded = []

        for prod_index, p in enumerate(nt_def.rhs_list):

            # Aggravatingly, a reduce-expression that's an int is not

            # simply an offset into p.body. It only counts "concrete"

            # elements. Make a mapping for converting reduce-expressions to

            # offsets.

            reduce_expr_to_offset = [

                for i, e in enumerate(p.body)

                if is_concrete_element(e)

            for pair in expand_optional_symbols_in_rhs(p.body, grammar, empties):

                expanded_rhs, removals = pair

                Expr = typing.TypeVar("Expr", ReduceExpr, ReduceExprOrAccept)

                def adjust_reduce_expr(expr: Expr) -> Expr:

                    if isinstance(expr, int):

                        i = reduce_expr_to_offset[expr]

                        if i in removals:

                            return removals[i]

                        was_optional = isinstance(p.body[i], Optional)

                        expr -= sum(1 for r in removals if r < i)

                        if was_optional:

                            return Some(expr)

                        else:

                            return expr

                    elif expr is None:

                        return None

                    elif isinstance(expr, Some):

                        return Some(adjust_reduce_expr(expr.inner))

                    elif isinstance(expr, CallMethod):

                        return dataclasses.replace(

                            expr,

                            args=tuple(adjust_reduce_expr(arg)

                                       for arg in expr.args)

                    elif expr == 'accept':

                        # doesn't need to be adjusted because 'accept' isn't

                        # turned into code downstream.

                        return 'accept'

                    else:

                        raise TypeError(

                            "internal error: unrecognized element {!r}"

                            .format(expr))

                adjusted_reducer = adjust_reduce_expr(p.reducer)

                prods_expanded.append(

                    Production(body=expanded_rhs,

                               reducer=adjusted_reducer))

                prods.append(Prod(nt, prod_index, expanded_rhs,

                                  adjusted_reducer))

                prods_with_indexes_by_nt[nt].append(

                    (len(prods) - 1, expanded_rhs))

        expanded_grammar[nt] = nt_def.with_rhs_list(prods_expanded)

    return (grammar.with_nonterminals(expanded_grammar),

            prods,

            prods_with_indexes_by_nt)

class CanonicalGrammar:

    __slots__ = ["prods", "prods_with_indexes_by_nt", "grammar"]

    prods: typing.List[Prod]

    prods_with_indexes_by_nt: typing.Mapping[

        LenientNt,

        typing.List[typing.Tuple[int, typing.List[Element]]]]

    grammar: Grammar

    def __init__(self, grammar: Grammar) -> None:

        # Step by step, we check the grammar and lower it to a more primitive form.

        grammar = expand_parameterized_nonterminals(grammar)

        check_cycle_free(grammar)

        # check_lookahead_rules(grammar)

        check_no_line_terminator_here(grammar)

        grammar, prods, prods_with_indexes_by_nt = \

            expand_all_optional_elements(grammar)

        self.prods = prods

        self.prods_with_indexes_by_nt = prods_with_indexes_by_nt

        self.grammar = grammar

Source code

Revision control

Copy as Markdown

Other Tools