state_inline.py - mozsearch

Enable keyboard shortcuts

from __future__ import annotations

from collections import namedtuple

from dataclasses import dataclass

from typing import TYPE_CHECKING, Any, Literal

from .._compat import DATACLASS_KWARGS

from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace

from ..ruler import StateBase

from ..token import Token

from ..utils import EnvType

if TYPE_CHECKING:

    from markdown_it import MarkdownIt

@dataclass(**DATACLASS_KWARGS)

class Delimiter:

    # Char code of the starting marker (number).

    marker: int

    # Total length of these series of delimiters.

    length: int

    # A position of the token this delimiter corresponds to.

    token: int

    # If this delimiter is matched as a valid opener, `end` will be

    # equal to its position, otherwise it's `-1`.

    end: int

    # Boolean flags that determine if this delimiter could open or close

    # an emphasis.

    open: bool

    close: bool

    level: bool | None = None

Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])

class StateInline(StateBase):

    def __init__(

        self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]

    ) -> None:

        self.src = src

        self.env = env

        self.md = md

        self.tokens = outTokens

        self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)

        self.pos = 0

        self.posMax = len(self.src)

        self.level = 0

        self.pending = ""

        self.pendingLevel = 0

        # Stores { start: end } pairs. Useful for backtrack

        # optimization of pairs parse (emphasis, strikes).

        self.cache: dict[int, int] = {}

        # List of emphasis-like delimiters for current tag

        self.delimiters: list[Delimiter] = []

        # Stack of delimiter lists for upper level tags

        self._prev_delimiters: list[list[Delimiter]] = []

        # backticklength => last seen position

        self.backticks: dict[int, int] = {}

        self.backticksScanned = False

        # Counter used to disable inline linkify-it execution

        # inside <a> and markdown links

        self.linkLevel = 0

    def __repr__(self) -> str:

        return (

            f"{self.__class__.__name__}"

            f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"

    def pushPending(self) -> Token:

        token = Token("text", "", 0)

        token.content = self.pending

        token.level = self.pendingLevel

        self.tokens.append(token)

        self.pending = ""

        return token

    def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:

        """Push new token to "stream".

        If pending text exists - flush it as text token

"""

        if self.pending:

            self.pushPending()

        token = Token(ttype, tag, nesting)

        token_meta = None

        if nesting < 0:

            # closing tag

            self.level -= 1

            self.delimiters = self._prev_delimiters.pop()

        token.level = self.level

        if nesting > 0:

            # opening tag

            self.level += 1

            self._prev_delimiters.append(self.delimiters)

            self.delimiters = []

            token_meta = {"delimiters": self.delimiters}

        self.pendingLevel = self.level

        self.tokens.append(token)

        self.tokens_meta.append(token_meta)

        return token

    def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:

"""

        Scan a sequence of emphasis-like markers, and determine whether

        it can start an emphasis sequence or end an emphasis sequence.

         - start - position to scan from (it should point at a valid marker);

         - canSplitWord - determine if these markers can be found inside a word

"""

        pos = start

        maximum = self.posMax

        marker = self.src[start]

        # treat beginning of the line as a whitespace

        lastChar = self.src[start - 1] if start > 0 else " "

        while pos < maximum and self.src[pos] == marker:

            pos += 1

        count = pos - start

        # treat end of the line as a whitespace

        nextChar = self.src[pos] if pos < maximum else " "

        isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)

        isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)

        isLastWhiteSpace = isWhiteSpace(ord(lastChar))

        isNextWhiteSpace = isWhiteSpace(ord(nextChar))

        left_flanking = not (

            isNextWhiteSpace

            or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))

        right_flanking = not (

            isLastWhiteSpace

            or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))

        if not canSplitWord:

            can_open = left_flanking and ((not right_flanking) or isLastPunctChar)

            can_close = right_flanking and ((not left_flanking) or isNextPunctChar)

        else:

            can_open = left_flanking

            can_close = right_flanking

        return Scanned(can_open, can_close, count)