regression.toml - mozsearch

comm-central/third_party/rust/regex/testdata/regression.toml

Enable keyboard shortcuts

Revision control

Copy as Markdown

Other Tools

HG Web

# See: https://github.com/rust-lang/regex/issues/48

[[test]]

name = "invalid-regex-no-crash-100"

regex = '(*)'

haystack = ""

matches = []

compiles = false

# See: https://github.com/rust-lang/regex/issues/48

[[test]]

name = "invalid-regex-no-crash-200"

regex = '(?:?)'

haystack = ""

matches = []

compiles = false

# See: https://github.com/rust-lang/regex/issues/48

[[test]]

name = "invalid-regex-no-crash-300"

regex = '(?)'

haystack = ""

matches = []

compiles = false

# See: https://github.com/rust-lang/regex/issues/48

[[test]]

name = "invalid-regex-no-crash-400"

regex = '*'

haystack = ""

matches = []

compiles = false

# See: https://github.com/rust-lang/regex/issues/75

[[test]]

name = "unsorted-binary-search-100"

regex = '(?i-u)[a_]+'

haystack = "A_"

matches = [[0, 2]]

# See: https://github.com/rust-lang/regex/issues/75

[[test]]

name = "unsorted-binary-search-200"

regex = '(?i-u)[A_]+'

haystack = "a_"

matches = [[0, 2]]

# See: https://github.com/rust-lang/regex/issues/76

[[test]]

name = "unicode-case-lower-nocase-flag"

regex = '(?i)\p{Ll}+'

haystack = "ΛΘΓΔα"

matches = [[0, 10]]

# See: https://github.com/rust-lang/regex/issues/99

[[test]]

name = "negated-char-class-100"

regex = '(?i)[^x]'

haystack = "x"

matches = []

# See: https://github.com/rust-lang/regex/issues/99

[[test]]

name = "negated-char-class-200"

regex = '(?i)[^x]'

haystack = "X"

matches = []

# See: https://github.com/rust-lang/regex/issues/101

[[test]]

name = "ascii-word-underscore"

regex = '[[:word:]]'

haystack = "_"

matches = [[0, 1]]

# See: https://github.com/rust-lang/regex/issues/129

[[test]]

name = "captures-repeat"

regex = '([a-f]){2}(?P<foo>[x-z])'

haystack = "abx"

matches = [

  [[0, 3], [1, 2], [2, 3]],

# See: https://github.com/rust-lang/regex/issues/153

[[test]]

name = "alt-in-alt-100"

regex = 'ab?|$'

haystack = "az"

matches = [[0, 1], [2, 2]]

# See: https://github.com/rust-lang/regex/issues/153

[[test]]

name = "alt-in-alt-200"

regex = '^(?:.*?)(?:\n|\r\n?|$)'

haystack = "ab\rcd"

matches = [[0, 3]]

# See: https://github.com/rust-lang/regex/issues/169

[[test]]

name = "leftmost-first-prefix"

regex = 'z*azb'

haystack = "azb"

matches = [[0, 3]]

# See: https://github.com/rust-lang/regex/issues/191

[[test]]

name = "many-alternates"

regex = '1|2|3|4|5|6|7|8|9|10|int'

haystack = "int"

matches = [[0, 3]]

# See: https://github.com/rust-lang/regex/issues/204

[[test]]

name = "word-boundary-alone-100"

regex = '\b'

haystack = "Should this (work?)"

matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]]

# See: https://github.com/rust-lang/regex/issues/204

[[test]]

name = "word-boundary-alone-200"

regex = '\b'

haystack = "a b c"

matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]

# See: https://github.com/rust-lang/regex/issues/264

[[test]]

name = "word-boundary-ascii-no-capture"

regex = '\B'

haystack = "\U00028F3E"

matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]

unicode = false

utf8 = false

# See: https://github.com/rust-lang/regex/issues/264

[[test]]

name = "word-boundary-ascii-capture"

regex = '(?:\B)'

haystack = "\U00028F3E"

matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]

unicode = false

utf8 = false

# See: https://github.com/rust-lang/regex/issues/268

[[test]]

name = "partial-anchor"

regex = '^a|b'

haystack = "ba"

matches = [[0, 1]]

# See: https://github.com/rust-lang/regex/issues/271

[[test]]

name = "endl-or-word-boundary"

regex = '(?m:$)|(?-u:\b)'

haystack = "\U0006084E"

matches = [[4, 4]]

# See: https://github.com/rust-lang/regex/issues/271

[[test]]

name = "zero-or-end"

regex = '(?i-u:\x00)|$'

haystack = "\U000E682F"

matches = [[4, 4]]

# See: https://github.com/rust-lang/regex/issues/271

[[test]]

name = "y-or-endl"

regex = '(?i-u:y)|(?m:$)'

haystack = "\U000B4331"

matches = [[4, 4]]

# See: https://github.com/rust-lang/regex/issues/271

[[test]]

name = "word-boundary-start-x"

regex = '(?u:\b)^(?-u:X)'

haystack = "X"

matches = [[0, 1]]

# See: https://github.com/rust-lang/regex/issues/271

[[test]]

name = "word-boundary-ascii-start-x"

regex = '(?-u:\b)^(?-u:X)'

haystack = "X"

matches = [[0, 1]]

# See: https://github.com/rust-lang/regex/issues/271

[[test]]

name = "end-not-word-boundary"

regex = '$\B'

haystack = "\U0005C124\U000B576C"

matches = [[8, 8]]

unicode = false

utf8 = false

# See: https://github.com/rust-lang/regex/issues/280

[[test]]

name = "partial-anchor-alternate-begin"

regex = '^a|z'

haystack = "yyyyya"

matches = []

# See: https://github.com/rust-lang/regex/issues/280

[[test]]

name = "partial-anchor-alternate-end"

regex = 'a$|z'

haystack = "ayyyyy"

matches = []

# See: https://github.com/rust-lang/regex/issues/289

[[test]]

name = "lits-unambiguous-100"

regex = '(?:ABC|CDA|BC)X'

haystack = "CDAX"

matches = [[0, 4]]

# See: https://github.com/rust-lang/regex/issues/291

[[test]]

name = "lits-unambiguous-200"

regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$'

haystack = "CIMG2341"

matches = [

  [[0, 8], [0, 4], [], [0, 4], [4, 8]],

# See: https://github.com/rust-lang/regex/issues/303

# 2022-09-19: This has now been "properly" fixed in that empty character

# classes are fully supported as something that can never match. This test

# used to be marked as 'compiles = false', but now it works.

[[test]]

name = "negated-full-byte-range"

regex = '[^\x00-\xFF]'

haystack = ""

matches = []

compiles = true

unicode = false

utf8 = false

# See: https://github.com/rust-lang/regex/issues/321

[[test]]

name = "strange-anchor-non-complete-prefix"

regex = 'a^{2}'

haystack = ""

matches = []

# See: https://github.com/rust-lang/regex/issues/321

[[test]]

name = "strange-anchor-non-complete-suffix"

regex = '${2}a'

haystack = ""

matches = []

# See: https://github.com/rust-lang/regex/issues/334

# See: https://github.com/rust-lang/regex/issues/557

[[test]]

name = "captures-after-dfa-premature-end-100"

regex = 'a(b*(X|$))?'

haystack = "abcbX"

matches = [

  [[0, 1], [], []],

# See: https://github.com/rust-lang/regex/issues/334

# See: https://github.com/rust-lang/regex/issues/557

[[test]]

name = "captures-after-dfa-premature-end-200"

regex = 'a(bc*(X|$))?'

haystack = "abcbX"

matches = [

  [[0, 1], [], []],

# See: https://github.com/rust-lang/regex/issues/334

# See: https://github.com/rust-lang/regex/issues/557

[[test]]

name = "captures-after-dfa-premature-end-300"

regex = '(aa$)?'

haystack = "aaz"

matches = [

  [[0, 0], []],

  [[1, 1], []],

  [[2, 2], []],

  [[3, 3], []],

# Plucked from "Why aren’t regular expressions a lingua franca? an empirical

# study on the re-use and portability of regular expressions", The ACM Joint

# European Software Engineering Conference and Symposium on the Foundations of

# Software Engineering (ESEC/FSE), 2019.

# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909

[[test]]

name = "captures-after-dfa-premature-end-400"

regex = '(a)\d*\.?\d+\b'

haystack = "a0.0c"

matches = [

  [[0, 2], [0, 1]],

# See: https://github.com/rust-lang/regex/issues/437

[[test]]

name = "literal-panic"

regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+'

haystack = "test"

matches = []

# See: https://github.com/rust-lang/regex/issues/527

[[test]]

name = "empty-flag-expr"

regex = '(?:(?:(?x)))'

haystack = ""

matches = [[0, 0]]

# See: https://github.com/rust-lang/regex/issues/533

#[[tests]]

#name = "blank-matches-nothing-between-space-and-tab"

#regex = '[[:blank:]]'

#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'

#match = false

#unescape = true

# See: https://github.com/rust-lang/regex/issues/533

#[[tests]]

#name = "blank-matches-nothing-between-space-and-tab-inverted"

#regex = '^[[:^blank:]]+$'

#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'

#match = true

#unescape = true

# See: https://github.com/rust-lang/regex/issues/555

[[test]]

name = "invalid-repetition"

regex = '(?m){1,1}'

haystack = ""

matches = []

compiles = false

# See: https://github.com/rust-lang/regex/issues/640

[[test]]

name = "flags-are-unset"

regex = '(?:(?i)foo)|Bar'

haystack = "foo Foo bar Bar"

matches = [[0, 3], [4, 7], [12, 15]]

# Note that 'Ј' is not 'j', but cyrillic Je

# https://en.wikipedia.org/wiki/Je_(Cyrillic)

# See: https://github.com/rust-lang/regex/issues/659

[[test]]

name = "empty-group-with-unicode"

regex = '(?:)Ј01'

haystack = 'zЈ01'

matches = [[1, 5]]

# See: https://github.com/rust-lang/regex/issues/579

[[test]]

name = "word-boundary-weird"

regex = '\b..\b'

haystack = "I have 12, he has 2!"

matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]

# See: https://github.com/rust-lang/regex/issues/579

[[test]]

name = "word-boundary-weird-ascii"

regex = '\b..\b'

haystack = "I have 12, he has 2!"

matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]

unicode = false

utf8 = false

# See: https://github.com/rust-lang/regex/issues/579

[[test]]

name = "word-boundary-weird-minimal-ascii"

regex = '\b..\b'

haystack = "az,,b"

matches = [[0, 2], [2, 4]]

unicode = false

utf8 = false

# See: https://github.com/BurntSushi/ripgrep/issues/1203

[[test]]

name = "reverse-suffix-100"

regex = '[0-4][0-4][0-4]000'

haystack = "153.230000"

matches = [[4, 10]]

# See: https://github.com/BurntSushi/ripgrep/issues/1203

[[test]]

name = "reverse-suffix-200"

regex = '[0-9][0-9][0-9]000'

haystack = "153.230000\n"

matches = [[4, 10]]

# This is a tricky case for the reverse suffix optimization, because it

# finds the 'foobar' match but the reverse scan must fail to find a match by

# correctly dealing with the word boundary following the 'foobar' literal when

# computing the start state.

# This test exists because I tried to break the following assumption that

# is currently in the code: that if a suffix is found and the reverse scan

# succeeds, then it's guaranteed that there is an overall match. Namely, the

# 'is_match' routine does *not* do another forward scan in this case because of

# this assumption.

[[test]]

name = "reverse-suffix-300"

regex = '\w+foobar\b'

haystack = "xyzfoobarZ"

matches = []

unicode = false

utf8 = false

# See: https://github.com/BurntSushi/ripgrep/issues/1247

[[test]]

name = "stops"

regex = '\bs(?:[ab])'

haystack = 's\xE4'

matches = []

unescape = true

utf8 = false

# See: https://github.com/BurntSushi/ripgrep/issues/1247

[[test]]

name = "stops-ascii"

regex = '(?-u:\b)s(?:[ab])'

haystack = 's\xE4'

matches = []

unescape = true

utf8 = false

# See: https://github.com/rust-lang/regex/issues/850

[[test]]

name = "adjacent-line-boundary-100"

regex = '(?m)^(?:[^ ]+?)$'

haystack = "line1\nline2"

matches = [[0, 5], [6, 11]]

# Continued.

[[test]]

name = "adjacent-line-boundary-200"

regex = '(?m)^(?:[^ ]+?)$'

haystack = "A\nB"

matches = [[0, 1], [2, 3]]

# There is no issue for this bug.

[[test]]

name = "anchored-prefix-100"

regex = '^a[[:^space:]]'

haystack = "a "

matches = []

# There is no issue for this bug.

[[test]]

name = "anchored-prefix-200"

regex = '^a[[:^space:]]'

haystack = "foo boo a"

matches = []

# There is no issue for this bug.

[[test]]

name = "anchored-prefix-300"

regex = '^-[a-z]'

haystack = "r-f"

matches = []

# Tests that a possible Aho-Corasick optimization works correctly. It only

# kicks in when we have a lot of literals. By "works correctly," we mean that

# leftmost-first match semantics are properly respected. That is, samwise

# should match, not sam.

# There is no issue for this bug.

[[test]]

name = "aho-corasick-100"

regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z'

haystack = "samwise"

matches = [[0, 7]]

# See: https://github.com/rust-lang/regex/issues/921

[[test]]

name = "interior-anchor-capture"

regex = '(a$)b$'

haystack = 'ab'

matches = []

# I found this bug in the course of adding some of the regexes that Ruff uses

# to rebar. It turns out that the lazy DFA was finding a match that was being

# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack.

# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52

[[test]]

name = "ruff-whitespace-around-keywords"

regex = '^(a|ab)$'

haystack = "ab"

anchored = true

unicode = false

utf8 = true

matches = [[[0, 2], [0, 2]]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-0"

regex = '(?:(?-u:\b)|(?u:h))+'

haystack = "h"

unicode = true

utf8 = false

matches = [[0, 0], [1, 1]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-1"

regex = '(?u:\B)'

haystack = "鋸"

unicode = true

utf8 = false

matches = []

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-2"

regex = '(?:(?u:\b)|(?s-u:.))+'

haystack = "oB"

unicode = true

utf8 = false

matches = [[0, 0], [1, 2]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-3"

regex = '(?:(?-u:\B)|(?su:.))+'

haystack = "\U000FEF80"

unicode = true

utf8 = false

matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-3-utf8"

regex = '(?:(?-u:\B)|(?su:.))+'

haystack = "\U000FEF80"

unicode = true

utf8 = true

matches = [[0, 0], [4, 4]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-4"

regex = '(?m:$)(?m:^)(?su:.)'

haystack = "\n‣"

unicode = true

utf8 = false

matches = [[0, 1]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-5"

regex = '(?m:$)^(?m:^)'

haystack = "\n"

unicode = true

utf8 = false

matches = [[0, 0]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-6"

regex = '(?P<kp>(?iu:do)(?m:$))*'

haystack = "dodo"

unicode = true

utf8 = false

matches = [

  [[0, 0], []],

  [[1, 1], []],

  [[2, 4], [2, 4]],

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-7"

regex = '(?u:\B)'

haystack = "䡁"

unicode = true

utf8 = false

matches = []

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-8"

regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+'

haystack = "0"

unicode = true

utf8 = false

matches = [[0, 0], [1, 1]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-9"

regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)'

haystack = "\n\n"

unicode = true

utf8 = false

matches = [

  [[1, 2], [1, 2]],

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-10"

regex = '(?m:$)(?m:$)^(?su:.)'

haystack = "\n\u0081¨\u200a"

unicode = true

utf8 = false

matches = [[0, 1]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-11"

regex = '(?-u:\B)(?m:^)'

haystack = "0\n"

unicode = true

utf8 = false

matches = [[2, 2]]

# From: https://github.com/rust-lang/regex/issues/429

[[test]]

name = "i429-12"

regex = '(?:(?u:\b)|(?-u:.))+'

haystack = "0"

unicode = true

utf8 = false

matches = [[0, 0], [1, 1]]

# From: https://github.com/rust-lang/regex/issues/969

[[test]]

name = "i969"

regex = 'c.*d\z'

haystack = "ababcd"

bounds = [4, 6]

search-kind = "earliest"

matches = [[4, 6]]

# I found this during the regex-automata migration. This is the fowler basic

# 154 test, but without anchored = true and without a match limit.

# This test caught a subtle bug in the hybrid reverse DFA search, where it

# would skip over the termination condition if it entered a start state. This

# was a double bug. Firstly, the reverse DFA shouldn't have had start states

# specialized in the first place, and thus it shouldn't have possible to detect

# that the DFA had entered a start state. The second bug was that the start

# state handling was incorrect by jumping over the termination condition.

[[test]]

name = "fowler-basic154-unanchored"

regex = '''a([bc]*)c*'''

haystack = '''abc'''

matches = [[[0, 3], [1, 3]]]

# From: https://github.com/rust-lang/regex/issues/981

# This was never really a problem in the new architecture because the

# regex-automata engines are far more principled about how they deal with

# look-around. (This was one of the many reasons I wanted to re-work the

# original regex crate engines.)

[[test]]

name = "word-boundary-interact-poorly-with-literal-optimizations"

regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))'

haystack = 'ubi-Darwin-x86_64.tar.gz'

matches = []

# This was found during fuzz testing of regex. It provoked a panic in the meta

# engine as a result of the reverse suffix optimization. Namely, it hit a case

# where a suffix match was found, a corresponding reverse match was found, but

# the forward search turned up no match. The forward search should always match

# if the suffix and reverse search match.

# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy

# and fully compiled) engines. It was caused by a mishandling of the collection

# of NFA state IDs in the generic determinization code (which is why both types

# of DFA were impacted). Namely, when a fail state was encountered (that's the

# `[^\s\S]` in the pattern below), then it would just stop collecting states.

# But that's not correct since a later state could lead to a match.

[[test]]

name = "impossible-branch"

regex = '.*[^\s\S]A|B'

haystack = "B"

matches = [[0, 1]]

# This was found during fuzz testing in regex-lite. The regex crate never

# suffered from this bug, but it causes regex-lite to incorrectly compile

# captures.

[[test]]

name = "captures-wrong-order"

regex = '(a){0}(a)'

haystack = 'a'

matches = [[[0, 1], [], [0, 1]]]

# This tests a bug in how quit states are handled in the DFA. At some point

# during development, the DFAs were tweaked slightly such that if they hit

# a quit state (which means, they hit a byte that the caller configured should

# stop the search), then it might not return an error necessarily. Namely, if a

# match had already been found, then it would be returned instead of an error.

# But this is actually wrong! Why? Because even though a match had been found,

# it wouldn't be fully correct to return it once a quit state has been seen

# because you can't determine whether the match offset returned is the correct

# greedy/leftmost-first match. Since you can't complete the search as requested

# by the caller, the DFA should just stop and return an error.

# Interestingly, this does seem to produce an unavoidable difference between

# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs.

# The former will stop immediately once a match is known to occur and return

# 'Ok(true)', where as the latter could find the match but quit with an

# 'Err(..)' first.

# Thankfully, I believe this inconsistency between 'is_match()' and 'find()'

# cannot be observed in the higher level meta regex API because it specifically

# will try another engine that won't fail in the case of a DFA failing.

# This regression happened in the regex crate rewrite, but before anything got

# released.

[[test]]

name = "negated-unicode-word-boundary-dfa-fail"

regex = '\B.*'

haystack = "!\u02D7"

matches = [[0, 3]]

# This failure was found in the *old* regex crate (prior to regex 1.9), but

# I didn't investigate why. My best guess is that it's a literal optimization

# bug. It didn't occur in the rewrite.

[[test]]

name = "missed-match"

regex = 'e..+e.ee>'

haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>'

matches = [[1, 26]]

# This test came from the 'ignore' crate and tripped a bug in how accelerated

# DFA states were handled in an overlapping search.

[[test]]

name = "regex-to-glob"

regex = ['(?-u)^path1/[^/]*$']

haystack = "path1/foo"

matches = [[0, 9]]

utf8 = false

match-kind = "all"

search-kind = "overlapping"

# See: https://github.com/rust-lang/regex/issues/1060

[[test]]

name = "reverse-inner-plus-shorter-than-expected"

regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'

haystack = '102:12:39'

matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]

# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex

# to demonstrate the extent of the rot. Sigh.

# See: https://github.com/rust-lang/regex/issues/1060

[[test]]

name = "reverse-inner-short"

regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'

haystack = '102:12:39'

matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]

# This regression test was found via the RegexSet APIs. It triggered a

# particular code path where a regex was compiled with 'All' match semantics

# (to support overlapping search), but got funneled down into a standard

# leftmost search when calling 'is_match'. This is fine on its own, but the

# leftmost search will use a prefilter and that's where this went awry.

# Namely, since 'All' semantics were used, the aho-corasick prefilter was

# incorrectly compiled with 'Standard' semantics. This was wrong because

# 'Standard' immediately attempts to report a match at every position, even if

# that would mean reporting a match past the leftmost match before reporting

# the leftmost match. This breaks the prefilter contract of never having false

# negatives and leads overall to the engine not finding a match.

# See: https://github.com/rust-lang/regex/issues/1070

[[test]]

name = "prefilter-with-aho-corasick-standard-semantics"

regex = '(?m)^ *v [0-9]'

haystack = 'v 0'

matches = [

  { id = 0, spans = [[0, 3]] },

match-kind = "all"

search-kind = "overlapping"

unicode = true

utf8 = true

# This tests that the PikeVM and the meta regex agree on a particular regex.

# This test previously failed when the ad hoc engines inside the meta engine

# did not handle quit states correctly. Namely, the Unicode word boundary here

# combined with a non-ASCII codepoint provokes the quit state. The ad hoc

# engines were previously returning a match even after entering the quit state

# if a match had been previously detected, but this is incorrect. The reason

# is that if a quit state is found, then the search must give up *immediately*

# because it prevents the search from finding the "proper" leftmost-first

# match. If it instead returns a match that has been found, it risks reporting

# an improper match, as it did in this case.

# See: https://github.com/rust-lang/regex/issues/1046

[[test]]

name = "non-prefix-literal-quit-state"

regex = '.+\b\n'

haystack = "β77\n"

matches = [[0, 5]]

# This is a regression test for some errant HIR interval set operations that

# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The

# issue here is that the HIR produced from the regex had out-of-order ranges.

# See: https://github.com/rust-lang/regex/issues/1103

# Ref: https://github.com/rust-lang/regex/pull/1051

# Ref: https://github.com/rust-lang/regex/pull/1102

[[test]]

name = "hir-optimization-out-of-order-class"

regex = '^[[:alnum:]./-]+$'

haystack = "a-b"

matches = [[0, 3]]

# This is a regression test for an improper reverse suffix optimization. This

# occurred when I "broadened" the applicability of the optimization to include

# multiple possible literal suffixes instead of only sticking to a non-empty

# longest common suffix. It turns out that, at least given how the reverse

# suffix optimization works, we need to stick to the longest common suffix for

# now.

# See: https://github.com/rust-lang/regex/issues/1110

# See also: https://github.com/astral-sh/ruff/pull/7980

[[test]]

name = 'improper-reverse-suffix-optimization'

regex = '(\\N\{[^}]+})|([{}])'

haystack = 'hiya \N{snowman} bye'

matches = [[[5, 16], [5, 16], []]]