Revision control

Copy as Markdown

Other Tools

# This tests that we can switch the line terminator to the NUL byte.
[[test]]
name = "nul"
regex = '(?m)^[a-z]+$'
haystack = '\x00abc\x00'
matches = [[1, 4]]
unescape = true
line-terminator = '\x00'
# This tests that '.' will not match the configured line terminator, but will
# match \n.
[[test]]
name = "dot-changes-with-line-terminator"
regex = '.'
haystack = '\x00\n'
matches = [[1, 2]]
unescape = true
line-terminator = '\x00'
# This tests that when we switch the line terminator, \n is no longer
# recognized as the terminator.
[[test]]
name = "not-line-feed"
regex = '(?m)^[a-z]+$'
haystack = '\nabc\n'
matches = []
unescape = true
line-terminator = '\x00'
# This tests that we can set the line terminator to a non-ASCII byte and have
# it behave as expected.
[[test]]
name = "non-ascii"
regex = '(?m)^[a-z]+$'
haystack = '\xFFabc\xFF'
matches = [[1, 4]]
unescape = true
line-terminator = '\xFF'
utf8 = false
# This tests that we can set the line terminator to a byte corresponding to a
# word character, and things work as expected.
[[test]]
name = "word-byte"
regex = '(?m)^[a-z]+$'
haystack = 'ZabcZ'
matches = [[1, 4]]
unescape = true
line-terminator = 'Z'
# This tests that we can set the line terminator to a byte corresponding to a
# non-word character, and things work as expected.
[[test]]
name = "non-word-byte"
regex = '(?m)^[a-z]+$'
haystack = '%abc%'
matches = [[1, 4]]
unescape = true
line-terminator = '%'
# This combines "set line terminator to a word byte" with a word boundary
# assertion, which should result in no match even though ^/$ matches.
[[test]]
name = "word-boundary"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
unescape = true
line-terminator = 'Z'
# Like 'word-boundary', but does an anchored search at the point where ^
# matches, but where \b should not.
[[test]]
name = "word-boundary-at"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'
# Like 'word-boundary-at', but flips the word boundary to a negation. This
# in particular tests a tricky case in DFA engines, where they must consider
# explicitly that a starting configuration from a custom line terminator may
# also required setting the "is from word byte" flag on a state. Otherwise,
# it's treated as "not from a word byte," which would result in \B not matching
# here when it should.
[[test]]
name = "not-word-boundary-at"
regex = '(?m)^\B[a-z]+\B$'
haystack = 'ZabcZ'
matches = [[1, 4]]
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'