Source code

Revision control

Copy as Markdown

Other Tools

# Basic Unicode literal support.
[[test]]
name = "literal1"
regex = '☃'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "literal2"
regex = '☃+'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "literal3"
regex = '☃+'
haystack = "☃"
matches = [[0, 3]]
case-insensitive = true
[[test]]
name = "literal4"
regex = 'Δ'
haystack = "δ"
matches = [[0, 2]]
case-insensitive = true
# Unicode word boundaries.
[[test]]
name = "wb-100"
regex = '\d\b'
haystack = "6δ"
matches = []
[[test]]
name = "wb-200"
regex = '\d\b'
haystack = "6 "
matches = [[0, 1]]
[[test]]
name = "wb-300"
regex = '\d\B'
haystack = "6δ"
matches = [[0, 1]]
[[test]]
name = "wb-400"
regex = '\d\B'
haystack = "6 "
matches = []
# Unicode character class support.
[[test]]
name = "class1"
regex = '[☃Ⅰ]+'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "class2"
regex = '\pN'
haystack = "Ⅰ"
matches = [[0, 3]]
[[test]]
name = "class3"
regex = '\pN+'
haystack = "Ⅰ1Ⅱ2"
matches = [[0, 8]]
[[test]]
name = "class4"
regex = '\PN+'
haystack = "abⅠ"
matches = [[0, 2]]
[[test]]
name = "class5"
regex = '[\PN]+'
haystack = "abⅠ"
matches = [[0, 2]]
[[test]]
name = "class6"
regex = '[^\PN]+'
haystack = "abⅠ"
matches = [[2, 5]]
[[test]]
name = "class7"
regex = '\p{Lu}+'
haystack = "ΛΘΓΔα"
matches = [[0, 8]]
[[test]]
name = "class8"
regex = '\p{Lu}+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
case-insensitive = true
[[test]]
name = "class9"
regex = '\pL+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
[[test]]
name = "class10"
regex = '\p{Ll}+'
haystack = "ΛΘΓΔα"
matches = [[8, 10]]
# Unicode aware "Perl" character classes.
[[test]]
name = "perl1"
regex = '\w+'
haystack = "dδd"
matches = [[0, 4]]
[[test]]
name = "perl2"
regex = '\w+'
haystack = "⥡"
matches = []
[[test]]
name = "perl3"
regex = '\W+'
haystack = "⥡"
matches = [[0, 3]]
[[test]]
name = "perl4"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 8]]
[[test]]
name = "perl5"
regex = '\d+'
haystack = "Ⅱ"
matches = []
[[test]]
name = "perl6"
regex = '\D+'
haystack = "Ⅱ"
matches = [[0, 3]]
[[test]]
name = "perl7"
regex = '\s+'
haystack = " "
matches = [[0, 3]]
[[test]]
name = "perl8"
regex = '\s+'
haystack = "☃"
matches = []
[[test]]
name = "perl9"
regex = '\S+'
haystack = "☃"
matches = [[0, 3]]
# Specific tests for Unicode general category classes.
[[test]]
name = "class-gencat1"
regex = '\p{Cased_Letter}'
haystack = "A"
matches = [[0, 3]]
[[test]]
name = "class-gencat2"
regex = '\p{Close_Punctuation}'
haystack = "❯"
matches = [[0, 3]]
[[test]]
name = "class-gencat3"
regex = '\p{Connector_Punctuation}'
haystack = "⁀"
matches = [[0, 3]]
[[test]]
name = "class-gencat4"
regex = '\p{Control}'
haystack = "\u009F"
matches = [[0, 2]]
[[test]]
name = "class-gencat5"
regex = '\p{Currency_Symbol}'
haystack = "£"
matches = [[0, 3]]
[[test]]
name = "class-gencat6"
regex = '\p{Dash_Punctuation}'
haystack = "〰"
matches = [[0, 3]]
[[test]]
name = "class-gencat7"
regex = '\p{Decimal_Number}'
haystack = "𑓙"
matches = [[0, 4]]
[[test]]
name = "class-gencat8"
regex = '\p{Enclosing_Mark}'
haystack = "\uA672"
matches = [[0, 3]]
[[test]]
name = "class-gencat9"
regex = '\p{Final_Punctuation}'
haystack = "⸡"
matches = [[0, 3]]
[[test]]
name = "class-gencat10"
regex = '\p{Format}'
haystack = "\U000E007F"
matches = [[0, 4]]
[[test]]
name = "class-gencat11"
regex = '\p{Initial_Punctuation}'
haystack = "⸜"
matches = [[0, 3]]
[[test]]
name = "class-gencat12"
regex = '\p{Letter}'
haystack = "Έ"
matches = [[0, 2]]
[[test]]
name = "class-gencat13"
regex = '\p{Letter_Number}'
haystack = "ↂ"
matches = [[0, 3]]
[[test]]
name = "class-gencat14"
regex = '\p{Line_Separator}'
haystack = "\u2028"
matches = [[0, 3]]
[[test]]
name = "class-gencat15"
regex = '\p{Lowercase_Letter}'
haystack = "ϛ"
matches = [[0, 2]]
[[test]]
name = "class-gencat16"
regex = '\p{Mark}'
haystack = "\U000E01EF"
matches = [[0, 4]]
[[test]]
name = "class-gencat17"
regex = '\p{Math}'
haystack = "⋿"
matches = [[0, 3]]
[[test]]
name = "class-gencat18"
regex = '\p{Modifier_Letter}'
haystack = "𖭃"
matches = [[0, 4]]
[[test]]
name = "class-gencat19"
regex = '\p{Modifier_Symbol}'
haystack = "🏿"
matches = [[0, 4]]
[[test]]
name = "class-gencat20"
regex = '\p{Nonspacing_Mark}'
haystack = "\U0001E94A"
matches = [[0, 4]]
[[test]]
name = "class-gencat21"
regex = '\p{Number}'
haystack = "⓿"
matches = [[0, 3]]
[[test]]
name = "class-gencat22"
regex = '\p{Open_Punctuation}'
haystack = "⦅"
matches = [[0, 3]]
[[test]]
name = "class-gencat23"
regex = '\p{Other}'
haystack = "\u0BC9"
matches = [[0, 3]]
[[test]]
name = "class-gencat24"
regex = '\p{Other_Letter}'
haystack = "ꓷ"
matches = [[0, 3]]
[[test]]
name = "class-gencat25"
regex = '\p{Other_Number}'
haystack = "㉏"
matches = [[0, 3]]
[[test]]
name = "class-gencat26"
regex = '\p{Other_Punctuation}'
haystack = "𞥞"
matches = [[0, 4]]
[[test]]
name = "class-gencat27"
regex = '\p{Other_Symbol}'
haystack = "⅌"
matches = [[0, 3]]
[[test]]
name = "class-gencat28"
regex = '\p{Paragraph_Separator}'
haystack = "\u2029"
matches = [[0, 3]]
[[test]]
name = "class-gencat29"
regex = '\p{Private_Use}'
haystack = "\U0010FFFD"
matches = [[0, 4]]
[[test]]
name = "class-gencat30"
regex = '\p{Punctuation}'
haystack = "𑁍"
matches = [[0, 4]]
[[test]]
name = "class-gencat31"
regex = '\p{Separator}'
haystack = "\u3000"
matches = [[0, 3]]
[[test]]
name = "class-gencat32"
regex = '\p{Space_Separator}'
haystack = "\u205F"
matches = [[0, 3]]
[[test]]
name = "class-gencat33"
regex = '\p{Spacing_Mark}'
haystack = "\U00016F7E"
matches = [[0, 4]]
[[test]]
name = "class-gencat34"
regex = '\p{Symbol}'
haystack = "⯈"
matches = [[0, 3]]
[[test]]
name = "class-gencat35"
regex = '\p{Titlecase_Letter}'
haystack = "ῼ"
matches = [[0, 3]]
[[test]]
name = "class-gencat36"
regex = '\p{Unassigned}'
haystack = "\U0010FFFF"
matches = [[0, 4]]
[[test]]
name = "class-gencat37"
regex = '\p{Uppercase_Letter}'
haystack = "Ꝋ"
matches = [[0, 3]]
# Tests for Unicode emoji properties.
[[test]]
name = "class-emoji1"
regex = '\p{Emoji}'
haystack = "\u23E9"
matches = [[0, 3]]
[[test]]
name = "class-emoji2"
regex = '\p{emoji}'
haystack = "\U0001F21A"
matches = [[0, 4]]
[[test]]
name = "class-emoji3"
regex = '\p{extendedpictographic}'
haystack = "\U0001FA6E"
matches = [[0, 4]]
[[test]]
name = "class-emoji4"
regex = '\p{extendedpictographic}'
haystack = "\U0001FFFD"
matches = [[0, 4]]
# Tests for Unicode grapheme cluster properties.
[[test]]
name = "class-gcb1"
regex = '\p{grapheme_cluster_break=prepend}'
haystack = "\U00011D46"
matches = [[0, 4]]
[[test]]
name = "class-gcb2"
regex = '\p{gcb=regional_indicator}'
haystack = "\U0001F1E6"
matches = [[0, 4]]
[[test]]
name = "class-gcb3"
regex = '\p{gcb=ri}'
haystack = "\U0001F1E7"
matches = [[0, 4]]
[[test]]
name = "class-gcb4"
regex = '\p{regionalindicator}'
haystack = "\U0001F1FF"
matches = [[0, 4]]
[[test]]
name = "class-gcb5"
regex = '\p{gcb=lvt}'
haystack = "\uC989"
matches = [[0, 3]]
[[test]]
name = "class-gcb6"
regex = '\p{gcb=zwj}'
haystack = "\u200D"
matches = [[0, 3]]
# Tests for Unicode word boundary properties.
[[test]]
name = "class-word-break1"
regex = '\p{word_break=Hebrew_Letter}'
haystack = "\uFB46"
matches = [[0, 3]]
[[test]]
name = "class-word-break2"
regex = '\p{wb=hebrewletter}'
haystack = "\uFB46"
matches = [[0, 3]]
[[test]]
name = "class-word-break3"
regex = '\p{wb=ExtendNumLet}'
haystack = "\uFF3F"
matches = [[0, 3]]
[[test]]
name = "class-word-break4"
regex = '\p{wb=WSegSpace}'
haystack = "\u3000"
matches = [[0, 3]]
[[test]]
name = "class-word-break5"
regex = '\p{wb=numeric}'
haystack = "\U0001E950"
matches = [[0, 4]]
# Tests for Unicode sentence boundary properties.
[[test]]
name = "class-sentence-break1"
regex = '\p{sentence_break=Lower}'
haystack = "\u0469"
matches = [[0, 2]]
[[test]]
name = "class-sentence-break2"
regex = '\p{sb=lower}'
haystack = "\u0469"
matches = [[0, 2]]
[[test]]
name = "class-sentence-break3"
regex = '\p{sb=Close}'
haystack = "\uFF60"
matches = [[0, 3]]
[[test]]
name = "class-sentence-break4"
regex = '\p{sb=Close}'
haystack = "\U0001F677"
matches = [[0, 4]]
[[test]]
name = "class-sentence-break5"
regex = '\p{sb=SContinue}'
haystack = "\uFF64"
matches = [[0, 3]]