Source code
Revision control
Copy as Markdown
Other Tools
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/decode_rust_punycode.h"
#include <cstddef>
#include <cstring>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
using ::testing::AllOf;
using ::testing::Eq;
using ::testing::IsNull;
using ::testing::Pointee;
using ::testing::ResultOf;
using ::testing::StrEq;
class DecodeRustPunycodeTest : public ::testing::Test {
protected:
void FillBufferWithNonzeroBytes() {
// The choice of nonzero value to fill with is arbitrary. The point is just
// to fail tests if DecodeRustPunycode forgets to write the final NUL
// character.
std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_));
}
DecodeRustPunycodeOptions WithAmpleSpace() {
FillBufferWithNonzeroBytes();
DecodeRustPunycodeOptions options;
options.punycode_begin = punycode_.data();
options.punycode_end = punycode_.data() + punycode_.size();
options.out_begin = buffer_storage_;
options.out_end = buffer_storage_ + sizeof(buffer_storage_);
return options;
}
DecodeRustPunycodeOptions WithJustEnoughSpace() {
FillBufferWithNonzeroBytes();
const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1;
DecodeRustPunycodeOptions options;
options.punycode_begin = punycode_.data();
options.punycode_end = punycode_.data() + punycode_.size();
options.out_begin = buffer_storage_ + begin_offset;
options.out_end = buffer_storage_ + sizeof(buffer_storage_);
return options;
}
DecodeRustPunycodeOptions WithOneByteTooFew() {
FillBufferWithNonzeroBytes();
const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size();
DecodeRustPunycodeOptions options;
options.punycode_begin = punycode_.data();
options.punycode_end = punycode_.data() + punycode_.size();
options.out_begin = buffer_storage_ + begin_offset;
options.out_end = buffer_storage_ + sizeof(buffer_storage_);
return options;
}
// Matches a correct return value of DecodeRustPunycode when `golden` is the
// expected plaintext output.
auto PointsToTheNulAfter(const std::string& golden) {
const size_t golden_size = golden.size();
return AllOf(
Pointee(Eq('\0')),
ResultOf("preceding string body",
[golden_size](const char* p) { return p - golden_size; },
StrEq(golden)));
}
std::string punycode_;
std::string plaintext_;
char buffer_storage_[1024];
};
TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) {
punycode_ = "";
plaintext_ = "";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest,
StripsTheTrailingDelimiterFromAPureRunOfBasicChars) {
punycode_ = "foo_";
plaintext_ = "foo";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) {
punycode_ = "foo_bar_";
plaintext_ = "foo_bar";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) {
punycode_ = "_foo_";
plaintext_ = "_foo";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) {
punycode_ = "_foo";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) {
punycode_ = std::string("foo\0bar_", 8);
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) {
punycode_ = "foo\007_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "foo-_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "foo;_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "foo\177_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) {
punycode_ = "\x80";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "\x80_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "\xff";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "\xff_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RecognizesU0080) {
// a encodes 0, so the output is the smallest non-ASCII code point standing
// alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode
// does not check whether non-ASCII characters could belong to an identifier.)
punycode_ = "a";
plaintext_ = "\xc2\x80";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) {
// Because bias = 72 for the first code point, any digit but a/A is nonfinal
// in one of the first two bytes of a delta sequence.
punycode_ = "b";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "z";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "0";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "9";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) {
punycode_ = "ba";
plaintext_ = "\xc2\x81";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) {
punycode_ = "ca";
plaintext_ = "\xc2\x82";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "za";
plaintext_ = "\xc2\x99";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "0a";
plaintext_ = "\xc2\x9a";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "1a";
plaintext_ = "\xc2\x9b";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "9a";
plaintext_ = "ยฃ"; // Pound sign, U+00A3
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
}
TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) {
punycode_ = "bb";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "zz";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "00";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "99";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) {
punycode_ = "bba";
plaintext_ = "ยค"; // U+00A4
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "cba";
plaintext_ = "ยฅ"; // U+00A5
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "zba";
plaintext_ = "ยผ"; // U+00BC
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "0ba";
plaintext_ = "ยฝ"; // U+00BD
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "1ba";
plaintext_ = "ยพ"; // U+00BE
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "9ba";
plaintext_ = "ร"; // U+00C6
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
}
// Tests beyond this point use characters allowed in identifiers, so you can
// prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test
// input and run it through another Rust demangler to verify that the
// corresponding golden output is correct.
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) {
punycode_ = "0ca";
plaintext_ = "ร ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) {
punycode_ = "_la_mode_yya";
plaintext_ = "ร _la_mode";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) {
punycode_ = "verre__vin_m4a";
plaintext_ = "verre_ร _vin";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) {
punycode_ = "belt_3na";
plaintext_ = "beltร ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) {
punycode_ = "0caaaa";
plaintext_ = "ร ร ร ร ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) {
punycode_ = "3camsuz";
plaintext_ = "รฃรฉรฏรดรน";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) {
punycode_ = "3caltsx";
plaintext_ = "รนรฉรดรฃรฏ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) {
punycode_ = "fiq";
plaintext_ = "ไธญ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) {
punycode_ = "fiqaaaa";
plaintext_ = "ไธญไธญไธญไธญไธญ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) {
punycode_ = "fiq228c";
plaintext_ = "ไธญๆ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) {
punycode_ = "fiq128c";
plaintext_ = "ๆไธญ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) {
punycode_ = "uy7h";
plaintext_ = "๐ป";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) {
punycode_ = "jack__uh63d";
plaintext_ = "jack_๐ป";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) {
punycode_ = "jack__of_hearts_ki37n";
plaintext_ = "jack_๐ป_of_hearts";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) {
punycode_ = "_of_hearts_kz45i";
plaintext_ = "๐ป_of_hearts";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) {
punycode_ = "uy7haaaa";
plaintext_ = "๐ป๐ป๐ป๐ป๐ป";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) {
punycode_ = "8x7hcjmf";
plaintext_ = "๐ฆ๐ง๐ช๐ญ๐ฎ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) {
punycode_ = "8x7hcild";
plaintext_ = "๐ฎ๐ฆ๐ญ๐ช๐ง";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) {
punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak";
plaintext_ = "รนรฉรดรฃรฏไธญๆ๐ฎ๐ฆ๐ญ๐ช๐ง";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) {
punycode_ = "123456789a";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
// Finally, we test on a few prose and poetry snippets as a defense in depth.
// If our artificial short test inputs did not exercise a bug that is tickled by
// patterns typical of real human writing, maybe real human writing will catch
// that.
//
// These test inputs are extracted from texts old enough to be out of copyright
// that probe a variety of ranges of code-point space. All are longer than 32
// code points, so they exercise the carrying of seminibbles from one uint64_t
// to the next higher one in BoundedUtf8LengthSequence.
// The first three lines of the Old English epic _Beowulf_, mostly ASCII with a
// few archaic two-byte letters interspersed.
TEST_F(DecodeRustPunycodeTest, Beowulf) {
punycode_ = "hwt_we_gardena_in_geardagum_"
"eodcyninga_rym_gefrunon_"
"hu_a_elingas_ellen_fremedon_hxg9c70do9alau";
plaintext_ = "hwรฆt_we_gardena_in_geardagum_"
"รพeodcyninga_รพrym_gefrunon_"
"hu_รฐa_รฆรพelingas_ellen_fremedon";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
// The whole of ้ๆ
ไบบ่ by the 8th-century Chinese poet ๅญๆตฉ็ถ
// (Meng Haoran), exercising three-byte-character processing.
TEST_F(DecodeRustPunycodeTest, MengHaoran) {
punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6"
"3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta";
plaintext_ = "ๆ
ไบบๅ
ท้้ป" "้ๆ่ณ็ฐๅฎถ"
"็ถ ๆจนๆ้ๅ" "้ๅฑฑ้ญๅคๆ"
"้่ป้ขๅ ดๅ" "ๆ้
่ฉฑๆก้บป"
"ๅพ
ๅฐ้้ฝๆฅ" "้ไพๅฐฑ่่ฑ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
// A poem of the 8th-century Japanese poet ๅฑฑไธๆถ่ฏ (Yamanoue no Okura).
// Japanese mixes two-byte and three-byte characters: a good workout for codecs.
TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) {
punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0"
"em23do0op23cc2ff70mb6tae8aq759gja";
plaintext_ = "็้ฃใใฐ"
"ๅญใฉใๆใปใ"
"ๆ ้ฃใใฐ"
"ใพใใฆๅฒใฏใ"
"ไฝๅฆใใ"
"ๆฅใใใใฎใ"
"็ผไบคใซ"
"ใใจใชๆธใใฆ"
"ๅฎ็ ใๅฏใใฌ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
// The first two lines of the Phoenician-language inscription on the sarcophagus
// of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other
// archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000
// through U+1FFFF) and thus exercise four-byte-character processing.
TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) {
punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee"
"ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g";
plaintext_ = "๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค"
"๐ค
๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค"
"๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค"
"๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค"
"๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค"
"๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค๐ค";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
} // namespace
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl