Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "builtin/String.h"
8
9
#include "mozilla/ArrayUtils.h"
10
#include "mozilla/Attributes.h"
11
#include "mozilla/CheckedInt.h"
12
#include "mozilla/FloatingPoint.h"
13
#include "mozilla/PodOperations.h"
14
#include "mozilla/Range.h"
15
#include "mozilla/TextUtils.h"
16
#include "mozilla/TypeTraits.h"
17
#include "mozilla/Unused.h"
18
19
#include <algorithm>
20
#include <limits>
21
#include <string.h>
22
#include <type_traits>
23
24
#include "jsapi.h"
25
#include "jsnum.h"
26
#include "jstypes.h"
27
28
#include "builtin/Array.h"
29
#include "builtin/Boolean.h"
30
#if ENABLE_INTL_API
31
# include "builtin/intl/CommonFunctions.h"
32
#endif
33
#include "builtin/RegExp.h"
34
#include "jit/InlinableNatives.h"
35
#include "js/Conversions.h"
36
#if !ENABLE_INTL_API
37
# include "js/LocaleSensitive.h"
38
#endif
39
#include "js/PropertySpec.h"
40
#include "js/StableStringChars.h"
41
#include "js/UniquePtr.h"
42
#if ENABLE_INTL_API
43
# include "unicode/uchar.h"
44
# include "unicode/unorm2.h"
45
# include "unicode/ustring.h"
46
# include "unicode/utypes.h"
47
#endif
48
#include "util/StringBuffer.h"
49
#include "util/Unicode.h"
50
#include "vm/BytecodeUtil.h"
51
#include "vm/GlobalObject.h"
52
#include "vm/Interpreter.h"
53
#include "vm/JSAtom.h"
54
#include "vm/JSContext.h"
55
#include "vm/JSObject.h"
56
#include "vm/Opcodes.h"
57
#include "vm/Printer.h"
58
#include "vm/RegExpObject.h"
59
#include "vm/RegExpStatics.h"
60
#include "vm/SelfHosting.h"
61
62
#include "vm/InlineCharBuffer-inl.h"
63
#include "vm/Interpreter-inl.h"
64
#include "vm/StringObject-inl.h"
65
#include "vm/StringType-inl.h"
66
#include "vm/TypeInference-inl.h"
67
68
using namespace js;
69
70
using JS::Symbol;
71
using JS::SymbolCode;
72
73
using mozilla::AsciiAlphanumericToNumber;
74
using mozilla::CheckedInt;
75
using mozilla::IsAsciiHexDigit;
76
using mozilla::IsNaN;
77
using mozilla::IsSame;
78
using mozilla::PodCopy;
79
using mozilla::RangedPtr;
80
81
using JS::AutoCheckCannotGC;
82
using JS::AutoStableStringChars;
83
84
static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
85
unsigned argno) {
86
if (argno >= args.length()) {
87
return cx->names().undefined;
88
}
89
90
JSString* str = ToString<CanGC>(cx, args[argno]);
91
if (!str) {
92
return nullptr;
93
}
94
95
return str->ensureLinear(cx);
96
}
97
98
/*
99
* Forward declarations for URI encode/decode and helper routines
100
*/
101
static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
102
103
static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
104
105
static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
106
107
static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
108
109
/*
110
* Global string methods
111
*/
112
113
/* ES5 B.2.1 */
114
template <typename CharT>
115
static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
116
InlineCharBuffer<Latin1Char>& newChars,
117
uint32_t* newLengthOut) {
118
// clang-format off
119
static const uint8_t shouldPassThrough[128] = {
120
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
121
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
122
0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
123
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
124
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
125
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
126
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
127
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
128
};
129
// clang-format on
130
131
/* Take a first pass and see how big the result string will need to be. */
132
uint32_t newLength = length;
133
for (size_t i = 0; i < length; i++) {
134
char16_t ch = chars[i];
135
if (ch < 128 && shouldPassThrough[ch]) {
136
continue;
137
}
138
139
/*
140
* newlength is incremented below by at most 5 and at this point it must
141
* be a valid string length, so this should never overflow uint32_t.
142
*/
143
static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5,
144
"Adding 5 to valid string length should not overflow");
145
146
MOZ_ASSERT(newLength <= JSString::MAX_LENGTH);
147
148
/* The character will be encoded as %XX or %uXXXX. */
149
newLength += (ch < 256) ? 2 : 5;
150
151
if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) {
152
ReportAllocationOverflow(cx);
153
return false;
154
}
155
}
156
157
if (newLength == length) {
158
*newLengthOut = newLength;
159
return true;
160
}
161
162
if (!newChars.maybeAlloc(cx, newLength)) {
163
return false;
164
}
165
166
static const char digits[] = "0123456789ABCDEF";
167
168
Latin1Char* rawNewChars = newChars.get();
169
size_t i, ni;
170
for (i = 0, ni = 0; i < length; i++) {
171
char16_t ch = chars[i];
172
if (ch < 128 && shouldPassThrough[ch]) {
173
rawNewChars[ni++] = ch;
174
} else if (ch < 256) {
175
rawNewChars[ni++] = '%';
176
rawNewChars[ni++] = digits[ch >> 4];
177
rawNewChars[ni++] = digits[ch & 0xF];
178
} else {
179
rawNewChars[ni++] = '%';
180
rawNewChars[ni++] = 'u';
181
rawNewChars[ni++] = digits[ch >> 12];
182
rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
183
rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
184
rawNewChars[ni++] = digits[ch & 0xF];
185
}
186
}
187
MOZ_ASSERT(ni == newLength);
188
189
*newLengthOut = newLength;
190
return true;
191
}
192
193
static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
194
CallArgs args = CallArgsFromVp(argc, vp);
195
196
RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
197
if (!str) {
198
return false;
199
}
200
201
InlineCharBuffer<Latin1Char> newChars;
202
uint32_t newLength = 0; // initialize to silence GCC warning
203
if (str->hasLatin1Chars()) {
204
AutoCheckCannotGC nogc;
205
if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
206
&newLength)) {
207
return false;
208
}
209
} else {
210
AutoCheckCannotGC nogc;
211
if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
212
&newLength)) {
213
return false;
214
}
215
}
216
217
// Return input if no characters need to be escaped.
218
if (newLength == str->length()) {
219
args.rval().setString(str);
220
return true;
221
}
222
223
JSString* res = newChars.toString(cx, newLength);
224
if (!res) {
225
return false;
226
}
227
228
args.rval().setString(res);
229
return true;
230
}
231
232
template <typename CharT>
233
static inline bool Unhex4(const RangedPtr<const CharT> chars,
234
char16_t* result) {
235
CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3];
236
237
if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) &&
238
IsAsciiHexDigit(d))) {
239
return false;
240
}
241
242
char16_t unhex = AsciiAlphanumericToNumber(a);
243
unhex = (unhex << 4) + AsciiAlphanumericToNumber(b);
244
unhex = (unhex << 4) + AsciiAlphanumericToNumber(c);
245
unhex = (unhex << 4) + AsciiAlphanumericToNumber(d);
246
*result = unhex;
247
return true;
248
}
249
250
template <typename CharT>
251
static inline bool Unhex2(const RangedPtr<const CharT> chars,
252
char16_t* result) {
253
CharT a = chars[0], b = chars[1];
254
255
if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) {
256
return false;
257
}
258
259
*result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b);
260
return true;
261
}
262
263
template <typename CharT>
264
static bool Unescape(StringBuffer& sb,
265
const mozilla::Range<const CharT> chars) {
266
// Step 2.
267
uint32_t length = chars.length();
268
269
/*
270
* Note that the spec algorithm has been optimized to avoid building
271
* a string in the case where no escapes are present.
272
*/
273
bool building = false;
274
275
#define ENSURE_BUILDING \
276
do { \
277
if (!building) { \
278
building = true; \
279
if (!sb.reserve(length)) return false; \
280
sb.infallibleAppend(chars.begin().get(), k); \
281
} \
282
} while (false);
283
284
// Step 4.
285
uint32_t k = 0;
286
287
// Step 5.
288
while (k < length) {
289
// Step 5.a.
290
char16_t c = chars[k];
291
292
// Step 5.b.
293
if (c == '%') {
294
static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
295
"String length is not near UINT32_MAX");
296
297
// Steps 5.b.i-ii.
298
if (k + 6 <= length && chars[k + 1] == 'u') {
299
if (Unhex4(chars.begin() + k + 2, &c)) {
300
ENSURE_BUILDING
301
k += 5;
302
}
303
} else if (k + 3 <= length) {
304
if (Unhex2(chars.begin() + k + 1, &c)) {
305
ENSURE_BUILDING
306
k += 2;
307
}
308
}
309
}
310
311
// Step 5.c.
312
if (building && !sb.append(c)) {
313
return false;
314
}
315
316
// Step 5.d.
317
k += 1;
318
}
319
320
return true;
321
#undef ENSURE_BUILDING
322
}
323
324
// ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
325
// B.2.1.2 unescape ( string )
326
static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
327
CallArgs args = CallArgsFromVp(argc, vp);
328
329
// Step 1.
330
RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
331
if (!str) {
332
return false;
333
}
334
335
// Step 3.
336
JSStringBuilder sb(cx);
337
if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) {
338
return false;
339
}
340
341
// Steps 2, 4-5.
342
if (str->hasLatin1Chars()) {
343
AutoCheckCannotGC nogc;
344
if (!Unescape(sb, str->latin1Range(nogc))) {
345
return false;
346
}
347
} else {
348
AutoCheckCannotGC nogc;
349
if (!Unescape(sb, str->twoByteRange(nogc))) {
350
return false;
351
}
352
}
353
354
// Step 6.
355
JSLinearString* result;
356
if (!sb.empty()) {
357
result = sb.finishString();
358
if (!result) {
359
return false;
360
}
361
} else {
362
result = str;
363
}
364
365
args.rval().setString(result);
366
return true;
367
}
368
369
static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
370
CallArgs args = CallArgsFromVp(argc, vp);
371
JSString* str = ValueToSource(cx, args.get(0));
372
if (!str) {
373
return false;
374
}
375
376
args.rval().setString(str);
377
return true;
378
}
379
380
static const JSFunctionSpec string_functions[] = {
381
JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
382
JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
383
JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
384
JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
385
JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
386
JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
387
JSPROP_RESOLVING),
388
JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
389
JSPROP_RESOLVING),
390
391
JS_FS_END};
392
393
static const unsigned STRING_ELEMENT_ATTRS =
394
JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
395
396
static bool str_enumerate(JSContext* cx, HandleObject obj) {
397
RootedString str(cx, obj->as<StringObject>().unbox());
398
js::StaticStrings& staticStrings = cx->staticStrings();
399
400
RootedValue value(cx);
401
for (size_t i = 0, length = str->length(); i < length; i++) {
402
JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
403
if (!str1) {
404
return false;
405
}
406
value.setString(str1);
407
if (!DefineDataElement(cx, obj, i, value,
408
STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
409
return false;
410
}
411
}
412
413
return true;
414
}
415
416
static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
417
// str_resolve ignores non-integer ids.
418
return JSID_IS_INT(id);
419
}
420
421
static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
422
bool* resolvedp) {
423
if (!JSID_IS_INT(id)) {
424
return true;
425
}
426
427
RootedString str(cx, obj->as<StringObject>().unbox());
428
429
int32_t slot = JSID_TO_INT(id);
430
if ((size_t)slot < str->length()) {
431
JSString* str1 =
432
cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
433
if (!str1) {
434
return false;
435
}
436
RootedValue value(cx, StringValue(str1));
437
if (!DefineDataElement(cx, obj, uint32_t(slot), value,
438
STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
439
return false;
440
}
441
*resolvedp = true;
442
}
443
return true;
444
}
445
446
static const JSClassOps StringObjectClassOps = {
447
nullptr, /* addProperty */
448
nullptr, /* delProperty */
449
str_enumerate, nullptr, /* newEnumerate */
450
str_resolve, str_mayResolve};
451
452
const JSClass StringObject::class_ = {
453
js_String_str,
454
JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
455
JSCLASS_HAS_CACHED_PROTO(JSProto_String),
456
&StringObjectClassOps};
457
458
/*
459
* Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
460
* from nearly all String.prototype.* functions.
461
*/
462
static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
463
JSContext* cx, HandleValue thisv) {
464
if (!CheckRecursionLimit(cx)) {
465
return nullptr;
466
}
467
468
if (thisv.isString()) {
469
return thisv.toString();
470
}
471
472
if (thisv.isObject()) {
473
RootedObject obj(cx, &thisv.toObject());
474
if (obj->is<StringObject>()) {
475
StringObject* nobj = &obj->as<StringObject>();
476
// We have to make sure that the ToPrimitive call from ToString
477
// would be unobservable.
478
if (HasNoToPrimitiveMethodPure(nobj, cx) &&
479
HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
480
return nobj->unbox();
481
}
482
}
483
} else if (thisv.isNullOrUndefined()) {
484
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
485
JSMSG_CANT_CONVERT_TO,
486
thisv.isNull() ? "null" : "undefined", "object");
487
return nullptr;
488
}
489
490
return ToStringSlow<CanGC>(cx, thisv);
491
}
492
493
MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
494
return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
495
}
496
497
MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
498
MOZ_ASSERT(IsString(args.thisv()));
499
500
JSString* str = ToString<CanGC>(cx, args.thisv());
501
if (!str) {
502
return false;
503
}
504
505
UniqueChars quoted = QuoteString(cx, str, '"');
506
if (!quoted) {
507
return false;
508
}
509
510
JSStringBuilder sb(cx);
511
if (!sb.append("(new String(") ||
512
!sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) {
513
return false;
514
}
515
516
JSString* result = sb.finishString();
517
if (!result) {
518
return false;
519
}
520
args.rval().setString(result);
521
return true;
522
}
523
524
static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
525
CallArgs args = CallArgsFromVp(argc, vp);
526
return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
527
}
528
529
MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
530
MOZ_ASSERT(IsString(args.thisv()));
531
532
args.rval().setString(
533
args.thisv().isString()
534
? args.thisv().toString()
535
: args.thisv().toObject().as<StringObject>().unbox());
536
return true;
537
}
538
539
bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
540
CallArgs args = CallArgsFromVp(argc, vp);
541
return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
542
}
543
544
/*
545
* Java-like string native methods.
546
*/
547
548
JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
549
int32_t lengthInt) {
550
MOZ_ASSERT(0 <= beginInt);
551
MOZ_ASSERT(0 <= lengthInt);
552
MOZ_ASSERT(uint32_t(beginInt) <= str->length());
553
MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
554
555
uint32_t begin = beginInt;
556
uint32_t len = lengthInt;
557
558
/*
559
* Optimization for one level deep ropes.
560
* This is common for the following pattern:
561
*
562
* while() {
563
* text = text.substr(0, x) + "bla" + text.substr(x)
564
* test.charCodeAt(x + 1)
565
* }
566
*/
567
if (str->isRope()) {
568
JSRope* rope = &str->asRope();
569
570
/* Substring is totally in leftChild of rope. */
571
if (begin + len <= rope->leftChild()->length()) {
572
return NewDependentString(cx, rope->leftChild(), begin, len);
573
}
574
575
/* Substring is totally in rightChild of rope. */
576
if (begin >= rope->leftChild()->length()) {
577
begin -= rope->leftChild()->length();
578
return NewDependentString(cx, rope->rightChild(), begin, len);
579
}
580
581
/*
582
* Requested substring is partly in the left and partly in right child.
583
* Create a rope of substrings for both childs.
584
*/
585
MOZ_ASSERT(begin < rope->leftChild()->length() &&
586
begin + len > rope->leftChild()->length());
587
588
size_t lhsLength = rope->leftChild()->length() - begin;
589
size_t rhsLength = begin + len - rope->leftChild()->length();
590
591
Rooted<JSRope*> ropeRoot(cx, rope);
592
RootedString lhs(
593
cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
594
if (!lhs) {
595
return nullptr;
596
}
597
598
RootedString rhs(
599
cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
600
if (!rhs) {
601
return nullptr;
602
}
603
604
return JSRope::new_<CanGC>(cx, lhs, rhs, len);
605
}
606
607
return NewDependentString(cx, str, begin, len);
608
}
609
610
/**
611
* U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
612
* depending on its context:
613
* When it's preceded by a cased character and not followed by another cased
614
* character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
615
* Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
616
*
617
* Unicode 9.0, §3.13 Default Case Algorithms
618
*/
619
static char16_t Final_Sigma(const char16_t* chars, size_t length,
620
size_t index) {
621
MOZ_ASSERT(index < length);
622
MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
623
MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
624
unicode::GREEK_SMALL_LETTER_SIGMA);
625
626
#if ENABLE_INTL_API
627
// Tell the analysis the BinaryProperty.contains function pointer called by
628
// u_hasBinaryProperty cannot GC.
629
JS::AutoSuppressGCAnalysis nogc;
630
631
bool precededByCased = false;
632
for (size_t i = index; i > 0;) {
633
char16_t c = chars[--i];
634
uint32_t codePoint = c;
635
if (unicode::IsTrailSurrogate(c) && i > 0) {
636
char16_t lead = chars[i - 1];
637
if (unicode::IsLeadSurrogate(lead)) {
638
codePoint = unicode::UTF16Decode(lead, c);
639
i--;
640
}
641
}
642
643
// Ignore any characters with the property Case_Ignorable.
644
// NB: We need to skip over all Case_Ignorable characters, even when
645
// they also have the Cased binary property.
646
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
647
continue;
648
}
649
650
precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
651
break;
652
}
653
if (!precededByCased) {
654
return unicode::GREEK_SMALL_LETTER_SIGMA;
655
}
656
657
bool followedByCased = false;
658
for (size_t i = index + 1; i < length;) {
659
char16_t c = chars[i++];
660
uint32_t codePoint = c;
661
if (unicode::IsLeadSurrogate(c) && i < length) {
662
char16_t trail = chars[i];
663
if (unicode::IsTrailSurrogate(trail)) {
664
codePoint = unicode::UTF16Decode(c, trail);
665
i++;
666
}
667
}
668
669
// Ignore any characters with the property Case_Ignorable.
670
// NB: We need to skip over all Case_Ignorable characters, even when
671
// they also have the Cased binary property.
672
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
673
continue;
674
}
675
676
followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
677
break;
678
}
679
if (!followedByCased) {
680
return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
681
}
682
#endif
683
684
return unicode::GREEK_SMALL_LETTER_SIGMA;
685
}
686
687
static Latin1Char Final_Sigma(const Latin1Char* chars, size_t length,
688
size_t index) {
689
MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character");
690
return 0;
691
}
692
693
// If |srcLength == destLength| is true, the destination buffer was allocated
694
// with the same size as the source buffer. When we append characters which
695
// have special casing mappings, we test |srcLength == destLength| to decide
696
// if we need to back out and reallocate a sufficiently large destination
697
// buffer. Otherwise the destination buffer was allocated with the correct
698
// size to hold all lower case mapped characters, i.e.
699
// |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
700
template <typename CharT>
701
static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
702
size_t startIndex, size_t srcLength,
703
size_t destLength) {
704
MOZ_ASSERT(startIndex < srcLength);
705
MOZ_ASSERT(srcLength <= destLength);
706
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength);
707
708
size_t j = startIndex;
709
for (size_t i = startIndex; i < srcLength; i++) {
710
char16_t c = srcChars[i];
711
if (!IsSame<CharT, Latin1Char>::value) {
712
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
713
char16_t trail = srcChars[i + 1];
714
if (unicode::IsTrailSurrogate(trail)) {
715
trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
716
destChars[j++] = c;
717
destChars[j++] = trail;
718
i++;
719
continue;
720
}
721
}
722
723
// Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
724
// lowercases to <U+0069 U+0307>.
725
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
726
// Return if the output buffer is too small.
727
if (srcLength == destLength) {
728
return i;
729
}
730
731
destChars[j++] = CharT('i');
732
destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
733
continue;
734
}
735
736
// Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
737
// one of two codepoints depending on context.
738
if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
739
destChars[j++] = Final_Sigma(srcChars, srcLength, i);
740
continue;
741
}
742
}
743
744
c = unicode::ToLowerCase(c);
745
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value),
746
c <= JSString::MAX_LATIN1_CHAR);
747
destChars[j++] = c;
748
}
749
750
MOZ_ASSERT(j == destLength);
751
return srcLength;
752
}
753
754
static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
755
size_t length) {
756
size_t lowerLength = length;
757
for (size_t i = startIndex; i < length; i++) {
758
char16_t c = chars[i];
759
760
// U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
761
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
762
lowerLength += 1;
763
}
764
}
765
return lowerLength;
766
}
767
768
static size_t ToLowerCaseLength(const Latin1Char* chars, size_t startIndex,
769
size_t length) {
770
MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings");
771
return 0;
772
}
773
774
template <typename CharT>
775
static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
776
// Unlike toUpperCase, toLowerCase has the nice invariant that if the
777
// input is a Latin-1 string, the output is also a Latin-1 string.
778
779
InlineCharBuffer<CharT> newChars;
780
781
const size_t length = str->length();
782
size_t resultLength;
783
{
784
AutoCheckCannotGC nogc;
785
const CharT* chars = str->chars<CharT>(nogc);
786
787
// We don't need extra special casing checks in the loop below,
788
// because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
789
// GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
790
MOZ_ASSERT(unicode::ChangesWhenLowerCased(
791
unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
792
"U+0130 has a simple lower case mapping");
793
MOZ_ASSERT(
794
unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
795
"U+03A3 has a simple lower case mapping");
796
797
// One element Latin-1 strings can be directly retrieved from the
798
// static strings cache.
799
if (IsSame<CharT, Latin1Char>::value) {
800
if (length == 1) {
801
char16_t lower = unicode::ToLowerCase(chars[0]);
802
MOZ_ASSERT(lower <= JSString::MAX_LATIN1_CHAR);
803
MOZ_ASSERT(StaticStrings::hasUnit(lower));
804
805
return cx->staticStrings().getUnit(lower);
806
}
807
}
808
809
// Look for the first character that changes when lowercased.
810
size_t i = 0;
811
for (; i < length; i++) {
812
CharT c = chars[i];
813
if (!IsSame<CharT, Latin1Char>::value) {
814
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
815
CharT trail = chars[i + 1];
816
if (unicode::IsTrailSurrogate(trail)) {
817
if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) {
818
break;
819
}
820
821
i++;
822
continue;
823
}
824
}
825
}
826
if (unicode::ChangesWhenLowerCased(c)) {
827
break;
828
}
829
}
830
831
// If no character needs to change, return the input string.
832
if (i == length) {
833
return str;
834
}
835
836
resultLength = length;
837
if (!newChars.maybeAlloc(cx, resultLength)) {
838
return nullptr;
839
}
840
841
PodCopy(newChars.get(), chars, i);
842
843
size_t readChars =
844
ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
845
if (readChars < length) {
846
MOZ_ASSERT((!IsSame<CharT, Latin1Char>::value),
847
"Latin-1 strings don't have special lower case mappings");
848
resultLength = ToLowerCaseLength(chars, readChars, length);
849
850
if (!newChars.maybeRealloc(cx, length, resultLength)) {
851
return nullptr;
852
}
853
854
MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
855
readChars, length,
856
resultLength));
857
}
858
}
859
860
return newChars.toStringDontDeflate(cx, resultLength);
861
}
862
863
JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
864
JSLinearString* linear = string->ensureLinear(cx);
865
if (!linear) {
866
return nullptr;
867
}
868
869
if (linear->hasLatin1Chars()) {
870
return ToLowerCase<Latin1Char>(cx, linear);
871
}
872
return ToLowerCase<char16_t>(cx, linear);
873
}
874
875
bool js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
876
CallArgs args = CallArgsFromVp(argc, vp);
877
878
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
879
if (!str) {
880
return false;
881
}
882
883
JSString* result = StringToLowerCase(cx, str);
884
if (!result) {
885
return false;
886
}
887
888
args.rval().setString(result);
889
return true;
890
}
891
892
#if ENABLE_INTL_API
893
// String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
894
// with core functionality performed by the intrinsic below.
895
896
static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
897
JSLinearString* locale = str->ensureLinear(cx);
898
if (!locale) {
899
return nullptr;
900
}
901
902
MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
903
904
// Lithuanian, Turkish, and Azeri have language dependent case mappings.
905
static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
906
907
// All strings in |languagesWithSpecialCasing| are of length two, so we
908
// only need to compare the first two characters to find a matching locale.
909
// ES2017 Intl, §9.2.2 BestAvailableLocale
910
if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
911
for (const auto& language : languagesWithSpecialCasing) {
912
if (locale->latin1OrTwoByteChar(0) == language[0] &&
913
locale->latin1OrTwoByteChar(1) == language[1]) {
914
return language;
915
}
916
}
917
}
918
919
return ""; // ICU root locale
920
}
921
922
bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
923
CallArgs args = CallArgsFromVp(argc, vp);
924
MOZ_ASSERT(args.length() == 2);
925
MOZ_ASSERT(args[0].isString());
926
MOZ_ASSERT(args[1].isString());
927
928
RootedString string(cx, args[0].toString());
929
930
const char* locale = CaseMappingLocale(cx, args[1].toString());
931
if (!locale) {
932
return false;
933
}
934
935
// Call String.prototype.toLowerCase() for language independent casing.
936
if (intl::StringsAreEqual(locale, "")) {
937
JSString* str = StringToLowerCase(cx, string);
938
if (!str) {
939
return false;
940
}
941
942
args.rval().setString(str);
943
return true;
944
}
945
946
AutoStableStringChars inputChars(cx);
947
if (!inputChars.initTwoByte(cx, string)) {
948
return false;
949
}
950
mozilla::Range<const char16_t> input = inputChars.twoByteRange();
951
952
// Note: maximum case mapping length is three characters, so the result
953
// length might be > INT32_MAX. ICU will fail in this case.
954
static_assert(JSString::MAX_LENGTH <= INT32_MAX,
955
"String length must fit in int32_t for ICU");
956
957
static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
958
959
Vector<char16_t, INLINE_CAPACITY> chars(cx);
960
if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
961
return false;
962
}
963
964
int32_t size = intl::CallICU(
965
cx,
966
[&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
967
return u_strToLower(chars, size, input.begin().get(), input.length(),
968
locale, status);
969
},
970
chars);
971
if (size < 0) {
972
return false;
973
}
974
975
JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
976
if (!result) {
977
return false;
978
}
979
980
args.rval().setString(result);
981
return true;
982
}
983
984
#else
985
986
// When the Intl API is not exposed, String.prototype.toLowerCase is implemented
987
// in C++.
988
static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
989
CallArgs args = CallArgsFromVp(argc, vp);
990
991
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
992
if (!str) {
993
return false;
994
}
995
996
/*
997
* Forcefully ignore the first (or any) argument and return toLowerCase(),
998
* ECMA has reserved that argument, presumably for defining the locale.
999
*/
1000
if (cx->runtime()->localeCallbacks &&
1001
cx->runtime()->localeCallbacks->localeToLowerCase) {
1002
RootedValue result(cx);
1003
if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
1004
return false;
1005
}
1006
1007
args.rval().set(result);
1008
return true;
1009
}
1010
1011
RootedLinearString linear(cx, str->ensureLinear(cx));
1012
if (!linear) {
1013
return false;
1014
}
1015
1016
JSString* result = StringToLowerCase(cx, linear);
1017
if (!result) {
1018
return false;
1019
}
1020
1021
args.rval().setString(result);
1022
return true;
1023
}
1024
1025
#endif // ENABLE_INTL_API
1026
1027
static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
1028
// U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
1029
// special casing rules, so detect it inline.
1030
bool hasUpperCaseSpecialCasing =
1031
charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1032
MOZ_ASSERT(hasUpperCaseSpecialCasing ==
1033
unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
1034
1035
return hasUpperCaseSpecialCasing;
1036
}
1037
1038
static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) {
1039
return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
1040
}
1041
1042
static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) {
1043
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1044
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1045
1046
return 2;
1047
}
1048
1049
static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) {
1050
MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
1051
1052
return unicode::LengthUpperCaseSpecialCasing(charCode);
1053
}
1054
1055
static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1056
Latin1Char* elements,
1057
size_t* index) {
1058
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1059
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1060
static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1061
1062
elements[(*index)++] = 'S';
1063
elements[(*index)++] = 'S';
1064
}
1065
1066
static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1067
char16_t* elements,
1068
size_t* index) {
1069
unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1070
}
1071
1072
// See ToLowerCaseImpl for an explanation of the parameters.
1073
template <typename DestChar, typename SrcChar>
1074
static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1075
size_t startIndex, size_t srcLength,
1076
size_t destLength) {
1077
static_assert(IsSame<SrcChar, Latin1Char>::value ||
1078
!IsSame<DestChar, Latin1Char>::value,
1079
"cannot write non-Latin-1 characters into Latin-1 string");
1080
MOZ_ASSERT(startIndex < srcLength);
1081
MOZ_ASSERT(srcLength <= destLength);
1082
1083
size_t j = startIndex;
1084
for (size_t i = startIndex; i < srcLength; i++) {
1085
char16_t c = srcChars[i];
1086
if (!IsSame<DestChar, Latin1Char>::value) {
1087
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1088
char16_t trail = srcChars[i + 1];
1089
if (unicode::IsTrailSurrogate(trail)) {
1090
trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1091
destChars[j++] = c;
1092
destChars[j++] = trail;
1093
i++;
1094
continue;
1095
}
1096
}
1097
}
1098
1099
if (MOZ_UNLIKELY(c > 0x7f &&
1100
ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
1101
// Return if the output buffer is too small.
1102
if (srcLength == destLength) {
1103
return i;
1104
}
1105
1106
ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
1107
continue;
1108
}
1109
1110
c = unicode::ToUpperCase(c);
1111
MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value),
1112
c <= JSString::MAX_LATIN1_CHAR);
1113
destChars[j++] = c;
1114
}
1115
1116
MOZ_ASSERT(j == destLength);
1117
return srcLength;
1118
}
1119
1120
// Explicit instantiation so we don't hit the static_assert from above.
1121
static bool ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars,
1122
size_t startIndex, size_t srcLength,
1123
size_t destLength) {
1124
MOZ_ASSERT_UNREACHABLE(
1125
"cannot write non-Latin-1 characters into Latin-1 string");
1126
return false;
1127
}
1128
1129
template <typename CharT>
1130
static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1131
size_t length) {
1132
size_t upperLength = length;
1133
for (size_t i = startIndex; i < length; i++) {
1134
char16_t c = chars[i];
1135
1136
if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) {
1137
upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
1138
}
1139
}
1140
return upperLength;
1141
}
1142
1143
template <typename DestChar, typename SrcChar>
1144
static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1145
size_t length) {
1146
static_assert(!IsSame<DestChar, SrcChar>::value,
1147
"PodCopy is used for the same type case");
1148
for (size_t i = 0; i < length; i++) {
1149
destChars[i] = srcChars[i];
1150
}
1151
}
1152
1153
template <typename CharT>
1154
static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1155
size_t length) {
1156
PodCopy(destChars, srcChars, length);
1157
}
1158
1159
template <typename DestChar, typename SrcChar>
1160
static inline bool ToUpperCase(JSContext* cx,
1161
InlineCharBuffer<DestChar>& newChars,
1162
const SrcChar* chars, size_t startIndex,
1163
size_t length, size_t* resultLength) {
1164
MOZ_ASSERT(startIndex < length);
1165
1166
*resultLength = length;
1167
if (!newChars.maybeAlloc(cx, length)) {
1168
return false;
1169
}
1170
1171
CopyChars(newChars.get(), chars, startIndex);
1172
1173
size_t readChars =
1174
ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1175
if (readChars < length) {
1176
size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1177
1178
*resultLength = actualLength;
1179
if (!newChars.maybeRealloc(cx, length, actualLength)) {
1180
return false;
1181
}
1182
1183
MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1184
length, actualLength));
1185
}
1186
1187
return true;
1188
}
1189
1190
template <typename CharT>
1191
static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1192
using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1193
using TwoByteBuffer = InlineCharBuffer<char16_t>;
1194
1195
mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1196
const size_t length = str->length();
1197
size_t resultLength;
1198
{
1199
AutoCheckCannotGC nogc;
1200
const CharT* chars = str->chars<CharT>(nogc);
1201
1202
// Most one element Latin-1 strings can be directly retrieved from the
1203
// static strings cache.
1204
if (IsSame<CharT, Latin1Char>::value) {
1205
if (length == 1) {
1206
Latin1Char c = chars[0];
1207
if (c != unicode::MICRO_SIGN &&
1208
c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1209
c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1210
char16_t upper = unicode::ToUpperCase(c);
1211
MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1212
MOZ_ASSERT(StaticStrings::hasUnit(upper));
1213
1214
return cx->staticStrings().getUnit(upper);
1215
}
1216
1217
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1218
ToUpperCaseHasSpecialCasing(c));
1219
}
1220
}
1221
1222
// Look for the first character that changes when uppercased.
1223
size_t i = 0;
1224
for (; i < length; i++) {
1225
CharT c = chars[i];
1226
if (!IsSame<CharT, Latin1Char>::value) {
1227
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1228
CharT trail = chars[i + 1];
1229
if (unicode::IsTrailSurrogate(trail)) {
1230
if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) {
1231
break;
1232
}
1233
1234
i++;
1235
continue;
1236
}
1237
}
1238
}
1239
if (unicode::ChangesWhenUpperCased(c)) {
1240
break;
1241
}
1242
if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) {
1243
break;
1244
}
1245
}
1246
1247
// If no character needs to change, return the input string.
1248
if (i == length) {
1249
return str;
1250
}
1251
1252
// The string changes when uppercased, so we must create a new string.
1253
// Can it be Latin-1?
1254
//
1255
// If the original string is Latin-1, it can -- unless the string
1256
// contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1257
// the only Latin-1 codepoints that don't uppercase within Latin-1.
1258
// Search for those codepoints to decide whether the new string can be
1259
// Latin-1.
1260
// If the original string is a two-byte string, its uppercase form is
1261
// so rarely Latin-1 that we don't even consider creating a new
1262
// Latin-1 string.
1263
bool resultIsLatin1;
1264
if (IsSame<CharT, Latin1Char>::value) {
1265
resultIsLatin1 = true;
1266
for (size_t j = i; j < length; j++) {
1267
Latin1Char c = chars[j];
1268
if (c == unicode::MICRO_SIGN ||
1269
c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1270
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1271
resultIsLatin1 = false;
1272
break;
1273
} else {
1274
MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1275
}
1276
}
1277
} else {
1278
resultIsLatin1 = false;
1279
}
1280
1281
if (resultIsLatin1) {
1282
newChars.construct<Latin1Buffer>();
1283
1284
if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1285
&resultLength)) {
1286
return nullptr;
1287
}
1288
} else {
1289
newChars.construct<TwoByteBuffer>();
1290
1291
if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1292
&resultLength)) {
1293
return nullptr;
1294
}
1295
}
1296
}
1297
1298
return newChars.constructed<Latin1Buffer>()
1299
? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1300
resultLength)
1301
: newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1302
resultLength);
1303
}
1304
1305
JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1306
JSLinearString* linear = string->ensureLinear(cx);
1307
if (!linear) {
1308
return nullptr;
1309
}
1310
1311
if (linear->hasLatin1Chars()) {
1312
return ToUpperCase<Latin1Char>(cx, linear);
1313
}
1314
return ToUpperCase<char16_t>(cx, linear);
1315
}
1316
1317
bool js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1318
CallArgs args = CallArgsFromVp(argc, vp);
1319
1320
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1321
if (!str) {
1322
return false;
1323
}
1324
1325
JSString* result = StringToUpperCase(cx, str);
1326
if (!result) {
1327
return false;
1328
}
1329
1330
args.rval().setString(result);
1331
return true;
1332
}
1333
1334
#if ENABLE_INTL_API
1335
// String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed,
1336
// with core functionality performed by the intrinsic below.
1337
1338
bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1339
CallArgs args = CallArgsFromVp(argc, vp);
1340
MOZ_ASSERT(args.length() == 2);
1341
MOZ_ASSERT(args[0].isString());
1342
MOZ_ASSERT(args[1].isString());
1343
1344
RootedString string(cx, args[0].toString());
1345
1346
const char* locale = CaseMappingLocale(cx, args[1].toString());
1347
if (!locale) {
1348
return false;
1349
}
1350
1351
// Call String.prototype.toUpperCase() for language independent casing.
1352
if (intl::StringsAreEqual(locale, "")) {
1353
JSString* str = js::StringToUpperCase(cx, string);
1354
if (!str) {
1355
return false;
1356
}
1357
1358
args.rval().setString(str);
1359
return true;
1360
}
1361
1362
AutoStableStringChars inputChars(cx);
1363
if (!inputChars.initTwoByte(cx, string)) {
1364
return false;
1365
}
1366
mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1367
1368
// Note: maximum case mapping length is three characters, so the result
1369
// length might be > INT32_MAX. ICU will fail in this case.
1370
static_assert(JSString::MAX_LENGTH <= INT32_MAX,
1371
"String length must fit in int32_t for ICU");
1372
1373
static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1374
1375
Vector<char16_t, INLINE_CAPACITY> chars(cx);
1376
if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
1377
return false;
1378
}
1379
1380
int32_t size = intl::CallICU(
1381
cx,
1382
[&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
1383
return u_strToUpper(chars, size, input.begin().get(), input.length(),
1384
locale, status);
1385
},
1386
chars);
1387
if (size < 0) {
1388
return false;
1389
}
1390
1391
JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1392
if (!result) {
1393
return false;
1394
}
1395
1396
args.rval().setString(result);
1397
return true;
1398
}
1399
1400
#else
1401
1402
// When the Intl API is not exposed, String.prototype.toUpperCase is implemented
1403
// in C++.
1404
static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1405
CallArgs args = CallArgsFromVp(argc, vp);
1406
1407
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1408
if (!str) {
1409
return false;
1410
}
1411
1412
/*
1413
* Forcefully ignore the first (or any) argument and return toUpperCase(),
1414
* ECMA has reserved that argument, presumably for defining the locale.
1415
*/
1416
if (cx->runtime()->localeCallbacks &&
1417
cx->runtime()->localeCallbacks->localeToUpperCase) {
1418
RootedValue result(cx);
1419
if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
1420
return false;
1421
}
1422
1423
args.rval().set(result);
1424
return true;
1425
}
1426
1427
RootedLinearString linear(cx, str->ensureLinear(cx));
1428
if (!linear) {
1429
return false;
1430
}
1431
1432
JSString* result = StringToUpperCase(cx, linear);
1433
if (!result) {
1434
return false;
1435
}
1436
1437
args.rval().setString(result);
1438
return true;
1439
}
1440
1441
#endif // ENABLE_INTL_API
1442
1443
#if ENABLE_INTL_API
1444
1445
// String.prototype.localeCompare is self-hosted when Intl functionality is
1446
// exposed, and the only intrinsics it requires are provided in the
1447
// implementation of Intl.Collator.
1448
1449
#else
1450
1451
// String.prototype.localeCompare is implemented in C++ (delegating to
1452
// JSLocaleCallbacks) when Intl functionality is not exposed.
1453
static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1454
CallArgs args = CallArgsFromVp(argc, vp);
1455
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1456
if (!str) {
1457
return false;
1458
}
1459
1460
RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1461
if (!thatStr) {
1462
return false;
1463
}
1464
1465
if (cx->runtime()->localeCallbacks &&
1466
cx->runtime()->localeCallbacks->localeCompare) {
1467
RootedValue result(cx);
1468
if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1469
&result)) {
1470
return false;
1471
}
1472
1473
args.rval().set(result);
1474
return true;
1475
}
1476
1477
int32_t result;
1478
if (!CompareStrings(cx, str, thatStr, &result)) {
1479
return false;
1480
}
1481
1482
args.rval().setInt32(result);
1483
return true;
1484
}
1485
1486
#endif // ENABLE_INTL_API
1487
1488
#if ENABLE_INTL_API
1489
1490
// ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1491
// 21.1.3.12 String.prototype.normalize ( [ form ] )
1492
//
1493
// String.prototype.normalize is only implementable if ICU's normalization
1494
// functionality is available.
1495
static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1496
CallArgs args = CallArgsFromVp(argc, vp);
1497
1498
// Steps 1-2.
1499
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1500
if (!str) {
1501
return false;
1502
}
1503
1504
enum NormalizationForm { NFC, NFD, NFKC, NFKD };
1505
1506
NormalizationForm form;
1507
if (!args.hasDefined(0)) {
1508
// Step 3.
1509
form = NFC;
1510
} else {
1511
// Step 4.
1512
JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1513
if (!formStr) {
1514
return false;
1515
}
1516
1517
// Step 5.
1518
if (EqualStrings(formStr, cx->names().NFC)) {
1519
form = NFC;
1520
} else if (EqualStrings(formStr, cx->names().NFD)) {
1521
form = NFD;
1522
} else if (EqualStrings(formStr, cx->names().NFKC)) {
1523
form = NFKC;
1524
} else if (EqualStrings(formStr, cx->names().NFKD)) {
1525
form = NFKD;
1526
} else {
1527
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1528
JSMSG_INVALID_NORMALIZE_FORM);
1529
return false;
1530
}
1531
}
1532
1533
// Latin-1 strings are already in Normalization Form C.
1534
if (form == NFC && str->hasLatin1Chars()) {
1535
// Step 7.
1536
args.rval().setString(str);
1537
return true;
1538
}
1539
1540
// Step 6.
1541
AutoStableStringChars stableChars(cx);
1542
if (!stableChars.initTwoByte(cx, str)) {
1543
return false;
1544
}
1545
1546
mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1547
1548
// The unorm2_getXXXInstance() methods return a shared instance which must
1549
// not be deleted.
1550
UErrorCode status = U_ZERO_ERROR;
1551
const UNormalizer2* normalizer;
1552
if (form == NFC) {
1553
normalizer = unorm2_getNFCInstance(&status);
1554
} else if (form == NFD) {
1555
normalizer = unorm2_getNFDInstance(&status);
1556
} else if (form == NFKC) {
1557
normalizer = unorm2_getNFKCInstance(&status);
1558
} else {
1559
MOZ_ASSERT(form == NFKD);
1560
normalizer = unorm2_getNFKDInstance(&status);
1561
}
1562
if (U_FAILURE(status)) {
1563
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1564
JSMSG_INTERNAL_INTL_ERROR);
1565
return false;
1566
}
1567
1568
int32_t spanLengthInt = unorm2_spanQuickCheckYes(
1569
normalizer, srcChars.begin().get(), srcChars.length(), &status);
1570
if (U_FAILURE(status)) {
1571
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1572
JSMSG_INTERNAL_INTL_ERROR);
1573
return false;
1574
}
1575
MOZ_ASSERT(0 <= spanLengthInt && size_t(spanLengthInt) <= srcChars.length());
1576
size_t spanLength = size_t(spanLengthInt);
1577
1578
// Return if the input string is already normalized.
1579
if (spanLength == srcChars.length()) {
1580
// Step 7.
1581
args.rval().setString(str);
1582
return true;
1583
}
1584
1585
static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1586
1587
Vector<char16_t, INLINE_CAPACITY> chars(cx);
1588
if (!chars.resize(std::max(INLINE_CAPACITY, srcChars.length()))) {
1589
return false;
1590
}
1591
1592
// Copy the already normalized prefix.
1593
if (spanLength > 0) {
1594
PodCopy(chars.begin(), srcChars.begin().get(), spanLength);
1595
}
1596
1597
int32_t size = intl::CallICU(
1598
cx,
1599
[normalizer, &srcChars, spanLength](UChar* chars, uint32_t size,
1600
UErrorCode* status) {
1601
mozilla::RangedPtr<const char16_t> remainingStart =
1602
srcChars.begin() + spanLength;
1603
size_t remainingLength = srcChars.length() - spanLength;
1604
1605
return unorm2_normalizeSecondAndAppend(normalizer, chars, spanLength,
1606
size, remainingStart.get(),
1607
remainingLength, status);
1608
},
1609
chars);
1610
if (size < 0) {
1611
return false;
1612
}
1613
1614
JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1615
if (!ns) {
1616
return false;
1617
}
1618
1619
// Step 7.
1620
args.rval().setString(ns);
1621
return true;
1622
}
1623
1624
#endif // ENABLE_INTL_API
1625
1626
static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1627
CallArgs args = CallArgsFromVp(argc, vp);
1628
1629
RootedString str(cx);
1630
size_t i;
1631
if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1632
str = args.thisv().toString();
1633
i = size_t(args[0].toInt32());
1634
if (i >= str->length()) {
1635
goto out_of_range;
1636
}
1637
} else {
1638
str = ToStringForStringFunction(cx, args.thisv());
1639
if (!str) {
1640
return false;
1641
}
1642
1643
double d = 0.0;
1644
if (args.length() > 0 && !ToInteger(cx, args[0], &d)) {
1645
return false;
1646
}
1647
1648
if (d < 0 || str->length() <= d) {
1649
goto out_of_range;
1650
}
1651
i = size_t(d);
1652
}
1653
1654
str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1655
if (!str) {
1656
return false;
1657
}
1658
args.rval().setString(str);
1659
return true;
1660
1661
out_of_range:
1662
args.rval().setString(cx->runtime()->emptyString);
1663
return true;
1664
}
1665
1666
bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1667
HandleValue index, MutableHandleValue res) {
1668
size_t i;
1669
if (index.isInt32()) {
1670
i = index.toInt32();
1671
if (i >= string->length()) {
1672
goto out_of_range;
1673
}
1674
} else {
1675
double d = 0.0;
1676
if (!ToInteger(cx, index, &d)) {
1677
return false;
1678
}
1679
// check whether d is negative as size_t is unsigned
1680
if (d < 0 || string->length() <= d) {
1681
goto out_of_range;
1682
}
1683
i = size_t(d);
1684
}
1685
char16_t c;
1686
if (!string->getChar(cx, i, &c)) {
1687
return false;
1688
}
1689
res.setInt32(c);
1690
return true;
1691
1692
out_of_range:
1693
res.setNaN();
1694
return true;
1695
}
1696
1697
bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1698
CallArgs args = CallArgsFromVp(argc, vp);
1699
RootedString str(cx);
1700
RootedValue index(cx);
1701
if (args.thisv().isString()) {
1702
str = args.thisv().toString();
1703
} else {
1704
str = ToStringForStringFunction(cx, args.thisv());
1705
if (!str) {
1706
return false;
1707
}
1708
}
1709
if (args.length() != 0) {
1710
index = args[0];
1711
} else {
1712
index.setInt32(0);
1713
}
1714
1715
return js::str_charCodeAt_impl(cx, str, index, args.rval());
1716
}
1717
1718
/*
1719
* Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1720
* The patlen argument must be positive and no greater than sBMHPatLenMax.
1721
*
1722
* Return the index of pat in text, or -1 if not found.
1723
*/
1724
static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1725
static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1726
static const int sBMHBadPattern =
1727
-2; /* return value if pat is not ISO-Latin-1 */
1728
1729
template <typename TextChar, typename PatChar>
1730
static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1731
const PatChar* pat, uint32_t patLen) {
1732
MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1733
1734
uint8_t skip[sBMHCharSetSize];
1735
for (uint32_t i = 0; i < sBMHCharSetSize; i++) {
1736
skip[i] = uint8_t(patLen);
1737
}
1738
1739
uint32_t patLast = patLen - 1;
1740
for (uint32_t i = 0; i < patLast; i++) {
1741
char16_t c = pat[i];
1742
if (c >= sBMHCharSetSize) {
1743
return sBMHBadPattern;
1744
}
1745
skip[c] = uint8_t(patLast - i);
1746
}
1747
1748
for (uint32_t k = patLast; k < textLen;) {
1749
for (uint32_t i = k, j = patLast;; i--, j--) {
1750
if (text[i] != pat[j]) {
1751
break;
1752
}
1753
if (j == 0) {
1754
return static_cast<int>(i); /* safe: max string size */
1755
}
1756
}
1757
1758
char16_t c = text[k];
1759
k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1760
}
1761
return -1;
1762
}
1763
1764
template <typename TextChar, typename PatChar>
1765
struct MemCmp {
1766
typedef uint32_t Extent;
1767
static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1768
uint32_t patLen) {
1769
return (patLen - 1) * sizeof(PatChar);
1770
}
1771
static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,