Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "builtin/String.h"
8
9
#include "mozilla/ArrayUtils.h"
10
#include "mozilla/Attributes.h"
11
#include "mozilla/CheckedInt.h"
12
#include "mozilla/FloatingPoint.h"
13
#include "mozilla/PodOperations.h"
14
#include "mozilla/Range.h"
15
#include "mozilla/TextUtils.h"
16
#include "mozilla/TypeTraits.h"
17
#include "mozilla/Unused.h"
18
19
#include <algorithm>
20
#include <limits>
21
#include <string.h>
22
#include <type_traits>
23
24
#include "jsapi.h"
25
#include "jsnum.h"
26
#include "jstypes.h"
27
28
#include "builtin/Array.h"
29
#include "builtin/Boolean.h"
30
#if JS_HAS_INTL_API
31
# include "builtin/intl/CommonFunctions.h"
32
#endif
33
#include "builtin/RegExp.h"
34
#include "jit/InlinableNatives.h"
35
#include "js/Conversions.h"
36
#if !JS_HAS_INTL_API
37
# include "js/LocaleSensitive.h"
38
#endif
39
#include "js/PropertySpec.h"
40
#include "js/StableStringChars.h"
41
#include "js/UniquePtr.h"
42
#if JS_HAS_INTL_API
43
# include "unicode/uchar.h"
44
# include "unicode/unorm2.h"
45
# include "unicode/ustring.h"
46
# include "unicode/utypes.h"
47
#endif
48
#include "util/StringBuffer.h"
49
#include "util/Unicode.h"
50
#include "vm/BytecodeUtil.h"
51
#include "vm/GlobalObject.h"
52
#include "vm/Interpreter.h"
53
#include "vm/JSAtom.h"
54
#include "vm/JSContext.h"
55
#include "vm/JSObject.h"
56
#include "vm/Opcodes.h"
57
#include "vm/Printer.h"
58
#include "vm/RegExpObject.h"
59
#include "vm/RegExpStatics.h"
60
#include "vm/SelfHosting.h"
61
#include "vm/ToSource.h" // js::ValueToSource
62
63
#include "vm/InlineCharBuffer-inl.h"
64
#include "vm/Interpreter-inl.h"
65
#include "vm/StringObject-inl.h"
66
#include "vm/StringType-inl.h"
67
#include "vm/TypeInference-inl.h"
68
69
using namespace js;
70
71
using JS::Symbol;
72
using JS::SymbolCode;
73
74
using mozilla::AsciiAlphanumericToNumber;
75
using mozilla::CheckedInt;
76
using mozilla::IsAsciiHexDigit;
77
using mozilla::IsNaN;
78
using mozilla::IsSame;
79
using mozilla::PodCopy;
80
using mozilla::RangedPtr;
81
82
using JS::AutoCheckCannotGC;
83
using JS::AutoStableStringChars;
84
85
static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
86
unsigned argno) {
87
if (argno >= args.length()) {
88
return cx->names().undefined;
89
}
90
91
JSString* str = ToString<CanGC>(cx, args[argno]);
92
if (!str) {
93
return nullptr;
94
}
95
96
return str->ensureLinear(cx);
97
}
98
99
/*
100
* Forward declarations for URI encode/decode and helper routines
101
*/
102
static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
103
104
static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
105
106
static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
107
108
static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
109
110
/*
111
* Global string methods
112
*/
113
114
/* ES5 B.2.1 */
115
template <typename CharT>
116
static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
117
InlineCharBuffer<Latin1Char>& newChars,
118
uint32_t* newLengthOut) {
119
// clang-format off
120
static const uint8_t shouldPassThrough[128] = {
121
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
122
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
123
0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
124
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
125
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
126
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
127
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
128
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
129
};
130
// clang-format on
131
132
/* Take a first pass and see how big the result string will need to be. */
133
uint32_t newLength = length;
134
for (size_t i = 0; i < length; i++) {
135
char16_t ch = chars[i];
136
if (ch < 128 && shouldPassThrough[ch]) {
137
continue;
138
}
139
140
/*
141
* newlength is incremented below by at most 5 and at this point it must
142
* be a valid string length, so this should never overflow uint32_t.
143
*/
144
static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5,
145
"Adding 5 to valid string length should not overflow");
146
147
MOZ_ASSERT(newLength <= JSString::MAX_LENGTH);
148
149
/* The character will be encoded as %XX or %uXXXX. */
150
newLength += (ch < 256) ? 2 : 5;
151
152
if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) {
153
ReportAllocationOverflow(cx);
154
return false;
155
}
156
}
157
158
if (newLength == length) {
159
*newLengthOut = newLength;
160
return true;
161
}
162
163
if (!newChars.maybeAlloc(cx, newLength)) {
164
return false;
165
}
166
167
static const char digits[] = "0123456789ABCDEF";
168
169
Latin1Char* rawNewChars = newChars.get();
170
size_t i, ni;
171
for (i = 0, ni = 0; i < length; i++) {
172
char16_t ch = chars[i];
173
if (ch < 128 && shouldPassThrough[ch]) {
174
rawNewChars[ni++] = ch;
175
} else if (ch < 256) {
176
rawNewChars[ni++] = '%';
177
rawNewChars[ni++] = digits[ch >> 4];
178
rawNewChars[ni++] = digits[ch & 0xF];
179
} else {
180
rawNewChars[ni++] = '%';
181
rawNewChars[ni++] = 'u';
182
rawNewChars[ni++] = digits[ch >> 12];
183
rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
184
rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
185
rawNewChars[ni++] = digits[ch & 0xF];
186
}
187
}
188
MOZ_ASSERT(ni == newLength);
189
190
*newLengthOut = newLength;
191
return true;
192
}
193
194
static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
195
CallArgs args = CallArgsFromVp(argc, vp);
196
197
RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
198
if (!str) {
199
return false;
200
}
201
202
InlineCharBuffer<Latin1Char> newChars;
203
uint32_t newLength = 0; // initialize to silence GCC warning
204
if (str->hasLatin1Chars()) {
205
AutoCheckCannotGC nogc;
206
if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
207
&newLength)) {
208
return false;
209
}
210
} else {
211
AutoCheckCannotGC nogc;
212
if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
213
&newLength)) {
214
return false;
215
}
216
}
217
218
// Return input if no characters need to be escaped.
219
if (newLength == str->length()) {
220
args.rval().setString(str);
221
return true;
222
}
223
224
JSString* res = newChars.toString(cx, newLength);
225
if (!res) {
226
return false;
227
}
228
229
args.rval().setString(res);
230
return true;
231
}
232
233
template <typename CharT>
234
static inline bool Unhex4(const RangedPtr<const CharT> chars,
235
char16_t* result) {
236
CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3];
237
238
if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) &&
239
IsAsciiHexDigit(d))) {
240
return false;
241
}
242
243
char16_t unhex = AsciiAlphanumericToNumber(a);
244
unhex = (unhex << 4) + AsciiAlphanumericToNumber(b);
245
unhex = (unhex << 4) + AsciiAlphanumericToNumber(c);
246
unhex = (unhex << 4) + AsciiAlphanumericToNumber(d);
247
*result = unhex;
248
return true;
249
}
250
251
template <typename CharT>
252
static inline bool Unhex2(const RangedPtr<const CharT> chars,
253
char16_t* result) {
254
CharT a = chars[0], b = chars[1];
255
256
if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) {
257
return false;
258
}
259
260
*result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b);
261
return true;
262
}
263
264
template <typename CharT>
265
static bool Unescape(StringBuffer& sb,
266
const mozilla::Range<const CharT> chars) {
267
// Step 2.
268
uint32_t length = chars.length();
269
270
/*
271
* Note that the spec algorithm has been optimized to avoid building
272
* a string in the case where no escapes are present.
273
*/
274
bool building = false;
275
276
#define ENSURE_BUILDING \
277
do { \
278
if (!building) { \
279
building = true; \
280
if (!sb.reserve(length)) return false; \
281
sb.infallibleAppend(chars.begin().get(), k); \
282
} \
283
} while (false);
284
285
// Step 4.
286
uint32_t k = 0;
287
288
// Step 5.
289
while (k < length) {
290
// Step 5.a.
291
char16_t c = chars[k];
292
293
// Step 5.b.
294
if (c == '%') {
295
static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
296
"String length is not near UINT32_MAX");
297
298
// Steps 5.b.i-ii.
299
if (k + 6 <= length && chars[k + 1] == 'u') {
300
if (Unhex4(chars.begin() + k + 2, &c)) {
301
ENSURE_BUILDING
302
k += 5;
303
}
304
} else if (k + 3 <= length) {
305
if (Unhex2(chars.begin() + k + 1, &c)) {
306
ENSURE_BUILDING
307
k += 2;
308
}
309
}
310
}
311
312
// Step 5.c.
313
if (building && !sb.append(c)) {
314
return false;
315
}
316
317
// Step 5.d.
318
k += 1;
319
}
320
321
return true;
322
#undef ENSURE_BUILDING
323
}
324
325
// ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
326
// B.2.1.2 unescape ( string )
327
static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
328
CallArgs args = CallArgsFromVp(argc, vp);
329
330
// Step 1.
331
RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
332
if (!str) {
333
return false;
334
}
335
336
// Step 3.
337
JSStringBuilder sb(cx);
338
if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) {
339
return false;
340
}
341
342
// Steps 2, 4-5.
343
if (str->hasLatin1Chars()) {
344
AutoCheckCannotGC nogc;
345
if (!Unescape(sb, str->latin1Range(nogc))) {
346
return false;
347
}
348
} else {
349
AutoCheckCannotGC nogc;
350
if (!Unescape(sb, str->twoByteRange(nogc))) {
351
return false;
352
}
353
}
354
355
// Step 6.
356
JSLinearString* result;
357
if (!sb.empty()) {
358
result = sb.finishString();
359
if (!result) {
360
return false;
361
}
362
} else {
363
result = str;
364
}
365
366
args.rval().setString(result);
367
return true;
368
}
369
370
static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
371
CallArgs args = CallArgsFromVp(argc, vp);
372
JSString* str = ValueToSource(cx, args.get(0));
373
if (!str) {
374
return false;
375
}
376
377
args.rval().setString(str);
378
return true;
379
}
380
381
static const JSFunctionSpec string_functions[] = {
382
JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
383
JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
384
JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
385
JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
386
JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
387
JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
388
JSPROP_RESOLVING),
389
JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
390
JSPROP_RESOLVING),
391
392
JS_FS_END};
393
394
static const unsigned STRING_ELEMENT_ATTRS =
395
JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
396
397
static bool str_enumerate(JSContext* cx, HandleObject obj) {
398
RootedString str(cx, obj->as<StringObject>().unbox());
399
js::StaticStrings& staticStrings = cx->staticStrings();
400
401
RootedValue value(cx);
402
for (size_t i = 0, length = str->length(); i < length; i++) {
403
JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
404
if (!str1) {
405
return false;
406
}
407
value.setString(str1);
408
if (!DefineDataElement(cx, obj, i, value,
409
STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
410
return false;
411
}
412
}
413
414
return true;
415
}
416
417
static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
418
// str_resolve ignores non-integer ids.
419
return JSID_IS_INT(id);
420
}
421
422
static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
423
bool* resolvedp) {
424
if (!JSID_IS_INT(id)) {
425
return true;
426
}
427
428
RootedString str(cx, obj->as<StringObject>().unbox());
429
430
int32_t slot = JSID_TO_INT(id);
431
if ((size_t)slot < str->length()) {
432
JSString* str1 =
433
cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
434
if (!str1) {
435
return false;
436
}
437
RootedValue value(cx, StringValue(str1));
438
if (!DefineDataElement(cx, obj, uint32_t(slot), value,
439
STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
440
return false;
441
}
442
*resolvedp = true;
443
}
444
return true;
445
}
446
447
static const JSClassOps StringObjectClassOps = {
448
nullptr, // addProperty
449
nullptr, // delProperty
450
str_enumerate, // enumerate
451
nullptr, // newEnumerate
452
str_resolve, // resolve
453
str_mayResolve, // mayResolve
454
nullptr, // finalize
455
nullptr, // call
456
nullptr, // hasInstance
457
nullptr, // construct
458
nullptr, // trace
459
};
460
461
const JSClass StringObject::class_ = {
462
js_String_str,
463
JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
464
JSCLASS_HAS_CACHED_PROTO(JSProto_String),
465
&StringObjectClassOps, &StringObject::classSpec_};
466
467
/*
468
* Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
469
* from nearly all String.prototype.* functions.
470
*/
471
static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
472
JSContext* cx, HandleValue thisv) {
473
if (!CheckRecursionLimit(cx)) {
474
return nullptr;
475
}
476
477
if (thisv.isString()) {
478
return thisv.toString();
479
}
480
481
if (thisv.isObject()) {
482
RootedObject obj(cx, &thisv.toObject());
483
if (obj->is<StringObject>()) {
484
StringObject* nobj = &obj->as<StringObject>();
485
// We have to make sure that the ToPrimitive call from ToString
486
// would be unobservable.
487
if (HasNoToPrimitiveMethodPure(nobj, cx) &&
488
HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
489
return nobj->unbox();
490
}
491
}
492
} else if (thisv.isNullOrUndefined()) {
493
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
494
JSMSG_CANT_CONVERT_TO,
495
thisv.isNull() ? "null" : "undefined", "object");
496
return nullptr;
497
}
498
499
return ToStringSlow<CanGC>(cx, thisv);
500
}
501
502
MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
503
return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
504
}
505
506
MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
507
MOZ_ASSERT(IsString(args.thisv()));
508
509
JSString* str = ToString<CanGC>(cx, args.thisv());
510
if (!str) {
511
return false;
512
}
513
514
UniqueChars quoted = QuoteString(cx, str, '"');
515
if (!quoted) {
516
return false;
517
}
518
519
JSStringBuilder sb(cx);
520
if (!sb.append("(new String(") ||
521
!sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) {
522
return false;
523
}
524
525
JSString* result = sb.finishString();
526
if (!result) {
527
return false;
528
}
529
args.rval().setString(result);
530
return true;
531
}
532
533
static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
534
CallArgs args = CallArgsFromVp(argc, vp);
535
return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
536
}
537
538
MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
539
MOZ_ASSERT(IsString(args.thisv()));
540
541
args.rval().setString(
542
args.thisv().isString()
543
? args.thisv().toString()
544
: args.thisv().toObject().as<StringObject>().unbox());
545
return true;
546
}
547
548
bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
549
CallArgs args = CallArgsFromVp(argc, vp);
550
return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
551
}
552
553
/*
554
* Java-like string native methods.
555
*/
556
557
JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
558
int32_t lengthInt) {
559
MOZ_ASSERT(0 <= beginInt);
560
MOZ_ASSERT(0 <= lengthInt);
561
MOZ_ASSERT(uint32_t(beginInt) <= str->length());
562
MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
563
564
uint32_t begin = beginInt;
565
uint32_t len = lengthInt;
566
567
/*
568
* Optimization for one level deep ropes.
569
* This is common for the following pattern:
570
*
571
* while() {
572
* text = text.substr(0, x) + "bla" + text.substr(x)
573
* test.charCodeAt(x + 1)
574
* }
575
*/
576
if (str->isRope()) {
577
JSRope* rope = &str->asRope();
578
579
/* Substring is totally in leftChild of rope. */
580
if (begin + len <= rope->leftChild()->length()) {
581
return NewDependentString(cx, rope->leftChild(), begin, len);
582
}
583
584
/* Substring is totally in rightChild of rope. */
585
if (begin >= rope->leftChild()->length()) {
586
begin -= rope->leftChild()->length();
587
return NewDependentString(cx, rope->rightChild(), begin, len);
588
}
589
590
/*
591
* Requested substring is partly in the left and partly in right child.
592
* Create a rope of substrings for both childs.
593
*/
594
MOZ_ASSERT(begin < rope->leftChild()->length() &&
595
begin + len > rope->leftChild()->length());
596
597
size_t lhsLength = rope->leftChild()->length() - begin;
598
size_t rhsLength = begin + len - rope->leftChild()->length();
599
600
Rooted<JSRope*> ropeRoot(cx, rope);
601
RootedString lhs(
602
cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
603
if (!lhs) {
604
return nullptr;
605
}
606
607
RootedString rhs(
608
cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
609
if (!rhs) {
610
return nullptr;
611
}
612
613
return JSRope::new_<CanGC>(cx, lhs, rhs, len);
614
}
615
616
return NewDependentString(cx, str, begin, len);
617
}
618
619
/**
620
* U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
621
* depending on its context:
622
* When it's preceded by a cased character and not followed by another cased
623
* character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
624
* Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
625
*
626
* Unicode 9.0, §3.13 Default Case Algorithms
627
*/
628
static char16_t Final_Sigma(const char16_t* chars, size_t length,
629
size_t index) {
630
MOZ_ASSERT(index < length);
631
MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
632
MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
633
unicode::GREEK_SMALL_LETTER_SIGMA);
634
635
#if JS_HAS_INTL_API
636
// Tell the analysis the BinaryProperty.contains function pointer called by
637
// u_hasBinaryProperty cannot GC.
638
JS::AutoSuppressGCAnalysis nogc;
639
640
bool precededByCased = false;
641
for (size_t i = index; i > 0;) {
642
char16_t c = chars[--i];
643
uint32_t codePoint = c;
644
if (unicode::IsTrailSurrogate(c) && i > 0) {
645
char16_t lead = chars[i - 1];
646
if (unicode::IsLeadSurrogate(lead)) {
647
codePoint = unicode::UTF16Decode(lead, c);
648
i--;
649
}
650
}
651
652
// Ignore any characters with the property Case_Ignorable.
653
// NB: We need to skip over all Case_Ignorable characters, even when
654
// they also have the Cased binary property.
655
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
656
continue;
657
}
658
659
precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
660
break;
661
}
662
if (!precededByCased) {
663
return unicode::GREEK_SMALL_LETTER_SIGMA;
664
}
665
666
bool followedByCased = false;
667
for (size_t i = index + 1; i < length;) {
668
char16_t c = chars[i++];
669
uint32_t codePoint = c;
670
if (unicode::IsLeadSurrogate(c) && i < length) {
671
char16_t trail = chars[i];
672
if (unicode::IsTrailSurrogate(trail)) {
673
codePoint = unicode::UTF16Decode(c, trail);
674
i++;
675
}
676
}
677
678
// Ignore any characters with the property Case_Ignorable.
679
// NB: We need to skip over all Case_Ignorable characters, even when
680
// they also have the Cased binary property.
681
if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
682
continue;
683
}
684
685
followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
686
break;
687
}
688
if (!followedByCased) {
689
return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
690
}
691
#endif
692
693
return unicode::GREEK_SMALL_LETTER_SIGMA;
694
}
695
696
// If |srcLength == destLength| is true, the destination buffer was allocated
697
// with the same size as the source buffer. When we append characters which
698
// have special casing mappings, we test |srcLength == destLength| to decide
699
// if we need to back out and reallocate a sufficiently large destination
700
// buffer. Otherwise the destination buffer was allocated with the correct
701
// size to hold all lower case mapped characters, i.e.
702
// |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
703
template <typename CharT>
704
static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
705
size_t startIndex, size_t srcLength,
706
size_t destLength) {
707
MOZ_ASSERT(startIndex < srcLength);
708
MOZ_ASSERT(srcLength <= destLength);
709
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength);
710
711
size_t j = startIndex;
712
for (size_t i = startIndex; i < srcLength; i++) {
713
char16_t c = srcChars[i];
714
if constexpr (!IsSame<CharT, Latin1Char>::value) {
715
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
716
char16_t trail = srcChars[i + 1];
717
if (unicode::IsTrailSurrogate(trail)) {
718
trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
719
destChars[j++] = c;
720
destChars[j++] = trail;
721
i++;
722
continue;
723
}
724
}
725
726
// Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
727
// lowercases to <U+0069 U+0307>.
728
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
729
// Return if the output buffer is too small.
730
if (srcLength == destLength) {
731
return i;
732
}
733
734
destChars[j++] = CharT('i');
735
destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
736
continue;
737
}
738
739
// Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
740
// one of two codepoints depending on context.
741
if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
742
destChars[j++] = Final_Sigma(srcChars, srcLength, i);
743
continue;
744
}
745
}
746
747
c = unicode::ToLowerCase(c);
748
MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value),
749
c <= JSString::MAX_LATIN1_CHAR);
750
destChars[j++] = c;
751
}
752
753
MOZ_ASSERT(j == destLength);
754
return srcLength;
755
}
756
757
static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
758
size_t length) {
759
size_t lowerLength = length;
760
for (size_t i = startIndex; i < length; i++) {
761
char16_t c = chars[i];
762
763
// U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
764
if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
765
lowerLength += 1;
766
}
767
}
768
return lowerLength;
769
}
770
771
template <typename CharT>
772
static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
773
// Unlike toUpperCase, toLowerCase has the nice invariant that if the
774
// input is a Latin-1 string, the output is also a Latin-1 string.
775
776
InlineCharBuffer<CharT> newChars;
777
778
const size_t length = str->length();
779
size_t resultLength;
780
{
781
AutoCheckCannotGC nogc;
782
const CharT* chars = str->chars<CharT>(nogc);
783
784
// We don't need extra special casing checks in the loop below,
785
// because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
786
// GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
787
MOZ_ASSERT(unicode::ChangesWhenLowerCased(
788
unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
789
"U+0130 has a simple lower case mapping");
790
MOZ_ASSERT(
791
unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
792
"U+03A3 has a simple lower case mapping");
793
794
// One element Latin-1 strings can be directly retrieved from the
795
// static strings cache.
796
if constexpr (IsSame<CharT, Latin1Char>::value) {
797
if (length == 1) {
798
char16_t lower = unicode::ToLowerCase(chars[0]);
799
MOZ_ASSERT(lower <= JSString::MAX_LATIN1_CHAR);
800
MOZ_ASSERT(StaticStrings::hasUnit(lower));
801
802
return cx->staticStrings().getUnit(lower);
803
}
804
}
805
806
// Look for the first character that changes when lowercased.
807
size_t i = 0;
808
for (; i < length; i++) {
809
CharT c = chars[i];
810
if constexpr (!IsSame<CharT, Latin1Char>::value) {
811
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
812
CharT trail = chars[i + 1];
813
if (unicode::IsTrailSurrogate(trail)) {
814
if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) {
815
break;
816
}
817
818
i++;
819
continue;
820
}
821
}
822
}
823
if (unicode::ChangesWhenLowerCased(c)) {
824
break;
825
}
826
}
827
828
// If no character needs to change, return the input string.
829
if (i == length) {
830
return str;
831
}
832
833
resultLength = length;
834
if (!newChars.maybeAlloc(cx, resultLength)) {
835
return nullptr;
836
}
837
838
PodCopy(newChars.get(), chars, i);
839
840
size_t readChars =
841
ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
842
if constexpr (!IsSame<CharT, Latin1Char>::value) {
843
if (readChars < length) {
844
resultLength = ToLowerCaseLength(chars, readChars, length);
845
846
if (!newChars.maybeRealloc(cx, length, resultLength)) {
847
return nullptr;
848
}
849
850
MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
851
readChars, length,
852
resultLength));
853
}
854
} else {
855
MOZ_ASSERT(readChars == length,
856
"Latin-1 strings don't have special lower case mappings");
857
}
858
}
859
860
return newChars.toStringDontDeflate(cx, resultLength);
861
}
862
863
JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
864
JSLinearString* linear = string->ensureLinear(cx);
865
if (!linear) {
866
return nullptr;
867
}
868
869
if (linear->hasLatin1Chars()) {
870
return ToLowerCase<Latin1Char>(cx, linear);
871
}
872
return ToLowerCase<char16_t>(cx, linear);
873
}
874
875
bool js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
876
CallArgs args = CallArgsFromVp(argc, vp);
877
878
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
879
if (!str) {
880
return false;
881
}
882
883
JSString* result = StringToLowerCase(cx, str);
884
if (!result) {
885
return false;
886
}
887
888
args.rval().setString(result);
889
return true;
890
}
891
892
#if JS_HAS_INTL_API
893
// String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
894
// with core functionality performed by the intrinsic below.
895
896
static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
897
JSLinearString* locale = str->ensureLinear(cx);
898
if (!locale) {
899
return nullptr;
900
}
901
902
MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
903
904
// Lithuanian, Turkish, and Azeri have language dependent case mappings.
905
static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
906
907
// All strings in |languagesWithSpecialCasing| are of length two, so we
908
// only need to compare the first two characters to find a matching locale.
909
// ES2017 Intl, §9.2.2 BestAvailableLocale
910
if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
911
for (const auto& language : languagesWithSpecialCasing) {
912
if (locale->latin1OrTwoByteChar(0) == language[0] &&
913
locale->latin1OrTwoByteChar(1) == language[1]) {
914
return language;
915
}
916
}
917
}
918
919
return ""; // ICU root locale
920
}
921
922
bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
923
CallArgs args = CallArgsFromVp(argc, vp);
924
MOZ_ASSERT(args.length() == 2);
925
MOZ_ASSERT(args[0].isString());
926
MOZ_ASSERT(args[1].isString());
927
928
RootedString string(cx, args[0].toString());
929
930
const char* locale = CaseMappingLocale(cx, args[1].toString());
931
if (!locale) {
932
return false;
933
}
934
935
// Call String.prototype.toLowerCase() for language independent casing.
936
if (intl::StringsAreEqual(locale, "")) {
937
JSString* str = StringToLowerCase(cx, string);
938
if (!str) {
939
return false;
940
}
941
942
args.rval().setString(str);
943
return true;
944
}
945
946
AutoStableStringChars inputChars(cx);
947
if (!inputChars.initTwoByte(cx, string)) {
948
return false;
949
}
950
mozilla::Range<const char16_t> input = inputChars.twoByteRange();
951
952
// Note: maximum case mapping length is three characters, so the result
953
// length might be > INT32_MAX. ICU will fail in this case.
954
static_assert(JSString::MAX_LENGTH <= INT32_MAX,
955
"String length must fit in int32_t for ICU");
956
957
static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
958
959
Vector<char16_t, INLINE_CAPACITY> chars(cx);
960
if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
961
return false;
962
}
963
964
int32_t size = intl::CallICU(
965
cx,
966
[&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
967
return u_strToLower(chars, size, input.begin().get(), input.length(),
968
locale, status);
969
},
970
chars);
971
if (size < 0) {
972
return false;
973
}
974
975
JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
976
if (!result) {
977
return false;
978
}
979
980
args.rval().setString(result);
981
return true;
982
}
983
984
#else
985
986
// When the Intl API is not exposed, String.prototype.toLowerCase is implemented
987
// in C++.
988
static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
989
CallArgs args = CallArgsFromVp(argc, vp);
990
991
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
992
if (!str) {
993
return false;
994
}
995
996
/*
997
* Forcefully ignore the first (or any) argument and return toLowerCase(),
998
* ECMA has reserved that argument, presumably for defining the locale.
999
*/
1000
if (cx->runtime()->localeCallbacks &&
1001
cx->runtime()->localeCallbacks->localeToLowerCase) {
1002
RootedValue result(cx);
1003
if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
1004
return false;
1005
}
1006
1007
args.rval().set(result);
1008
return true;
1009
}
1010
1011
RootedLinearString linear(cx, str->ensureLinear(cx));
1012
if (!linear) {
1013
return false;
1014
}
1015
1016
JSString* result = StringToLowerCase(cx, linear);
1017
if (!result) {
1018
return false;
1019
}
1020
1021
args.rval().setString(result);
1022
return true;
1023
}
1024
1025
#endif // JS_HAS_INTL_API
1026
1027
static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
1028
// U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
1029
// special casing rules, so detect it inline.
1030
bool hasUpperCaseSpecialCasing =
1031
charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1032
MOZ_ASSERT(hasUpperCaseSpecialCasing ==
1033
unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
1034
1035
return hasUpperCaseSpecialCasing;
1036
}
1037
1038
static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) {
1039
return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
1040
}
1041
1042
static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) {
1043
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1044
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1045
1046
return 2;
1047
}
1048
1049
static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) {
1050
MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
1051
1052
return unicode::LengthUpperCaseSpecialCasing(charCode);
1053
}
1054
1055
static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1056
Latin1Char* elements,
1057
size_t* index) {
1058
// U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1059
MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1060
static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1061
1062
elements[(*index)++] = 'S';
1063
elements[(*index)++] = 'S';
1064
}
1065
1066
static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1067
char16_t* elements,
1068
size_t* index) {
1069
unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1070
}
1071
1072
// See ToLowerCaseImpl for an explanation of the parameters.
1073
template <typename DestChar, typename SrcChar>
1074
static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1075
size_t startIndex, size_t srcLength,
1076
size_t destLength) {
1077
static_assert(IsSame<SrcChar, Latin1Char>::value ||
1078
!IsSame<DestChar, Latin1Char>::value,
1079
"cannot write non-Latin-1 characters into Latin-1 string");
1080
MOZ_ASSERT(startIndex < srcLength);
1081
MOZ_ASSERT(srcLength <= destLength);
1082
1083
size_t j = startIndex;
1084
for (size_t i = startIndex; i < srcLength; i++) {
1085
char16_t c = srcChars[i];
1086
if constexpr (!IsSame<DestChar, Latin1Char>::value) {
1087
if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1088
char16_t trail = srcChars[i + 1];
1089
if (unicode::IsTrailSurrogate(trail)) {
1090
trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1091
destChars[j++] = c;
1092
destChars[j++] = trail;
1093
i++;
1094
continue;
1095
}
1096
}
1097
}
1098
1099
if (MOZ_UNLIKELY(c > 0x7f &&
1100
ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
1101
// Return if the output buffer is too small.
1102
if (srcLength == destLength) {
1103
return i;
1104
}
1105
1106
ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
1107
continue;
1108
}
1109
1110
c = unicode::ToUpperCase(c);
1111
MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value),
1112
c <= JSString::MAX_LATIN1_CHAR);
1113
destChars[j++] = c;
1114
}
1115
1116
MOZ_ASSERT(j == destLength);
1117
return srcLength;
1118
}
1119
1120
template <typename CharT>
1121
static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1122
size_t length) {
1123
size_t upperLength = length;
1124
for (size_t i = startIndex; i < length; i++) {
1125
char16_t c = chars[i];
1126
1127
if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) {
1128
upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
1129
}
1130
}
1131
return upperLength;
1132
}
1133
1134
template <typename DestChar, typename SrcChar>
1135
static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1136
size_t length) {
1137
static_assert(!IsSame<DestChar, SrcChar>::value,
1138
"PodCopy is used for the same type case");
1139
for (size_t i = 0; i < length; i++) {
1140
destChars[i] = srcChars[i];
1141
}
1142
}
1143
1144
template <typename CharT>
1145
static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1146
size_t length) {
1147
PodCopy(destChars, srcChars, length);
1148
}
1149
1150
template <typename DestChar, typename SrcChar>
1151
static inline bool ToUpperCase(JSContext* cx,
1152
InlineCharBuffer<DestChar>& newChars,
1153
const SrcChar* chars, size_t startIndex,
1154
size_t length, size_t* resultLength) {
1155
MOZ_ASSERT(startIndex < length);
1156
1157
*resultLength = length;
1158
if (!newChars.maybeAlloc(cx, length)) {
1159
return false;
1160
}
1161
1162
CopyChars(newChars.get(), chars, startIndex);
1163
1164
size_t readChars =
1165
ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1166
if (readChars < length) {
1167
size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1168
1169
*resultLength = actualLength;
1170
if (!newChars.maybeRealloc(cx, length, actualLength)) {
1171
return false;
1172
}
1173
1174
MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1175
length, actualLength));
1176
}
1177
1178
return true;
1179
}
1180
1181
template <typename CharT>
1182
static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1183
using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1184
using TwoByteBuffer = InlineCharBuffer<char16_t>;
1185
1186
mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1187
const size_t length = str->length();
1188
size_t resultLength;
1189
{
1190
AutoCheckCannotGC nogc;
1191
const CharT* chars = str->chars<CharT>(nogc);
1192
1193
// Most one element Latin-1 strings can be directly retrieved from the
1194
// static strings cache.
1195
if constexpr (IsSame<CharT, Latin1Char>::value) {
1196
if (length == 1) {
1197
Latin1Char c = chars[0];
1198
if (c != unicode::MICRO_SIGN &&
1199
c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1200
c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1201
char16_t upper = unicode::ToUpperCase(c);
1202
MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1203
MOZ_ASSERT(StaticStrings::hasUnit(upper));
1204
1205
return cx->staticStrings().getUnit(upper);
1206
}
1207
1208
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1209
ToUpperCaseHasSpecialCasing(c));
1210
}
1211
}
1212
1213
// Look for the first character that changes when uppercased.
1214
size_t i = 0;
1215
for (; i < length; i++) {
1216
CharT c = chars[i];
1217
if constexpr (!IsSame<CharT, Latin1Char>::value) {
1218
if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1219
CharT trail = chars[i + 1];
1220
if (unicode::IsTrailSurrogate(trail)) {
1221
if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) {
1222
break;
1223
}
1224
1225
i++;
1226
continue;
1227
}
1228
}
1229
}
1230
if (unicode::ChangesWhenUpperCased(c)) {
1231
break;
1232
}
1233
if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) {
1234
break;
1235
}
1236
}
1237
1238
// If no character needs to change, return the input string.
1239
if (i == length) {
1240
return str;
1241
}
1242
1243
// The string changes when uppercased, so we must create a new string.
1244
// Can it be Latin-1?
1245
//
1246
// If the original string is Latin-1, it can -- unless the string
1247
// contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1248
// the only Latin-1 codepoints that don't uppercase within Latin-1.
1249
// Search for those codepoints to decide whether the new string can be
1250
// Latin-1.
1251
// If the original string is a two-byte string, its uppercase form is
1252
// so rarely Latin-1 that we don't even consider creating a new
1253
// Latin-1 string.
1254
if constexpr (IsSame<CharT, Latin1Char>::value) {
1255
bool resultIsLatin1 = true;
1256
for (size_t j = i; j < length; j++) {
1257
Latin1Char c = chars[j];
1258
if (c == unicode::MICRO_SIGN ||
1259
c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1260
MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1261
resultIsLatin1 = false;
1262
break;
1263
} else {
1264
MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1265
}
1266
}
1267
1268
if (resultIsLatin1) {
1269
newChars.construct<Latin1Buffer>();
1270
1271
if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1272
&resultLength)) {
1273
return nullptr;
1274
}
1275
} else {
1276
newChars.construct<TwoByteBuffer>();
1277
1278
if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1279
&resultLength)) {
1280
return nullptr;
1281
}
1282
}
1283
} else {
1284
newChars.construct<TwoByteBuffer>();
1285
1286
if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1287
&resultLength)) {
1288
return nullptr;
1289
}
1290
}
1291
}
1292
1293
return newChars.constructed<Latin1Buffer>()
1294
? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1295
resultLength)
1296
: newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1297
resultLength);
1298
}
1299
1300
JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1301
JSLinearString* linear = string->ensureLinear(cx);
1302
if (!linear) {
1303
return nullptr;
1304
}
1305
1306
if (linear->hasLatin1Chars()) {
1307
return ToUpperCase<Latin1Char>(cx, linear);
1308
}
1309
return ToUpperCase<char16_t>(cx, linear);
1310
}
1311
1312
bool js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1313
CallArgs args = CallArgsFromVp(argc, vp);
1314
1315
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1316
if (!str) {
1317
return false;
1318
}
1319
1320
JSString* result = StringToUpperCase(cx, str);
1321
if (!result) {
1322
return false;
1323
}
1324
1325
args.rval().setString(result);
1326
return true;
1327
}
1328
1329
#if JS_HAS_INTL_API
1330
// String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed,
1331
// with core functionality performed by the intrinsic below.
1332
1333
bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1334
CallArgs args = CallArgsFromVp(argc, vp);
1335
MOZ_ASSERT(args.length() == 2);
1336
MOZ_ASSERT(args[0].isString());
1337
MOZ_ASSERT(args[1].isString());
1338
1339
RootedString string(cx, args[0].toString());
1340
1341
const char* locale = CaseMappingLocale(cx, args[1].toString());
1342
if (!locale) {
1343
return false;
1344
}
1345
1346
// Call String.prototype.toUpperCase() for language independent casing.
1347
if (intl::StringsAreEqual(locale, "")) {
1348
JSString* str = js::StringToUpperCase(cx, string);
1349
if (!str) {
1350
return false;
1351
}
1352
1353
args.rval().setString(str);
1354
return true;
1355
}
1356
1357
AutoStableStringChars inputChars(cx);
1358
if (!inputChars.initTwoByte(cx, string)) {
1359
return false;
1360
}
1361
mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1362
1363
// Note: maximum case mapping length is three characters, so the result
1364
// length might be > INT32_MAX. ICU will fail in this case.
1365
static_assert(JSString::MAX_LENGTH <= INT32_MAX,
1366
"String length must fit in int32_t for ICU");
1367
1368
static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1369
1370
Vector<char16_t, INLINE_CAPACITY> chars(cx);
1371
if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
1372
return false;
1373
}
1374
1375
int32_t size = intl::CallICU(
1376
cx,
1377
[&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
1378
return u_strToUpper(chars, size, input.begin().get(), input.length(),
1379
locale, status);
1380
},
1381
chars);
1382
if (size < 0) {
1383
return false;
1384
}
1385
1386
JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1387
if (!result) {
1388
return false;
1389
}
1390
1391
args.rval().setString(result);
1392
return true;
1393
}
1394
1395
#else
1396
1397
// When the Intl API is not exposed, String.prototype.toUpperCase is implemented
1398
// in C++.
1399
static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1400
CallArgs args = CallArgsFromVp(argc, vp);
1401
1402
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1403
if (!str) {
1404
return false;
1405
}
1406
1407
/*
1408
* Forcefully ignore the first (or any) argument and return toUpperCase(),
1409
* ECMA has reserved that argument, presumably for defining the locale.
1410
*/
1411
if (cx->runtime()->localeCallbacks &&
1412
cx->runtime()->localeCallbacks->localeToUpperCase) {
1413
RootedValue result(cx);
1414
if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
1415
return false;
1416
}
1417
1418
args.rval().set(result);
1419
return true;
1420
}
1421
1422
RootedLinearString linear(cx, str->ensureLinear(cx));
1423
if (!linear) {
1424
return false;
1425
}
1426
1427
JSString* result = StringToUpperCase(cx, linear);
1428
if (!result) {
1429
return false;
1430
}
1431
1432
args.rval().setString(result);
1433
return true;
1434
}
1435
1436
#endif // JS_HAS_INTL_API
1437
1438
#if JS_HAS_INTL_API
1439
1440
// String.prototype.localeCompare is self-hosted when Intl functionality is
1441
// exposed, and the only intrinsics it requires are provided in the
1442
// implementation of Intl.Collator.
1443
1444
#else
1445
1446
// String.prototype.localeCompare is implemented in C++ (delegating to
1447
// JSLocaleCallbacks) when Intl functionality is not exposed.
1448
static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1449
CallArgs args = CallArgsFromVp(argc, vp);
1450
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1451
if (!str) {
1452
return false;
1453
}
1454
1455
RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1456
if (!thatStr) {
1457
return false;
1458
}
1459
1460
if (cx->runtime()->localeCallbacks &&
1461
cx->runtime()->localeCallbacks->localeCompare) {
1462
RootedValue result(cx);
1463
if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1464
&result)) {
1465
return false;
1466
}
1467
1468
args.rval().set(result);
1469
return true;
1470
}
1471
1472
int32_t result;
1473
if (!CompareStrings(cx, str, thatStr, &result)) {
1474
return false;
1475
}
1476
1477
args.rval().setInt32(result);
1478
return true;
1479
}
1480
1481
#endif // JS_HAS_INTL_API
1482
1483
#if JS_HAS_INTL_API
1484
1485
// ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1486
// 21.1.3.12 String.prototype.normalize ( [ form ] )
1487
//
1488
// String.prototype.normalize is only implementable if ICU's normalization
1489
// functionality is available.
1490
static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1491
CallArgs args = CallArgsFromVp(argc, vp);
1492
1493
// Steps 1-2.
1494
RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1495
if (!str) {
1496
return false;
1497
}
1498
1499
enum NormalizationForm { NFC, NFD, NFKC, NFKD };
1500
1501
NormalizationForm form;
1502
if (!args.hasDefined(0)) {
1503
// Step 3.
1504
form = NFC;
1505
} else {
1506
// Step 4.
1507
JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1508
if (!formStr) {
1509
return false;
1510
}
1511
1512
// Step 5.
1513
if (EqualStrings(formStr, cx->names().NFC)) {
1514
form = NFC;
1515
} else if (EqualStrings(formStr, cx->names().NFD)) {
1516
form = NFD;
1517
} else if (EqualStrings(formStr, cx->names().NFKC)) {
1518
form = NFKC;
1519
} else if (EqualStrings(formStr, cx->names().NFKD)) {
1520
form = NFKD;
1521
} else {
1522
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1523
JSMSG_INVALID_NORMALIZE_FORM);
1524
return false;
1525
}
1526
}
1527
1528
// Latin-1 strings are already in Normalization Form C.
1529
if (form == NFC && str->hasLatin1Chars()) {
1530
// Step 7.
1531
args.rval().setString(str);
1532
return true;
1533
}
1534
1535
// Step 6.
1536
AutoStableStringChars stableChars(cx);
1537
if (!stableChars.initTwoByte(cx, str)) {
1538
return false;
1539
}
1540
1541
mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1542
1543
// The unorm2_getXXXInstance() methods return a shared instance which must
1544
// not be deleted.
1545
UErrorCode status = U_ZERO_ERROR;
1546
const UNormalizer2* normalizer;
1547
if (form == NFC) {
1548
normalizer = unorm2_getNFCInstance(&status);
1549
} else if (form == NFD) {
1550
normalizer = unorm2_getNFDInstance(&status);
1551
} else if (form == NFKC) {
1552
normalizer = unorm2_getNFKCInstance(&status);
1553
} else {
1554
MOZ_ASSERT(form == NFKD);
1555
normalizer = unorm2_getNFKDInstance(&status);
1556
}
1557
if (U_FAILURE(status)) {
1558
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1559
JSMSG_INTERNAL_INTL_ERROR);
1560
return false;
1561
}
1562
1563
int32_t spanLengthInt = unorm2_spanQuickCheckYes(
1564
normalizer, srcChars.begin().get(), srcChars.length(), &status);
1565
if (U_FAILURE(status)) {
1566
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1567
JSMSG_INTERNAL_INTL_ERROR);
1568
return false;
1569
}
1570
MOZ_ASSERT(0 <= spanLengthInt && size_t(spanLengthInt) <= srcChars.length());
1571
size_t spanLength = size_t(spanLengthInt);
1572
1573
// Return if the input string is already normalized.
1574
if (spanLength == srcChars.length()) {
1575
// Step 7.
1576
args.rval().setString(str);
1577
return true;
1578
}
1579
1580
static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1581
1582
Vector<char16_t, INLINE_CAPACITY> chars(cx);
1583
if (!chars.resize(std::max(INLINE_CAPACITY, srcChars.length()))) {
1584
return false;
1585
}
1586
1587
// Copy the already normalized prefix.
1588
if (spanLength > 0) {
1589
PodCopy(chars.begin(), srcChars.begin().get(), spanLength);
1590
}
1591
1592
int32_t size = intl::CallICU(
1593
cx,
1594
[normalizer, &srcChars, spanLength](UChar* chars, uint32_t size,
1595
UErrorCode* status) {
1596
mozilla::RangedPtr<const char16_t> remainingStart =
1597
srcChars.begin() + spanLength;
1598
size_t remainingLength = srcChars.length() - spanLength;
1599
1600
return unorm2_normalizeSecondAndAppend(normalizer, chars, spanLength,
1601
size, remainingStart.get(),
1602
remainingLength, status);
1603
},
1604
chars);
1605
if (size < 0) {
1606
return false;
1607
}
1608
1609
JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1610
if (!ns) {
1611
return false;
1612
}
1613
1614
// Step 7.
1615
args.rval().setString(ns);
1616
return true;
1617
}
1618
1619
#endif // JS_HAS_INTL_API
1620
1621
static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1622
CallArgs args = CallArgsFromVp(argc, vp);
1623
1624
RootedString str(cx);
1625
size_t i;
1626
if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1627
str = args.thisv().toString();
1628
i = size_t(args[0].toInt32());
1629
if (i >= str->length()) {
1630
goto out_of_range;
1631
}
1632
} else {
1633
str = ToStringForStringFunction(cx, args.thisv());
1634
if (!str) {
1635
return false;
1636
}
1637
1638
double d = 0.0;
1639
if (args.length() > 0 && !ToInteger(cx, args[0], &d)) {
1640
return false;
1641
}
1642
1643
if (d < 0 || str->length() <= d) {
1644
goto out_of_range;
1645
}
1646
i = size_t(d);
1647
}
1648
1649
str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1650
if (!str) {
1651
return false;
1652
}
1653
args.rval().setString(str);
1654
return true;
1655
1656
out_of_range:
1657
args.rval().setString(cx->runtime()->emptyString);
1658
return true;
1659
}
1660
1661
bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1662
HandleValue index, MutableHandleValue res) {
1663
size_t i;
1664
if (index.isInt32()) {
1665
i = index.toInt32();
1666
if (i >= string->length()) {
1667
goto out_of_range;
1668
}
1669
} else {
1670
double d = 0.0;
1671
if (!ToInteger(cx, index, &d)) {
1672
return false;
1673
}
1674
// check whether d is negative as size_t is unsigned
1675
if (d < 0 || string->length() <= d) {
1676
goto out_of_range;
1677
}
1678
i = size_t(d);
1679
}
1680
char16_t c;
1681
if (!string->getChar(cx, i, &c)) {
1682
return false;
1683
}
1684
res.setInt32(c);
1685
return true;
1686
1687
out_of_range:
1688
res.setNaN();
1689
return true;
1690
}
1691
1692
bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1693
CallArgs args = CallArgsFromVp(argc, vp);
1694
RootedString str(cx);
1695
RootedValue index(cx);
1696
if (args.thisv().isString()) {
1697
str = args.thisv().toString();
1698
} else {
1699
str = ToStringForStringFunction(cx, args.thisv());
1700
if (!str) {
1701
return false;
1702
}
1703
}
1704
if (args.length() != 0) {
1705
index = args[0];
1706
} else {
1707
index.setInt32(0);
1708
}
1709
1710
return js::str_charCodeAt_impl(cx, str, index, args.rval());
1711
}
1712
1713
/*
1714
* Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1715
* The patlen argument must be positive and no greater than sBMHPatLenMax.
1716
*
1717
* Return the index of pat in text, or -1 if not found.
1718
*/
1719
static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1720
static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1721
static const int sBMHBadPattern =
1722
-2; /* return value if pat is not ISO-Latin-1 */
1723
1724
template <typename TextChar, typename PatChar>
1725
static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1726
const PatChar* pat, uint32_t patLen) {
1727
MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1728
1729
uint8_t skip[sBMHCharSetSize];
1730
for (uint32_t i = 0; i < sBMHCharSetSize; i++) {
1731
skip[i] = uint8_t(patLen);
1732
}
1733
1734
uint32_t patLast = patLen - 1;
1735
for (uint32_t i = 0; i < patLast; i++) {
1736
char16_t c = pat[i];
1737
if (c >= sBMHCharSetSize) {
1738
return sBMHBadPattern;
1739
}
1740
skip[c] = uint8_t(patLast - i);
1741
}
1742
1743
for (uint32_t k = patLast; k < textLen;) {
1744
for (uint32_t i = k, j = patLast;; i--, j--) {
1745
if (text[i] != pat[j]) {
1746
break;
1747
}
1748
if (j == 0) {
1749
return static_cast<int>(i); /* safe: max string size */
1750
}
1751
}
1752
1753
char16_t c = text[k];
1754
k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1755
}
1756
return -1;
1757
}
1758
1759
template <typename TextChar, typename PatChar>
1760
struct MemCmp {
1761
typedef uint32_t Extent;
1762
static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1763
uint32_t patLen) {
1764
return (patLen - 1) * sizeof(PatChar);
1765
}
1766
static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1767
Extent extent) {
1768
MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1769
return memcmp(p, t, extent) == 0;
1770
}
1771
};
1772
1773
template <typename TextChar, typename PatChar>
1774
struct ManualCmp {
1775
typedef const PatChar* Extent;
1776
static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
1777
uint32_t patLen) {
1778
return pat + patLen;
1779
}