Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
// JS lexical scanner.
8
9
#include "frontend/TokenStream.h"
10
11
#include "mozilla/ArrayUtils.h"
12
#include "mozilla/Attributes.h"
13
#include "mozilla/IntegerTypeTraits.h"
14
#include "mozilla/Likely.h"
15
#include "mozilla/Maybe.h"
16
#include "mozilla/MemoryChecking.h"
17
#include "mozilla/ScopeExit.h"
18
#include "mozilla/Span.h"
19
#include "mozilla/TemplateLib.h"
20
#include "mozilla/TextUtils.h"
21
#include "mozilla/Utf8.h"
22
23
#include <algorithm>
24
#include <stdarg.h>
25
#include <stdint.h>
26
#include <stdio.h>
27
#include <string.h>
28
#include <type_traits>
29
#include <utility>
30
31
#include "jsexn.h"
32
#include "jsnum.h"
33
34
#include "frontend/BytecodeCompiler.h"
35
#include "frontend/Parser.h"
36
#include "frontend/ReservedWords.h"
37
#include "js/CharacterEncoding.h"
38
#include "js/RegExpFlags.h" // JS::RegExpFlags
39
#include "js/UniquePtr.h"
40
#include "util/StringBuffer.h"
41
#include "util/Unicode.h"
42
#include "vm/FrameIter.h" // js::{,NonBuiltin}FrameIter
43
#include "vm/HelperThreads.h"
44
#include "vm/JSAtom.h"
45
#include "vm/JSContext.h"
46
#include "vm/Realm.h"
47
48
using mozilla::ArrayLength;
49
using mozilla::AsciiAlphanumericToNumber;
50
using mozilla::AssertedCast;
51
using mozilla::DecodeOneUtf8CodePoint;
52
using mozilla::IsAscii;
53
using mozilla::IsAsciiAlpha;
54
using mozilla::IsAsciiDigit;
55
using mozilla::IsAsciiHexDigit;
56
using mozilla::IsTrailingUnit;
57
using mozilla::MakeScopeExit;
58
using mozilla::MakeSpan;
59
using mozilla::Maybe;
60
using mozilla::PointerRangeSize;
61
using mozilla::Utf8Unit;
62
63
using JS::ReadOnlyCompileOptions;
64
using JS::RegExpFlag;
65
using JS::RegExpFlags;
66
67
// There's some very preliminary support for private fields in this file. It's
68
// disabled in all builds, for now.
69
//#define JS_PRIVATE_FIELDS 1
70
71
struct ReservedWordInfo {
72
const char* chars; // C string with reserved word text
73
js::frontend::TokenKind tokentype;
74
};
75
76
static const ReservedWordInfo reservedWords[] = {
77
#define RESERVED_WORD_INFO(word, name, type) \
78
{js_##word##_str, js::frontend::type},
79
FOR_EACH_JAVASCRIPT_RESERVED_WORD(RESERVED_WORD_INFO)
80
#undef RESERVED_WORD_INFO
81
};
82
83
// Returns a ReservedWordInfo for the specified characters, or nullptr if the
84
// string is not a reserved word.
85
template <typename CharT>
86
static const ReservedWordInfo* FindReservedWord(const CharT* s, size_t length) {
87
MOZ_ASSERT(length != 0);
88
89
size_t i;
90
const ReservedWordInfo* rw;
91
const char* chars;
92
93
#define JSRW_LENGTH() length
94
#define JSRW_AT(column) s[column]
95
#define JSRW_GOT_MATCH(index) \
96
i = (index); \
97
goto got_match;
98
#define JSRW_TEST_GUESS(index) \
99
i = (index); \
100
goto test_guess;
101
#define JSRW_NO_MATCH() goto no_match;
102
#include "frontend/ReservedWordsGenerated.h"
103
#undef JSRW_NO_MATCH
104
#undef JSRW_TEST_GUESS
105
#undef JSRW_GOT_MATCH
106
#undef JSRW_AT
107
#undef JSRW_LENGTH
108
109
got_match:
110
return &reservedWords[i];
111
112
test_guess:
113
rw = &reservedWords[i];
114
chars = rw->chars;
115
do {
116
if (*s++ != static_cast<unsigned char>(*chars++)) {
117
goto no_match;
118
}
119
} while (--length != 0);
120
return rw;
121
122
no_match:
123
return nullptr;
124
}
125
126
template <>
127
MOZ_ALWAYS_INLINE const ReservedWordInfo* FindReservedWord<Utf8Unit>(
128
const Utf8Unit* units, size_t length) {
129
return FindReservedWord(Utf8AsUnsignedChars(units), length);
130
}
131
132
static const ReservedWordInfo* FindReservedWord(
133
JSLinearString* str, js::frontend::NameVisibility* visibility) {
134
JS::AutoCheckCannotGC nogc;
135
if (str->hasLatin1Chars()) {
136
const JS::Latin1Char* chars = str->latin1Chars(nogc);
137
size_t length = str->length();
138
#ifdef JS_PRIVATE_FIELDS
139
if (length > 0 && chars[0] == '#') {
140
*visibility = js::frontend::NameVisibility::Private;
141
return nullptr;
142
}
143
#else
144
MOZ_ASSERT_IF(length > 0, chars[0] != '#');
145
#endif
146
*visibility = js::frontend::NameVisibility::Public;
147
return FindReservedWord(chars, length);
148
}
149
150
const char16_t* chars = str->twoByteChars(nogc);
151
size_t length = str->length();
152
#ifdef JS_PRIVATE_FIELDS
153
if (length > 0 && chars[0] == '#') {
154
*visibility = js::frontend::NameVisibility::Private;
155
return nullptr;
156
}
157
#else
158
MOZ_ASSERT_IF(length > 0, chars[0] != '#');
159
#endif
160
*visibility = js::frontend::NameVisibility::Public;
161
return FindReservedWord(chars, length);
162
}
163
164
static uint32_t GetSingleCodePoint(const char16_t** p, const char16_t* end) {
165
using namespace js;
166
167
uint32_t codePoint;
168
if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) {
169
char16_t lead = **p;
170
char16_t maybeTrail = *(*p + 1);
171
if (unicode::IsTrailSurrogate(maybeTrail)) {
172
*p += 2;
173
return unicode::UTF16Decode(lead, maybeTrail);
174
}
175
}
176
177
codePoint = **p;
178
(*p)++;
179
return codePoint;
180
}
181
182
template <typename CharT>
183
static constexpr bool IsAsciiBinary(CharT c) {
184
using UnsignedCharT = std::make_unsigned_t<CharT>;
185
auto uc = static_cast<UnsignedCharT>(c);
186
return uc == '0' || uc == '1';
187
}
188
189
template <typename CharT>
190
static constexpr bool IsAsciiOctal(CharT c) {
191
using UnsignedCharT = std::make_unsigned_t<CharT>;
192
auto uc = static_cast<UnsignedCharT>(c);
193
return '0' <= uc && uc <= '7';
194
}
195
196
template <typename CharT>
197
static constexpr uint8_t AsciiOctalToNumber(CharT c) {
198
using UnsignedCharT = std::make_unsigned_t<CharT>;
199
auto uc = static_cast<UnsignedCharT>(c);
200
return uc - '0';
201
}
202
203
namespace js {
204
205
namespace frontend {
206
207
bool IsIdentifier(JSLinearString* str) {
208
JS::AutoCheckCannotGC nogc;
209
MOZ_ASSERT(str);
210
if (str->hasLatin1Chars()) {
211
return IsIdentifier(str->latin1Chars(nogc), str->length());
212
}
213
return IsIdentifier(str->twoByteChars(nogc), str->length());
214
}
215
216
bool IsIdentifierNameOrPrivateName(JSLinearString* str) {
217
JS::AutoCheckCannotGC nogc;
218
MOZ_ASSERT(str);
219
if (str->hasLatin1Chars()) {
220
return IsIdentifierNameOrPrivateName(str->latin1Chars(nogc), str->length());
221
}
222
return IsIdentifierNameOrPrivateName(str->twoByteChars(nogc), str->length());
223
}
224
225
bool IsIdentifier(const Latin1Char* chars, size_t length) {
226
if (length == 0) {
227
return false;
228
}
229
230
if (!unicode::IsIdentifierStart(char16_t(*chars))) {
231
return false;
232
}
233
234
const Latin1Char* end = chars + length;
235
while (++chars != end) {
236
if (!unicode::IsIdentifierPart(char16_t(*chars))) {
237
return false;
238
}
239
}
240
241
return true;
242
}
243
244
bool IsIdentifierNameOrPrivateName(const Latin1Char* chars, size_t length) {
245
if (length == 0) {
246
return false;
247
}
248
249
if (char16_t(*chars) == '#') {
250
#ifdef JS_PRIVATE_FIELDS
251
++chars;
252
--length;
253
#else
254
return false;
255
#endif
256
}
257
258
return IsIdentifier(chars, length);
259
}
260
261
bool IsIdentifier(const char16_t* chars, size_t length) {
262
if (length == 0) {
263
return false;
264
}
265
266
const char16_t* p = chars;
267
const char16_t* end = chars + length;
268
uint32_t codePoint;
269
270
codePoint = GetSingleCodePoint(&p, end);
271
if (!unicode::IsIdentifierStart(codePoint)) {
272
return false;
273
}
274
275
while (p < end) {
276
codePoint = GetSingleCodePoint(&p, end);
277
if (!unicode::IsIdentifierPart(codePoint)) {
278
return false;
279
}
280
}
281
282
return true;
283
}
284
285
bool IsIdentifierNameOrPrivateName(const char16_t* chars, size_t length) {
286
if (length == 0) {
287
return false;
288
}
289
290
const char16_t* p = chars;
291
const char16_t* end = chars + length;
292
uint32_t codePoint;
293
294
codePoint = GetSingleCodePoint(&p, end);
295
if (codePoint == '#') {
296
#ifdef JS_PRIVATE_FIELDS
297
if (length == 1) {
298
return false;
299
}
300
301
codePoint = GetSingleCodePoint(&p, end);
302
#else
303
return false;
304
#endif
305
}
306
307
if (!unicode::IsIdentifierStart(codePoint)) {
308
return false;
309
}
310
311
while (p < end) {
312
codePoint = GetSingleCodePoint(&p, end);
313
if (!unicode::IsIdentifierPart(codePoint)) {
314
return false;
315
}
316
}
317
318
return true;
319
}
320
321
bool IsKeyword(JSLinearString* str) {
322
NameVisibility visibility;
323
if (const ReservedWordInfo* rw = FindReservedWord(str, &visibility)) {
324
return TokenKindIsKeyword(rw->tokentype);
325
}
326
327
return false;
328
}
329
330
TokenKind ReservedWordTokenKind(PropertyName* str) {
331
NameVisibility visibility;
332
if (const ReservedWordInfo* rw = FindReservedWord(str, &visibility)) {
333
return rw->tokentype;
334
}
335
336
return visibility == NameVisibility::Private ? TokenKind::PrivateName
337
: TokenKind::Name;
338
}
339
340
const char* ReservedWordToCharZ(PropertyName* str) {
341
NameVisibility visibility;
342
if (const ReservedWordInfo* rw = FindReservedWord(str, &visibility)) {
343
return ReservedWordToCharZ(rw->tokentype);
344
}
345
346
return nullptr;
347
}
348
349
const char* ReservedWordToCharZ(TokenKind tt) {
350
MOZ_ASSERT(tt != TokenKind::Name);
351
switch (tt) {
352
#define EMIT_CASE(word, name, type) \
353
case type: \
354
return js_##word##_str;
355
FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
356
#undef EMIT_CASE
357
default:
358
MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");
359
}
360
return nullptr;
361
}
362
363
PropertyName* TokenStreamAnyChars::reservedWordToPropertyName(
364
TokenKind tt) const {
365
MOZ_ASSERT(tt != TokenKind::Name);
366
switch (tt) {
367
#define EMIT_CASE(word, name, type) \
368
case type: \
369
return cx->names().name;
370
FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
371
#undef EMIT_CASE
372
default:
373
MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");
374
}
375
return nullptr;
376
}
377
378
SourceCoords::SourceCoords(JSContext* cx, uint32_t initialLineNumber,
379
uint32_t initialOffset)
380
: lineStartOffsets_(cx), initialLineNum_(initialLineNumber), lastIndex_(0) {
381
// This is actually necessary! Removing it causes compile errors on
382
// GCC and clang. You could try declaring this:
383
//
384
// const uint32_t SourceCoords::MAX_PTR;
385
//
386
// which fixes the GCC/clang error, but causes bustage on Windows. Sigh.
387
//
388
uint32_t maxPtr = MAX_PTR;
389
390
// The first line begins at buffer offset |initialOffset|. MAX_PTR is the
391
// sentinel. The appends cannot fail because |lineStartOffsets_| has
392
// statically-allocated elements.
393
MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
394
MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
395
lineStartOffsets_.infallibleAppend(initialOffset);
396
lineStartOffsets_.infallibleAppend(maxPtr);
397
}
398
399
MOZ_ALWAYS_INLINE bool SourceCoords::add(uint32_t lineNum,
400
uint32_t lineStartOffset) {
401
uint32_t index = indexFromLineNumber(lineNum);
402
uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
403
404
MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset);
405
MOZ_ASSERT(lineStartOffsets_[sentinelIndex] == MAX_PTR);
406
407
if (index == sentinelIndex) {
408
// We haven't seen this newline before. Update lineStartOffsets_
409
// only if lineStartOffsets_.append succeeds, to keep sentinel.
410
// Otherwise return false to tell TokenStream about OOM.
411
uint32_t maxPtr = MAX_PTR;
412
if (!lineStartOffsets_.append(maxPtr)) {
413
static_assert(mozilla::IsSame<decltype(lineStartOffsets_.allocPolicy()),
414
TempAllocPolicy&>::value,
415
"this function's caller depends on it reporting an "
416
"error on failure, as TempAllocPolicy ensures");
417
return false;
418
}
419
420
lineStartOffsets_[index] = lineStartOffset;
421
} else {
422
// We have seen this newline before (and ungot it). Do nothing (other
423
// than checking it hasn't mysteriously changed).
424
// This path can be executed after hitting OOM, so check index.
425
MOZ_ASSERT_IF(index < sentinelIndex,
426
lineStartOffsets_[index] == lineStartOffset);
427
}
428
return true;
429
}
430
431
MOZ_ALWAYS_INLINE bool SourceCoords::fill(const SourceCoords& other) {
432
MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);
433
MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
434
MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
435
436
if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) {
437
return true;
438
}
439
440
uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
441
lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
442
443
for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length();
444
i++) {
445
if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) {
446
return false;
447
}
448
}
449
return true;
450
}
451
452
MOZ_ALWAYS_INLINE uint32_t
453
SourceCoords::indexFromOffset(uint32_t offset) const {
454
uint32_t iMin, iMax, iMid;
455
456
if (lineStartOffsets_[lastIndex_] <= offset) {
457
// If we reach here, offset is on a line the same as or higher than
458
// last time. Check first for the +0, +1, +2 cases, because they
459
// typically cover 85--98% of cases.
460
if (offset < lineStartOffsets_[lastIndex_ + 1]) {
461
return lastIndex_; // index is same as last time
462
}
463
464
// If we reach here, there must be at least one more entry (plus the
465
// sentinel). Try it.
466
lastIndex_++;
467
if (offset < lineStartOffsets_[lastIndex_ + 1]) {
468
return lastIndex_; // index is one higher than last time
469
}
470
471
// The same logic applies here.
472
lastIndex_++;
473
if (offset < lineStartOffsets_[lastIndex_ + 1]) {
474
return lastIndex_; // index is two higher than last time
475
}
476
477
// No luck. Oh well, we have a better-than-default starting point for
478
// the binary search.
479
iMin = lastIndex_ + 1;
480
MOZ_ASSERT(iMin <
481
lineStartOffsets_.length() - 1); // -1 due to the sentinel
482
483
} else {
484
iMin = 0;
485
}
486
487
// This is a binary search with deferred detection of equality, which was
488
// marginally faster in this case than a standard binary search.
489
// The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
490
// want one before that.
491
iMax = lineStartOffsets_.length() - 2;
492
while (iMax > iMin) {
493
iMid = iMin + (iMax - iMin) / 2;
494
if (offset >= lineStartOffsets_[iMid + 1]) {
495
iMin = iMid + 1; // offset is above lineStartOffsets_[iMid]
496
} else {
497
iMax = iMid; // offset is below or within lineStartOffsets_[iMid]
498
}
499
}
500
501
MOZ_ASSERT(iMax == iMin);
502
MOZ_ASSERT(lineStartOffsets_[iMin] <= offset);
503
MOZ_ASSERT(offset < lineStartOffsets_[iMin + 1]);
504
505
lastIndex_ = iMin;
506
return iMin;
507
}
508
509
SourceCoords::LineToken SourceCoords::lineToken(uint32_t offset) const {
510
return LineToken(indexFromOffset(offset), offset);
511
}
512
513
TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx,
514
const ReadOnlyCompileOptions& options,
515
StrictModeGetter* smg)
516
: cx(cx),
517
options_(options),
518
strictModeGetter_(smg),
519
filename_(options.filename()),
520
longLineColumnInfo_(cx),
521
srcCoords(cx, options.lineno, options.scriptSourceOffset),
522
lineno(options.lineno),
523
mutedErrors(options.mutedErrors()) {
524
// |isExprEnding| was initially zeroed: overwrite the true entries here.
525
isExprEnding[size_t(TokenKind::Comma)] = true;
526
isExprEnding[size_t(TokenKind::Semi)] = true;
527
isExprEnding[size_t(TokenKind::Colon)] = true;
528
isExprEnding[size_t(TokenKind::RightParen)] = true;
529
isExprEnding[size_t(TokenKind::RightBracket)] = true;
530
isExprEnding[size_t(TokenKind::RightCurly)] = true;
531
}
532
533
template <typename Unit>
534
TokenStreamCharsBase<Unit>::TokenStreamCharsBase(JSContext* cx,
535
const Unit* units,
536
size_t length,
537
size_t startOffset)
538
: TokenStreamCharsShared(cx), sourceUnits(units, length, startOffset) {}
539
540
template <>
541
MOZ_MUST_USE bool TokenStreamCharsBase<char16_t>::
542
fillCharBufferFromSourceNormalizingAsciiLineBreaks(const char16_t* cur,
543
const char16_t* end) {
544
MOZ_ASSERT(this->charBuffer.length() == 0);
545
546
while (cur < end) {
547
char16_t ch = *cur++;
548
if (ch == '\r') {
549
ch = '\n';
550
if (cur < end && *cur == '\n') {
551
cur++;
552
}
553
}
554
555
if (!this->charBuffer.append(ch)) {
556
return false;
557
}
558
}
559
560
MOZ_ASSERT(cur == end);
561
return true;
562
}
563
564
template <>
565
MOZ_MUST_USE bool TokenStreamCharsBase<Utf8Unit>::
566
fillCharBufferFromSourceNormalizingAsciiLineBreaks(const Utf8Unit* cur,
567
const Utf8Unit* end) {
568
MOZ_ASSERT(this->charBuffer.length() == 0);
569
570
while (cur < end) {
571
Utf8Unit unit = *cur++;
572
if (MOZ_LIKELY(IsAscii(unit))) {
573
char16_t ch = unit.toUint8();
574
if (ch == '\r') {
575
ch = '\n';
576
if (cur < end && *cur == Utf8Unit('\n')) {
577
cur++;
578
}
579
}
580
581
if (!this->charBuffer.append(ch)) {
582
return false;
583
}
584
585
continue;
586
}
587
588
Maybe<char32_t> ch = DecodeOneUtf8CodePoint(unit, &cur, end);
589
MOZ_ASSERT(ch.isSome(),
590
"provided source text should already have been validated");
591
592
if (!appendCodePointToCharBuffer(ch.value())) {
593
return false;
594
}
595
}
596
597
MOZ_ASSERT(cur == end);
598
return true;
599
}
600
601
template <typename Unit, class AnyCharsAccess>
602
TokenStreamSpecific<Unit, AnyCharsAccess>::TokenStreamSpecific(
603
JSContext* cx, const ReadOnlyCompileOptions& options, const Unit* units,
604
size_t length)
605
: TokenStreamChars<Unit, AnyCharsAccess>(cx, units, length,
606
options.scriptSourceOffset) {}
607
608
bool TokenStreamAnyChars::checkOptions() {
609
// Constrain starting columns to half of the range of a signed 32-bit value,
610
// to avoid overflow.
611
if (options().column >= std::numeric_limits<int32_t>::max() / 2 + 1) {
612
reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
613
return false;
614
}
615
616
return true;
617
}
618
619
void TokenStreamAnyChars::reportErrorNoOffset(unsigned errorNumber, ...) {
620
va_list args;
621
va_start(args, errorNumber);
622
623
reportErrorNoOffsetVA(errorNumber, &args);
624
625
va_end(args);
626
}
627
628
void TokenStreamAnyChars::reportErrorNoOffsetVA(unsigned errorNumber,
629
va_list* args) {
630
ErrorMetadata metadata;
631
computeErrorMetadataNoOffset(&metadata);
632
633
ReportCompileError(cx, std::move(metadata), nullptr, JSREPORT_ERROR,
634
errorNumber, args);
635
}
636
637
// Use the fastest available getc.
638
#if defined(HAVE_GETC_UNLOCKED)
639
# define fast_getc getc_unlocked
640
#elif defined(HAVE__GETC_NOLOCK)
641
# define fast_getc _getc_nolock
642
#else
643
# define fast_getc getc
644
#endif
645
646
MOZ_MUST_USE MOZ_ALWAYS_INLINE bool
647
TokenStreamAnyChars::internalUpdateLineInfoForEOL(uint32_t lineStartOffset) {
648
prevLinebase = linebase;
649
linebase = lineStartOffset;
650
lineno++;
651
return srcCoords.add(lineno, linebase);
652
}
653
654
#ifdef DEBUG
655
656
template <>
657
inline void SourceUnits<char16_t>::assertNextCodePoint(
658
const PeekedCodePoint<char16_t>& peeked) {
659
char32_t c = peeked.codePoint();
660
if (c < unicode::NonBMPMin) {
661
MOZ_ASSERT(peeked.lengthInUnits() == 1);
662
MOZ_ASSERT(ptr[0] == c);
663
} else {
664
MOZ_ASSERT(peeked.lengthInUnits() == 2);
665
char16_t lead, trail;
666
unicode::UTF16Encode(c, &lead, &trail);
667
MOZ_ASSERT(ptr[0] == lead);
668
MOZ_ASSERT(ptr[1] == trail);
669
}
670
}
671
672
template <>
673
inline void SourceUnits<Utf8Unit>::assertNextCodePoint(
674
const PeekedCodePoint<Utf8Unit>& peeked) {
675
char32_t c = peeked.codePoint();
676
677
// This is all roughly indulgence of paranoia only for assertions, so the
678
// reimplementation of UTF-8 encoding a code point is (we think) a virtue.
679
uint8_t expectedUnits[4] = {};
680
if (c < 0x80) {
681
expectedUnits[0] = AssertedCast<uint8_t>(c);
682
} else if (c < 0x800) {
683
expectedUnits[0] = 0b1100'0000 | (c >> 6);
684
expectedUnits[1] = 0b1000'0000 | (c & 0b11'1111);
685
} else if (c < 0x10000) {
686
expectedUnits[0] = 0b1110'0000 | (c >> 12);
687
expectedUnits[1] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
688
expectedUnits[2] = 0b1000'0000 | (c & 0b11'1111);
689
} else {
690
expectedUnits[0] = 0b1111'0000 | (c >> 18);
691
expectedUnits[1] = 0b1000'0000 | ((c >> 12) & 0b11'1111);
692
expectedUnits[2] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
693
expectedUnits[3] = 0b1000'0000 | (c & 0b11'1111);
694
}
695
696
MOZ_ASSERT(peeked.lengthInUnits() <= 4);
697
for (uint8_t i = 0; i < peeked.lengthInUnits(); i++) {
698
MOZ_ASSERT(expectedUnits[i] == ptr[i].toUint8());
699
}
700
}
701
702
#endif // DEBUG
703
704
static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
705
const Utf8Unit** ptr, const Utf8Unit* limit) {
706
MOZ_ASSERT(*ptr <= limit);
707
708
// |limit| is a code point boundary.
709
if (MOZ_UNLIKELY(*ptr == limit)) {
710
return;
711
}
712
713
// Otherwise rewind past trailing units to the start of the code point.
714
#ifdef DEBUG
715
size_t retracted = 0;
716
#endif
717
while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {
718
--*ptr;
719
#ifdef DEBUG
720
retracted++;
721
#endif
722
}
723
724
MOZ_ASSERT(retracted < 4,
725
"the longest UTF-8 code point is four units, so this should never "
726
"retract more than three units");
727
}
728
729
static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
730
const char16_t** ptr, const char16_t* limit) {
731
MOZ_ASSERT(*ptr <= limit);
732
733
// |limit| is a code point boundary.
734
if (MOZ_UNLIKELY(*ptr == limit)) {
735
return;
736
}
737
738
// Otherwise the pointer must be retracted by one iff it splits a two-unit
739
// code point.
740
if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {
741
// Outside test suites testing garbage WTF-16, it's basically guaranteed
742
// here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.
743
if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {
744
--*ptr;
745
}
746
}
747
}
748
749
template <typename Unit>
750
uint32_t TokenStreamAnyChars::computePartialColumn(
751
const LineToken lineToken, const uint32_t offset,
752
const SourceUnits<Unit>& sourceUnits) const {
753
lineToken.assertConsistentOffset(offset);
754
755
const uint32_t line = lineNumber(lineToken);
756
const uint32_t start = srcCoords.lineStart(lineToken);
757
758
// Reset the previous offset/column cache for this line, if the previous
759
// lookup wasn't on this line.
760
if (line != lineOfLastColumnComputation_) {
761
lineOfLastColumnComputation_ = line;
762
lastChunkVectorForLine_ = nullptr;
763
lastOffsetOfComputedColumn_ = start;
764
lastComputedColumn_ = 0;
765
}
766
767
// Compute and return the final column number from a partial offset/column,
768
// using the last-cached offset/column if they're more optimal.
769
auto ColumnFromPartial = [this, offset, &sourceUnits](uint32_t partialOffset,
770
uint32_t partialCols,
771
UnitsType unitsType) {
772
MOZ_ASSERT(partialOffset <= offset);
773
774
// If the last lookup on this line was closer to |offset|, use it.
775
if (partialOffset < this->lastOffsetOfComputedColumn_ &&
776
this->lastOffsetOfComputedColumn_ <= offset) {
777
partialOffset = this->lastOffsetOfComputedColumn_;
778
partialCols = this->lastComputedColumn_;
779
}
780
781
const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
782
const Unit* end = sourceUnits.codeUnitPtrAt(offset);
783
784
size_t offsetDelta = AssertedCast<uint32_t>(PointerRangeSize(begin, end));
785
partialOffset += offsetDelta;
786
787
if (unitsType == UnitsType::GuaranteedSingleUnit) {
788
MOZ_ASSERT(unicode::CountCodePoints(begin, end) == offsetDelta,
789
"guaranteed-single-units also guarantee pointer distance "
790
"equals code point count");
791
partialCols += offsetDelta;
792
} else {
793
partialCols +=
794
AssertedCast<uint32_t>(unicode::CountCodePoints(begin, end));
795
}
796
797
this->lastOffsetOfComputedColumn_ = partialOffset;
798
this->lastComputedColumn_ = partialCols;
799
return partialCols;
800
};
801
802
const uint32_t offsetInLine = offset - start;
803
804
// We won't add an entry to |longLineColumnInfo_| for lines where the maximum
805
// column has offset less than this value. The most common (non-minified)
806
// long line length is likely 80ch, maybe 100ch, so we use that, rounded up to
807
// the next power of two for efficient division/multiplication below.
808
constexpr uint32_t ColumnChunkLength = mozilla::tl::RoundUpPow2<100>::value;
809
810
// The index within any associated |Vector<ChunkInfo>| of |offset|'s chunk.
811
const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;
812
if (chunkIndex == 0) {
813
// We don't know from an |offset| in the zeroth chunk that this line is even
814
// long. First-chunk info is mostly useless, anyway -- we have |start|
815
// already. So if we have *easy* access to that zeroth chunk, use it --
816
// otherwise just count pessimally. (This will still benefit from caching
817
// the last column/offset for computations for successive offsets, so it's
818
// not *always* worst-case.)
819
UnitsType unitsType;
820
if (lastChunkVectorForLine_ && lastChunkVectorForLine_->length() > 0) {
821
MOZ_ASSERT((*lastChunkVectorForLine_)[0].column() == 0);
822
unitsType = (*lastChunkVectorForLine_)[0].unitsType();
823
} else {
824
unitsType = UnitsType::PossiblyMultiUnit;
825
}
826
827
return ColumnFromPartial(start, 0, unitsType);
828
}
829
830
// If this line has no chunk vector yet, insert one in the hash map. (The
831
// required index is allocated and filled further down.)
832
if (!lastChunkVectorForLine_) {
833
auto ptr = longLineColumnInfo_.lookupForAdd(line);
834
if (!ptr) {
835
// This could rehash and invalidate a cached vector pointer, but the outer
836
// condition means we don't have a cached pointer.
837
if (!longLineColumnInfo_.add(ptr, line, Vector<ChunkInfo>(cx))) {
838
// In case of OOM, just count columns from the start of the line.
839
cx->recoverFromOutOfMemory();
840
return ColumnFromPartial(start, 0, UnitsType::PossiblyMultiUnit);
841
}
842
}
843
844
// Note that adding elements to this vector won't invalidate this pointer.
845
lastChunkVectorForLine_ = &ptr->value();
846
}
847
848
const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);
849
850
auto RetractedOffsetOfChunk = [
851
#ifdef DEBUG
852
this,
853
#endif
854
start, limit,
855
&sourceUnits](uint32_t index) {
856
MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());
857
858
uint32_t naiveOffset = start + index * ColumnChunkLength;
859
const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);
860
861
const Unit* actualPtr = naivePtr;
862
RetractPointerToCodePointBoundary(&actualPtr, limit);
863
864
#ifdef DEBUG
865
if ((*this->lastChunkVectorForLine_)[index].unitsType() ==
866
UnitsType::GuaranteedSingleUnit) {
867
MOZ_ASSERT(naivePtr == actualPtr, "miscomputed unitsType value");
868
}
869
#endif
870
871
return naiveOffset - PointerRangeSize(actualPtr, naivePtr);
872
};
873
874
uint32_t partialOffset;
875
uint32_t partialColumn;
876
UnitsType unitsType;
877
878
auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
879
if (chunkIndex < entriesLen) {
880
// We've computed the chunk |offset| resides in. Compute the column number
881
// from the chunk.
882
partialOffset = RetractedOffsetOfChunk(chunkIndex);
883
partialColumn = (*lastChunkVectorForLine_)[chunkIndex].column();
884
885
// This is exact if |chunkIndex| isn't the last chunk.
886
unitsType = (*lastChunkVectorForLine_)[chunkIndex].unitsType();
887
888
// Otherwise the last chunk is pessimistically assumed to contain multi-unit
889
// code points because we haven't fully examined its contents yet -- they
890
// may not have been tokenized yet, they could contain encoding errors, or
891
// they might not even exist.
892
MOZ_ASSERT_IF(chunkIndex == entriesLen - 1,
893
(*lastChunkVectorForLine_)[chunkIndex].unitsType() ==
894
UnitsType::PossiblyMultiUnit);
895
} else {
896
// Extend the vector from its last entry or the start of the line. (This is
897
// also a suitable partial start point if we must recover from OOM.)
898
if (entriesLen > 0) {
899
partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
900
partialColumn = (*lastChunkVectorForLine_)[entriesLen - 1].column();
901
} else {
902
partialOffset = start;
903
partialColumn = 0;
904
}
905
906
if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
907
// As earlier, just start from the greatest offset/column in case of OOM.
908
cx->recoverFromOutOfMemory();
909
return ColumnFromPartial(partialOffset, partialColumn,
910
UnitsType::PossiblyMultiUnit);
911
}
912
913
// OOM is no longer possible now. \o/
914
915
// The vector always begins with the column of the line start, i.e. zero,
916
// with chunk units pessimally assumed not single-unit.
917
if (entriesLen == 0) {
918
lastChunkVectorForLine_->infallibleAppend(
919
ChunkInfo(0, UnitsType::PossiblyMultiUnit));
920
entriesLen++;
921
}
922
923
do {
924
const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);
925
const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(
926
start + std::min(entriesLen++ * ColumnChunkLength, offsetInLine));
927
928
MOZ_ASSERT(begin < chunkLimit);
929
MOZ_ASSERT(chunkLimit <= limit);
930
931
static_assert(
932
ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength - 1,
933
"any retraction below is assumed to never underflow to the "
934
"preceding chunk, even for the longest code point");
935
936
// Prior tokenizing ensured that [begin, limit) is validly encoded, and
937
// |begin < chunkLimit|, so any retraction here can't underflow.
938
RetractPointerToCodePointBoundary(&chunkLimit, limit);
939
940
MOZ_ASSERT(begin < chunkLimit);
941
MOZ_ASSERT(chunkLimit <= limit);
942
943
size_t numUnits = PointerRangeSize(begin, chunkLimit);
944
size_t numCodePoints = unicode::CountCodePoints(begin, chunkLimit);
945
946
// If this chunk (which will become non-final at the end of the loop) is
947
// all single-unit code points, annotate the chunk accordingly.
948
if (numUnits == numCodePoints) {
949
lastChunkVectorForLine_->back().guaranteeSingleUnits();
950
}
951
952
partialOffset += numUnits;
953
partialColumn += numCodePoints;
954
955
lastChunkVectorForLine_->infallibleEmplaceBack(
956
partialColumn, UnitsType::PossiblyMultiUnit);
957
} while (entriesLen < chunkIndex + 1);
958
959
// We're at a spot in the current final chunk, and final chunks never have
960
// complete units information, so be pessimistic.
961
unitsType = UnitsType::PossiblyMultiUnit;
962
}
963
964
return ColumnFromPartial(partialOffset, partialColumn, unitsType);
965
}
966
967
template <typename Unit, class AnyCharsAccess>
968
uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
969
LineToken lineToken, uint32_t offset) const {
970
lineToken.assertConsistentOffset(offset);
971
972
const TokenStreamAnyChars& anyChars = anyCharsAccess();
973
974
uint32_t partialCols =
975
anyChars.computePartialColumn(lineToken, offset, this->sourceUnits);
976
977
return (lineToken.isFirstLine() ? anyChars.options_.column : 0) + partialCols;
978
}
979
980
template <typename Unit, class AnyCharsAccess>
981
void GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeLineAndColumn(
982
uint32_t offset, uint32_t* line, uint32_t* column) const {
983
const TokenStreamAnyChars& anyChars = anyCharsAccess();
984
985
auto lineToken = anyChars.lineToken(offset);
986
*line = anyChars.lineNumber(lineToken);
987
*column = computeColumn(lineToken, offset);
988
}
989
990
template <class AnyCharsAccess>
991
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::internalEncodingError(
992
uint8_t relevantUnits, unsigned errorNumber, ...) {
993
va_list args;
994
va_start(args, errorNumber);
995
996
do {
997
size_t offset = this->sourceUnits.offset();
998
999
ErrorMetadata err;
1000
1001
TokenStreamAnyChars& anyChars = anyCharsAccess();
1002
1003
bool canAddLineOfContext = fillExceptingContext(&err, offset);
1004
if (canAddLineOfContext) {
1005
if (!internalComputeLineOfContext(&err, offset)) {
1006
break;
1007
}
1008
1009
// As this is an encoding error, the computed window-end must be
1010
// identical to the location of the error -- any further on and the
1011
// window would contain invalid Unicode.
1012
MOZ_ASSERT_IF(err.lineOfContext != nullptr,
1013
err.lineLength == err.tokenOffset);
1014
}
1015
1016
auto notes = MakeUnique<JSErrorNotes>();
1017
if (!notes) {
1018
ReportOutOfMemory(anyChars.cx);
1019
break;
1020
}
1021
1022
// The largest encoding of a UTF-8 code point is 4 units. (Encoding an
1023
// obsolete 5- or 6-byte code point will complain only about a bad lead
1024
// code unit.)
1025
constexpr size_t MaxWidth = sizeof("0xHH 0xHH 0xHH 0xHH");
1026
1027
MOZ_ASSERT(relevantUnits > 0);
1028
1029
char badUnitsStr[MaxWidth];
1030
char* ptr = badUnitsStr;
1031
while (relevantUnits > 0) {
1032
byteToString(this->sourceUnits.getCodeUnit().toUint8(), ptr);
1033
ptr[4] = ' ';
1034
1035
ptr += 5;
1036
relevantUnits--;
1037
}
1038
1039
ptr[-1] = '\0';
1040
1041
uint32_t line, column;
1042
computeLineAndColumn(offset, &line, &column);
1043
1044
if (!notes->addNoteASCII(anyChars.cx, anyChars.getFilename(), 0, line,
1045
column, GetErrorMessage, nullptr,
1046
JSMSG_BAD_CODE_UNITS, badUnitsStr)) {
1047
break;
1048
}
1049
1050
ReportCompileError(anyChars.cx, std::move(err), std::move(notes),
1051
JSREPORT_ERROR, errorNumber, &args);
1052
} while (false);
1053
1054
va_end(args);
1055
}
1056
1057
template <class AnyCharsAccess>
1058
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badLeadUnit(
1059
Utf8Unit lead) {
1060
uint8_t leadValue = lead.toUint8();
1061
1062
char leadByteStr[5];
1063
byteToTerminatedString(leadValue, leadByteStr);
1064
1065
internalEncodingError(1, JSMSG_BAD_LEADING_UTF8_UNIT, leadByteStr);
1066
}
1067
1068
template <class AnyCharsAccess>
1069
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::notEnoughUnits(
1070
Utf8Unit lead, uint8_t remaining, uint8_t required) {
1071
uint8_t leadValue = lead.toUint8();
1072
1073
MOZ_ASSERT(required == 2 || required == 3 || required == 4);
1074
MOZ_ASSERT(remaining < 4);
1075
MOZ_ASSERT(remaining < required);
1076
1077
char leadByteStr[5];
1078
byteToTerminatedString(leadValue, leadByteStr);
1079
1080
// |toHexChar| produces the desired decimal numbers for values < 4.
1081
const char expectedStr[] = {toHexChar(required - 1), '\0'};
1082
const char actualStr[] = {toHexChar(remaining - 1), '\0'};
1083
1084
internalEncodingError(remaining, JSMSG_NOT_ENOUGH_CODE_UNITS, leadByteStr,
1085
expectedStr, required == 2 ? "" : "s", actualStr,
1086
remaining == 2 ? " was" : "s were");
1087
}
1088
1089
template <class AnyCharsAccess>
1090
MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badTrailingUnit(
1091
uint8_t unitsObserved) {
1092
Utf8Unit badUnit =
1093
this->sourceUnits.addressOfNextCodeUnit()[unitsObserved - 1];
1094
1095
char badByteStr[5];
1096
byteToTerminatedString(badUnit.toUint8(), badByteStr);
1097
1098
internalEncodingError(unitsObserved, JSMSG_BAD_TRAILING_UTF8_UNIT,
1099
badByteStr);
1100
}
1101
1102
template <class AnyCharsAccess>
1103
MOZ_COLD void
1104
TokenStreamChars<Utf8Unit, AnyCharsAccess>::badStructurallyValidCodePoint(
1105
uint32_t codePoint, uint8_t codePointLength, const char* reason) {
1106
// Construct a string like "0x203D" (including null terminator) to include
1107
// in the error message. Write the string end-to-start from end to start
1108
// of an adequately sized |char| array, shifting least significant nibbles
1109
// off the number and writing the corresponding hex digits until done, then
1110
// prefixing with "0x". |codePointStr| points at the incrementally
1111
// computed string, within |codePointCharsArray|'s bounds.
1112
1113
// 0x1F'FFFF is the maximum value that can fit in 3+6+6+6 unconstrained
1114
// bits in a four-byte UTF-8 code unit sequence.
1115
constexpr size_t MaxHexSize = sizeof(
1116
"0x1F"
1117
"FFFF"); // including '\0'
1118
char codePointCharsArray[MaxHexSize];
1119
1120
char* codePointStr = codePointCharsArray + ArrayLength(codePointCharsArray);
1121
*--codePointStr = '\0';
1122
1123
// Note that by do-while looping here rather than while-looping, this
1124
// writes a '0' when |codePoint == 0|.
1125
do {
1126
MOZ_ASSERT(codePointCharsArray < codePointStr);
1127
*--codePointStr = toHexChar(codePoint & 0xF);
1128
codePoint >>= 4;
1129
} while (codePoint);
1130
1131
MOZ_ASSERT(codePointCharsArray + 2 <= codePointStr);
1132
*--codePointStr = 'x';
1133
*--codePointStr = '0';
1134
1135
internalEncodingError(codePointLength, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
1136
codePointStr, reason);
1137
}
1138
1139
template <class AnyCharsAccess>
1140
MOZ_MUST_USE bool
1141
TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePointDontNormalize(
1142
Utf8Unit lead, char32_t* codePoint) {
1143
auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };
1144
1145
auto onNotEnoughUnits = [this, &lead](uint8_t remaining, uint8_t required) {
1146
this->notEnoughUnits(lead, remaining, required);
1147
};
1148
1149
auto onBadTrailingUnit = [this](uint8_t unitsObserved) {
1150
this->badTrailingUnit(unitsObserved);
1151
};
1152
1153
auto onBadCodePoint = [this](char32_t badCodePoint, uint8_t unitsObserved) {
1154
this->badCodePoint(badCodePoint, unitsObserved);
1155
};
1156
1157
auto onNotShortestForm = [this](char32_t badCodePoint,
1158
uint8_t unitsObserved) {
1159
this->notShortestForm(badCodePoint, unitsObserved);
1160
};
1161
1162
// If a valid code point is decoded, this function call consumes its code
1163
// units. If not, it ungets the lead code unit and invokes the right error
1164
// handler, so on failure we must immediately return false.
1165
SourceUnitsIterator iter(this->sourceUnits);
1166
Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePointInline(
1167
lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
1168
onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
1169
if (maybeCodePoint.isNothing()) {
1170
return false;
1171
}
1172
1173
*codePoint = maybeCodePoint.value();
1174
return true;
1175
}
1176
1177
template <class AnyCharsAccess>
1178
bool TokenStreamChars<char16_t, AnyCharsAccess>::getNonAsciiCodePoint(
1179
int32_t lead, int32_t* codePoint) {
1180
MOZ_ASSERT(lead != EOF);
1181
MOZ_ASSERT(!isAsciiCodePoint(lead),
1182
"ASCII code unit/point must be handled separately");
1183
MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
1184
"getNonAsciiCodePoint called incorrectly");
1185
1186
// The code point is usually |lead|: overwrite later if needed.
1187
*codePoint = lead;
1188
1189
// ECMAScript specifically requires that unpaired UTF-16 surrogates be
1190
// treated as the corresponding code point and not as an error. See
1192
// Thus this function does not consider any sequence of 16-bit numbers to
1193
// be intrinsically in error.
1194
1195
// Dispense with single-unit code points and lone trailing surrogates.
1196
if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead))) {
1197
if (MOZ_UNLIKELY(lead == unicode::LINE_SEPARATOR ||
1198
lead == unicode::PARA_SEPARATOR)) {
1199
if (!updateLineInfoForEOL()) {
1200
#ifdef DEBUG
1201
*codePoint = EOF; // sentinel value to hopefully cause errors
1202
#endif
1203
MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
1204
return false;
1205
}
1206
1207
*codePoint = '\n';
1208
} else {
1209
MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(*codePoint)));
1210
}
1211
1212
return true;
1213
}
1214
1215
// Also handle a lead surrogate not paired with a trailing surrogate.
1216
if (MOZ_UNLIKELY(
1217
this->sourceUnits.atEnd() ||
1218
!unicode::IsTrailSurrogate(this->sourceUnits.peekCodeUnit()))) {
1219
MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(*codePoint)));
1220
return true;
1221
}
1222
1223
// Otherwise we have a multi-unit code point.
1224
*codePoint = unicode::UTF16Decode(lead, this->sourceUnits.getCodeUnit());
1225
MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(*codePoint)));
1226
return true;
1227
}
1228
1229
template <typename Unit, class AnyCharsAccess>
1230
bool TokenStreamSpecific<Unit, AnyCharsAccess>::getCodePoint(int32_t* cp) {
1231
int32_t unit = getCodeUnit();
1232
if (unit == EOF) {
1233
MOZ_ASSERT(anyCharsAccess().flags.isEOF,
1234
"flags.isEOF should have been set by getCodeUnit()");
1235
*cp = EOF;
1236
return true;
1237
}
1238
1239
if (isAsciiCodePoint(unit)) {
1240
return getFullAsciiCodePoint(unit, cp);
1241
}
1242
1243
return getNonAsciiCodePoint(unit, cp);
1244
}
1245
1246
template <class AnyCharsAccess>
1247
bool TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePoint(
1248
int32_t unit, int32_t* codePoint) {
1249
MOZ_ASSERT(unit != EOF);
1250
MOZ_ASSERT(!isAsciiCodePoint(unit),
1251
"ASCII code unit/point must be handled separately");
1252
1253
Utf8Unit lead = Utf8Unit(static_cast<unsigned char>(unit));
1254
MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
1255
"getNonAsciiCodePoint called incorrectly");
1256
1257
auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };
1258
1259
auto onNotEnoughUnits = [this, &lead](uint_fast8_t remaining,
1260
uint_fast8_t required) {
1261
this->notEnoughUnits(lead, remaining, required);
1262
};
1263
1264
auto onBadTrailingUnit = [this](uint_fast8_t unitsObserved) {
1265
this->badTrailingUnit(unitsObserved);
1266
};
1267
1268
auto onBadCodePoint = [this](char32_t badCodePoint,
1269
uint_fast8_t unitsObserved) {
1270
this->badCodePoint(badCodePoint, unitsObserved);
1271
};
1272
1273
auto onNotShortestForm = [this](char32_t badCodePoint,
1274
uint_fast8_t unitsObserved) {
1275
this->notShortestForm(badCodePoint, unitsObserved);
1276
};
1277
1278
// This consumes the full, valid code point or ungets |lead| and calls the
1279
// appropriate error functor on failure.
1280
SourceUnitsIterator iter(this->sourceUnits);
1281
Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePoint(
1282
lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
1283
onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
1284
if (maybeCodePoint.isNothing()) {
1285
return false;
1286
}
1287
1288
char32_t cp = maybeCodePoint.value();
1289
if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||
1290
cp == unicode::PARA_SEPARATOR)) {
1291
if (!updateLineInfoForEOL()) {
1292
#ifdef DEBUG
1293
*codePoint = EOF; // sentinel value to hopefully cause errors
1294
#endif
1295
MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
1296
return false;
1297
}
1298
1299
*codePoint = '\n';
1300
} else {
1301
MOZ_ASSERT(!IsLineTerminator(cp));
1302
*codePoint = AssertedCast<int32_t>(cp);
1303
}
1304
1305
return true;
1306
}
1307
1308
template <>
1309
size_t SourceUnits<char16_t>::findWindowStart(size_t offset) const {
1310
// This is JS's understanding of UTF-16 that allows lone surrogates, so
1311
// we have to exclude lone surrogates from [windowStart, offset) ourselves.
1312
1313
const char16_t* const earliestPossibleStart = codeUnitPtrAt(startOffset_);
1314
1315
const char16_t* const initial = codeUnitPtrAt(offset);
1316
const char16_t* p = initial;
1317
1318
auto HalfWindowSize = [&p, &initial]() {
1319
return PointerRangeSize(p, initial);
1320
};
1321
1322
while (true) {
1323
MOZ_ASSERT(earliestPossibleStart <= p);
1324
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
1325
if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
1326
break;
1327
}
1328
1329
char16_t c = p[-1];
1330
1331
// This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
1332
// string and template literals. These code points do affect line and
1333
// column coordinates, even as they encode their literal values.
1334
if (IsLineTerminator(c)) {
1335
break;
1336
}
1337
1338
// Don't allow invalid UTF-16 in pre-context. (Current users don't
1339
// require this, and this behavior isn't currently imposed on
1340
// pre-context, but these facts might change someday.)
1341
1342
if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
1343
break;
1344
}
1345
1346
// Optimistically include the code unit, reverting below if needed.
1347
p--;
1348
1349
// If it's not a surrogate at all, keep going.
1350
if (MOZ_LIKELY(!unicode::IsTrailSurrogate(c))) {
1351
continue;
1352
}
1353
1354
// Stop if we don't have a usable surrogate pair.
1355
if (HalfWindowSize() >= WindowRadius ||
1356
p <= earliestPossibleStart || // trail surrogate at low end
1357
!unicode::IsLeadSurrogate(p[-1])) // no paired lead surrogate
1358
{
1359
p++;
1360
break;
1361
}
1362
1363
p--;
1364
}
1365
1366
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
1367
return offset - HalfWindowSize();
1368
}
1369
1370
template <>
1371
size_t SourceUnits<Utf8Unit>::findWindowStart(size_t offset) const {
1372
// |offset| must be the location of the error or somewhere before it, so we
1373
// know preceding data is valid UTF-8.
1374
1375
const Utf8Unit* const earliestPossibleStart = codeUnitPtrAt(startOffset_);
1376
1377
const Utf8Unit* const initial = codeUnitPtrAt(offset);
1378
const Utf8Unit* p = initial;
1379
1380
auto HalfWindowSize = [&p, &initial]() {
1381
return PointerRangeSize(p, initial);
1382
};
1383
1384
while (true) {
1385
MOZ_ASSERT(earliestPossibleStart <= p);
1386
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
1387
if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
1388
break;
1389
}
1390
1391
// Peek backward for a line break, and only decrement if there is none.
1392
uint8_t prev = p[-1].toUint8();
1393
1394
// First check for the ASCII LineTerminators.
1395
if (prev == '\r' || prev == '\n') {
1396
break;
1397
}
1398
1399
// Now check for the non-ASCII LineTerminators U+2028 LINE SEPARATOR
1400
// (0xE2 0x80 0xA8) and U+2029 PARAGRAPH (0xE2 0x80 0xA9). If there
1401
// aren't three code units available, some comparison here will fail
1402
// before we'd underflow.
1403
if (MOZ_UNLIKELY((prev == 0xA8 || prev == 0xA9) &&
1404
p[-2].toUint8() == 0x80 && p[-3].toUint8() == 0xE2)) {
1405
break;
1406
}
1407
1408
// Rewind over the non-LineTerminator. This can't underflow
1409
// |earliestPossibleStart| because it begins a code point.
1410
while (IsTrailingUnit(*--p)) {
1411
continue;
1412
}
1413
1414
MOZ_ASSERT(earliestPossibleStart <= p);
1415
1416
// But if we underflowed |WindowRadius|, adjust forward and stop.
1417
if (HalfWindowSize() > WindowRadius) {
1418
static_assert(WindowRadius > 3,
1419
"skipping over non-lead code units below must not "
1420
"advance past |offset|");
1421
1422
while (IsTrailingUnit(*++p)) {
1423
continue;
1424
}
1425
1426
MOZ_ASSERT(HalfWindowSize() < WindowRadius);
1427
break;
1428
}
1429
}
1430
1431
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
1432
return offset - HalfWindowSize();
1433
}
1434
1435
template <>
1436
size_t SourceUnits<char16_t>::findWindowEnd(size_t offset) const {
1437
const char16_t* const initial = codeUnitPtrAt(offset);
1438
const char16_t* p = initial;
1439
1440
auto HalfWindowSize = [&initial, &p]() {
1441
return PointerRangeSize(initial, p);
1442
};
1443
1444
while (true) {
1445
MOZ_ASSERT(p <= limit_);
1446
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
1447
if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
1448
break;
1449
}
1450
1451
char16_t c = *p;
1452
1453
// This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
1454
// string and template literals. These code points do affect line and
1455
// column coordinates, even as they encode their literal values.
1456
if (IsLineTerminator(c)) {
1457
break;
1458
}
1459
1460
// Don't allow invalid UTF-16 in post-context. (Current users don't
1461
// require this, and this behavior isn't currently imposed on
1462
// pre-context, but these facts might change someday.)
1463
1464
if (MOZ_UNLIKELY(unicode::IsTrailSurrogate(c))) {
1465
break;
1466
}
1467
1468
// Optimistically consume the code unit, ungetting it below if needed.
1469
p++;
1470
1471
// If it's not a surrogate at all, keep going.
1472
if (MOZ_LIKELY(!unicode::IsLeadSurrogate(c))) {
1473
continue;
1474
}
1475
1476
// Retract if the lead surrogate would stand alone at the end of the
1477
// window.
1478
if (HalfWindowSize() >= WindowRadius || // split pair
1479
p >= limit_ || // half-pair at end of source
1480
!unicode::IsTrailSurrogate(*p)) // no paired trail surrogate
1481
{
1482
p--;
1483
break;
1484
}
1485
1486
p++;
1487
}
1488
1489
return offset + HalfWindowSize();
1490
}
1491
1492
template <>
1493
size_t SourceUnits<Utf8Unit>::findWindowEnd(size_t offset) const {
1494
const Utf8Unit* const initial = codeUnitPtrAt(offset);
1495
const Utf8Unit* p = initial;
1496
1497
auto HalfWindowSize = [&initial, &p]() {
1498
return PointerRangeSize(initial, p);
1499
};
1500
1501
while (true) {
1502
MOZ_ASSERT(p <= limit_);
1503
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
1504
if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
1505
break;
1506
}
1507
1508
// A non-encoding error might be followed by an encoding error within
1509
// |maxEnd|, so we must validate as we go to not include invalid UTF-8
1510
// in the computed window. What joy!
1511
1512
Utf8Unit lead = *p;
1513
if (mozilla::IsAscii(lead)) {
1514
if (IsSingleUnitLineTerminator(lead)) {
1515
break;
1516
}
1517
1518
p++;
1519
continue;
1520
}
1521
1522
PeekedCodePoint<Utf8Unit> peeked = PeekCodePoint(p, limit_);
1523
if (peeked.isNone()) {
1524
break; // encoding error
1525
}
1526
1527
char32_t c = peeked.codePoint();
1528
if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||
1529
c == unicode::PARA_SEPARATOR)) {
1530
break;
1531
}
1532
1533
MOZ_ASSERT(!IsLineTerminator(c));
1534
1535
uint8_t len = peeked.lengthInUnits();
1536
if (HalfWindowSize() + len > WindowRadius) {
1537
break;
1538
}
1539
1540
p += len;
1541
}
1542
1543
MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
1544
return offset + HalfWindowSize();
1545
}
1546
1547
template <typename Unit, class AnyCharsAccess>
1548
bool TokenStreamSpecific<Unit, AnyCharsAccess>::advance(size_t position) {
1549
const Unit* end = this->sourceUnits.codeUnitPtrAt(position);
1550
while (this->sourceUnits.addressOfNextCodeUnit() < end) {
1551
int32_t c;
1552
if (!getCodePoint(&c)) {
1553
return false;
1554
}
1555
}
1556
1557
TokenStreamAnyChars& anyChars = anyCharsAccess();
1558
Token* cur = const_cast<Token*>(&anyChars.currentToken());
1559
cur->pos.begin = this->sourceUnits.offset();
1560
cur->pos.end = cur->pos.begin;
1561
MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
1562
anyChars.lookahead = 0;
1563
return true;
1564
}
1565
1566
template <typename Unit, class AnyCharsAccess>
1567
void TokenStreamSpecific<Unit, AnyCharsAccess>::seek(const Position& pos) {
1568
TokenStreamAnyChars& anyChars = anyCharsAccess();
1569
1570
this->sourceUnits.setAddressOfNextCodeUnit(pos.buf,
1571
/* allowPoisoned = */ true);
1572
anyChars.flags = pos.flags;
1573
anyChars.lineno = pos.lineno;
1574
anyChars.linebase = pos.linebase;
1575
anyChars.prevLinebase = pos.prevLinebase;
1576
anyChars.lookahead = pos.lookahead;
1577
1578
anyChars.tokens[anyChars.cursor()] = pos.currentToken;
1579
for (unsigned i = 0; i < anyChars.lookahead; i++) {
1580
anyChars.tokens[anyChars.aheadCursor(1 + i)] = pos.lookaheadTokens[i];
1581
}
1582
}
1583
1584
template <typename Unit, class AnyCharsAccess>
1585
bool TokenStreamSpecific<Unit, AnyCharsAccess>::seek(
1586
const Position& pos, const TokenStreamAnyChars& other) {
1587
if (!anyCharsAccess().srcCoords.fill(other.srcCoords)) {
1588
return false;
1589
}
1590
1591
seek(pos);
1592
return true;
1593
}
1594
1595
void TokenStreamAnyChars::computeErrorMetadataNoOffset(ErrorMetadata* err) {
1596
err->isMuted = mutedErrors;
1597
err->filename = filename_;
1598
err->lineNumber = 0;
1599
err->columnNumber = 0;
1600
1601
MOZ_ASSERT(err->lineOfContext == nullptr);
1602
}
1603
1604
bool TokenStreamAnyChars::fillExceptingContext(ErrorMetadata* err,
1605
uint32_t offset) {
1606
err->isMuted = mutedErrors;
1607
1608
// If this TokenStreamAnyChars doesn't have location information, try to
1609
// get it from the caller.
1610
if (!filename_ && !cx->isHelperThreadContext()) {
1611
NonBuiltinFrameIter iter(cx, FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
1612
cx->realm()->principals());
1613
if (!iter.done() && iter.filename()) {
1614
err->filename = iter.filename();
1615
err->lineNumber = iter.computeLine(&err->columnNumber);
1616
return false;
1617
}
1618
}
1619
1620
// Otherwise use this TokenStreamAnyChars's location information.
1621
err->filename = filename_;
1622
return true;
1623
}
1624
1625
template <typename Unit, class AnyCharsAccess>
1626
bool TokenStreamSpecific<Unit, AnyCharsAccess>::hasTokenizationStarted() const {
1627
const TokenStreamAnyChars& anyChars = anyCharsAccess();
1628
return anyChars.isCurrentTokenType(TokenKind::Eof) && !anyChars.isEOF();
1629
}
1630
1631
template <>
1632
inline void SourceUnits<char16_t>::computeWindowOffsetAndLength(
1633
const char16_t* encodedWindow, size_t encodedTokenOffset,
1634
size_t* utf16TokenOffset, size_t encodedWindowLength,
1635
size_t* utf16WindowLength) {
1636
MOZ_ASSERT_UNREACHABLE("shouldn't need to recompute for UTF-16");
1637
}
1638
1639
template <>
1640
inline void SourceUnits<Utf8Unit>::computeWindowOffsetAndLength(
1641
const Utf8Unit* encodedWindow, size_t encodedTokenOffset,
1642
size_t* utf16TokenOffset, size_t encodedWindowLength,
1643
size_t* utf16WindowLength) {
1644
MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
1645
"token offset must be within the window, and the two lambda "
1646
"calls below presume this ordering of values");
1647
1648
const Utf8Unit* const encodedWindowEnd = encodedWindow + encodedWindowLength;
1649
1650
size_t i = 0;
1651
auto ComputeUtf16Count = [&i, &encodedWindow](const Utf8Unit* limit) {
1652
while (encodedWindow < limit) {
1653
Utf8Unit lead = *encodedWindow++;
1654
if (MOZ_LIKELY(IsAscii(lead))) {
1655
// ASCII contributes a single UTF-16 code unit.
1656
i++;
1657
continue;
1658
}
1659
1660
Maybe<char32_t> cp = DecodeOneUtf8CodePoint(lead, &encodedWindow, limit);
1661
MOZ_ASSERT(cp.isSome(),
1662
"computed window should only contain valid UTF-8");
1663
1664
i += unicode::IsSupplementary(cp.value()) ? 2 : 1;
1665
}
1666
1667
return i;
1668
};
1669
1670
// Compute the token offset from |i == 0| and the initial |encodedWindow|.
1671
const Utf8Unit* token = encodedWindow + encodedTokenOffset;
1672
MOZ_ASSERT(token <= encodedWindowEnd);
1673
*utf16TokenOffset = ComputeUtf16Count(token);
1674
1675
// Compute the window length, picking up from |i| and |encodedWindow| that,
1676
// in general, were modified just above.
1677
*utf16WindowLength = ComputeUtf16Count(encodedWindowEnd);
1678
}
1679
1680
template <typename Unit>
1681
bool TokenStreamCharsBase<Unit>::addLineOfContext(ErrorMetadata* err,
1682
uint32_t offset) {
1683
// Rename the variable to make meaning clearer: an offset into source units
1684
// in Unit encoding.
1685
size_t encodedOffset = offset;
1686
1687
// These are also offsets into source units in Unit encoding.
1688
size_t encodedWindowStart = sourceUnits.findWindowStart(encodedOffset);
1689
size_t encodedWindowEnd = sourceUnits.findWindowEnd(encodedOffset);
1690
1691
size_t encodedWindowLength = encodedWindowEnd - encodedWindowStart;
1692
MOZ_ASSERT(encodedWindowLength <= SourceUnits::WindowRadius * 2);
1693
1694
// Don't add a useless "line" of context when the window ends up empty
1695
// because of an invalid encoding at the start of a line.
1696
if (encodedWindowLength == 0) {
1697
MOZ_ASSERT(err->lineOfContext == nullptr,
1698
"ErrorMetadata::lineOfContext must be null so we don't "
1699
"have to set the lineLength/tokenOffset fields");
1700
return true;
1701
}
1702
1703
// We might have hit an error while processing some source code feature
1704
// that's accumulating text into |this->charBuffer| -- e.g. we could be
1705
// halfway into a regular expression literal, then encounter invalid UTF-8.
1706
// Thus we must clear |this->charBuffer| of prior work.
1707
this->charBuffer.clear();
1708
1709
const Unit* encodedWindow = sourceUnits.codeUnitPtrAt(encodedWindowStart);
1710
if (!fillCharBufferFromSourceNormalizingAsciiLineBreaks(
1711
encodedWindow, encodedWindow + encodedWindowLength)) {
1712
return false;
1713
}
1714
1715
size_t utf16WindowLength = this->charBuffer.length();
1716
1717
// The windowed string is null-terminated.
1718
if (!this->charBuffer.append('\0')) {
1719
return false;
1720
}
1721
1722
err->lineOfContext.reset(this->charBuffer.extractOrCopyRawBuffer());
1723
if (!err->lineOfContext) {
1724
return false;
1725
}
1726
1727
size_t encodedTokenOffset = encodedOffset - encodedWindowStart;
1728
1729
MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
1730
"token offset must be inside the window");
1731
1732
// The length in UTF-8 code units of a code point is always greater than or
1733
// equal to the same code point's length in UTF-16 code points. ASCII code
1734
// points are 1 unit in either encoding. Code points in [U+0080, U+10000)
1735
// are 2-3 UTF-8 code units to 1 UTF-16 code unit. And code points in
1736
// [U+10000, U+10FFFF] are 4 UTF-8 code units to 2 UTF-16 code units.
1737
//
1738
// Therefore, if encoded window length equals the length in UTF-16 (this is
1739
// always the case for Unit=char16_t), the UTF-16 offsets are exactly the
1740
// encoded offsets. Otherwise we must convert offset/length from UTF-8 to
1741
// UTF-16.
1742
if (std::is_same<Unit, char16_t>::value) {
1743
MOZ_ASSERT(utf16WindowLength == encodedWindowLength,
1744
"UTF-16 to UTF-16 shouldn't change window length");
1745
err->tokenOffset = encodedTokenOffset;
1746
err->lineLength = encodedWindowLength;
1747
} else {
1748
MOZ_ASSERT((std::is_same<Unit, Utf8Unit>::value),
1749
"should only see UTF-8 here");
1750
1751
bool simple = utf16WindowLength == encodedWindowLength;
1752
#ifdef DEBUG
1753
auto isAscii = [](Unit u) { return IsAscii(u); };
1754
MOZ_ASSERT(std::all_of(encodedWindow, encodedWindow + encodedWindowLength,
1755
isAscii) == simple,
1756
"equal window lengths in UTF-8 should correspond only to "
1757
"wholly-ASCII text");
1758
#endif
1759
if (simple) {
1760
err->tokenOffset = encodedTokenOffset;
1761
err->lineLength = encodedWindowLength;
1762
} else {
1763
sourceUnits.computeWindowOffsetAndLength(
1764
encodedWindow, encodedTokenOffset, &err->tokenOffset,
1765
encodedWindowLength, &err->lineLength);
1766
}
1767
}
1768
1769
return true;
1770
}
1771
1772
template <typename Unit, class AnyCharsAccess>
1773
bool TokenStreamSpecific<Unit, AnyCharsAccess>::computeErrorMetadata(
1774
ErrorMetadata* err, const ErrorOffset& errorOffset) {
1775
if (errorOffset.is<NoOffset>()) {
1776
anyCharsAccess().computeErrorMetadataNoOffset(err);
1777
return true;
1778
}
1779
1780
uint32_t offset;
1781
if (errorOffset.is<uint32_t>()) {
1782
offset = errorOffset.as<uint32_t>();
1783
} else {
1784
offset = this->sourceUnits.offset();
1785
}
1786
1787
// This function's return value isn't a success/failure indication: it
1788
// returns true if this TokenStream can be used to provide a line of
1789
// context.
1790
if (fillExceptingContext(err, offset)) {
1791
// Add a line of context from this TokenStream to help with debugging.
1792
return internalComputeLineOfContext(err, offset);
1793
}
1794
1795
// We can't fill in any more here.
1796
return true;
1797
}
1798
1799
// We have encountered a '\': check for a Unicode escape sequence after it.
1800
// Return the length of the escape sequence and the encoded code point (by
1801
// value) if we found a Unicode escape sequence, and skip all code units
1802
// involed. Otherwise, return 0 and don't advance along the buffer.
1803
template <typename Unit, class AnyCharsAccess>
1804
uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscape(
1805
uint32_t* codePoint) {
1806
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
1807
1808
int32_t unit = getCodeUnit();
1809
if (unit != 'u') {
1810
// NOTE: |unit| may be EOF here.
1811
ungetCodeUnit(unit);
1812
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
1813
return 0;
1814
}
1815
1816
char16_t v;
1817
unit = getCodeUnit();
1818
if (IsAsciiHexDigit(unit) && this->sourceUnits.matchHexDigits(3, &v)) {
1819
*codePoint = (AsciiAlphanumericToNumber(unit) << 12) | v;
1820
return 5;
1821
}
1822
1823
if (unit == '{') {
1824
return matchExtendedUnicodeEscape(codePoint);
1825
}
1826
1827
// NOTE: |unit| may be EOF here, so this ungets either one or two units.
1828
ungetCodeUnit(unit);
1829
ungetCodeUnit('u');
1830
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
1831
return 0;
1832
}
1833
1834
template <typename Unit, class AnyCharsAccess>
1835
uint32_t
1836
GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchExtendedUnicodeEscape(
1837
uint32_t* codePoint) {
1838
MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('{'));
1839
1840
int32_t unit = getCodeUnit();
1841
1842
// Skip leading zeroes.
1843
uint32_t leadingZeroes = 0;