TokenStream.cpp - mozsearch

mozilla-central/js/src/frontend/TokenStream.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-

 * vim: set ts=8 sts=2 et sw=2 tw=80:

 * This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// JS lexical scanner.

#include "frontend/TokenStream.h"

#include "mozilla/ArrayUtils.h"

#include "mozilla/Attributes.h"

#include "mozilla/Likely.h"

#include "mozilla/Maybe.h"

#include "mozilla/MemoryChecking.h"

#include "mozilla/ScopeExit.h"

#include "mozilla/Span.h"

#include "mozilla/TemplateLib.h"

#include "mozilla/TextUtils.h"

#include "mozilla/Utf8.h"

#include <algorithm>

#include <iterator>

#include <limits>

#include <stdarg.h>

#include <stdint.h>

#include <stdio.h>

#include <type_traits>

#include <utility>

#include "jsnum.h"

#include "frontend/FrontendContext.h"

#include "frontend/Parser.h"

#include "frontend/ParserAtom.h"

#include "frontend/ReservedWords.h"

#include "js/CharacterEncoding.h"  // JS::ConstUTF8CharsZ

#include "js/ColumnNumber.h"  // JS::LimitedColumnNumberOneOrigin, JS::ColumnNumberOneOrigin, JS::TaggedColumnNumberOneOrigin

#include "js/ErrorReport.h"   // JSErrorBase

#include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*

#include "js/Printf.h"                // JS_smprintf

#include "js/RegExpFlags.h"           // JS::RegExpFlags

#include "js/UniquePtr.h"

#include "util/Text.h"

#include "util/Unicode.h"

#include "vm/FrameIter.h"  // js::{,NonBuiltin}FrameIter

#include "vm/JSContext.h"

#include "vm/Realm.h"

using mozilla::AsciiAlphanumericToNumber;

using mozilla::AssertedCast;

using mozilla::DecodeOneUtf8CodePoint;

using mozilla::IsAscii;

using mozilla::IsAsciiAlpha;

using mozilla::IsAsciiDigit;

using mozilla::IsAsciiHexDigit;

using mozilla::IsTrailingUnit;

using mozilla::MakeScopeExit;

using mozilla::Maybe;

using mozilla::PointerRangeSize;

using mozilla::Span;

using mozilla::Utf8Unit;

using JS::ReadOnlyCompileOptions;

using JS::RegExpFlag;

using JS::RegExpFlags;

struct ReservedWordInfo {

  const char* chars;  // C string with reserved word text

  js::frontend::TokenKind tokentype;

};

static const ReservedWordInfo reservedWords[] = {

#define RESERVED_WORD_INFO(word, name, type) {#word, js::frontend::type},

    FOR_EACH_JAVASCRIPT_RESERVED_WORD(RESERVED_WORD_INFO)

#undef RESERVED_WORD_INFO

};

enum class ReservedWordsIndex : size_t {

#define ENTRY_(_1, NAME, _3) NAME,

  FOR_EACH_JAVASCRIPT_RESERVED_WORD(ENTRY_)

#undef ENTRY_

};

// Returns a ReservedWordInfo for the specified characters, or nullptr if the

// string is not a reserved word.

template <typename CharT>

static const ReservedWordInfo* FindReservedWord(const CharT* s, size_t length) {

  MOZ_ASSERT(length != 0);

  size_t i;

  const ReservedWordInfo* rw;

  const char* chars;

#define JSRW_LENGTH() length

#define JSRW_AT(column) s[column]

#define JSRW_GOT_MATCH(index) \

  i = (index);                \

  goto got_match;

#define JSRW_TEST_GUESS(index) \

  i = (index);                 \

  goto test_guess;

#define JSRW_NO_MATCH() goto no_match;

#include "frontend/ReservedWordsGenerated.h"

#undef JSRW_NO_MATCH

#undef JSRW_TEST_GUESS

#undef JSRW_GOT_MATCH

#undef JSRW_AT

#undef JSRW_LENGTH

got_match:

  return &reservedWords[i];

test_guess:

  rw = &reservedWords[i];

  chars = rw->chars;

  do {

    if (*s++ != static_cast<unsigned char>(*chars++)) {

      goto no_match;

  } while (--length != 0);

  return rw;

no_match:

  return nullptr;

template <>

MOZ_ALWAYS_INLINE const ReservedWordInfo* FindReservedWord<Utf8Unit>(

    const Utf8Unit* units, size_t length) {

  return FindReservedWord(Utf8AsUnsignedChars(units), length);

static const ReservedWordInfo* FindReservedWord(

    const js::frontend::TaggedParserAtomIndex atom) {

  switch (atom.rawData()) {

#define CASE_(_1, NAME, _3)                                           \

  case js::frontend::TaggedParserAtomIndex::WellKnownRawData::NAME(): \

    return &reservedWords[size_t(ReservedWordsIndex::NAME)];

    FOR_EACH_JAVASCRIPT_RESERVED_WORD(CASE_)

#undef CASE_

  return nullptr;

template <typename CharT>

static constexpr bool IsAsciiBinary(CharT c) {

  using UnsignedCharT = std::make_unsigned_t<CharT>;

  auto uc = static_cast<UnsignedCharT>(c);

  return uc == '0' || uc == '1';

template <typename CharT>

static constexpr bool IsAsciiOctal(CharT c) {

  using UnsignedCharT = std::make_unsigned_t<CharT>;

  auto uc = static_cast<UnsignedCharT>(c);

  return '0' <= uc && uc <= '7';

template <typename CharT>

static constexpr uint8_t AsciiOctalToNumber(CharT c) {

  using UnsignedCharT = std::make_unsigned_t<CharT>;

  auto uc = static_cast<UnsignedCharT>(c);

  return uc - '0';

namespace js {

namespace frontend {

bool IsKeyword(TaggedParserAtomIndex atom) {

  if (const ReservedWordInfo* rw = FindReservedWord(atom)) {

    return TokenKindIsKeyword(rw->tokentype);

  return false;

TokenKind ReservedWordTokenKind(TaggedParserAtomIndex name) {

  if (const ReservedWordInfo* rw = FindReservedWord(name)) {

    return rw->tokentype;

  return TokenKind::Limit;

const char* ReservedWordToCharZ(TaggedParserAtomIndex name) {

  if (const ReservedWordInfo* rw = FindReservedWord(name)) {

    return ReservedWordToCharZ(rw->tokentype);

  return nullptr;

const char* ReservedWordToCharZ(TokenKind tt) {

  MOZ_ASSERT(tt != TokenKind::Name);

  switch (tt) {

#define EMIT_CASE(word, name, type) \

  case type:                        \

    return #word;

    FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)

#undef EMIT_CASE

    default:

      MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");

  return nullptr;

TaggedParserAtomIndex TokenStreamAnyChars::reservedWordToPropertyName(

    TokenKind tt) const {

  MOZ_ASSERT(tt != TokenKind::Name);

  switch (tt) {

#define EMIT_CASE(word, name, type) \

  case type:                        \

    return TaggedParserAtomIndex::WellKnown::name();

    FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)

#undef EMIT_CASE

    default:

      MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");

  return TaggedParserAtomIndex::null();

SourceCoords::SourceCoords(FrontendContext* fc, uint32_t initialLineNumber,

                           uint32_t initialOffset)

    : lineStartOffsets_(fc), initialLineNum_(initialLineNumber), lastIndex_(0) {

  // This is actually necessary!  Removing it causes compile errors on

  // GCC and clang.  You could try declaring this:

//

  //   const uint32_t SourceCoords::MAX_PTR;

//

  // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.

//

  uint32_t maxPtr = MAX_PTR;

  // The first line begins at buffer offset |initialOffset|.  MAX_PTR is the

  // sentinel.  The appends cannot fail because |lineStartOffsets_| has

  // statically-allocated elements.

  MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);

  MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));

  lineStartOffsets_.infallibleAppend(initialOffset);

  lineStartOffsets_.infallibleAppend(maxPtr);

MOZ_ALWAYS_INLINE bool SourceCoords::add(uint32_t lineNum,

                                         uint32_t lineStartOffset) {

  uint32_t index = indexFromLineNumber(lineNum);

  uint32_t sentinelIndex = lineStartOffsets_.length() - 1;

  MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset);

  MOZ_ASSERT(lineStartOffsets_[sentinelIndex] == MAX_PTR);

  if (index == sentinelIndex) {

    // We haven't seen this newline before.  Update lineStartOffsets_

    // only if lineStartOffsets_.append succeeds, to keep sentinel.

    // Otherwise return false to tell TokenStream about OOM.

    uint32_t maxPtr = MAX_PTR;

    if (!lineStartOffsets_.append(maxPtr)) {

      static_assert(std::is_same_v<decltype(lineStartOffsets_.allocPolicy()),

                                   TempAllocPolicy&>,

                    "this function's caller depends on it reporting an "

                    "error on failure, as TempAllocPolicy ensures");

      return false;

    lineStartOffsets_[index] = lineStartOffset;

  } else {

    // We have seen this newline before (and ungot it).  Do nothing (other

    // than checking it hasn't mysteriously changed).

    // This path can be executed after hitting OOM, so check index.

    MOZ_ASSERT_IF(index < sentinelIndex,

                  lineStartOffsets_[index] == lineStartOffset);

  return true;

MOZ_ALWAYS_INLINE bool SourceCoords::fill(const SourceCoords& other) {

  MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);

  MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);

  MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);

  if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) {

    return true;

  uint32_t sentinelIndex = lineStartOffsets_.length() - 1;

  lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];

  for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length();

       i++) {

    if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) {

      return false;

  return true;

MOZ_ALWAYS_INLINE uint32_t

SourceCoords::indexFromOffset(uint32_t offset) const {

  uint32_t iMin, iMax, iMid;

  if (lineStartOffsets_[lastIndex_] <= offset) {

    // If we reach here, offset is on a line the same as or higher than

    // last time.  Check first for the +0, +1, +2 cases, because they

    // typically cover 85--98% of cases.

    if (offset < lineStartOffsets_[lastIndex_ + 1]) {

      return lastIndex_;  // index is same as last time

    // If we reach here, there must be at least one more entry (plus the

    // sentinel).  Try it.

    lastIndex_++;

    if (offset < lineStartOffsets_[lastIndex_ + 1]) {

      return lastIndex_;  // index is one higher than last time

    // The same logic applies here.

    lastIndex_++;

    if (offset < lineStartOffsets_[lastIndex_ + 1]) {

      return lastIndex_;  // index is two higher than last time

    // No luck.  Oh well, we have a better-than-default starting point for

    // the binary search.

    iMin = lastIndex_ + 1;

    MOZ_ASSERT(iMin <

               lineStartOffsets_.length() - 1);  // -1 due to the sentinel

  } else {

    iMin = 0;

  // This is a binary search with deferred detection of equality, which was

  // marginally faster in this case than a standard binary search.

  // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we

  // want one before that.

  iMax = lineStartOffsets_.length() - 2;

  while (iMax > iMin) {

    iMid = iMin + (iMax - iMin) / 2;

    if (offset >= lineStartOffsets_[iMid + 1]) {

      iMin = iMid + 1;  // offset is above lineStartOffsets_[iMid]

    } else {

      iMax = iMid;  // offset is below or within lineStartOffsets_[iMid]

  MOZ_ASSERT(iMax == iMin);

  MOZ_ASSERT(lineStartOffsets_[iMin] <= offset);

  MOZ_ASSERT(offset < lineStartOffsets_[iMin + 1]);

  lastIndex_ = iMin;

  return iMin;

SourceCoords::LineToken SourceCoords::lineToken(uint32_t offset) const {

  return LineToken(indexFromOffset(offset), offset);

TokenStreamAnyChars::TokenStreamAnyChars(FrontendContext* fc,

                                         const ReadOnlyCompileOptions& options,

                                         StrictModeGetter* smg)

    : fc(fc),

      options_(options),

      strictModeGetter_(smg),

      filename_(options.filename()),

      longLineColumnInfo_(fc),

      srcCoords(fc, options.lineno, options.scriptSourceOffset),

      lineno(options.lineno),

      mutedErrors(options.mutedErrors()) {

  // |isExprEnding| was initially zeroed: overwrite the true entries here.

  isExprEnding[size_t(TokenKind::Comma)] = true;

  isExprEnding[size_t(TokenKind::Semi)] = true;

  isExprEnding[size_t(TokenKind::Colon)] = true;

  isExprEnding[size_t(TokenKind::RightParen)] = true;

  isExprEnding[size_t(TokenKind::RightBracket)] = true;

  isExprEnding[size_t(TokenKind::RightCurly)] = true;

template <typename Unit>

TokenStreamCharsBase<Unit>::TokenStreamCharsBase(FrontendContext* fc,

                                                 ParserAtomsTable* parserAtoms,

                                                 const Unit* units,

                                                 size_t length,

                                                 size_t startOffset)

    : TokenStreamCharsShared(fc, parserAtoms),

      sourceUnits(units, length, startOffset) {}

bool FillCharBufferFromSourceNormalizingAsciiLineBreaks(CharBuffer& charBuffer,

                                                        const char16_t* cur,

                                                        const char16_t* end) {

  MOZ_ASSERT(charBuffer.length() == 0);

  while (cur < end) {

    char16_t ch = *cur++;

    if (ch == '\r') {

      ch = '\n';

      if (cur < end && *cur == '\n') {

        cur++;

    if (!charBuffer.append(ch)) {

      return false;

  MOZ_ASSERT(cur == end);

  return true;

bool FillCharBufferFromSourceNormalizingAsciiLineBreaks(CharBuffer& charBuffer,

                                                        const Utf8Unit* cur,

                                                        const Utf8Unit* end) {

  MOZ_ASSERT(charBuffer.length() == 0);

  while (cur < end) {

    Utf8Unit unit = *cur++;

    if (MOZ_LIKELY(IsAscii(unit))) {

      char16_t ch = unit.toUint8();

      if (ch == '\r') {

        ch = '\n';

        if (cur < end && *cur == Utf8Unit('\n')) {

          cur++;

      if (!charBuffer.append(ch)) {

        return false;

      continue;

    Maybe<char32_t> ch = DecodeOneUtf8CodePoint(unit, &cur, end);

    MOZ_ASSERT(ch.isSome(),

               "provided source text should already have been validated");

    if (!AppendCodePointToCharBuffer(charBuffer, ch.value())) {

      return false;

  MOZ_ASSERT(cur == end);

  return true;

template <typename Unit, class AnyCharsAccess>

TokenStreamSpecific<Unit, AnyCharsAccess>::TokenStreamSpecific(

    FrontendContext* fc, ParserAtomsTable* parserAtoms,

    const ReadOnlyCompileOptions& options, const Unit* units, size_t length)

    : TokenStreamChars<Unit, AnyCharsAccess>(fc, parserAtoms, units, length,

                                             options.scriptSourceOffset) {}

bool TokenStreamAnyChars::checkOptions() {

  // Constrain starting columns to where they will saturate.

  if (options().column.oneOriginValue() >

      JS::LimitedColumnNumberOneOrigin::Limit) {

    reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);

    return false;

  return true;

void TokenStreamAnyChars::reportErrorNoOffset(unsigned errorNumber, ...) const {

  va_list args;

  va_start(args, errorNumber);

  reportErrorNoOffsetVA(errorNumber, &args);

  va_end(args);

void TokenStreamAnyChars::reportErrorNoOffsetVA(unsigned errorNumber,

                                                va_list* args) const {

  ErrorMetadata metadata;

  computeErrorMetadataNoOffset(&metadata);

  ReportCompileErrorLatin1VA(fc, std::move(metadata), nullptr, errorNumber,

                             args);

[[nodiscard]] MOZ_ALWAYS_INLINE bool

TokenStreamAnyChars::internalUpdateLineInfoForEOL(uint32_t lineStartOffset) {

  prevLinebase = linebase;

  linebase = lineStartOffset;

  lineno++;

  // On overflow, report error.

  if (MOZ_UNLIKELY(!lineno)) {

    reportErrorNoOffset(JSMSG_BAD_LINE_NUMBER);

    return false;

  return srcCoords.add(lineno, linebase);

#ifdef DEBUG

template <>

inline void SourceUnits<char16_t>::assertNextCodePoint(

    const PeekedCodePoint<char16_t>& peeked) {

  char32_t c = peeked.codePoint();

  if (c < unicode::NonBMPMin) {

    MOZ_ASSERT(peeked.lengthInUnits() == 1);

    MOZ_ASSERT(ptr[0] == c);

  } else {

    MOZ_ASSERT(peeked.lengthInUnits() == 2);

    char16_t lead, trail;

    unicode::UTF16Encode(c, &lead, &trail);

    MOZ_ASSERT(ptr[0] == lead);

    MOZ_ASSERT(ptr[1] == trail);

template <>

inline void SourceUnits<Utf8Unit>::assertNextCodePoint(

    const PeekedCodePoint<Utf8Unit>& peeked) {

  char32_t c = peeked.codePoint();

  // This is all roughly indulgence of paranoia only for assertions, so the

  // reimplementation of UTF-8 encoding a code point is (we think) a virtue.

  uint8_t expectedUnits[4] = {};

  if (c < 0x80) {

    expectedUnits[0] = AssertedCast<uint8_t>(c);

  } else if (c < 0x800) {

    expectedUnits[0] = 0b1100'0000 | (c >> 6);

    expectedUnits[1] = 0b1000'0000 | (c & 0b11'1111);

  } else if (c < 0x10000) {

    expectedUnits[0] = 0b1110'0000 | (c >> 12);

    expectedUnits[1] = 0b1000'0000 | ((c >> 6) & 0b11'1111);

    expectedUnits[2] = 0b1000'0000 | (c & 0b11'1111);

  } else {

    expectedUnits[0] = 0b1111'0000 | (c >> 18);

    expectedUnits[1] = 0b1000'0000 | ((c >> 12) & 0b11'1111);

    expectedUnits[2] = 0b1000'0000 | ((c >> 6) & 0b11'1111);

    expectedUnits[3] = 0b1000'0000 | (c & 0b11'1111);

  MOZ_ASSERT(peeked.lengthInUnits() <= 4);

  for (uint8_t i = 0; i < peeked.lengthInUnits(); i++) {

    MOZ_ASSERT(expectedUnits[i] == ptr[i].toUint8());

#endif  // DEBUG

static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(

    const Utf8Unit** ptr, const Utf8Unit* limit) {

  MOZ_ASSERT(*ptr <= limit);

  // |limit| is a code point boundary.

  if (MOZ_UNLIKELY(*ptr == limit)) {

    return;

  // Otherwise rewind past trailing units to the start of the code point.

#ifdef DEBUG

  size_t retracted = 0;

#endif

  while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {

    --*ptr;

#ifdef DEBUG

    retracted++;

#endif

  MOZ_ASSERT(retracted < 4,

             "the longest UTF-8 code point is four units, so this should never "

             "retract more than three units");

static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(

    const char16_t** ptr, const char16_t* limit) {

  MOZ_ASSERT(*ptr <= limit);

  // |limit| is a code point boundary.

  if (MOZ_UNLIKELY(*ptr == limit)) {

    return;

  // Otherwise the pointer must be retracted by one iff it splits a two-unit

  // code point.

  if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {

    // Outside test suites testing garbage WTF-16, it's basically guaranteed

    // here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.

    if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {

      --*ptr;

template <typename Unit>

JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffset(

    const LineToken lineToken, const uint32_t offset,

    const SourceUnits<Unit>& sourceUnits) const {

  lineToken.assertConsistentOffset(offset);

  const uint32_t start = srcCoords.lineStart(lineToken);

  const uint32_t offsetInLine = offset - start;

  if constexpr (std::is_same_v<Unit, char16_t>) {

    // Column offset is in UTF-16 code units.

    return JS::ColumnNumberUnsignedOffset(offsetInLine);

  return computeColumnOffsetForUTF8(lineToken, offset, start, offsetInLine,

                                    sourceUnits);

template <typename Unit>

JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffsetForUTF8(

    const LineToken lineToken, const uint32_t offset, const uint32_t start,

    const uint32_t offsetInLine, const SourceUnits<Unit>& sourceUnits) const {

  const uint32_t line = lineNumber(lineToken);

  // Reset the previous offset/column number offset cache for this line, if the

  // previous lookup wasn't on this line.

  if (line != lineOfLastColumnComputation_) {

    lineOfLastColumnComputation_ = line;

    lastChunkVectorForLine_ = nullptr;

    lastOffsetOfComputedColumn_ = start;

    lastComputedColumnOffset_ = JS::ColumnNumberUnsignedOffset::zero();

  // Compute and return the final column number offset from a partially

  // calculated offset/column number offset, using the last-cached

  // offset/column number offset if they're more optimal.

  auto OffsetFromPartial =

      [this, offset, &sourceUnits](

          uint32_t partialOffset,

          JS::ColumnNumberUnsignedOffset partialColumnOffset,

          UnitsType unitsType) {

        MOZ_ASSERT(partialOffset <= offset);

        // If the last lookup on this line was closer to |offset|, use it.

        if (partialOffset < this->lastOffsetOfComputedColumn_ &&

            this->lastOffsetOfComputedColumn_ <= offset) {

          partialOffset = this->lastOffsetOfComputedColumn_;

          partialColumnOffset = this->lastComputedColumnOffset_;

        const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);

        const Unit* end = sourceUnits.codeUnitPtrAt(offset);

        size_t offsetDelta =

            AssertedCast<uint32_t>(PointerRangeSize(begin, end));

        partialOffset += offsetDelta;

        if (unitsType == UnitsType::GuaranteedSingleUnit) {

          MOZ_ASSERT(unicode::CountUTF16CodeUnits(begin, end) == offsetDelta,

                     "guaranteed-single-units also guarantee pointer distance "

                     "equals UTF-16 code unit count");

          partialColumnOffset += JS::ColumnNumberUnsignedOffset(offsetDelta);

        } else {

          partialColumnOffset += JS::ColumnNumberUnsignedOffset(

              AssertedCast<uint32_t>(unicode::CountUTF16CodeUnits(begin, end)));

        this->lastOffsetOfComputedColumn_ = partialOffset;

        this->lastComputedColumnOffset_ = partialColumnOffset;

        return partialColumnOffset;

};

  // We won't add an entry to |longLineColumnInfo_| for lines where the maximum

  // column has offset less than this value.  The most common (non-minified)

  // long line length is likely 80ch, maybe 100ch, so we use that, rounded up to

  // the next power of two for efficient division/multiplication below.

  constexpr uint32_t ColumnChunkLength = mozilla::tl::RoundUpPow2<100>::value;

  // The index within any associated |Vector<ChunkInfo>| of |offset|'s chunk.

  const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;

  if (chunkIndex == 0) {

    // We don't know from an |offset| in the zeroth chunk that this line is even

    // long.  First-chunk info is mostly useless, anyway -- we have |start|

    // already.  So if we have *easy* access to that zeroth chunk, use it --

    // otherwise just count pessimally.  (This will still benefit from caching

    // the last column/offset for computations for successive offsets, so it's

    // not *always* worst-case.)

    UnitsType unitsType;

    if (lastChunkVectorForLine_ && lastChunkVectorForLine_->length() > 0) {

      MOZ_ASSERT((*lastChunkVectorForLine_)[0].columnOffset() ==

                 JS::ColumnNumberUnsignedOffset::zero());

      unitsType = (*lastChunkVectorForLine_)[0].unitsType();

    } else {

      unitsType = UnitsType::PossiblyMultiUnit;

    return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),

                             unitsType);

  // If this line has no chunk vector yet, insert one in the hash map.  (The

  // required index is allocated and filled further down.)

  if (!lastChunkVectorForLine_) {

    auto ptr = longLineColumnInfo_.lookupForAdd(line);

    if (!ptr) {

      // This could rehash and invalidate a cached vector pointer, but the outer

      // condition means we don't have a cached pointer.

      if (!longLineColumnInfo_.add(ptr, line, Vector<ChunkInfo>(fc))) {

        // In case of OOM, just count columns from the start of the line.

        fc->recoverFromOutOfMemory();

        return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),

                                 UnitsType::PossiblyMultiUnit);

    // Note that adding elements to this vector won't invalidate this pointer.

    lastChunkVectorForLine_ = &ptr->value();

  const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);

  auto RetractedOffsetOfChunk = [

#ifdef DEBUG

                                    this,

#endif

                                    start, limit,

                                    &sourceUnits](uint32_t index) {

    MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());

    uint32_t naiveOffset = start + index * ColumnChunkLength;

    const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);

    const Unit* actualPtr = naivePtr;

    RetractPointerToCodePointBoundary(&actualPtr, limit);

#ifdef DEBUG

    if ((*this->lastChunkVectorForLine_)[index].unitsType() ==

        UnitsType::GuaranteedSingleUnit) {

      MOZ_ASSERT(naivePtr == actualPtr, "miscomputed unitsType value");

#endif

    return naiveOffset - PointerRangeSize(actualPtr, naivePtr);

};

  uint32_t partialOffset;

  JS::ColumnNumberUnsignedOffset partialColumnOffset;

  UnitsType unitsType;

  auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());

  if (chunkIndex < entriesLen) {

    // We've computed the chunk |offset| resides in.  Compute the column number

    // from the chunk.

    partialOffset = RetractedOffsetOfChunk(chunkIndex);

    partialColumnOffset = (*lastChunkVectorForLine_)[chunkIndex].columnOffset();

    // This is exact if |chunkIndex| isn't the last chunk.

    unitsType = (*lastChunkVectorForLine_)[chunkIndex].unitsType();

    // Otherwise the last chunk is pessimistically assumed to contain multi-unit

    // code points because we haven't fully examined its contents yet -- they

    // may not have been tokenized yet, they could contain encoding errors, or

    // they might not even exist.

    MOZ_ASSERT_IF(chunkIndex == entriesLen - 1,

                  (*lastChunkVectorForLine_)[chunkIndex].unitsType() ==

                      UnitsType::PossiblyMultiUnit);

  } else {

    // Extend the vector from its last entry or the start of the line.  (This is

    // also a suitable partial start point if we must recover from OOM.)

    if (entriesLen > 0) {

      partialOffset = RetractedOffsetOfChunk(entriesLen - 1);

      partialColumnOffset =

          (*lastChunkVectorForLine_)[entriesLen - 1].columnOffset();

    } else {

      partialOffset = start;

      partialColumnOffset = JS::ColumnNumberUnsignedOffset::zero();

    if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {

      // As earlier, just start from the greatest offset/column in case of OOM.

      fc->recoverFromOutOfMemory();

      return OffsetFromPartial(partialOffset, partialColumnOffset,

                               UnitsType::PossiblyMultiUnit);

    // OOM is no longer possible now.  \o/

    // The vector always begins with the column of the line start, i.e. zero,

    // with chunk units pessimally assumed not single-unit.

    if (entriesLen == 0) {

      lastChunkVectorForLine_->infallibleAppend(

          ChunkInfo(JS::ColumnNumberUnsignedOffset::zero(),

                    UnitsType::PossiblyMultiUnit));

      entriesLen++;

    do {

      const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);

      const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(

          start + std::min(entriesLen++ * ColumnChunkLength, offsetInLine));

      MOZ_ASSERT(begin < chunkLimit);

      MOZ_ASSERT(chunkLimit <= limit);

      static_assert(

          ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength - 1,

          "any retraction below is assumed to never underflow to the "

          "preceding chunk, even for the longest code point");

      // Prior tokenizing ensured that [begin, limit) is validly encoded, and

      // |begin < chunkLimit|, so any retraction here can't underflow.

      RetractPointerToCodePointBoundary(&chunkLimit, limit);

      MOZ_ASSERT(begin < chunkLimit);

      MOZ_ASSERT(chunkLimit <= limit);

      size_t numUnits = PointerRangeSize(begin, chunkLimit);

      size_t numUTF16CodeUnits =

          unicode::CountUTF16CodeUnits(begin, chunkLimit);

      // If this chunk (which will become non-final at the end of the loop) is

      // all single-unit code points, annotate the chunk accordingly.

      if (numUnits == numUTF16CodeUnits) {

        lastChunkVectorForLine_->back().guaranteeSingleUnits();

      partialOffset += numUnits;

      partialColumnOffset += JS::ColumnNumberUnsignedOffset(numUTF16CodeUnits);

      lastChunkVectorForLine_->infallibleEmplaceBack(

          partialColumnOffset, UnitsType::PossiblyMultiUnit);

    } while (entriesLen < chunkIndex + 1);

    // We're at a spot in the current final chunk, and final chunks never have

    // complete units information, so be pessimistic.

    unitsType = UnitsType::PossiblyMultiUnit;

  return OffsetFromPartial(partialOffset, partialColumnOffset, unitsType);

template <typename Unit, class AnyCharsAccess>

JS::LimitedColumnNumberOneOrigin

GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(

    LineToken lineToken, uint32_t offset) const {

  lineToken.assertConsistentOffset(offset);

  const TokenStreamAnyChars& anyChars = anyCharsAccess();

  JS::ColumnNumberUnsignedOffset columnOffset =

      anyChars.computeColumnOffset(lineToken, offset, this->sourceUnits);

  if (!lineToken.isFirstLine()) {

    return JS::LimitedColumnNumberOneOrigin::fromUnlimited(

        JS::ColumnNumberOneOrigin() + columnOffset);

  if (1 + columnOffset.value() > JS::LimitedColumnNumberOneOrigin::Limit) {

    return JS::LimitedColumnNumberOneOrigin::limit();

  return JS::LimitedColumnNumberOneOrigin::fromUnlimited(

      (anyChars.options_.column + columnOffset).oneOriginValue());

template <typename Unit, class AnyCharsAccess>

void GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeLineAndColumn(

    uint32_t offset, uint32_t* line,

    JS::LimitedColumnNumberOneOrigin* column) const {

  const TokenStreamAnyChars& anyChars = anyCharsAccess();

  auto lineToken = anyChars.lineToken(offset);

  *line = anyChars.lineNumber(lineToken);

  *column = computeColumn(lineToken, offset);

template <class AnyCharsAccess>

MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::internalEncodingError(

    uint8_t relevantUnits, unsigned errorNumber, ...) {

  va_list args;

  va_start(args, errorNumber);

  do {

    size_t offset = this->sourceUnits.offset();

    ErrorMetadata err;

    TokenStreamAnyChars& anyChars = anyCharsAccess();

    bool canAddLineOfContext = fillExceptingContext(&err, offset);

    if (canAddLineOfContext) {

      if (!internalComputeLineOfContext(&err, offset)) {

        break;

      // As this is an encoding error, the computed window-end must be

      // identical to the location of the error -- any further on and the

      // window would contain invalid Unicode.

      MOZ_ASSERT_IF(err.lineOfContext != nullptr,

                    err.lineLength == err.tokenOffset);

    auto notes = MakeUnique<JSErrorNotes>();

    if (!notes) {

      ReportOutOfMemory(anyChars.fc);

      break;

    // The largest encoding of a UTF-8 code point is 4 units.  (Encoding an

    // obsolete 5- or 6-byte code point will complain only about a bad lead

    // code unit.)

    constexpr size_t MaxWidth = sizeof("0xHH 0xHH 0xHH 0xHH");

    MOZ_ASSERT(relevantUnits > 0);

    char badUnitsStr[MaxWidth];

    char* ptr = badUnitsStr;

    while (relevantUnits > 0) {

      byteToString(this->sourceUnits.getCodeUnit().toUint8(), ptr);

      ptr[4] = ' ';

      ptr += 5;

      relevantUnits--;

    ptr[-1] = '\0';

    uint32_t line;

    JS::LimitedColumnNumberOneOrigin column;

    computeLineAndColumn(offset, &line, &column);

    if (!notes->addNoteASCII(anyChars.fc, anyChars.getFilename().c_str(), 0,

                             line, JS::ColumnNumberOneOrigin(column),

                             GetErrorMessage, nullptr, JSMSG_BAD_CODE_UNITS,

                             badUnitsStr)) {

      break;

    ReportCompileErrorLatin1VA(anyChars.fc, std::move(err), std::move(notes),

                               errorNumber, &args);

  } while (false);

  va_end(args);

template <class AnyCharsAccess>

MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badLeadUnit(

    Utf8Unit lead) {

  uint8_t leadValue = lead.toUint8();

  char leadByteStr[5];

  byteToTerminatedString(leadValue, leadByteStr);

  internalEncodingError(1, JSMSG_BAD_LEADING_UTF8_UNIT, leadByteStr);

template <class AnyCharsAccess>

MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::notEnoughUnits(

    Utf8Unit lead, uint8_t remaining, uint8_t required) {

  uint8_t leadValue = lead.toUint8();

  MOZ_ASSERT(required == 2 || required == 3 || required == 4);

  MOZ_ASSERT(remaining < 4);

  MOZ_ASSERT(remaining < required);

  char leadByteStr[5];

  byteToTerminatedString(leadValue, leadByteStr);

  // |toHexChar| produces the desired decimal numbers for values < 4.

  const char expectedStr[] = {toHexChar(required - 1), '\0'};

  const char actualStr[] = {toHexChar(remaining - 1), '\0'};

  internalEncodingError(remaining, JSMSG_NOT_ENOUGH_CODE_UNITS, leadByteStr,

                        expectedStr, required == 2 ? "" : "s", actualStr,

                        remaining == 2 ? " was" : "s were");

template <class AnyCharsAccess>

MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badTrailingUnit(

    uint8_t unitsObserved) {

  Utf8Unit badUnit =

      this->sourceUnits.addressOfNextCodeUnit()[unitsObserved - 1];

  char badByteStr[5];

  byteToTerminatedString(badUnit.toUint8(), badByteStr);

  internalEncodingError(unitsObserved, JSMSG_BAD_TRAILING_UTF8_UNIT,

                        badByteStr);

template <class AnyCharsAccess>

MOZ_COLD void

TokenStreamChars<Utf8Unit, AnyCharsAccess>::badStructurallyValidCodePoint(

    char32_t codePoint, uint8_t codePointLength, const char* reason) {

  // Construct a string like "0x203D" (including null terminator) to include

  // in the error message.  Write the string end-to-start from end to start

  // of an adequately sized |char| array, shifting least significant nibbles

  // off the number and writing the corresponding hex digits until done, then

  // prefixing with "0x".  |codePointStr| points at the incrementally

  // computed string, within |codePointCharsArray|'s bounds.

  // 0x1F'FFFF is the maximum value that can fit in 3+6+6+6 unconstrained

  // bits in a four-byte UTF-8 code unit sequence.

  constexpr size_t MaxHexSize = sizeof(

      "0x1F"

      "FFFF");  // including '\0'

  char codePointCharsArray[MaxHexSize];

  char* codePointStr = std::end(codePointCharsArray);

  *--codePointStr = '\0';

  // Note that by do-while looping here rather than while-looping, this

  // writes a '0' when |codePoint == 0|.

  do {

    MOZ_ASSERT(codePointCharsArray < codePointStr);

    *--codePointStr = toHexChar(codePoint & 0xF);

    codePoint >>= 4;

  } while (codePoint);

  MOZ_ASSERT(codePointCharsArray + 2 <= codePointStr);

  *--codePointStr = 'x';

  *--codePointStr = '0';

  internalEncodingError(codePointLength, JSMSG_FORBIDDEN_UTF8_CODE_POINT,

                        codePointStr, reason);

template <class AnyCharsAccess>

[[nodiscard]] bool

TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePointDontNormalize(

    Utf8Unit lead, char32_t* codePoint) {

  auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };

  auto onNotEnoughUnits = [this, &lead](uint8_t remaining, uint8_t required) {

    this->notEnoughUnits(lead, remaining, required);

};

  auto onBadTrailingUnit = [this](uint8_t unitsObserved) {

    this->badTrailingUnit(unitsObserved);

};

  auto onBadCodePoint = [this](char32_t badCodePoint, uint8_t unitsObserved) {

    this->badCodePoint(badCodePoint, unitsObserved);

};

  auto onNotShortestForm = [this](char32_t badCodePoint,

                                  uint8_t unitsObserved) {

    this->notShortestForm(badCodePoint, unitsObserved);

};

  // If a valid code point is decoded, this function call consumes its code

  // units.  If not, it ungets the lead code unit and invokes the right error

  // handler, so on failure we must immediately return false.

  SourceUnitsIterator iter(this->sourceUnits);

  Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePointInline(

      lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,

      onBadTrailingUnit, onBadCodePoint, onNotShortestForm);

  if (maybeCodePoint.isNothing()) {

    return false;

  *codePoint = maybeCodePoint.value();

  return true;

template <class AnyCharsAccess>

bool TokenStreamChars<char16_t, AnyCharsAccess>::getNonAsciiCodePoint(

    int32_t lead, char32_t* codePoint) {

  MOZ_ASSERT(lead != EOF);

  MOZ_ASSERT(!isAsciiCodePoint(lead),

             "ASCII code unit/point must be handled separately");

  MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),

             "getNonAsciiCodePoint called incorrectly");

  // The code point is usually |lead|: overwrite later if needed.

  *codePoint = AssertedCast<char32_t>(lead);

  // ECMAScript specifically requires that unpaired UTF-16 surrogates be

  // treated as the corresponding code point and not as an error.  See

  // <https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type>.

  // Thus this function does not consider any sequence of 16-bit numbers to

  // be intrinsically in error.

  // Dispense with single-unit code points and lone trailing surrogates.

  if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead))) {

    if (MOZ_UNLIKELY(lead == unicode::LINE_SEPARATOR ||

                     lead == unicode::PARA_SEPARATOR)) {

      if (!updateLineInfoForEOL()) {

#ifdef DEBUG

        // Assign to a sentinel value to hopefully cause errors.

        *codePoint = std::numeric_limits<char32_t>::max();

#endif

        MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));

        return false;

      *codePoint = '\n';

    } else {

      MOZ_ASSERT(!IsLineTerminator(*codePoint));

    return true;

  // Also handle a lead surrogate not paired with a trailing surrogate.

  if (MOZ_UNLIKELY(

          this->sourceUnits.atEnd() ||

          !unicode::IsTrailSurrogate(this->sourceUnits.peekCodeUnit()))) {

    MOZ_ASSERT(!IsLineTerminator(*codePoint));

    return true;

  // Otherwise we have a multi-unit code point.

  *codePoint = unicode::UTF16Decode(lead, this->sourceUnits.getCodeUnit());

  MOZ_ASSERT(!IsLineTerminator(*codePoint));

  return true;

template <class AnyCharsAccess>

bool TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePoint(

    int32_t unit, char32_t* codePoint) {

  MOZ_ASSERT(unit != EOF);

  MOZ_ASSERT(!isAsciiCodePoint(unit),

             "ASCII code unit/point must be handled separately");

  Utf8Unit lead = Utf8Unit(static_cast<unsigned char>(unit));

  MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),

             "getNonAsciiCodePoint called incorrectly");

  auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };

  auto onNotEnoughUnits = [this, &lead](uint_fast8_t remaining,

                                        uint_fast8_t required) {

    this->notEnoughUnits(lead, remaining, required);

};

  auto onBadTrailingUnit = [this](uint_fast8_t unitsObserved) {

    this->badTrailingUnit(unitsObserved);

};

  auto onBadCodePoint = [this](char32_t badCodePoint,

                               uint_fast8_t unitsObserved) {

    this->badCodePoint(badCodePoint, unitsObserved);

};

  auto onNotShortestForm = [this](char32_t badCodePoint,

                                  uint_fast8_t unitsObserved) {

    this->notShortestForm(badCodePoint, unitsObserved);

};

  // This consumes the full, valid code point or ungets |lead| and calls the

  // appropriate error functor on failure.

  SourceUnitsIterator iter(this->sourceUnits);

  Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePoint(

      lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,

      onBadTrailingUnit, onBadCodePoint, onNotShortestForm);

  if (maybeCodePoint.isNothing()) {

    return false;

  char32_t cp = maybeCodePoint.value();

  if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||

                   cp == unicode::PARA_SEPARATOR)) {

    if (!updateLineInfoForEOL()) {

#ifdef DEBUG

      // Assign to a sentinel value to hopefully cause errors.

      *codePoint = std::numeric_limits<char32_t>::max();

#endif

      MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));

      return false;

    *codePoint = '\n';

  } else {

    MOZ_ASSERT(!IsLineTerminator(cp));

    *codePoint = cp;

  return true;

template <>

size_t SourceUnits<char16_t>::findWindowStart(size_t offset) const {

  // This is JS's understanding of UTF-16 that allows lone surrogates, so

  // we have to exclude lone surrogates from [windowStart, offset) ourselves.

  const char16_t* const earliestPossibleStart = codeUnitPtrAt(startOffset_);

  const char16_t* const initial = codeUnitPtrAt(offset);

  const char16_t* p = initial;

  auto HalfWindowSize = [&p, &initial]() {

    return PointerRangeSize(p, initial);

};

  while (true) {

    MOZ_ASSERT(earliestPossibleStart <= p);

    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);

    if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {

      break;

    char16_t c = p[-1];

    // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in

    // string and template literals.  These code points do affect line and

    // column coordinates, even as they encode their literal values.

    if (IsLineTerminator(c)) {

      break;

    // Don't allow invalid UTF-16 in pre-context.  (Current users don't

    // require this, and this behavior isn't currently imposed on

    // pre-context, but these facts might change someday.)

    if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {

      break;

    // Optimistically include the code unit, reverting below if needed.

    p--;

    // If it's not a surrogate at all, keep going.

    if (MOZ_LIKELY(!unicode::IsTrailSurrogate(c))) {

      continue;

    // Stop if we don't have a usable surrogate pair.

    if (HalfWindowSize() >= WindowRadius ||

        p <= earliestPossibleStart ||      // trail surrogate at low end

        !unicode::IsLeadSurrogate(p[-1]))  // no paired lead surrogate

      p++;

      break;

    p--;

  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);

  return offset - HalfWindowSize();

template <>

size_t SourceUnits<Utf8Unit>::findWindowStart(size_t offset) const {

  // |offset| must be the location of the error or somewhere before it, so we

  // know preceding data is valid UTF-8.

  const Utf8Unit* const earliestPossibleStart = codeUnitPtrAt(startOffset_);

  const Utf8Unit* const initial = codeUnitPtrAt(offset);

  const Utf8Unit* p = initial;

  auto HalfWindowSize = [&p, &initial]() {

    return PointerRangeSize(p, initial);

};

  while (true) {

    MOZ_ASSERT(earliestPossibleStart <= p);

    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);

    if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {

      break;

    // Peek backward for a line break, and only decrement if there is none.

    uint8_t prev = p[-1].toUint8();

    // First check for the ASCII LineTerminators.

    if (prev == '\r' || prev == '\n') {

      break;

    // Now check for the non-ASCII LineTerminators U+2028 LINE SEPARATOR

    // (0xE2 0x80 0xA8) and U+2029 PARAGRAPH (0xE2 0x80 0xA9).  If there

    // aren't three code units available, some comparison here will fail

    // before we'd underflow.

    if (MOZ_UNLIKELY((prev == 0xA8 || prev == 0xA9) &&

                     p[-2].toUint8() == 0x80 && p[-3].toUint8() == 0xE2)) {

      break;

    // Rewind over the non-LineTerminator.  This can't underflow

    // |earliestPossibleStart| because it begins a code point.

    while (IsTrailingUnit(*--p)) {

      continue;

    MOZ_ASSERT(earliestPossibleStart <= p);

    // But if we underflowed |WindowRadius|, adjust forward and stop.

    if (HalfWindowSize() > WindowRadius) {

      static_assert(WindowRadius > 3,

                    "skipping over non-lead code units below must not "

                    "advance past |offset|");

      while (IsTrailingUnit(*++p)) {

        continue;

      MOZ_ASSERT(HalfWindowSize() < WindowRadius);

      break;

  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);

  return offset - HalfWindowSize();

template <>

size_t SourceUnits<char16_t>::findWindowEnd(size_t offset) const {

  const char16_t* const initial = codeUnitPtrAt(offset);

  const char16_t* p = initial;

  auto HalfWindowSize = [&initial, &p]() {

    return PointerRangeSize(initial, p);

};

  while (true) {

    MOZ_ASSERT(p <= limit_);

    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);

    if (p >= limit_ || HalfWindowSize() >= WindowRadius) {

      break;

    char16_t c = *p;

    // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in

    // string and template literals.  These code points do affect line and

    // column coordinates, even as they encode their literal values.

    if (IsLineTerminator(c)) {

      break;

    // Don't allow invalid UTF-16 in post-context.  (Current users don't

    // require this, and this behavior isn't currently imposed on

    // pre-context, but these facts might change someday.)

    if (MOZ_UNLIKELY(unicode::IsTrailSurrogate(c))) {

      break;

    // Optimistically consume the code unit, ungetting it below if needed.

    p++;

    // If it's not a surrogate at all, keep going.

    if (MOZ_LIKELY(!unicode::IsLeadSurrogate(c))) {

      continue;

    // Retract if the lead surrogate would stand alone at the end of the

    // window.

    if (HalfWindowSize() >= WindowRadius ||  // split pair

        p >= limit_ ||                       // half-pair at end of source

        !unicode::IsTrailSurrogate(*p))      // no paired trail surrogate

      p--;

      break;

    p++;

  return offset + HalfWindowSize();

template <>

size_t SourceUnits<Utf8Unit>::findWindowEnd(size_t offset) const {

  const Utf8Unit* const initial = codeUnitPtrAt(offset);

  const Utf8Unit* p = initial;

  auto HalfWindowSize = [&initial, &p]() {

    return PointerRangeSize(initial, p);

};

  while (true) {

    MOZ_ASSERT(p <= limit_);

    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);

    if (p >= limit_ || HalfWindowSize() >= WindowRadius) {

      break;

    // A non-encoding error might be followed by an encoding error within

    // |maxEnd|, so we must validate as we go to not include invalid UTF-8

    // in the computed window.  What joy!

    Utf8Unit lead = *p;

    if (mozilla::IsAscii(lead)) {

      if (IsSingleUnitLineTerminator(lead)) {

        break;

      p++;

      continue;

    PeekedCodePoint<Utf8Unit> peeked = PeekCodePoint(p, limit_);

    if (peeked.isNone()) {

      break;  // encoding error

    char32_t c = peeked.codePoint();

    if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||

                     c == unicode::PARA_SEPARATOR)) {

      break;

    MOZ_ASSERT(!IsLineTerminator(c));

    uint8_t len = peeked.lengthInUnits();

    if (HalfWindowSize() + len > WindowRadius) {

      break;

    p += len;

  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);

  return offset + HalfWindowSize();

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::advance(size_t position) {

  const Unit* end = this->sourceUnits.codeUnitPtrAt(position);

  while (this->sourceUnits.addressOfNextCodeUnit() < end) {

    if (!getCodePoint()) {

      return false;

  TokenStreamAnyChars& anyChars = anyCharsAccess();

  Token* cur = const_cast<Token*>(&anyChars.currentToken());

  cur->pos.begin = this->sourceUnits.offset();

  cur->pos.end = cur->pos.begin;

#ifdef DEBUG

  cur->type = TokenKind::Limit;

#endif

  MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));

  anyChars.lookahead = 0;

  return true;

template <typename Unit, class AnyCharsAccess>

void TokenStreamSpecific<Unit, AnyCharsAccess>::seekTo(const Position& pos) {

  TokenStreamAnyChars& anyChars = anyCharsAccess();

  this->sourceUnits.setAddressOfNextCodeUnit(pos.buf,

                                             /* allowPoisoned = */ true);

  anyChars.flags = pos.flags;

  anyChars.lineno = pos.lineno;

  anyChars.linebase = pos.linebase;

  anyChars.prevLinebase = pos.prevLinebase;

  anyChars.lookahead = pos.lookahead;

  anyChars.tokens[anyChars.cursor()] = pos.currentToken;

  for (unsigned i = 0; i < anyChars.lookahead; i++) {

    anyChars.tokens[anyChars.aheadCursor(1 + i)] = pos.lookaheadTokens[i];

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::seekTo(

    const Position& pos, const TokenStreamAnyChars& other) {

  if (!anyCharsAccess().srcCoords.fill(other.srcCoords)) {

    return false;

  seekTo(pos);

  return true;

void TokenStreamAnyChars::computeErrorMetadataNoOffset(

    ErrorMetadata* err) const {

  err->isMuted = mutedErrors;

  err->filename = filename_;

  err->lineNumber = 0;

  err->columnNumber = JS::ColumnNumberOneOrigin();

  MOZ_ASSERT(err->lineOfContext == nullptr);

bool TokenStreamAnyChars::fillExceptingContext(ErrorMetadata* err,

                                               uint32_t offset) const {

  err->isMuted = mutedErrors;

  // If this TokenStreamAnyChars doesn't have location information, try to

  // get it from the caller.

  if (!filename_) {

    JSContext* maybeCx = context()->maybeCurrentJSContext();

    if (maybeCx) {

      NonBuiltinFrameIter iter(maybeCx,

                               FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,

                               maybeCx->realm()->principals());

      if (!iter.done() && iter.filename()) {

        err->filename = JS::ConstUTF8CharsZ(iter.filename());

        JS::TaggedColumnNumberOneOrigin columnNumber;

        err->lineNumber = iter.computeLine(&columnNumber);

        err->columnNumber =

            JS::ColumnNumberOneOrigin(columnNumber.oneOriginValue());

        return false;

  // Otherwise use this TokenStreamAnyChars's location information.

  err->filename = filename_;

  return true;

template <>

inline void SourceUnits<char16_t>::computeWindowOffsetAndLength(

    const char16_t* encodedWindow, size_t encodedTokenOffset,

    size_t* utf16TokenOffset, size_t encodedWindowLength,

    size_t* utf16WindowLength) const {

  MOZ_ASSERT_UNREACHABLE("shouldn't need to recompute for UTF-16");

template <>

inline void SourceUnits<Utf8Unit>::computeWindowOffsetAndLength(

    const Utf8Unit* encodedWindow, size_t encodedTokenOffset,

    size_t* utf16TokenOffset, size_t encodedWindowLength,

    size_t* utf16WindowLength) const {

  MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,

             "token offset must be within the window, and the two lambda "

             "calls below presume this ordering of values");

  const Utf8Unit* const encodedWindowEnd = encodedWindow + encodedWindowLength;

  size_t i = 0;

  auto ComputeUtf16Count = [&i, &encodedWindow](const Utf8Unit* limit) {

    while (encodedWindow < limit) {

      Utf8Unit lead = *encodedWindow++;

      if (MOZ_LIKELY(IsAscii(lead))) {

        // ASCII contributes a single UTF-16 code unit.

        i++;

        continue;

      Maybe<char32_t> cp = DecodeOneUtf8CodePoint(lead, &encodedWindow, limit);

      MOZ_ASSERT(cp.isSome(),

                 "computed window should only contain valid UTF-8");

      i += unicode::IsSupplementary(cp.value()) ? 2 : 1;

    return i;

};

  // Compute the token offset from |i == 0| and the initial |encodedWindow|.

  const Utf8Unit* token = encodedWindow + encodedTokenOffset;

  MOZ_ASSERT(token <= encodedWindowEnd);

  *utf16TokenOffset = ComputeUtf16Count(token);

  // Compute the window length, picking up from |i| and |encodedWindow| that,

  // in general, were modified just above.

  *utf16WindowLength = ComputeUtf16Count(encodedWindowEnd);

template <typename Unit>

bool TokenStreamCharsBase<Unit>::addLineOfContext(ErrorMetadata* err,

                                                  uint32_t offset) const {

  // Rename the variable to make meaning clearer: an offset into source units

  // in Unit encoding.

  size_t encodedOffset = offset;

  // These are also offsets into source units in Unit encoding.

  size_t encodedWindowStart = sourceUnits.findWindowStart(encodedOffset);

  size_t encodedWindowEnd = sourceUnits.findWindowEnd(encodedOffset);

  size_t encodedWindowLength = encodedWindowEnd - encodedWindowStart;

  MOZ_ASSERT(encodedWindowLength <= SourceUnits::WindowRadius * 2);

  // Don't add a useless "line" of context when the window ends up empty

  // because of an invalid encoding at the start of a line.

  if (encodedWindowLength == 0) {

    MOZ_ASSERT(err->lineOfContext == nullptr,

               "ErrorMetadata::lineOfContext must be null so we don't "

               "have to set the lineLength/tokenOffset fields");

    return true;

  CharBuffer lineOfContext(fc);

  const Unit* encodedWindow = sourceUnits.codeUnitPtrAt(encodedWindowStart);

  if (!FillCharBufferFromSourceNormalizingAsciiLineBreaks(

          lineOfContext, encodedWindow, encodedWindow + encodedWindowLength)) {

    return false;

  size_t utf16WindowLength = lineOfContext.length();

  // The windowed string is null-terminated.

  if (!lineOfContext.append('\0')) {

    return false;

  err->lineOfContext.reset(lineOfContext.extractOrCopyRawBuffer());

  if (!err->lineOfContext) {

    return false;

  size_t encodedTokenOffset = encodedOffset - encodedWindowStart;

  MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,

             "token offset must be inside the window");

  // The length in UTF-8 code units of a code point is always greater than or

  // equal to the same code point's length in UTF-16 code points.  ASCII code

  // points are 1 unit in either encoding.  Code points in [U+0080, U+10000)

  // are 2-3 UTF-8 code units to 1 UTF-16 code unit.  And code points in

  // [U+10000, U+10FFFF] are 4 UTF-8 code units to 2 UTF-16 code units.

//

  // Therefore, if encoded window length equals the length in UTF-16 (this is

  // always the case for Unit=char16_t), the UTF-16 offsets are exactly the

  // encoded offsets.  Otherwise we must convert offset/length from UTF-8 to

  // UTF-16.

  if constexpr (std::is_same_v<Unit, char16_t>) {

    MOZ_ASSERT(utf16WindowLength == encodedWindowLength,

               "UTF-16 to UTF-16 shouldn't change window length");

    err->tokenOffset = encodedTokenOffset;

    err->lineLength = encodedWindowLength;

  } else {

    static_assert(std::is_same_v<Unit, Utf8Unit>, "should only see UTF-8 here");

    bool simple = utf16WindowLength == encodedWindowLength;

#ifdef DEBUG

    auto isAscii = [](Unit u) { return IsAscii(u); };

    MOZ_ASSERT(std::all_of(encodedWindow, encodedWindow + encodedWindowLength,

                           isAscii) == simple,

               "equal window lengths in UTF-8 should correspond only to "

               "wholly-ASCII text");

#endif

    if (simple) {

      err->tokenOffset = encodedTokenOffset;

      err->lineLength = encodedWindowLength;

    } else {

      sourceUnits.computeWindowOffsetAndLength(

          encodedWindow, encodedTokenOffset, &err->tokenOffset,

          encodedWindowLength, &err->lineLength);

  return true;

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::computeErrorMetadata(

    ErrorMetadata* err, const ErrorOffset& errorOffset) const {

  if (errorOffset.is<NoOffset>()) {

    anyCharsAccess().computeErrorMetadataNoOffset(err);

    return true;

  uint32_t offset;

  if (errorOffset.is<uint32_t>()) {

    offset = errorOffset.as<uint32_t>();

  } else {

    offset = this->sourceUnits.offset();

  // This function's return value isn't a success/failure indication: it

  // returns true if this TokenStream can be used to provide a line of

  // context.

  if (fillExceptingContext(err, offset)) {

    // Add a line of context from this TokenStream to help with debugging.

    return internalComputeLineOfContext(err, offset);

  // We can't fill in any more here.

  return true;

template <typename Unit, class AnyCharsAccess>

void TokenStreamSpecific<Unit, AnyCharsAccess>::reportIllegalCharacter(

    int32_t cp) {

  UniqueChars display = JS_smprintf("U+%04X", cp);

  if (!display) {

    ReportOutOfMemory(anyCharsAccess().fc);

    return;

  error(JSMSG_ILLEGAL_CHARACTER, display.get());

// We have encountered a '\': check for a Unicode escape sequence after it.

// Return the length of the escape sequence and the encoded code point (by

// value) if we found a Unicode escape sequence, and skip all code units

// involed.  Otherwise, return 0 and don't advance along the buffer.

template <typename Unit, class AnyCharsAccess>

uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscape(

    char32_t* codePoint) {

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));

  int32_t unit = getCodeUnit();

  if (unit != 'u') {

    // NOTE: |unit| may be EOF here.

    ungetCodeUnit(unit);

    MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));

    return 0;

  char16_t v;

  unit = getCodeUnit();

  if (IsAsciiHexDigit(unit) && this->sourceUnits.matchHexDigits(3, &v)) {

    *codePoint = (AsciiAlphanumericToNumber(unit) << 12) | v;

    return 5;

  if (unit == '{') {

    return matchExtendedUnicodeEscape(codePoint);

  // NOTE: |unit| may be EOF here, so this ungets either one or two units.

  ungetCodeUnit(unit);

  ungetCodeUnit('u');

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));

  return 0;

template <typename Unit, class AnyCharsAccess>

uint32_t

GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchExtendedUnicodeEscape(

    char32_t* codePoint) {

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('{'));

  int32_t unit = getCodeUnit();

  // Skip leading zeroes.

  uint32_t leadingZeroes = 0;

  while (unit == '0') {

    leadingZeroes++;

    unit = getCodeUnit();

  size_t i = 0;

  uint32_t code = 0;

  while (IsAsciiHexDigit(unit) && i < 6) {

    code = (code << 4) | AsciiAlphanumericToNumber(unit);

    unit = getCodeUnit();

    i++;

  uint32_t gotten =

      2 +                  // 'u{'

      leadingZeroes + i +  // significant hexdigits

      (unit != EOF);       // subtract a get if it didn't contribute to length

  if (unit == '}' && (leadingZeroes > 0 || i > 0) &&

      code <= unicode::NonBMPMax) {

    *codePoint = code;

    return gotten;

  this->sourceUnits.unskipCodeUnits(gotten);

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));

  return 0;

template <typename Unit, class AnyCharsAccess>

uint32_t

GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdStart(

    char32_t* codePoint) {

  uint32_t length = matchUnicodeEscape(codePoint);

  if (MOZ_LIKELY(length > 0)) {

    if (MOZ_LIKELY(unicode::IsIdentifierStart(*codePoint))) {

      return length;

    this->sourceUnits.unskipCodeUnits(length);

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));

  return 0;

template <typename Unit, class AnyCharsAccess>

bool GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdent(

    char32_t* codePoint) {

  uint32_t length = matchUnicodeEscape(codePoint);

  if (MOZ_LIKELY(length > 0)) {

    if (MOZ_LIKELY(unicode::IsIdentifierPart(*codePoint))) {

      return true;

    this->sourceUnits.unskipCodeUnits(length);

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));

  return false;

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] bool

TokenStreamSpecific<Unit, AnyCharsAccess>::matchIdentifierStart(

    IdentifierEscapes* sawEscape) {

  int32_t unit = getCodeUnit();

  if (unit == EOF) {

    error(JSMSG_MISSING_PRIVATE_NAME);

    return false;

  if (MOZ_LIKELY(isAsciiCodePoint(unit))) {

    if (unicode::IsIdentifierStart(char16_t(unit))) {

      *sawEscape = IdentifierEscapes::None;

      return true;

    if (unit == '\\') {

      char32_t codePoint;

      uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint);

      if (escapeLength != 0) {

        *sawEscape = IdentifierEscapes::SawUnicodeEscape;

        return true;

      // We could point "into" a mistyped escape, e.g. for "\u{41H}" we

      // could point at the 'H'.  But we don't do that now, so the code

      // unit after the '\' isn't necessarily bad, so just point at the

      // start of the actually-invalid escape.

      ungetCodeUnit('\\');

      error(JSMSG_BAD_ESCAPE);

      return false;

  // Unget the lead code unit before peeking at the full code point.

  ungetCodeUnit(unit);

  PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();

  if (!peeked.isNone() && unicode::IsIdentifierStart(peeked.codePoint())) {

    this->sourceUnits.consumeKnownCodePoint(peeked);

    *sawEscape = IdentifierEscapes::None;

    return true;

  error(JSMSG_MISSING_PRIVATE_NAME);

  return false;

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirectives(

    bool isMultiline, bool shouldWarnDeprecated) {

  // Match directive comments used in debugging, such as "//# sourceURL" and

  // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.

//

  // To avoid a crashing bug in IE, several JavaScript transpilers wrap single

  // line comments containing a source mapping URL inside a multiline

  // comment. To avoid potentially expensive lookahead and backtracking, we

  // only check for this case if we encounter a '#' code unit.

  bool res = getDisplayURL(isMultiline, shouldWarnDeprecated) &&

             getSourceMappingURL(isMultiline, shouldWarnDeprecated);

  if (!res) {

    badToken();

  return res;

[[nodiscard]] bool TokenStreamCharsShared::copyCharBufferTo(

    UniquePtr<char16_t[], JS::FreePolicy>* destination) {

  size_t length = charBuffer.length();

  *destination = fc->getAllocator()->make_pod_array<char16_t>(length + 1);

  if (!*destination) {

    return false;

  std::copy(charBuffer.begin(), charBuffer.end(), destination->get());

  (*destination)[length] = '\0';

  return true;

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirective(

    bool isMultiline, bool shouldWarnDeprecated, const char* directive,

    uint8_t directiveLength, const char* errorMsgPragma,

    UniquePtr<char16_t[], JS::FreePolicy>* destination) {

  // Stop if we don't find |directive|.  (Note that |directive| must be

  // ASCII, so there are no tricky encoding issues to consider in matching

  // UTF-8/16-agnostically.)

  if (!this->sourceUnits.matchCodeUnits(directive, directiveLength)) {

    return true;

  if (shouldWarnDeprecated) {

    if (!warning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) {

      return false;

  this->charBuffer.clear();

  do {

    int32_t unit = peekCodeUnit();

    if (unit == EOF) {

      break;

    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {

      if (unicode::IsSpace(AssertedCast<Latin1Char>(unit))) {

        break;

      consumeKnownCodeUnit(unit);

      // Debugging directives can occur in both single- and multi-line

      // comments. If we're currently inside a multi-line comment, we

      // also must recognize multi-line comment terminators.

      if (isMultiline && unit == '*' && peekCodeUnit() == '/') {

        ungetCodeUnit('*');

        break;

      if (!this->charBuffer.append(unit)) {

        return false;

      continue;

    // This ignores encoding errors: subsequent caller-side code to

    // handle the remaining source text in the comment will do so.

    PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();

    if (peeked.isNone() || unicode::IsSpace(peeked.codePoint())) {

      break;

    MOZ_ASSERT(!IsLineTerminator(peeked.codePoint()),

               "!IsSpace must imply !IsLineTerminator or else we'll fail to "

               "maintain line-info/flags for EOL");

    this->sourceUnits.consumeKnownCodePoint(peeked);

    if (!AppendCodePointToCharBuffer(this->charBuffer, peeked.codePoint())) {

      return false;

  } while (true);

  if (this->charBuffer.empty()) {

    // The directive's URL was missing, but comments can contain anything,

    // so it isn't an error.

    return true;

  return copyCharBufferTo(destination);

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDisplayURL(

    bool isMultiline, bool shouldWarnDeprecated) {

  // Match comments of the form "//# sourceURL=<url>" or

  // "/\* //# sourceURL=<url> *\/"

//

  // Note that while these are labeled "sourceURL" in the source text,

  // internally we refer to it as a "displayURL" to distinguish what the

  // developer would like to refer to the source as from the source's actual

  // URL.

  static constexpr char sourceURLDirective[] = " sourceURL=";

  constexpr uint8_t sourceURLDirectiveLength = js_strlen(sourceURLDirective);

  return getDirective(isMultiline, shouldWarnDeprecated, sourceURLDirective,

                      sourceURLDirectiveLength, "sourceURL",

                      &anyCharsAccess().displayURL_);

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::getSourceMappingURL(

    bool isMultiline, bool shouldWarnDeprecated) {

  // Match comments of the form "//# sourceMappingURL=<url>" or

  // "/\* //# sourceMappingURL=<url> *\/"

  static constexpr char sourceMappingURLDirective[] = " sourceMappingURL=";

  constexpr uint8_t sourceMappingURLDirectiveLength =

      js_strlen(sourceMappingURLDirective);

  return getDirective(isMultiline, shouldWarnDeprecated,

                      sourceMappingURLDirective,

                      sourceMappingURLDirectiveLength, "sourceMappingURL",

                      &anyCharsAccess().sourceMapURL_);

template <typename Unit, class AnyCharsAccess>

MOZ_ALWAYS_INLINE Token*

GeneralTokenStreamChars<Unit, AnyCharsAccess>::newTokenInternal(

    TokenKind kind, TokenStart start, TokenKind* out) {

  MOZ_ASSERT(kind < TokenKind::Limit);

  MOZ_ASSERT(kind != TokenKind::Eol,

             "TokenKind::Eol should never be used in an actual Token, only "

             "returned by peekTokenSameLine()");

  TokenStreamAnyChars& anyChars = anyCharsAccess();

  anyChars.flags.isDirtyLine = true;

  Token* token = anyChars.allocateToken();

  *out = token->type = kind;

  token->pos = TokenPos(start.offset(), this->sourceUnits.offset());

  MOZ_ASSERT(token->pos.begin <= token->pos.end);

  // NOTE: |token->modifier| is set in |newToken()| so that optimized,

  // non-debug code won't do any work to pass a modifier-argument that will

  // never be used.

  return token;

template <typename Unit, class AnyCharsAccess>

MOZ_COLD bool GeneralTokenStreamChars<Unit, AnyCharsAccess>::badToken() {

  // We didn't get a token, so don't set |flags.isDirtyLine|.

  anyCharsAccess().flags.hadError = true;

  // Poisoning sourceUnits on error establishes an invariant: once an

  // erroneous token has been seen, sourceUnits will not be consulted again.

  // This is true because the parser will deal with the illegal token by

  // aborting parsing immediately.

  this->sourceUnits.poisonInDebug();

  return false;

};

bool AppendCodePointToCharBuffer(CharBuffer& charBuffer, char32_t codePoint) {

  MOZ_ASSERT(codePoint <= unicode::NonBMPMax,

             "should only be processing code points validly decoded from UTF-8 "

             "or WTF-16 source text (surrogate code points permitted)");

  char16_t units[2];

  unsigned numUnits = 0;

  unicode::UTF16Encode(codePoint, units, &numUnits);

  MOZ_ASSERT(numUnits == 1 || numUnits == 2,

             "UTF-16 code points are only encoded in one or two units");

  if (!charBuffer.append(units[0])) {

    return false;

  if (numUnits == 1) {

    return true;

  return charBuffer.append(units[1]);

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::putIdentInCharBuffer(

    const Unit* identStart) {

  const Unit* const originalAddress = this->sourceUnits.addressOfNextCodeUnit();

  this->sourceUnits.setAddressOfNextCodeUnit(identStart);

  auto restoreNextRawCharAddress = MakeScopeExit([this, originalAddress]() {

    this->sourceUnits.setAddressOfNextCodeUnit(originalAddress);

});

  this->charBuffer.clear();

  do {

    int32_t unit = getCodeUnit();

    if (unit == EOF) {

      break;

    char32_t codePoint;

    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {

      if (unicode::IsIdentifierPart(char16_t(unit)) || unit == '#') {

        if (!this->charBuffer.append(unit)) {

          return false;

        continue;

      if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {

        break;

    } else {

      // |restoreNextRawCharAddress| undoes all gets, and this function

      // doesn't update line/column info.

      char32_t cp;

      if (!getNonAsciiCodePointDontNormalize(toUnit(unit), &cp)) {

        return false;

      codePoint = cp;

      if (!unicode::IsIdentifierPart(codePoint)) {

        break;

    if (!AppendCodePointToCharBuffer(this->charBuffer, codePoint)) {

      return false;

  } while (true);

  return true;

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::identifierName(

    TokenStart start, const Unit* identStart, IdentifierEscapes escaping,

    Modifier modifier, NameVisibility visibility, TokenKind* out) {

  // Run the bad-token code for every path out of this function except the

  // two success-cases.

  auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });

  // We've already consumed an initial code point in the identifer, to *know*

  // that this is an identifier.  So no need to worry about not consuming any

  // code points in the loop below.

  int32_t unit;

  while (true) {

    unit = peekCodeUnit();

    if (unit == EOF) {

      break;

    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {

      consumeKnownCodeUnit(unit);

      if (MOZ_UNLIKELY(

              !unicode::IsIdentifierPart(static_cast<char16_t>(unit)))) {

        // Handle a Unicode escape -- otherwise it's not part of the

        // identifier.

        char32_t codePoint;

        if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {

          ungetCodeUnit(unit);

          break;

        escaping = IdentifierEscapes::SawUnicodeEscape;

    } else {

      // This ignores encoding errors: subsequent caller-side code to

      // handle source text after the IdentifierName will do so.

      PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();

      if (peeked.isNone() || !unicode::IsIdentifierPart(peeked.codePoint())) {

        break;

      MOZ_ASSERT(!IsLineTerminator(peeked.codePoint()),

                 "IdentifierPart must guarantee !IsLineTerminator or "

                 "else we'll fail to maintain line-info/flags for EOL");

      this->sourceUnits.consumeKnownCodePoint(peeked);

  TaggedParserAtomIndex atom;

  if (MOZ_UNLIKELY(escaping == IdentifierEscapes::SawUnicodeEscape)) {

    // Identifiers containing Unicode escapes have to be converted into

    // tokenbuf before atomizing.

    if (!putIdentInCharBuffer(identStart)) {

      return false;

    atom = drainCharBufferIntoAtom();

  } else {

    // Escape-free identifiers can be created directly from sourceUnits.

    const Unit* chars = identStart;

    size_t length = this->sourceUnits.addressOfNextCodeUnit() - identStart;

    // Private identifiers start with a '#', and so cannot be reserved words.

    if (visibility == NameVisibility::Public) {

      // Represent reserved words lacking escapes as reserved word tokens.

      if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {

        noteBadToken.release();

        newSimpleToken(rw->tokentype, start, modifier, out);

        return true;

    atom = atomizeSourceChars(Span(chars, length));

  if (!atom) {

    return false;

  noteBadToken.release();

  if (visibility == NameVisibility::Private) {

    newPrivateNameToken(atom, start, modifier, out);

    return true;

  newNameToken(atom, start, modifier, out);

  return true;

enum FirstCharKind {

  // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid

  // token that cannot also be a prefix of a longer token.  E.g. ';' has the

  // OneChar kind, but '+' does not, because '++' and '+=' are valid longer

  // tokens

  // that begin with '+'.

//

  // The few token kinds satisfying these properties cover roughly 35--45%

  // of the tokens seen in practice.

//

  // We represent the 'OneChar' kind with any positive value less than

  // TokenKind::Limit.  This representation lets us associate

  // each one-char token char16_t with a TokenKind and thus avoid

  // a subsequent char16_t-to-TokenKind conversion.

  OneChar_Min = 0,

  OneChar_Max = size_t(TokenKind::Limit) - 1,

  Space = size_t(TokenKind::Limit),

  Ident,

  Dec,

  String,

  EOL,

  ZeroDigit,

  Other,

  LastCharKind = Other

};

// OneChar: 40,  41,  44,  58,  59,  91,  93,  123, 125, 126:

//          '(', ')', ',', ':', ';', '[', ']', '{', '}', '~'

// Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'

// Dot:     46: '.'

// Equals:  61: '='

// String:  34, 39, 96: '"', '\'', '`'

// Dec:     49..57: '1'..'9'

// Plus:    43: '+'

// ZeroDigit:  48: '0'

// Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '

// EOL:     10, 13: '\n', '\r'

//

#define T_COMMA size_t(TokenKind::Comma)

#define T_COLON size_t(TokenKind::Colon)

#define T_BITNOT size_t(TokenKind::BitNot)

#define T_LP size_t(TokenKind::LeftParen)

#define T_RP size_t(TokenKind::RightParen)

#define T_SEMI size_t(TokenKind::Semi)

#define T_LB size_t(TokenKind::LeftBracket)

#define T_RB size_t(TokenKind::RightBracket)

#define T_LC size_t(TokenKind::LeftCurly)

#define T_RC size_t(TokenKind::RightCurly)

#define _______ Other

static const uint8_t firstCharKinds[] = {

    // clang-format off

/*         0        1        2        3        4        5        6        7        8        9    */

/*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,

/*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,

/*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,

/*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,

/*  40+ */    T_LP,    T_RP, _______, _______, T_COMMA, _______, _______, _______,ZeroDigit,    Dec,

/*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec, T_COLON,  T_SEMI,

/*  60+ */ _______, _______, _______, _______, _______,   Ident,   Ident,   Ident,   Ident,   Ident,

/*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

/*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

/*  90+ */   Ident,    T_LB, _______,    T_RB, _______,   Ident,  String,   Ident,   Ident,   Ident,

/* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

/* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

/* 120+ */   Ident,   Ident,   Ident,    T_LC, _______,    T_RC,T_BITNOT, _______

    // clang-format on

};

#undef T_COMMA

#undef T_COLON

#undef T_BITNOT

#undef T_LP

#undef T_RP

#undef T_SEMI

#undef T_LB

#undef T_RB

#undef T_LC

#undef T_RC

#undef _______

static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),

              "Elements of firstCharKinds[] are too small");

template <>

void SourceUnits<char16_t>::consumeRestOfSingleLineComment() {

  while (MOZ_LIKELY(!atEnd())) {

    char16_t unit = peekCodeUnit();

    if (IsLineTerminator(unit)) {

      return;

    consumeKnownCodeUnit(unit);

template <>

void SourceUnits<Utf8Unit>::consumeRestOfSingleLineComment() {

  while (MOZ_LIKELY(!atEnd())) {

    const Utf8Unit unit = peekCodeUnit();

    if (IsSingleUnitLineTerminator(unit)) {

      return;

    if (MOZ_LIKELY(IsAscii(unit))) {

      consumeKnownCodeUnit(unit);

      continue;

    PeekedCodePoint<Utf8Unit> peeked = peekCodePoint();

    if (peeked.isNone()) {

      return;

    char32_t c = peeked.codePoint();

    if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||

                     c == unicode::PARA_SEPARATOR)) {

      return;

    consumeKnownCodePoint(peeked);

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] MOZ_ALWAYS_INLINE bool

TokenStreamSpecific<Unit, AnyCharsAccess>::matchInteger(

    IsIntegerUnit isIntegerUnit, int32_t* nextUnit) {

  int32_t unit = getCodeUnit();

  if (!isIntegerUnit(unit)) {

    *nextUnit = unit;

    return true;

  return matchIntegerAfterFirstDigit(isIntegerUnit, nextUnit);

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] MOZ_ALWAYS_INLINE bool

TokenStreamSpecific<Unit, AnyCharsAccess>::matchIntegerAfterFirstDigit(

    IsIntegerUnit isIntegerUnit, int32_t* nextUnit) {

  int32_t unit;

  while (true) {

    unit = getCodeUnit();

    if (isIntegerUnit(unit)) {

      continue;

    if (unit != '_') {

      break;

    unit = getCodeUnit();

    if (!isIntegerUnit(unit)) {

      if (unit == '_') {

        ungetCodeUnit(unit);

        error(JSMSG_NUMBER_MULTIPLE_ADJACENT_UNDERSCORES);

      } else {

        ungetCodeUnit(unit);

        ungetCodeUnit('_');

        error(JSMSG_NUMBER_END_WITH_UNDERSCORE);

      return false;

  *nextUnit = unit;

  return true;

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::decimalNumber(

    int32_t unit, TokenStart start, const Unit* numStart, Modifier modifier,

    TokenKind* out) {

  // Run the bad-token code for every path out of this function except the

  // one success-case.

  auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });

  // Consume integral component digits.

  if (IsAsciiDigit(unit)) {

    if (!matchIntegerAfterFirstDigit(IsAsciiDigit, &unit)) {

      return false;

  // Numbers contain no escapes, so we can read directly from |sourceUnits|.

  double dval;

  bool isBigInt = false;

  DecimalPoint decimalPoint = NoDecimal;

  if (unit != '.' && unit != 'e' && unit != 'E' && unit != 'n') {

    // NOTE: |unit| may be EOF here.

    ungetCodeUnit(unit);

    // Most numbers are pure decimal integers without fractional component

    // or exponential notation.  Handle that with optimized code.

    if (!GetDecimalInteger(numStart, this->sourceUnits.addressOfNextCodeUnit(),

                           &dval)) {

      ReportOutOfMemory(this->fc);

      return false;

  } else if (unit == 'n') {

    isBigInt = true;

    unit = peekCodeUnit();

  } else {

    // Consume any decimal dot and fractional component.

    if (unit == '.') {

      decimalPoint = HasDecimal;

      if (!matchInteger(IsAsciiDigit, &unit)) {

        return false;

    // Consume any exponential notation.

    if (unit == 'e' || unit == 'E') {

      unit = getCodeUnit();

      if (unit == '+' || unit == '-') {

        unit = getCodeUnit();

      // Exponential notation must contain at least one digit.

      if (!IsAsciiDigit(unit)) {

        ungetCodeUnit(unit);

        error(JSMSG_MISSING_EXPONENT);

        return false;

      // Consume exponential digits.

      if (!matchIntegerAfterFirstDigit(IsAsciiDigit, &unit)) {

        return false;

    ungetCodeUnit(unit);

    if (!GetDecimal(numStart, this->sourceUnits.addressOfNextCodeUnit(),

                    &dval)) {

      ReportOutOfMemory(this->fc);

      return false;

  // Number followed by IdentifierStart is an error.  (This is the only place

  // in ECMAScript where token boundary is inadequate to properly separate

  // two tokens, necessitating this unaesthetic lookahead.)

  if (unit != EOF) {

    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {

      if (unicode::IsIdentifierStart(char16_t(unit))) {

        error(JSMSG_IDSTART_AFTER_NUMBER);

        return false;

    } else {

      // This ignores encoding errors: subsequent caller-side code to

      // handle source text after the number will do so.

      PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();

      if (!peeked.isNone() && unicode::IsIdentifierStart(peeked.codePoint())) {

        error(JSMSG_IDSTART_AFTER_NUMBER);

        return false;

  noteBadToken.release();

  if (isBigInt) {

    return bigIntLiteral(start, modifier, out);

  newNumberToken(dval, decimalPoint, start, modifier, out);

  return true;

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::regexpLiteral(

    TokenStart start, TokenKind* out) {

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('/'));

  this->charBuffer.clear();

  auto ProcessNonAsciiCodePoint = [this](int32_t lead) {

    MOZ_ASSERT(lead != EOF);

    MOZ_ASSERT(!this->isAsciiCodePoint(lead));

    char32_t codePoint;

    if (!this->getNonAsciiCodePointDontNormalize(this->toUnit(lead),

                                                 &codePoint)) {

      return false;

    if (MOZ_UNLIKELY(codePoint == unicode::LINE_SEPARATOR ||

                     codePoint == unicode::PARA_SEPARATOR)) {

      this->sourceUnits.ungetLineOrParagraphSeparator();

      this->error(JSMSG_UNTERMINATED_REGEXP);

      return false;

    return AppendCodePointToCharBuffer(this->charBuffer, codePoint);

};

  auto ReportUnterminatedRegExp = [this](int32_t unit) {

    this->ungetCodeUnit(unit);

    this->error(JSMSG_UNTERMINATED_REGEXP);

};

  bool inCharClass = false;

  do {

    int32_t unit = getCodeUnit();

    if (unit == EOF) {

      ReportUnterminatedRegExp(unit);

      return badToken();

    if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {

      if (!ProcessNonAsciiCodePoint(unit)) {

        return badToken();

      continue;

    if (unit == '\\') {

      if (!this->charBuffer.append(unit)) {

        return badToken();

      unit = getCodeUnit();

      if (unit == EOF) {

        ReportUnterminatedRegExp(unit);

        return badToken();

      // Fallthrough only handles ASCII code points, so

      // deal with non-ASCII and skip everything else.

      if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {

        if (!ProcessNonAsciiCodePoint(unit)) {

          return badToken();

        continue;

    } else if (unit == '[') {

      inCharClass = true;

    } else if (unit == ']') {

      inCharClass = false;

    } else if (unit == '/' && !inCharClass) {

      // For IE compat, allow unescaped / in char classes.

      break;

    // NOTE: Non-ASCII LineTerminators were handled by

    //       ProcessNonAsciiCodePoint calls above.

    if (unit == '\r' || unit == '\n') {

      ReportUnterminatedRegExp(unit);

      return badToken();

    MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(unit)));

    if (!this->charBuffer.append(unit)) {

      return badToken();

  } while (true);

  int32_t unit;

  RegExpFlags reflags = RegExpFlag::NoFlags;

  while (true) {

    uint8_t flag;

    unit = getCodeUnit();

    if (unit == 'd') {

      flag = RegExpFlag::HasIndices;

    } else if (unit == 'g') {

      flag = RegExpFlag::Global;

    } else if (unit == 'i') {

      flag = RegExpFlag::IgnoreCase;

    } else if (unit == 'm') {

      flag = RegExpFlag::Multiline;

    } else if (unit == 's') {

      flag = RegExpFlag::DotAll;

    } else if (unit == 'u') {

      flag = RegExpFlag::Unicode;

    } else if (unit == 'v') {

      flag = RegExpFlag::UnicodeSets;

    } else if (unit == 'y') {

      flag = RegExpFlag::Sticky;

    } else if (IsAsciiAlpha(unit)) {

      flag = RegExpFlag::NoFlags;

    } else {

      break;

    if ((reflags & flag) || flag == RegExpFlag::NoFlags) {

      ungetCodeUnit(unit);

      char buf[2] = {char(unit), '\0'};

      error(JSMSG_BAD_REGEXP_FLAG, buf);

      return badToken();

    // /u and /v flags are mutually exclusive.

    if (((reflags & RegExpFlag::Unicode) && (flag & RegExpFlag::UnicodeSets)) ||

        ((reflags & RegExpFlag::UnicodeSets) && (flag & RegExpFlag::Unicode))) {

      ungetCodeUnit(unit);

      char buf[2] = {char(unit), '\0'};

      error(JSMSG_BAD_REGEXP_FLAG, buf);

      return badToken();

    reflags |= flag;

  ungetCodeUnit(unit);

  newRegExpToken(reflags, start, out);

  return true;

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::bigIntLiteral(

    TokenStart start, Modifier modifier, TokenKind* out) {

  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == toUnit('n'));

  MOZ_ASSERT(this->sourceUnits.offset() > start.offset());

  uint32_t length = this->sourceUnits.offset() - start.offset();

  MOZ_ASSERT(length >= 2);

  this->charBuffer.clear();

  mozilla::Range<const Unit> chars(

      this->sourceUnits.codeUnitPtrAt(start.offset()), length);

  for (uint32_t idx = 0; idx < length - 1; idx++) {

    int32_t unit = CodeUnitValue(chars[idx]);

    // Char buffer may start with a 0[bBoOxX] prefix, then follows with

    // binary, octal, decimal, or hex digits.  Already checked by caller, as

    // the "n" indicating bigint comes at the end.

    MOZ_ASSERT(isAsciiCodePoint(unit));

    // Skip over any separators.

    if (unit == '_') {

      continue;

    if (!AppendCodePointToCharBuffer(this->charBuffer, unit)) {

      return false;

  newBigIntToken(start, modifier, out);

  return true;

template <typename Unit, class AnyCharsAccess>

void GeneralTokenStreamChars<Unit,

                             AnyCharsAccess>::consumeOptionalHashbangComment() {

  MOZ_ASSERT(this->sourceUnits.atStart(),

             "HashBangComment can only appear immediately at the start of a "

             "Script or Module");

  // HashbangComment ::

  //   #!  SingleLineCommentChars_opt

  if (!matchCodeUnit('#')) {

    // HashbangComment is optional at start of Script or Module.

    return;

  if (!matchCodeUnit('!')) {

    // # not followed by ! at start of Script or Module is an error, but normal

    // parsing code will handle that error just fine if we let it.

    ungetCodeUnit('#');

    return;

  // This doesn't consume a concluding LineTerminator, and it stops consuming

  // just before any encoding error.  The subsequent |getToken| call will call

  // |getTokenInternal| below which will handle these possibilities.

  this->sourceUnits.consumeRestOfSingleLineComment();

template <typename Unit, class AnyCharsAccess>

[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::getTokenInternal(

    TokenKind* const ttp, const Modifier modifier) {

  // Assume we'll fail: success cases will overwrite this.

#ifdef DEBUG

  *ttp = TokenKind::Limit;

#endif

  MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));

  // This loop runs more than once only when whitespace or comments are

  // encountered.

  do {

    int32_t unit = peekCodeUnit();

    if (MOZ_UNLIKELY(unit == EOF)) {

      MOZ_ASSERT(this->sourceUnits.atEnd());

      anyCharsAccess().flags.isEOF = true;

      TokenStart start(this->sourceUnits, 0);

      newSimpleToken(TokenKind::Eof, start, modifier, ttp);

      return true;

    if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {

      // Non-ASCII code points can only be identifiers or whitespace.  It would

      // be nice to compute these *after* discarding whitespace, but IN A WORLD

      // where |unicode::IsSpace| requires consuming a variable number of code

      // units, it's easier to assume it's an identifier and maybe do a little

      // wasted work, than to unget and compute and reget if whitespace.

      TokenStart start(this->sourceUnits, 0);

      const Unit* identStart = this->sourceUnits.addressOfNextCodeUnit();

      PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();

      if (peeked.isNone()) {

        MOZ_ALWAYS_FALSE(getCodePoint());

        return badToken();

      char32_t cp = peeked.codePoint();

      if (unicode::IsSpace(cp)) {

        this->sourceUnits.consumeKnownCodePoint(peeked);

        if (IsLineTerminator(cp)) {

          if (!updateLineInfoForEOL()) {

            return badToken();

          anyCharsAccess().updateFlagsForEOL();

        continue;

      static_assert(isAsciiCodePoint('$'),

                    "IdentifierStart contains '$', but as "

                    "!IsUnicodeIDStart('$'), ensure that '$' is never "

                    "handled here");

      static_assert(isAsciiCodePoint('_'),

                    "IdentifierStart contains '_', but as "

                    "!IsUnicodeIDStart('_'), ensure that '_' is never "

                    "handled here");

      if (MOZ_LIKELY(unicode::IsUnicodeIDStart(cp))) {

        this->sourceUnits.consumeKnownCodePoint(peeked);

        MOZ_ASSERT(!IsLineTerminator(cp),

                   "IdentifierStart must guarantee !IsLineTerminator "

                   "or else we'll fail to maintain line-info/flags "

                   "for EOL here");

        return identifierName(start, identStart, IdentifierEscapes::None,

                              modifier, NameVisibility::Public, ttp);

      reportIllegalCharacter(cp);

      return badToken();

    }  // !isAsciiCodePoint(unit)

    consumeKnownCodeUnit(unit);

    // Get the token kind, based on the first char.  The ordering of c1kind

    // comparison is based on the frequency of tokens in real code:

    // Parsemark (which represents typical JS code on the web) and the

    // Unreal demo (which represents asm.js code).

//

    //                  Parsemark   Unreal

    //  OneChar         32.9%       39.7%

    //  Space           25.0%        0.6%

    //  Ident           19.2%       36.4%

    //  Dec              7.2%        5.1%

    //  String           7.9%        0.0%

    //  EOL              1.7%        0.0%

    //  ZeroDigit        0.4%        4.9%

    //  Other            5.7%       13.3%

//

    // The ordering is based mostly only Parsemark frequencies, with Unreal

    // frequencies used to break close categories (e.g. |Dec| and

    // |String|).  |Other| is biggish, but no other token kind is common

    // enough for it to be worth adding extra values to FirstCharKind.

    FirstCharKind c1kind = FirstCharKind(firstCharKinds[unit]);

    // Look for an unambiguous single-char token.

//

    if (c1kind <= OneChar_Max) {

      TokenStart start(this->sourceUnits, -1);

      newSimpleToken(TokenKind(c1kind), start, modifier, ttp);

      return true;

    // Skip over non-EOL whitespace chars.

//

    if (c1kind == Space) {

      continue;

    // Look for an identifier.

//

    if (c1kind == Ident) {

      TokenStart start(this->sourceUnits, -1);

      return identifierName(

          start, this->sourceUnits.addressOfNextCodeUnit() - 1,

          IdentifierEscapes::None, modifier, NameVisibility::Public, ttp);

    // Look for a decimal number.

//

    if (c1kind == Dec) {

      TokenStart start(this->sourceUnits, -1);

      const Unit* numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;

      return decimalNumber(unit, start, numStart, modifier, ttp);

    // Look for a string or a template string.

//

    if (c1kind == String) {

      return getStringOrTemplateToken(static_cast<char>(unit), modifier, ttp);

    // Skip over EOL chars, updating line state along the way.

//

    if (c1kind == EOL) {

      if (unit == '\r') {

        matchLineTerminator('\n');

      if (!updateLineInfoForEOL()) {

        return badToken();

      anyCharsAccess().updateFlagsForEOL();

      continue;

    // From a '0', look for a hexadecimal, binary, octal, or "noctal" (a

    // number starting with '0' that contains '8' or '9' and is treated as

    // decimal) number.

//

    if (c1kind == ZeroDigit) {

      TokenStart start(this->sourceUnits, -1);

      int radix;

      bool isBigInt = false;

      const Unit* numStart;

      unit = getCodeUnit();

      if (unit == 'x' || unit == 'X') {

        radix = 16;

        unit = getCodeUnit();

        if (!IsAsciiHexDigit(unit)) {

          // NOTE: |unit| may be EOF here.

          ungetCodeUnit(unit);

          error(JSMSG_MISSING_HEXDIGITS);

          return badToken();

        // one past the '0x'

        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;

        if (!matchIntegerAfterFirstDigit(IsAsciiHexDigit, &unit)) {

          return badToken();

      } else if (unit == 'b' || unit == 'B') {

        radix = 2;

        unit = getCodeUnit();

        if (!IsAsciiBinary(unit)) {

          // NOTE: |unit| may be EOF here.

          ungetCodeUnit(unit);

          error(JSMSG_MISSING_BINARY_DIGITS);

          return badToken();

        // one past the '0b'

        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;

        if (!matchIntegerAfterFirstDigit(IsAsciiBinary, &unit)) {

          return badToken();

      } else if (unit == 'o' || unit == 'O') {

        radix = 8;

        unit = getCodeUnit();

        if (!IsAsciiOctal(unit)) {

          // NOTE: |unit| may be EOF here.

          ungetCodeUnit(unit);

          error(JSMSG_MISSING_OCTAL_DIGITS);

          return badToken();

        // one past the '0o'

        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;

        if (!matchIntegerAfterFirstDigit(IsAsciiOctal, &unit)) {

          return badToken();

      } else if (IsAsciiDigit(unit)) {

        // Reject octal literals that appear in strict mode code.

        if (!strictModeError(JSMSG_DEPRECATED_OCTAL_LITERAL)) {

          return badToken();

        // The above test doesn't catch a few edge cases; see

        // |GeneralParser::maybeParseDirective|.  Record the violation so that

        // that function can handle them.

        anyCharsAccess().setSawDeprecatedOctalLiteral();

        radix = 8;

        // one past the '0'

        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;

        bool nonOctalDecimalIntegerLiteral = false;

        do {

          if (unit >= '8') {

            nonOctalDecimalIntegerLiteral = true;

          unit = getCodeUnit();

        } while (IsAsciiDigit(unit));

        if (unit == '_') {

          ungetCodeUnit(unit);

          error(JSMSG_SEPARATOR_IN_ZERO_PREFIXED_NUMBER);

          return badToken();

        if (unit == 'n') {

          ungetCodeUnit(unit);

          error(JSMSG_BIGINT_INVALID_SYNTAX);

          return badToken();

        if (nonOctalDecimalIntegerLiteral) {

          // Use the decimal scanner for the rest of the number.

          return decimalNumber(unit, start, numStart, modifier, ttp);

      } else if (unit == '_') {

        // Give a more explicit error message when '_' is used after '0'.

        ungetCodeUnit(unit);

        error(JSMSG_SEPARATOR_IN_ZERO_PREFIXED_NUMBER);

        return badToken();

      } else {

        // '0' not followed by [XxBbOo0-9_];  scan as a decimal number.

        ungetCodeUnit(unit);

        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;  // The '0'.

        return decimalNumber('0', start, numStart, modifier, ttp);

      if (unit == 'n') {

        isBigInt = true;

        unit = peekCodeUnit();

      } else {

        ungetCodeUnit(unit);

      // Error if an identifier-start code point appears immediately

      // after the number.  Somewhat surprisingly, if we don't check

      // here, we'll never check at all.

      if (MOZ_LIKELY(isAsciiCodePoint(unit))) {

        if (unicode::IsIdentifierStart(char16_t(unit))) {

          error(JSMSG_IDSTART_AFTER_NUMBER);

          return badToken();

      } else if (MOZ_LIKELY(unit != EOF)) {

        // This ignores encoding errors: subsequent caller-side code to

        // handle source text after the number will do so.

        PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();

        if (!peeked.isNone() &&

            unicode::IsIdentifierStart(peeked.codePoint())) {

          error(JSMSG_IDSTART_AFTER_NUMBER);

          return badToken();

      if (isBigInt) {

        return bigIntLiteral(start, modifier, ttp);

      double dval;

      if (!GetFullInteger(numStart, this->sourceUnits.addressOfNextCodeUnit(),

                          radix, IntegerSeparatorHandling::SkipUnderscore,

                          &dval)) {

        ReportOutOfMemory(this->fc);

        return badToken();

      newNumberToken(dval, NoDecimal, start, modifier, ttp);

      return true;

    MOZ_ASSERT(c1kind == Other);

    // This handles everything else.  Simple tokens distinguished solely by

    // TokenKind should set |simpleKind| and break, to share simple-token

    // creation code for all such tokens.  All other tokens must be handled

    // by returning (or by continuing from the loop enclosing this).

//

    TokenStart start(this->sourceUnits, -1);

    TokenKind simpleKind;

#ifdef DEBUG

    simpleKind = TokenKind::Limit;  // sentinel value for code after switch

#endif

    // The block a ways above eliminated all non-ASCII, so cast to the

    // smallest type possible to assist the C++ compiler.

    switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit)))) {

      case '.':

        if (IsAsciiDigit(peekCodeUnit())) {

          return decimalNumber('.', start,

                               this->sourceUnits.addressOfNextCodeUnit() - 1,

                               modifier, ttp);

        unit = getCodeUnit();

        if (unit == '.') {

          if (matchCodeUnit('.')) {

            simpleKind = TokenKind::TripleDot;

            break;

        // NOTE: |unit| may be EOF here.  A stray '.' at EOF would be an

        //       error, but subsequent code will handle it.

        ungetCodeUnit(unit);

        simpleKind = TokenKind::Dot;

        break;

      case '#': {

#ifdef ENABLE_RECORD_TUPLE

        if (matchCodeUnit('{')) {

          simpleKind = TokenKind::HashCurly;

          break;

        if (matchCodeUnit('[')) {

          simpleKind = TokenKind::HashBracket;

          break;

#endif

        TokenStart start(this->sourceUnits, -1);

        const Unit* identStart = this->sourceUnits.addressOfNextCodeUnit() - 1;

        IdentifierEscapes sawEscape;

        if (!matchIdentifierStart(&sawEscape)) {

          return badToken();

        return identifierName(start, identStart, sawEscape, modifier,

                              NameVisibility::Private, ttp);

      case '=':

        if (matchCodeUnit('=')) {

          simpleKind = matchCodeUnit('=') ? TokenKind::StrictEq : TokenKind::Eq;

        } else if (matchCodeUnit('>')) {

          simpleKind = TokenKind::Arrow;

        } else {

          simpleKind = TokenKind::Assign;

        break;

      case '+':

        if (matchCodeUnit('+')) {

          simpleKind = TokenKind::Inc;

        } else {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::AddAssign : TokenKind::Add;

        break;

      case '\\': {

        char32_t codePoint;

        if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint)) {

          return identifierName(

              start,

              this->sourceUnits.addressOfNextCodeUnit() - escapeLength - 1,

              IdentifierEscapes::SawUnicodeEscape, modifier,

              NameVisibility::Public, ttp);

        // We could point "into" a mistyped escape, e.g. for "\u{41H}" we

        // could point at the 'H'.  But we don't do that now, so the code

        // unit after the '\' isn't necessarily bad, so just point at the

        // start of the actually-invalid escape.

        ungetCodeUnit('\\');

        error(JSMSG_BAD_ESCAPE);

        return badToken();

      case '|':

        if (matchCodeUnit('|')) {

          simpleKind = matchCodeUnit('=') ? TokenKind::OrAssign : TokenKind::Or;

        } else {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::BitOrAssign : TokenKind::BitOr;

        break;

      case '^':

        simpleKind =

            matchCodeUnit('=') ? TokenKind::BitXorAssign : TokenKind::BitXor;

        break;

      case '&':

        if (matchCodeUnit('&')) {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::AndAssign : TokenKind::And;

        } else {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::BitAndAssign : TokenKind::BitAnd;

        break;

      case '?':

        if (matchCodeUnit('.')) {

          unit = getCodeUnit();

          if (IsAsciiDigit(unit)) {

            // if the code unit is followed by a number, for example it has the

            // following form `<...> ?.5 <..> then it should be treated as a

            // ternary rather than as an optional chain

            simpleKind = TokenKind::Hook;

            ungetCodeUnit(unit);

            ungetCodeUnit('.');

          } else {

            ungetCodeUnit(unit);

            simpleKind = TokenKind::OptionalChain;

        } else if (matchCodeUnit('?')) {

          simpleKind = matchCodeUnit('=') ? TokenKind::CoalesceAssign

                                          : TokenKind::Coalesce;

        } else {

          simpleKind = TokenKind::Hook;

        break;

      case '!':

        if (matchCodeUnit('=')) {

          simpleKind = matchCodeUnit('=') ? TokenKind::StrictNe : TokenKind::Ne;

        } else {

          simpleKind = TokenKind::Not;

        break;

      case '<':

        if (anyCharsAccess().options().allowHTMLComments) {

          // Treat HTML begin-comment as comment-till-end-of-line.

          if (matchCodeUnit('!')) {

            if (matchCodeUnit('-')) {

              if (matchCodeUnit('-')) {

                this->sourceUnits.consumeRestOfSingleLineComment();

                continue;

              ungetCodeUnit('-');

            ungetCodeUnit('!');

        if (matchCodeUnit('<')) {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::LshAssign : TokenKind::Lsh;

        } else {

          simpleKind = matchCodeUnit('=') ? TokenKind::Le : TokenKind::Lt;

        break;

      case '>':

        if (matchCodeUnit('>')) {

          if (matchCodeUnit('>')) {

            simpleKind =

                matchCodeUnit('=') ? TokenKind::UrshAssign : TokenKind::Ursh;

          } else {

            simpleKind =

                matchCodeUnit('=') ? TokenKind::RshAssign : TokenKind::Rsh;

        } else {

          simpleKind = matchCodeUnit('=') ? TokenKind::Ge : TokenKind::Gt;

        break;

      case '*':

        if (matchCodeUnit('*')) {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::PowAssign : TokenKind::Pow;

        } else {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::MulAssign : TokenKind::Mul;

        break;

      case '/':

        // Look for a single-line comment.

        if (matchCodeUnit('/')) {

          unit = getCodeUnit();

          if (unit == '@' || unit == '#') {

            bool shouldWarn = unit == '@';

            if (!getDirectives(false, shouldWarn)) {

              return false;

          } else {

            // NOTE: |unit| may be EOF here.

            ungetCodeUnit(unit);

          this->sourceUnits.consumeRestOfSingleLineComment();

          continue;

        // Look for a multi-line comment.

        if (matchCodeUnit('*')) {

          TokenStreamAnyChars& anyChars = anyCharsAccess();

          unsigned linenoBefore = anyChars.lineno;

          do {

            int32_t unit = getCodeUnit();

            if (unit == EOF) {

              error(JSMSG_UNTERMINATED_COMMENT);

              return badToken();

            if (unit == '*' && matchCodeUnit('/')) {

              break;

            if (unit == '@' || unit == '#') {

              bool shouldWarn = unit == '@';

              if (!getDirectives(true, shouldWarn)) {

                return badToken();

            } else if (MOZ_LIKELY(isAsciiCodePoint(unit))) {

              if (!getFullAsciiCodePoint(unit)) {

                return badToken();

            } else {

              char32_t codePoint;

              if (!getNonAsciiCodePoint(unit, &codePoint)) {

                return badToken();

          } while (true);

          if (linenoBefore != anyChars.lineno) {

            anyChars.updateFlagsForEOL();

          continue;

        // Look for a regexp.

        if (modifier == SlashIsRegExp) {

          return regexpLiteral(start, ttp);

        simpleKind = matchCodeUnit('=') ? TokenKind::DivAssign : TokenKind::Div;

        break;

      case '%':

        simpleKind = matchCodeUnit('=') ? TokenKind::ModAssign : TokenKind::Mod;

        break;

      case '-':

        if (matchCodeUnit('-')) {

          if (anyCharsAccess().options().allowHTMLComments &&

              !anyCharsAccess().flags.isDirtyLine) {

            if (matchCodeUnit('>')) {

              this->sourceUnits.consumeRestOfSingleLineComment();

              continue;

          simpleKind = TokenKind::Dec;

        } else {

          simpleKind =

              matchCodeUnit('=') ? TokenKind::SubAssign : TokenKind::Sub;

        break;

#ifdef ENABLE_DECORATORS

      case '@':

        simpleKind = TokenKind::At;

        break;

#endif

      default:

        // We consumed a bad ASCII code point/unit.  Put it back so the

        // error location is the bad code point.

        ungetCodeUnit(unit);

        reportIllegalCharacter(unit);

        return badToken();

    }  // switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit))))

    MOZ_ASSERT(simpleKind != TokenKind::Limit,

               "switch-statement should have set |simpleKind| before "

               "breaking");

    newSimpleToken(simpleKind, start, modifier, ttp);

    return true;

  } while (true);

template <typename Unit, class AnyCharsAccess>

bool TokenStreamSpecific<Unit, AnyCharsAccess>::getStringOrTemplateToken(

    char untilChar, Modifier modifier, TokenKind* out) {

  MOZ_ASSERT(untilChar == '\'' || untilChar == '"' || untilChar == '`',

             "unexpected string/template literal delimiter");

  bool parsingTemplate = (untilChar == '`');

  bool templateHead = false;

  TokenStart start(this->sourceUnits, -1);

  this->charBuffer.clear();

  // Run the bad-token code for every path out of this function except the

  // one success-case.

  auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });

  auto ReportPrematureEndOfLiteral = [this, untilChar](unsigned errnum) {

    // Unicode separators aren't end-of-line in template or (as of

    // recently) string literals, so this assertion doesn't allow them.

    MOZ_ASSERT(this->sourceUnits.atEnd() ||

                   this->sourceUnits.peekCodeUnit() == Unit('\r') ||

                   this->sourceUnits.peekCodeUnit() == Unit('\n'),

               "must be parked at EOF or EOL to call this function");

    // The various errors reported here include language like "in a ''

    // literal" or similar, with '' being '', "", or `` as appropriate.

    const char delimiters[] = {untilChar, untilChar, '\0'};

    this->error(errnum, delimiters);

    return;

};

  // We need to detect any of these chars:  " or ', \n (or its

  // equivalents), \\, EOF.  Because we detect EOL sequences here and

  // put them back immediately, we can use getCodeUnit().

  int32_t unit;

  while ((unit = getCodeUnit()) != untilChar) {

    if (unit == EOF) {

      ReportPrematureEndOfLiteral(JSMSG_EOF_BEFORE_END_OF_LITERAL);

      return false;

    // Non-ASCII code points are always directly appended -- even

    // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR that are

    // ordinarily LineTerminatorSequences.  (They contribute their literal

    // values to template and [as of recently] string literals, but they're

    // line terminators when computing line/column coordinates.)  Handle

    // the non-ASCII case early for readability.

    if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {

      char32_t cp;

      if (!getNonAsciiCodePointDontNormalize(toUnit(unit), &cp)) {

        return false;

      if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||

                       cp == unicode::PARA_SEPARATOR)) {

        if (!updateLineInfoForEOL()) {

          return false;

        anyCharsAccess().updateFlagsForEOL();

      } else {

        MOZ_ASSERT(!IsLineTerminator(cp));

      if (!AppendCodePointToCharBuffer(this->charBuffer, cp)) {

        return false;

      continue;

    if (unit == '\\') {

      // When parsing templates, we don't immediately report errors for

      // invalid escapes; these are handled by the parser.  We don't

      // append to charBuffer in those cases because it won't be read.

      unit = getCodeUnit();

      if (unit == EOF) {

        ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);

        return false;

      // Non-ASCII |unit| isn't handled by code after this, so dedicate

      // an unlikely special-case to it and then continue.

      if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {

        char32_t codePoint;

        if (!getNonAsciiCodePoint(unit, &codePoint)) {

          return false;

        // If we consumed U+2028 LINE SEPARATOR or U+2029 PARAGRAPH

        // SEPARATOR, they'll be normalized to '\n'.  '\' followed by

        // LineContinuation represents no code points, so don't append

        // in this case.

        if (codePoint != '\n') {

          if (!AppendCodePointToCharBuffer(this->charBuffer, codePoint)) {

            return false;

        continue;

      // The block above eliminated all non-ASCII, so cast to the

      // smallest type possible to assist the C++ compiler.

      switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit)))) {

        case 'b':

          unit = '\b';

          break;

        case 'f':

          unit = '\f';

          break;

        case 'n':

          unit = '\n';

          break;

        case 'r':

          unit = '\r';

          break;

        case 't':

          unit = '\t';

          break;

        case 'v':

          unit = '\v';

          break;

        case '\r':

          matchLineTerminator('\n');

          [[fallthrough]];

        case '\n': {

          // LineContinuation represents no code points.  We're manually

          // consuming a LineTerminatorSequence, so we must manually

          // update line/column info.

          if (!updateLineInfoForEOL()) {

            return false;

          continue;

        // Unicode character specification.

        case 'u': {

          int32_t c2 = getCodeUnit();

          if (c2 == EOF) {

            ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);

            return false;

          // First handle a delimited Unicode escape, e.g. \u{1F4A9}.

          if (c2 == '{') {

            uint32_t start = this->sourceUnits.offset() - 3;

            uint32_t code = 0;

            bool first = true;

            bool valid = true;

            do {

              int32_t u3 = getCodeUnit();

              if (u3 == EOF) {

                if (parsingTemplate) {

                  TokenStreamAnyChars& anyChars = anyCharsAccess();

                  anyChars.setInvalidTemplateEscape(start,

                                                    InvalidEscapeType::Unicode);

                  valid = false;

                  break;

                reportInvalidEscapeError(start, InvalidEscapeType::Unicode);

                return false;

              if (u3 == '}') {

                if (first) {

                  if (parsingTemplate) {

                    TokenStreamAnyChars& anyChars = anyCharsAccess();

                    anyChars.setInvalidTemplateEscape(

                        start, InvalidEscapeType::Unicode);

                    valid = false;

                    break;

                  reportInvalidEscapeError(start, InvalidEscapeType::Unicode);

                  return false;

                break;

              // Beware: |u3| may be a non-ASCII code point here; if

              // so it'll pass into this |if|-block.

              if (!IsAsciiHexDigit(u3)) {

                if (parsingTemplate) {

                  // We put the code unit back so that we read it

                  // on the next pass, which matters if it was

                  // '`' or '\'.

                  ungetCodeUnit(u3);

                  TokenStreamAnyChars& anyChars = anyCharsAccess();

                  anyChars.setInvalidTemplateEscape(start,

                                                    InvalidEscapeType::Unicode);

                  valid = false;

                  break;

                reportInvalidEscapeError(start, InvalidEscapeType::Unicode);

                return false;

              code = (code << 4) | AsciiAlphanumericToNumber(u3);

              if (code > unicode::NonBMPMax) {

                if (parsingTemplate) {

                  TokenStreamAnyChars& anyChars = anyCharsAccess();

                  anyChars.setInvalidTemplateEscape(

                      start + 3, InvalidEscapeType::UnicodeOverflow);

                  valid = false;

                  break;

                reportInvalidEscapeError(start + 3,

                                         InvalidEscapeType::UnicodeOverflow);

                return false;

              first = false;

            } while (true);

            if (!valid) {

              continue;

            MOZ_ASSERT(code <= unicode::NonBMPMax);

            if (!AppendCodePointToCharBuffer(this->charBuffer, code)) {

              return false;

            continue;

          }  // end of delimited Unicode escape handling

          // Otherwise it must be a fixed-length \uXXXX Unicode escape.

          // If it isn't, this is usually an error -- but if this is a

          // template literal, we must defer error reporting because

          // malformed escapes are okay in *tagged* template literals.

          char16_t v;

          if (IsAsciiHexDigit(c2) && this->sourceUnits.matchHexDigits(3, &v)) {

            unit = (AsciiAlphanumericToNumber(c2) << 12) | v;

          } else {

            // Beware: |c2| may not be an ASCII code point here!

            ungetCodeUnit(c2);

            uint32_t start = this->sourceUnits.offset() - 2;

            if (parsingTemplate) {

              TokenStreamAnyChars& anyChars = anyCharsAccess();

              anyChars.setInvalidTemplateEscape(start,

                                                InvalidEscapeType::Unicode);

              continue;

            reportInvalidEscapeError(start, InvalidEscapeType::Unicode);

            return false;

          break;

        }  // case 'u'

        // Hexadecimal character specification.

        case 'x': {

          char16_t v;

          if (this->sourceUnits.matchHexDigits(2, &v)) {

            unit = v;

          } else {

            uint32_t start = this->sourceUnits.offset() - 2;

            if (parsingTemplate) {

              TokenStreamAnyChars& anyChars = anyCharsAccess();

              anyChars.setInvalidTemplateEscape(start,

                                                InvalidEscapeType::Hexadecimal);

              continue;

            reportInvalidEscapeError(start, InvalidEscapeType::Hexadecimal);

            return false;

          break;

        default: {

          if (!IsAsciiOctal(unit)) {

            // \8 or \9 in an untagged template literal is a syntax error,

            // reported in GeneralParser::noSubstitutionUntaggedTemplate.

//

            // Tagged template literals, however, may contain \8 and \9.  The

            // "cooked" representation of such a part will be |undefined|, and

            // the "raw" representation will contain the literal characters.

//

            //   function f(parts) {

            //     assertEq(parts[0], undefined);

            //     assertEq(parts.raw[0], "\\8");

            //     return "composed";

            //   }

            //   assertEq(f`\8`, "composed");

            if (unit == '8' || unit == '9') {

              TokenStreamAnyChars& anyChars = anyCharsAccess();

              if (parsingTemplate) {

                anyChars.setInvalidTemplateEscape(

                    this->sourceUnits.offset() - 2,

                    InvalidEscapeType::EightOrNine);

                continue;

              // \8 and \9 are forbidden in string literals in strict mode code.

              if (!strictModeError(JSMSG_DEPRECATED_EIGHT_OR_NINE_ESCAPE)) {

                return false;

              // The above test doesn't catch a few edge cases; see

              // |GeneralParser::maybeParseDirective|.  Record the violation so

              // that that function can handle them.

              anyChars.setSawDeprecatedEightOrNineEscape();

            break;

          // Octal character specification.

          int32_t val = AsciiOctalToNumber(unit);

          unit = peekCodeUnit();

          if (MOZ_UNLIKELY(unit == EOF)) {

            ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);

            return false;

          // Strict mode code allows only \0 followed by a non-digit.

          if (val != 0 || IsAsciiDigit(unit)) {

            TokenStreamAnyChars& anyChars = anyCharsAccess();

            if (parsingTemplate) {

              anyChars.setInvalidTemplateEscape(this->sourceUnits.offset() - 2,

                                                InvalidEscapeType::Octal);

              continue;

            if (!strictModeError(JSMSG_DEPRECATED_OCTAL_ESCAPE)) {

              return false;

            // The above test doesn't catch a few edge cases; see

            // |GeneralParser::maybeParseDirective|.  Record the violation so

            // that that function can handle them.

            anyChars.setSawDeprecatedOctalEscape();

          if (IsAsciiOctal(unit)) {

            val = 8 * val + AsciiOctalToNumber(unit);

            consumeKnownCodeUnit(unit);

            unit = peekCodeUnit();

            if (MOZ_UNLIKELY(unit == EOF)) {

              ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);

              return false;

            if (IsAsciiOctal(unit)) {

              int32_t save = val;

              val = 8 * val + AsciiOctalToNumber(unit);

              if (val <= 0xFF) {

                consumeKnownCodeUnit(unit);

              } else {

                val = save;

          unit = char16_t(val);

          break;

        }  // default

      }  // switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit))))

      if (!this->charBuffer.append(unit)) {

        return false;

      continue;

    }  // (unit == '\\')

    if (unit == '\r' || unit == '\n') {

      if (!parsingTemplate) {

        // String literals don't allow ASCII line breaks.

        ungetCodeUnit(unit);

        ReportPrematureEndOfLiteral(JSMSG_EOL_BEFORE_END_OF_STRING);

        return false;

      if (unit == '\r') {

        unit = '\n';

        matchLineTerminator('\n');

      if (!updateLineInfoForEOL()) {

        return false;

      anyCharsAccess().updateFlagsForEOL();

    } else if (parsingTemplate && unit == '$' && matchCodeUnit('{')) {

      templateHead = true;

      break;

    if (!this->charBuffer.append(unit)) {

      return false;

  TaggedParserAtomIndex atom = drainCharBufferIntoAtom();

  if (!atom) {

    return false;

  noteBadToken.release();

  MOZ_ASSERT_IF(!parsingTemplate, !templateHead);

  TokenKind kind = !parsingTemplate ? TokenKind::String

                   : templateHead   ? TokenKind::TemplateHead

                                    : TokenKind::NoSubsTemplate;

  newAtomToken(kind, atom, start, modifier, out);

  return true;

const char* TokenKindToDesc(TokenKind tt) {

  switch (tt) {

#define EMIT_CASE(name, desc) \

  case TokenKind::name:       \

    return desc;

    FOR_EACH_TOKEN_KIND(EMIT_CASE)

#undef EMIT_CASE

    case TokenKind::Limit:

      MOZ_ASSERT_UNREACHABLE("TokenKind::Limit should not be passed.");

      break;

  return "<bad TokenKind>";

#ifdef DEBUG

const char* TokenKindToString(TokenKind tt) {

  switch (tt) {

#  define EMIT_CASE(name, desc) \

    case TokenKind::name:       \

      return "TokenKind::" #name;

    FOR_EACH_TOKEN_KIND(EMIT_CASE)

#  undef EMIT_CASE

    case TokenKind::Limit:

      break;

  return "<bad TokenKind>";

#endif

template class TokenStreamCharsBase<Utf8Unit>;

template class TokenStreamCharsBase<char16_t>;

template class GeneralTokenStreamChars<char16_t, TokenStreamAnyCharsAccess>;

template class TokenStreamChars<char16_t, TokenStreamAnyCharsAccess>;

template class TokenStreamSpecific<char16_t, TokenStreamAnyCharsAccess>;

template class GeneralTokenStreamChars<

    Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;

template class GeneralTokenStreamChars<

    Utf8Unit,

    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;

template class GeneralTokenStreamChars<

    char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;

template class GeneralTokenStreamChars<

    char16_t,

    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;

template class TokenStreamChars<

    Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;

template class TokenStreamChars<

    Utf8Unit,

    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;

template class TokenStreamChars<

    char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;

template class TokenStreamChars<

    char16_t,

    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;

template class TokenStreamSpecific<

    Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;

template class TokenStreamSpecific<

    Utf8Unit,

    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;

template class TokenStreamSpecific<

    char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;

template class TokenStreamSpecific<

    char16_t,

    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;

}  // namespace frontend

}  // namespace js