Source code
Revision control
Copy as Markdown
Other Tools
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
#include "vm/StringType-inl.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/HashFunctions.h"
#include "mozilla/Latin1.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/MemoryReporting.h"
#include "mozilla/PodOperations.h"
#include "mozilla/RangedPtr.h"
#include "mozilla/StringBuffer.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Utf8.h"
#include "mozilla/Vector.h"
#include <algorithm> // std::{all_of,copy_n,enable_if,is_const,move}
#include <iterator> // std::size
#include <type_traits> // std::is_same, std::is_unsigned
#include "jsfriendapi.h"
#include "jsnum.h"
#include "builtin/Boolean.h"
#ifdef ENABLE_RECORD_TUPLE
# include "builtin/RecordObject.h"
#endif
#include "gc/AllocKind.h"
#include "gc/MaybeRooted.h"
#include "gc/Nursery.h"
#include "js/CharacterEncoding.h"
#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
#include "js/Printer.h" // js::GenericPrinter
#include "js/PropertyAndElement.h" // JS_DefineElement
#include "js/SourceText.h" // JS::SourceText
#include "js/StableStringChars.h"
#include "js/UbiNode.h"
#include "util/Identifier.h" // js::IsIdentifierNameOrPrivateName
#include "util/Unicode.h"
#include "vm/GeckoProfiler.h"
#include "vm/JSONPrinter.h" // js::JSONPrinter
#include "vm/StaticStrings.h"
#include "vm/ToSource.h" // js::ValueToSource
#include "gc/Marking-inl.h"
#include "vm/GeckoProfiler-inl.h"
#ifdef ENABLE_RECORD_TUPLE
# include "vm/RecordType.h"
# include "vm/TupleType.h"
#endif
using namespace js;
using mozilla::AsWritableChars;
using mozilla::ConvertLatin1toUtf16;
using mozilla::IsAsciiDigit;
using mozilla::IsUtf16Latin1;
using mozilla::LossyConvertUtf16toLatin1;
using mozilla::PodCopy;
using mozilla::RangedPtr;
using mozilla::RoundUpPow2;
using mozilla::Span;
using JS::AutoCheckCannotGC;
using JS::AutoStableStringChars;
using UniqueLatin1Chars = UniquePtr<Latin1Char[], JS::FreePolicy>;
size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
// JSRope: do nothing, we'll count all children chars when we hit the leaf
// strings.
if (isRope()) {
return 0;
}
MOZ_ASSERT(isLinear());
// JSDependentString: do nothing, we'll count the chars when we hit the base
// string.
if (isDependent()) {
return 0;
}
// JSExternalString: Ask the embedding to tell us what's going on.
if (isExternal()) {
// Our callback isn't supposed to cause GC.
JS::AutoSuppressGCAnalysis nogc;
JSExternalString& external = asExternal();
if (external.hasLatin1Chars()) {
return asExternal().callbacks()->sizeOfBuffer(external.latin1Chars(),
mallocSizeOf);
} else {
return asExternal().callbacks()->sizeOfBuffer(external.twoByteChars(),
mallocSizeOf);
}
}
// JSInlineString, JSFatInlineString, js::ThinInlineAtom, js::FatInlineAtom:
// the chars are inline.
if (isInline()) {
return 0;
}
JSLinearString& linear = asLinear();
if (hasStringBuffer()) {
return linear.stringBuffer()->SizeOfIncludingThisIfUnshared(mallocSizeOf);
}
// Chars in the nursery are owned by the nursery.
if (!ownsMallocedChars()) {
return 0;
}
// Everything else: measure the space for the chars.
return linear.hasLatin1Chars() ? mallocSizeOf(linear.rawLatin1Chars())
: mallocSizeOf(linear.rawTwoByteChars());
}
JS::ubi::Node::Size JS::ubi::Concrete<JSString>::size(
mozilla::MallocSizeOf mallocSizeOf) const {
JSString& str = get();
size_t size;
if (str.isAtom()) {
if (str.isInline()) {
size = str.isFatInline() ? sizeof(js::FatInlineAtom)
: sizeof(js::ThinInlineAtom);
} else {
size = sizeof(js::NormalAtom);
}
} else {
size = str.isFatInline() ? sizeof(JSFatInlineString) : sizeof(JSString);
}
if (IsInsideNursery(&str)) {
size += Nursery::nurseryCellHeaderSize();
}
size += str.sizeOfExcludingThis(mallocSizeOf);
return size;
}
const char16_t JS::ubi::Concrete<JSString>::concreteTypeName[] = u"JSString";
mozilla::Maybe<std::tuple<size_t, size_t>> JSString::encodeUTF8Partial(
const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const {
mozilla::Vector<const JSString*, 16, SystemAllocPolicy> stack;
const JSString* current = this;
char16_t pendingLeadSurrogate = 0; // U+0000 means no pending lead surrogate
size_t totalRead = 0;
size_t totalWritten = 0;
for (;;) {
if (current->isRope()) {
JSRope& rope = current->asRope();
if (!stack.append(rope.rightChild())) {
// OOM
return mozilla::Nothing();
}
current = rope.leftChild();
continue;
}
JSLinearString& linear = current->asLinear();
if (MOZ_LIKELY(linear.hasLatin1Chars())) {
if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
if (buffer.Length() < 3) {
return mozilla::Some(std::make_tuple(totalRead, totalWritten));
}
buffer[0] = '\xEF';
buffer[1] = '\xBF';
buffer[2] = '\xBD';
buffer = buffer.From(3);
totalRead += 1; // pendingLeadSurrogate
totalWritten += 3;
pendingLeadSurrogate = 0;
}
auto src = mozilla::AsChars(
mozilla::Span(linear.latin1Chars(nogc), linear.length()));
size_t read;
size_t written;
std::tie(read, written) =
mozilla::ConvertLatin1toUtf8Partial(src, buffer);
buffer = buffer.From(written);
totalRead += read;
totalWritten += written;
if (read < src.Length()) {
return mozilla::Some(std::make_tuple(totalRead, totalWritten));
}
} else {
auto src = mozilla::Span(linear.twoByteChars(nogc), linear.length());
if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
char16_t first = 0;
if (!src.IsEmpty()) {
first = src[0];
}
if (unicode::IsTrailSurrogate(first)) {
// Got a surrogate pair
if (buffer.Length() < 4) {
return mozilla::Some(std::make_tuple(totalRead, totalWritten));
}
uint32_t astral = unicode::UTF16Decode(pendingLeadSurrogate, first);
buffer[0] = char(0b1111'0000 | (astral >> 18));
buffer[1] = char(0b1000'0000 | ((astral >> 12) & 0b11'1111));
buffer[2] = char(0b1000'0000 | ((astral >> 6) & 0b11'1111));
buffer[3] = char(0b1000'0000 | (astral & 0b11'1111));
src = src.From(1);
buffer = buffer.From(4);
totalRead += 2; // both pendingLeadSurrogate and first!
totalWritten += 4;
} else {
// unpaired surrogate
if (buffer.Length() < 3) {
return mozilla::Some(std::make_tuple(totalRead, totalWritten));
}
buffer[0] = '\xEF';
buffer[1] = '\xBF';
buffer[2] = '\xBD';
buffer = buffer.From(3);
totalRead += 1; // pendingLeadSurrogate
totalWritten += 3;
}
pendingLeadSurrogate = 0;
}
if (!src.IsEmpty()) {
char16_t last = src[src.Length() - 1];
if (unicode::IsLeadSurrogate(last)) {
src = src.To(src.Length() - 1);
pendingLeadSurrogate = last;
} else {
MOZ_ASSERT(!pendingLeadSurrogate);
}
size_t read;
size_t written;
std::tie(read, written) =
mozilla::ConvertUtf16toUtf8Partial(src, buffer);
buffer = buffer.From(written);
totalRead += read;
totalWritten += written;
if (read < src.Length()) {
return mozilla::Some(std::make_tuple(totalRead, totalWritten));
}
}
}
if (stack.empty()) {
break;
}
current = stack.popCopy();
}
if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
if (buffer.Length() < 3) {
return mozilla::Some(std::make_tuple(totalRead, totalWritten));
}
buffer[0] = '\xEF';
buffer[1] = '\xBF';
buffer[2] = '\xBD';
// No need to update buffer and pendingLeadSurrogate anymore
totalRead += 1;
totalWritten += 3;
}
return mozilla::Some(std::make_tuple(totalRead, totalWritten));
}
#if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
template <typename CharT>
/*static */
void JSString::dumpCharsNoQuote(const CharT* s, size_t n,
js::GenericPrinter& out) {
for (size_t i = 0; i < n; i++) {
char16_t c = s[i];
if (c == '"') {
out.put("\\\"");
} else if (c == '\'') {
out.put("\\'");
} else if (c == '`') {
out.put("\\`");
} else if (c == '\\') {
out.put("\\\\");
} else if (c == '\r') {
out.put("\\r");
} else if (c == '\n') {
out.put("\\n");
} else if (c == '\t') {
out.put("\\t");
} else if (c >= 32 && c < 127) {
out.putChar((char)s[i]);
} else if (c <= 255) {
out.printf("\\x%02x", unsigned(c));
} else {
out.printf("\\u%04x", unsigned(c));
}
}
}
/* static */
template void JSString::dumpCharsNoQuote(const Latin1Char* s, size_t n,
js::GenericPrinter& out);
/* static */
template void JSString::dumpCharsNoQuote(const char16_t* s, size_t n,
js::GenericPrinter& out);
void JSString::dump() const {
js::Fprinter out(stderr);
dump(out);
}
void JSString::dump(js::GenericPrinter& out) const {
js::JSONPrinter json(out);
dump(json);
out.put("\n");
}
void JSString::dump(js::JSONPrinter& json) const {
json.beginObject();
dumpFields(json);
json.endObject();
}
const char* RepresentationToString(const JSString* s) {
if (s->isAtom()) {
return "JSAtom";
}
if (s->isLinear()) {
if (s->isDependent()) {
return "JSDependentString";
}
if (s->isExternal()) {
return "JSExternalString";
}
if (s->isExtensible()) {
return "JSExtensibleString";
}
if (s->isInline()) {
if (s->isFatInline()) {
return "JSFatInlineString";
}
return "JSThinInlineString";
}
return "JSLinearString";
}
if (s->isRope()) {
return "JSRope";
}
return "JSString";
}
template <typename KnownF, typename UnknownF>
void ForEachStringFlag(const JSString* str, uint32_t flags, KnownF known,
UnknownF unknown) {
for (uint32_t i = js::Bit(3); i < js::Bit(17); i = i << 1) {
if (!(flags & i)) {
continue;
}
switch (i) {
case JSString::ATOM_BIT:
known("ATOM_BIT");
break;
case JSString::LINEAR_BIT:
known("LINEAR_BIT");
break;
case JSString::DEPENDENT_BIT:
known("DEPENDENT_BIT");
break;
case JSString::INLINE_CHARS_BIT:
known("INLINE_BIT");
break;
case JSString::LINEAR_IS_EXTENSIBLE_BIT:
static_assert(JSString::LINEAR_IS_EXTENSIBLE_BIT ==
JSString::INLINE_IS_FAT_BIT);
if (str->isLinear()) {
if (str->isInline()) {
known("FAT");
} else if (!str->isAtom()) {
known("EXTENSIBLE");
} else {
unknown(i);
}
} else {
unknown(i);
}
break;
case JSString::LINEAR_IS_EXTERNAL_BIT:
static_assert(JSString::LINEAR_IS_EXTERNAL_BIT ==
JSString::ATOM_IS_PERMANENT_BIT);
if (str->isAtom()) {
known("PERMANENT");
} else if (str->isLinear()) {
known("EXTERNAL");
} else {
unknown(i);
}
break;
case JSString::LATIN1_CHARS_BIT:
known("LATIN1_CHARS_BIT");
break;
case JSString::HAS_STRING_BUFFER_BIT:
known("HAS_STRING_BUFFER_BIT");
break;
case JSString::ATOM_IS_INDEX_BIT:
if (str->isAtom()) {
known("ATOM_IS_INDEX_BIT");
} else {
known("ATOM_REF_BIT");
}
break;
case JSString::INDEX_VALUE_BIT:
known("INDEX_VALUE_BIT");
break;
case JSString::IN_STRING_TO_ATOM_CACHE:
known("IN_STRING_TO_ATOM_CACHE");
break;
case JSString::FLATTEN_VISIT_RIGHT:
if (str->isRope()) {
known("FLATTEN_VISIT_RIGHT");
} else {
known("DEPENDED_ON_BIT");
}
break;
case JSString::FLATTEN_FINISH_NODE:
static_assert(JSString::FLATTEN_FINISH_NODE ==
JSString::PINNED_ATOM_BIT);
if (str->isRope()) {
known("FLATTEN_FINISH_NODE");
} else if (str->isAtom()) {
known("PINNED_ATOM_BIT");
} else {
known("NON_DEDUP_BIT");
}
break;
default:
unknown(i);
break;
}
}
}
void JSString::dumpFields(js::JSONPrinter& json) const {
dumpCommonFields(json);
dumpCharsFields(json);
}
void JSString::dumpCommonFields(js::JSONPrinter& json) const {
json.formatProperty("address", "(%s*)0x%p", RepresentationToString(this),
this);
json.beginInlineListProperty("flags");
ForEachStringFlag(
this, flags(), [&](const char* name) { json.value("%s", name); },
[&](uint32_t value) { json.value("Unknown(%08x)", value); });
json.endInlineList();
if (hasIndexValue()) {
json.property("indexValue", getIndexValue());
}
json.boolProperty("isTenured", isTenured());
json.property("length", length());
}
void JSString::dumpCharsFields(js::JSONPrinter& json) const {
if (isLinear()) {
const JSLinearString* linear = &asLinear();
AutoCheckCannotGC nogc;
if (hasLatin1Chars()) {
const Latin1Char* chars = linear->latin1Chars(nogc);
json.formatProperty("chars", "(JS::Latin1Char*)0x%p", chars);
js::GenericPrinter& out = json.beginStringProperty("value");
dumpCharsNoQuote(chars, length(), out);
json.endStringProperty();
} else {
const char16_t* chars = linear->twoByteChars(nogc);
json.formatProperty("chars", "(char16_t*)0x%p", chars);
js::GenericPrinter& out = json.beginStringProperty("value");
dumpCharsNoQuote(chars, length(), out);
json.endStringProperty();
}
} else {
js::GenericPrinter& out = json.beginStringProperty("value");
dumpCharsNoQuote(out);
json.endStringProperty();
}
}
void JSString::dumpRepresentation() const {
js::Fprinter out(stderr);
dumpRepresentation(out);
}
void JSString::dumpRepresentation(js::GenericPrinter& out) const {
js::JSONPrinter json(out);
dumpRepresentation(json);
out.put("\n");
}
void JSString::dumpRepresentation(js::JSONPrinter& json) const {
json.beginObject();
dumpRepresentationFields(json);
json.endObject();
}
void JSString::dumpRepresentationFields(js::JSONPrinter& json) const {
dumpCommonFields(json);
if (isAtom()) {
asAtom().dumpOwnRepresentationFields(json);
} else if (isLinear()) {
asLinear().dumpOwnRepresentationFields(json);
if (isDependent()) {
asDependent().dumpOwnRepresentationFields(json);
} else if (isExternal()) {
asExternal().dumpOwnRepresentationFields(json);
} else if (isExtensible()) {
asExtensible().dumpOwnRepresentationFields(json);
} else if (isInline()) {
asInline().dumpOwnRepresentationFields(json);
}
} else if (isRope()) {
asRope().dumpOwnRepresentationFields(json);
// Rope already shows the chars.
return;
}
dumpCharsFields(json);
}
void JSString::dumpStringContent(js::GenericPrinter& out) const {
dumpCharsSingleQuote(out);
out.printf(" @ (%s*)0x%p", RepresentationToString(this), this);
}
void JSString::dumpPropertyName(js::GenericPrinter& out) const {
dumpCharsNoQuote(out);
}
void JSString::dumpChars(js::GenericPrinter& out) const {
out.putChar('"');
dumpCharsNoQuote(out);
out.putChar('"');
}
void JSString::dumpCharsSingleQuote(js::GenericPrinter& out) const {
out.putChar('\'');
dumpCharsNoQuote(out);
out.putChar('\'');
}
void JSString::dumpCharsNoQuote(js::GenericPrinter& out) const {
if (isLinear()) {
const JSLinearString* linear = &asLinear();
AutoCheckCannotGC nogc;
if (hasLatin1Chars()) {
dumpCharsNoQuote(linear->latin1Chars(nogc), length(), out);
} else {
dumpCharsNoQuote(linear->twoByteChars(nogc), length(), out);
}
} else if (isRope()) {
JSRope* rope = &asRope();
rope->leftChild()->dumpCharsNoQuote(out);
rope->rightChild()->dumpCharsNoQuote(out);
}
}
bool JSString::equals(const char* s) {
JSLinearString* linear = ensureLinear(nullptr);
if (!linear) {
// This is DEBUG-only code.
fprintf(stderr, "OOM in JSString::equals!\n");
return false;
}
return StringEqualsAscii(linear, s);
}
#endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
JSExtensibleString& JSLinearString::makeExtensible(size_t capacity) {
MOZ_ASSERT(!isDependent());
MOZ_ASSERT(!isInline());
MOZ_ASSERT(!isAtom());
MOZ_ASSERT(!isExternal());
MOZ_ASSERT(capacity >= length());
js::RemoveCellMemory(this, allocSize(), js::MemoryUse::StringContents);
setLengthAndFlags(length(), flags() | EXTENSIBLE_FLAGS);
d.s.u3.capacity = capacity;
js::AddCellMemory(this, allocSize(), js::MemoryUse::StringContents);
return asExtensible();
}
template <typename CharT>
static MOZ_ALWAYS_INLINE bool AllocCharsForFlatten(Nursery& nursery,
JSString* str, size_t length,
CharT** chars,
size_t* capacity,
bool* hasStringBuffer) {
/*
* Grow by 12.5% if the buffer is very large. Otherwise, round up to the
* next power of 2. This is similar to what we do with arrays; see
* JSObject::ensureDenseArrayElements.
*/
auto calcCapacity = [](size_t length, size_t maxCapacity) {
static const size_t DOUBLING_MAX = 1024 * 1024;
if (length > DOUBLING_MAX) {
return std::min<size_t>(maxCapacity, length + (length / 8));
}
size_t capacity = RoundUpPow2(length);
MOZ_ASSERT(capacity <= maxCapacity);
return capacity;
};
if (length < JSString::MIN_BYTES_FOR_BUFFER / sizeof(CharT)) {
*capacity = calcCapacity(length, JSString::MAX_LENGTH);
MOZ_ASSERT(length <= *capacity);
MOZ_ASSERT(*capacity <= JSString::MAX_LENGTH);
auto buffer = str->zone()->make_pod_arena_array<CharT>(
js::StringBufferArena, *capacity);
if (!buffer) {
return false;
}
if (!str->isTenured()) {
if (!nursery.registerMallocedBuffer(buffer.get(),
*capacity * sizeof(CharT))) {
return false;
}
}
*chars = buffer.release();
*hasStringBuffer = false;
return true;
}
using mozilla::StringBuffer;
static_assert(StringBuffer::IsValidLength<CharT>(JSString::MAX_LENGTH),
"JSString length must be valid for StringBuffer");
// Include extra space for the header and the null-terminator before
// calculating the capacity. This ensures we make good use of jemalloc's
// bucket sizes. For example, for a Latin1 string with length 2000 we want to
// get a capacity of 2039 (chars). With the StringBuffer header (8 bytes) and
// the null-terminator this results in an allocation of 2048 bytes.
//
// Note: the null-terminator will not be included in the extensible string's
// capacity field.
static_assert(sizeof(StringBuffer) % sizeof(CharT) == 0);
static constexpr size_t ExtraChars = sizeof(StringBuffer) / sizeof(CharT) + 1;
size_t fullCapacity =
calcCapacity(length + ExtraChars, JSString::MAX_LENGTH + ExtraChars);
*capacity = fullCapacity - ExtraChars;
MOZ_ASSERT(length <= *capacity);
MOZ_ASSERT(*capacity <= JSString::MAX_LENGTH);
RefPtr<StringBuffer> buffer = StringBuffer::Alloc(
(*capacity + 1) * sizeof(CharT), mozilla::Some(js::StringBufferArena));
if (!buffer) {
return false;
}
if (!str->isTenured()) {
auto* linear = static_cast<JSLinearString*>(str); // True when we're done.
if (!nursery.addExtensibleStringBuffer(linear, buffer)) {
return false;
}
}
// Transfer ownership to the caller, where the buffer will be used for the
// extensible string.
// Note: the null-terminator will be stored in flattenInternal.
StringBuffer* buf;
buffer.forget(&buf);
*chars = static_cast<CharT*>(buf->Data());
*hasStringBuffer = true;
return true;
}
UniqueLatin1Chars JSRope::copyLatin1Chars(JSContext* maybecx,
arena_id_t destArenaId) const {
return copyCharsInternal<Latin1Char>(maybecx, destArenaId);
}
UniqueTwoByteChars JSRope::copyTwoByteChars(JSContext* maybecx,
arena_id_t destArenaId) const {
return copyCharsInternal<char16_t>(maybecx, destArenaId);
}
// Allocate chars for a string. If parameters and conditions allow, this will
// try to allocate in the nursery, but this may always fall back to a malloc
// allocation. The return value will record where the allocation happened.
template <typename CharT>
static MOZ_ALWAYS_INLINE JSString::OwnedChars<CharT> AllocChars(JSContext* cx,
size_t length,
gc::Heap heap) {
if (heap == gc::Heap::Default && cx->zone()->allocNurseryStrings()) {
MOZ_ASSERT(cx->nursery().isEnabled());
void* buffer = cx->nursery().tryAllocateNurseryBuffer(
cx->zone(), length * sizeof(CharT), js::StringBufferArena);
if (buffer) {
using Kind = typename JSString::OwnedChars<CharT>::Kind;
return {static_cast<CharT*>(buffer), length, Kind::Nursery};
}
}
static_assert(JSString::MIN_BYTES_FOR_BUFFER % sizeof(CharT) == 0);
if (length < JSString::MIN_BYTES_FOR_BUFFER / sizeof(CharT)) {
auto buffer =
cx->make_pod_arena_array<CharT>(js::StringBufferArena, length);
if (!buffer) {
return {};
}
return {std::move(buffer), length};
}
if (MOZ_UNLIKELY(!mozilla::StringBuffer::IsValidLength<CharT>(length))) {
ReportOversizedAllocation(cx, JSMSG_ALLOC_OVERFLOW);
return {};
}
// Note: StringBuffers must be null-terminated.
RefPtr<mozilla::StringBuffer> buffer = mozilla::StringBuffer::Alloc(
(length + 1) * sizeof(CharT), mozilla::Some(js::StringBufferArena));
if (!buffer) {
ReportOutOfMemory(cx);
return {};
}
static_cast<CharT*>(buffer->Data())[length] = '\0';
return {std::move(buffer), length};
}
// Like AllocChars but for atom characters. Does not report an exception on OOM.
template <typename CharT>
JSString::OwnedChars<CharT> js::AllocAtomCharsValidLength(JSContext* cx,
size_t length) {
MOZ_ASSERT(cx->zone()->isAtomsZone());
MOZ_ASSERT(JSAtom::validateLength(cx, length));
MOZ_ASSERT(mozilla::StringBuffer::IsValidLength<CharT>(length));
static_assert(JSString::MIN_BYTES_FOR_BUFFER % sizeof(CharT) == 0);
if (length < JSString::MIN_BYTES_FOR_BUFFER / sizeof(CharT)) {
auto buffer =
cx->make_pod_arena_array<CharT>(js::StringBufferArena, length);
if (!buffer) {
cx->recoverFromOutOfMemory();
return {};
}
return {std::move(buffer), length};
}
// Note: StringBuffers must be null-terminated.
RefPtr<mozilla::StringBuffer> buffer = mozilla::StringBuffer::Alloc(
(length + 1) * sizeof(CharT), mozilla::Some(js::StringBufferArena));
if (!buffer) {
return {};
}
static_cast<CharT*>(buffer->Data())[length] = '\0';
return {std::move(buffer), length};
}
template JSString::OwnedChars<Latin1Char> js::AllocAtomCharsValidLength(
JSContext* cx, size_t length);
template JSString::OwnedChars<char16_t> js::AllocAtomCharsValidLength(
JSContext* cx, size_t length);
template <typename CharT>
UniquePtr<CharT[], JS::FreePolicy> JSRope::copyCharsInternal(
JSContext* maybecx, arena_id_t destArenaId) const {
// Left-leaning ropes are far more common than right-leaning ropes, so
// perform a non-destructive traversal of the rope, right node first,
// splatting each node's characters into a contiguous buffer.
size_t n = length();
UniquePtr<CharT[], JS::FreePolicy> out;
if (maybecx) {
out.reset(maybecx->pod_arena_malloc<CharT>(destArenaId, n));
} else {
out.reset(js_pod_arena_malloc<CharT>(destArenaId, n));
}
if (!out) {
return nullptr;
}
Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
const JSString* str = this;
CharT* end = out.get() + str->length();
while (true) {
if (str->isRope()) {
if (!nodeStack.append(str->asRope().leftChild())) {
if (maybecx) {
ReportOutOfMemory(maybecx);
}
return nullptr;
}
str = str->asRope().rightChild();
} else {
end -= str->length();
CopyChars(end, str->asLinear());
if (nodeStack.empty()) {
break;
}
str = nodeStack.popCopy();
}
}
MOZ_ASSERT(end == out.get());
return out;
}
template <typename CharT>
void AddStringToHash(uint32_t* hash, const CharT* chars, size_t len) {
// It's tempting to use |HashString| instead of this loop, but that's
// slightly different than our existing implementation for non-ropes. We
// want to pretend we have a contiguous set of chars so we need to
// accumulate char by char rather than generate a new hash for substring
// and then accumulate that.
for (size_t i = 0; i < len; i++) {
*hash = mozilla::AddToHash(*hash, chars[i]);
}
}
void AddStringToHash(uint32_t* hash, const JSString* str) {
AutoCheckCannotGC nogc;
const auto& s = str->asLinear();
if (s.hasLatin1Chars()) {
AddStringToHash(hash, s.latin1Chars(nogc), s.length());
} else {
AddStringToHash(hash, s.twoByteChars(nogc), s.length());
}
}
bool JSRope::hash(uint32_t* outHash) const {
Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
const JSString* str = this;
*outHash = 0;
while (true) {
if (str->isRope()) {
if (!nodeStack.append(str->asRope().rightChild())) {
return false;
}
str = str->asRope().leftChild();
} else {
AddStringToHash(outHash, str);
if (nodeStack.empty()) {
break;
}
str = nodeStack.popCopy();
}
}
return true;
}
#if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
void JSRope::dumpOwnRepresentationFields(js::JSONPrinter& json) const {
json.beginObjectProperty("leftChild");
leftChild()->dumpRepresentationFields(json);
json.endObject();
json.beginObjectProperty("rightChild");
rightChild()->dumpRepresentationFields(json);
json.endObject();
}
#endif
namespace js {
template <>
void CopyChars(char16_t* dest, const JSLinearString& str) {
AutoCheckCannotGC nogc;
if (str.hasTwoByteChars()) {
PodCopy(dest, str.twoByteChars(nogc), str.length());
} else {
CopyAndInflateChars(dest, str.latin1Chars(nogc), str.length());
}
}
template <>
void CopyChars(Latin1Char* dest, const JSLinearString& str) {
AutoCheckCannotGC nogc;
if (str.hasLatin1Chars()) {
PodCopy(dest, str.latin1Chars(nogc), str.length());
} else {
/*
* When we flatten a TwoByte rope, we turn child ropes (including Latin1
* ropes) into TwoByte dependent strings. If one of these strings is
* also part of another Latin1 rope tree, we can have a Latin1 rope with
* a TwoByte descendent and we end up here when we flatten it. Although
* the chars are stored as TwoByte, we know they must be in the Latin1
* range, so we can safely deflate here.
*/
size_t len = str.length();
const char16_t* chars = str.twoByteChars(nogc);
auto src = Span(chars, len);
MOZ_ASSERT(IsUtf16Latin1(src));
LossyConvertUtf16toLatin1(src, AsWritableChars(Span(dest, len)));
}
}
} /* namespace js */
template <typename CharT>
static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags) {
if constexpr (std::is_same_v<CharT, char16_t>) {
return baseFlags;
}
return baseFlags | JSString::LATIN1_CHARS_BIT;
}
static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery,
JSExtensibleString* from,
JSString* to, void* chars,
size_t size) {
// Update the list of buffers associated with nursery cells when |buffer| is
// moved from string |from| to string |to|, depending on whether those strings
// are in the nursery or not.
if (from->hasStringBuffer()) {
if (!from->isTenured()) {
nursery.removeExtensibleStringBuffer(from);
}
if (!to->isTenured()) {
auto* linear = static_cast<JSLinearString*>(to);
if (!nursery.addExtensibleStringBuffer(linear, from->stringBuffer())) {
return false;
}
}
return true;
}
if (from->isTenured() && !to->isTenured()) {
// Tenured leftmost child is giving its chars buffer to the
// nursery-allocated root node.
if (!nursery.registerMallocedBuffer(chars, size)) {
return false;
}
} else if (!from->isTenured() && to->isTenured()) {
// Leftmost child is giving its nursery-held chars buffer to a
// tenured string.
nursery.removeMallocedBuffer(chars, size);
}
return true;
}
static bool CanReuseLeftmostBuffer(JSString* leftmostChild, size_t wholeLength,
bool hasTwoByteChars) {
if (!leftmostChild->isExtensible()) {
return false;
}
JSExtensibleString& str = leftmostChild->asExtensible();
// Don't mutate the StringBuffer if there are other references to it, possibly
// on other threads.
if (str.hasStringBuffer() && str.stringBuffer()->IsReadonly()) {
return false;
}
return str.capacity() >= wholeLength &&
str.hasTwoByteChars() == hasTwoByteChars;
}
JSLinearString* JSRope::flatten(JSContext* maybecx) {
mozilla::Maybe<AutoGeckoProfilerEntry> entry;
if (maybecx) {
entry.emplace(maybecx, "JSRope::flatten");
}
JSLinearString* str = flattenInternal();
if (!str && maybecx) {
ReportOutOfMemory(maybecx);
}
return str;
}
JSLinearString* JSRope::flattenInternal() {
if (zone()->needsIncrementalBarrier()) {
return flattenInternal<WithIncrementalBarrier>();
}
return flattenInternal<NoBarrier>();
}
template <JSRope::UsingBarrier usingBarrier>
JSLinearString* JSRope::flattenInternal() {
if (hasTwoByteChars()) {
return flattenInternal<usingBarrier, char16_t>(this);
}
return flattenInternal<usingBarrier, Latin1Char>(this);
}
template <JSRope::UsingBarrier usingBarrier, typename CharT>
/* static */
JSLinearString* JSRope::flattenInternal(JSRope* root) {
/*
* Consider the DAG of JSRopes rooted at |root|, with non-JSRopes as
* its leaves. Mutate the root JSRope into a JSExtensibleString containing
* the full flattened text that the root represents, and mutate all other
* JSRopes in the interior of the DAG into JSDependentStrings that refer to
* this new JSExtensibleString.
*
* If the leftmost leaf of our DAG is a JSExtensibleString, consider
* stealing its buffer for use in our new root, and transforming it into a
* JSDependentString too. Do not mutate any of the other leaves.
*
* Perform a depth-first dag traversal, splatting each node's characters
* into a contiguous buffer. Visit each rope node three times:
* 1. record position in the buffer and recurse into left child;
* 2. recurse into the right child;
* 3. transform the node into a dependent string.
* To avoid maintaining a stack, tree nodes are mutated to indicate how many
* times they have been visited. Since ropes can be dags, a node may be
* encountered multiple times during traversal. However, step 3 above leaves
* a valid dependent string, so everything works out.
*
* While ropes avoid all sorts of quadratic cases with string concatenation,
* they can't help when ropes are immediately flattened. One idiomatic case
* that we'd like to keep linear (and has traditionally been linear in SM
* and other JS engines) is:
*
* while (...) {
* s += ...
* s.flatten
* }
*
* Two behaviors accomplish this:
*
* - When the leftmost non-rope in the DAG we're flattening is a
* JSExtensibleString with sufficient capacity to hold the entire
* flattened string, we just flatten the DAG into its buffer. Then, when
* we transform the root of the DAG from a JSRope into a
* JSExtensibleString, we steal that buffer, and change the victim from a
* JSExtensibleString to a JSDependentString. In this case, the left-hand
* side of the string never needs to be copied.
*
* - Otherwise, we round up the total flattened size and create a fresh
* JSExtensibleString with that much capacity. If this in turn becomes the
* leftmost leaf of a subsequent flatten, we will hopefully be able to
* fill it, as in the case above.
*
* Note that, even though the code for creating JSDependentStrings avoids
* creating dependents of dependents, we can create that situation here: the
* JSExtensibleStrings we transform into JSDependentStrings might have
* JSDependentStrings pointing to them already. Stealing the buffer doesn't
* change its address, only its owning JSExtensibleString, so all chars()
* pointers in the JSDependentStrings are still valid.
*
* This chain of dependent strings could be problematic if the base string
* moves, either because it was initially allocated in the nursery or it
* gets deduplicated, because you might have a dependent ->
* tenured dependent -> nursery base string, and the store buffer would
* only capture the latter edge. Prevent this case from happening by
* marking the root as nondeduplicatable if the extensible string
* optimization applied.
*/
const size_t wholeLength = root->length();
size_t wholeCapacity;
CharT* wholeChars;
uint32_t newRootFlags = 0;
AutoCheckCannotGC nogc;
Nursery& nursery = root->runtimeFromMainThread()->gc.nursery();
/* Find the left most string, containing the first string. */
JSRope* leftmostRope = root;
while (leftmostRope->leftChild()->isRope()) {
leftmostRope = &leftmostRope->leftChild()->asRope();
}
JSString* leftmostChild = leftmostRope->leftChild();
bool reuseLeftmostBuffer = CanReuseLeftmostBuffer(
leftmostChild, wholeLength, std::is_same_v<CharT, char16_t>);
bool hasStringBuffer = false;
if (reuseLeftmostBuffer) {
JSExtensibleString& left = leftmostChild->asExtensible();
wholeCapacity = left.capacity();
wholeChars = const_cast<CharT*>(left.nonInlineChars<CharT>(nogc));
hasStringBuffer = left.hasStringBuffer();
// Nursery::registerMallocedBuffer is fallible, so attempt it first before
// doing anything irreversible.
if (!UpdateNurseryBuffersOnTransfer(nursery, &left, root, wholeChars,
wholeCapacity * sizeof(CharT))) {
return nullptr;
}
} else {
// If we can't reuse the leftmost child's buffer, allocate a new one.
if (!AllocCharsForFlatten(nursery, root, wholeLength, &wholeChars,
&wholeCapacity, &hasStringBuffer)) {
return nullptr;
}
}
JSRope* str = root;
CharT* pos = wholeChars;
JSRope* parent = nullptr;
uint32_t parentFlag = 0;
first_visit_node: {
MOZ_ASSERT_IF(str != root, parent && parentFlag);
MOZ_ASSERT(!str->asRope().isBeingFlattened());
ropeBarrierDuringFlattening<usingBarrier>(str);
JSString& left = *str->d.s.u2.left;
str->d.s.u2.parent = parent;
str->setFlagBit(parentFlag);
parent = nullptr;
parentFlag = 0;
if (left.isRope()) {
/* Return to this node when 'left' done, then goto visit_right_child. */
parent = str;
parentFlag = FLATTEN_VISIT_RIGHT;
str = &left.asRope();
goto first_visit_node;
}
if (!(reuseLeftmostBuffer && pos == wholeChars)) {
CopyChars(pos, left.asLinear());
}
pos += left.length();
}
visit_right_child: {
JSString& right = *str->d.s.u3.right;
if (right.isRope()) {
/* Return to this node when 'right' done, then goto finish_node. */
parent = str;
parentFlag = FLATTEN_FINISH_NODE;
str = &right.asRope();
goto first_visit_node;
}
CopyChars(pos, right.asLinear());
pos += right.length();
}
finish_node: {
if (str == root) {
goto finish_root;
}
MOZ_ASSERT(pos >= wholeChars);
CharT* chars = pos - str->length();
JSRope* strParent = str->d.s.u2.parent;
str->setNonInlineChars(chars, /* usesStringBuffer = */ false);
MOZ_ASSERT(str->asRope().isBeingFlattened());
mozilla::DebugOnly<bool> visitRight = str->flags() & FLATTEN_VISIT_RIGHT;
bool finishNode = str->flags() & FLATTEN_FINISH_NODE;
MOZ_ASSERT(visitRight != finishNode);
// This also clears the flags related to flattening.
str->setLengthAndFlags(str->length(),
StringFlagsForCharType<CharT>(INIT_DEPENDENT_FLAGS));
str->d.s.u3.base =
reinterpret_cast<JSLinearString*>(root); /* will be true on exit */
newRootFlags |= DEPENDED_ON_BIT;
// Every interior (rope) node in the rope's tree will be visited during
// the traversal and post-barriered here, so earlier additions of
// dependent.base -> root pointers are handled by this barrier as well.
//
// The only time post-barriers need do anything is when the root is in
// the nursery. Note that the root was a rope but will be an extensible
// string when we return, so it will not point to any strings and need
// not be barriered.
if (str->isTenured() && !root->isTenured()) {
root->storeBuffer()->putWholeCell(str);
}
str = strParent;
if (finishNode) {
goto finish_node;
}
MOZ_ASSERT(visitRight);
goto visit_right_child;
}
finish_root:
// We traversed all the way back up to the root so we're finished.
MOZ_ASSERT(str == root);
MOZ_ASSERT(pos == wholeChars + wholeLength);
uint32_t flags = StringFlagsForCharType<CharT>(EXTENSIBLE_FLAGS);
if (hasStringBuffer) {
flags |= HAS_STRING_BUFFER_BIT;
wholeChars[wholeLength] = '\0';
}
root->setLengthAndFlags(wholeLength, flags);
root->setNonInlineChars(wholeChars, hasStringBuffer);
root->d.s.u3.capacity = wholeCapacity;
AddCellMemory(root, root->asLinear().allocSize(), MemoryUse::StringContents);
if (reuseLeftmostBuffer) {
// Remove memory association for left node we're about to make into a
// dependent string.
JSString& left = *leftmostChild;
RemoveCellMemory(&left, left.allocSize(), MemoryUse::StringContents);
// Inherit NON_DEDUP_BIT from the leftmost string.
newRootFlags |= left.flags() & NON_DEDUP_BIT;
// Set root's DEPENDED_ON_BIT because the leftmost string is now a
// dependent.
newRootFlags |= DEPENDED_ON_BIT;
uint32_t flags = INIT_DEPENDENT_FLAGS;
if (left.inStringToAtomCache()) {
flags |= IN_STRING_TO_ATOM_CACHE;
}
// If left was depended on, we need to make sure we preserve that. Even
// though the string that depended on left's buffer will now depend on
// root's buffer, if left is the only edge to root, replacing left with an
// atom ref would break that edge and allow root's buffer to be freed.
if (left.isDependedOn()) {
flags |= DEPENDED_ON_BIT;
}
left.setLengthAndFlags(left.length(), StringFlagsForCharType<CharT>(flags));
left.d.s.u3.base = &root->asLinear();
if (left.isTenured() && !root->isTenured()) {
// leftmost child -> root is a tenured -> nursery edge. Put the leftmost
// child in the store buffer and prevent the root's chars from moving or
// being freed (because the leftmost child may have a tenured dependent
// string that cannot be updated.)
root->storeBuffer()->putWholeCell(&left);
newRootFlags |= NON_DEDUP_BIT;
}
}
root->setHeaderFlagBit(newRootFlags);
return &root->asLinear();
}
template <JSRope::UsingBarrier usingBarrier>
/* static */
inline void JSRope::ropeBarrierDuringFlattening(JSRope* rope) {
MOZ_ASSERT(!rope->isBeingFlattened());
if constexpr (usingBarrier) {
gc::PreWriteBarrierDuringFlattening(rope->leftChild());
gc::PreWriteBarrierDuringFlattening(rope->rightChild());
}
}
template <AllowGC allowGC>
static JSLinearString* EnsureLinear(
JSContext* cx,
typename MaybeRooted<JSString*, allowGC>::HandleType string) {
JSLinearString* linear = string->ensureLinear(cx);
// Don't report an exception if GC is not allowed, just return nullptr.
if (!linear && !allowGC) {
cx->recoverFromOutOfMemory();
}
return linear;
}
template <AllowGC allowGC>
JSString* js::ConcatStrings(
JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left,
typename MaybeRooted<JSString*, allowGC>::HandleType right, gc::Heap heap) {
MOZ_ASSERT_IF(!left->isAtom(), cx->isInsideCurrentZone(left));
MOZ_ASSERT_IF(!right->isAtom(), cx->isInsideCurrentZone(right));
size_t leftLen = left->length();
if (leftLen == 0) {
return right;
}
size_t rightLen = right->length();
if (rightLen == 0) {
return left;
}
size_t wholeLength = leftLen + rightLen;
if (MOZ_UNLIKELY(wholeLength > JSString::MAX_LENGTH)) {
// Don't report an exception if GC is not allowed, just return nullptr.
if (allowGC) {
js::ReportOversizedAllocation(cx, JSMSG_ALLOC_OVERFLOW);