Source code
Revision control
Copy as Markdown
Other Tools
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
#include "nsExpatDriver.h"
#include "mozilla/fallible.h"
#include "nsCOMPtr.h"
#include "CParserContext.h"
#include "nsIExpatSink.h"
#include "nsIContentSink.h"
#include "nsIDocShell.h"
#include "nsParserMsgUtils.h"
#include "nsIURL.h"
#include "nsIUnicharInputStream.h"
#include "nsIProtocolHandler.h"
#include "nsNetUtil.h"
#include "nsString.h"
#include "nsTextFormatter.h"
#include "nsDirectoryServiceDefs.h"
#include "nsCRT.h"
#include "nsIConsoleService.h"
#include "nsIScriptError.h"
#include "nsIScriptGlobalObject.h"
#include "nsIContentPolicy.h"
#include "nsComponentManagerUtils.h"
#include "nsContentPolicyUtils.h"
#include "nsError.h"
#include "nsXPCOMCIDInternal.h"
#include "nsUnicharInputStream.h"
#include "nsContentUtils.h"
#include "mozilla/Array.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/BasePrincipal.h"
#include "mozilla/IntegerTypeTraits.h"
#include "mozilla/NullPrincipal.h"
#include "mozilla/Telemetry.h"
#include "mozilla/glean/GleanMetrics.h"
#include "nsThreadUtils.h"
#include "mozilla/ClearOnShutdown.h"
#include "mozilla/RLBoxUtils.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Logging.h"
using mozilla::fallible;
using mozilla::LogLevel;
using mozilla::MakeStringSpan;
using mozilla::Maybe;
using mozilla::Unused;
using mozilla::dom::Document;
// We only pass chunks of length sMaxChunkLength to Expat in the RLBOX sandbox.
// The RLBOX sandbox has a limited amount of memory, and we have to account for
// other memory use by Expat (including the buffering it does).
// Note that sMaxChunkLength is in number of characters.
#ifdef DEBUG
// On debug builds we set a much lower limit (1kB) to try to hit boundary
// conditions more frequently.
static const uint32_t sMaxChunkLength = 1024 / sizeof(char16_t);
#else
static const uint32_t sMaxChunkLength = (128 * 1024) / sizeof(char16_t);
#endif
#define kExpatSeparatorChar 0xFFFF
static const char16_t kUTF16[] = {'U', 'T', 'F', '-', '1', '6', '\0'};
static mozilla::LazyLogModule gExpatDriverLog("expatdriver");
// Use the same maximum tree depth as Chromium (see
static const uint16_t sMaxXMLTreeDepth = 5000;
/***************************** RLBOX HELPERS ********************************/
// Helpers for calling sandboxed expat functions in handlers
#define RLBOX_EXPAT_SAFE_CALL(foo, verifier, ...) \
aSandbox.invoke_sandbox_function(foo, self->mExpatParser, ##__VA_ARGS__) \
.copy_and_verify(verifier)
#define RLBOX_EXPAT_SAFE_MCALL(foo, verifier, ...) \
Sandbox() \
->invoke_sandbox_function(foo, mExpatParser, ##__VA_ARGS__) \
.copy_and_verify(verifier)
#define RLBOX_EXPAT_CALL(foo, ...) \
aSandbox.invoke_sandbox_function(foo, self->mExpatParser, ##__VA_ARGS__)
#define RLBOX_EXPAT_MCALL(foo, ...) \
Sandbox()->invoke_sandbox_function(foo, mExpatParser, ##__VA_ARGS__)
#define RLBOX_SAFE_PRINT "Value used only for printing"
#define MOZ_RELEASE_ASSERT_TAINTED(cond, ...) \
MOZ_RELEASE_ASSERT((cond).unverified_safe_because("Sanity check"), \
##__VA_ARGS__)
/* safe_unverified is used whenever it's safe to not use a validator */
template <typename T>
static T safe_unverified(T val) {
return val;
}
/* status_verifier is a type validator for XML_Status */
inline enum XML_Status status_verifier(enum XML_Status s) {
MOZ_RELEASE_ASSERT(s >= XML_STATUS_ERROR && s <= XML_STATUS_SUSPENDED,
"unexpected status code");
return s;
}
/* error_verifier is a type validator for XML_Error */
inline enum XML_Error error_verifier(enum XML_Error code) {
MOZ_RELEASE_ASSERT(
code >= XML_ERROR_NONE && code <= XML_ERROR_INVALID_ARGUMENT,
"unexpected XML error code");
return code;
}
/* We use unverified_xml_string to just expose sandbox expat strings to Firefox
* without any validation. On 64-bit we have guard pages at the sandbox
* boundary; on 32-bit we don't and a string could be used to read beyond the
* sandbox boundary. In our attacker model this is okay (the attacker can just
* Spectre).
*
* Nevertheless, we should try to add strings validators to the consumer code
* of expat whenever we have some semantics. At the very lest we should make
*/
static const XML_Char* unverified_xml_string(uintptr_t ptr) {
return reinterpret_cast<const XML_Char*>(ptr);
}
/* The TransferBuffer class is used to copy (or directly expose in the
* noop-sandbox case) buffers into the expat sandbox (and automatically
* when out of scope).
*/
template <typename T>
using TransferBuffer =
mozilla::RLBoxTransferBufferToSandbox<T, rlbox_expat_sandbox_type>;
/*************************** END RLBOX HELPERS ******************************/
/***************************** EXPAT CALL BACKS ******************************/
// The callback handlers that get called from the expat parser.
static void Driver_HandleXMLDeclaration(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */,
tainted_expat<const XML_Char*> aVersion,
tainted_expat<const XML_Char*> aEncoding, tainted_expat<int> aStandalone) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
int standalone = aStandalone.copy_and_verify([&](auto a) {
// Standalone argument can be -1, 0, or 1 (see
// /parser/expat/lib/expat.h#185)
MOZ_RELEASE_ASSERT(a >= -1 && a <= 1, "Unexpected standalone parameter");
return a;
});
const auto* version = aVersion.copy_and_verify_address(unverified_xml_string);
const auto* encoding =
aEncoding.copy_and_verify_address(unverified_xml_string);
driver->HandleXMLDeclaration(version, encoding, standalone);
}
static void Driver_HandleCharacterData(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> /* aUserData */,
tainted_expat<const XML_Char*> aData,
tainted_expat<int> aLength) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
// aData is not null terminated; even with bad length we will not span beyond
// sandbox boundary
uint32_t length =
static_cast<uint32_t>(aLength.copy_and_verify(safe_unverified<int>));
const auto* data = aData.unverified_safe_pointer_because(
length, "Only care that the data is within sandbox boundary.");
driver->HandleCharacterData(data, length);
}
static void Driver_HandleComment(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> /* aUserData */,
tainted_expat<const XML_Char*> aName) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
const auto* name = aName.copy_and_verify_address(unverified_xml_string);
driver->HandleComment(name);
}
static void Driver_HandleProcessingInstruction(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */,
tainted_expat<const XML_Char*> aTarget,
tainted_expat<const XML_Char*> aData) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
const auto* target = aTarget.copy_and_verify_address(unverified_xml_string);
const auto* data = aData.copy_and_verify_address(unverified_xml_string);
driver->HandleProcessingInstruction(target, data);
}
static void Driver_HandleDefault(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> /* aUserData */,
tainted_expat<const XML_Char*> aData,
tainted_expat<int> aLength) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
// aData is not null terminated; even with bad length we will not span
// beyond sandbox boundary
uint32_t length =
static_cast<uint32_t>(aLength.copy_and_verify(safe_unverified<int>));
const auto* data = aData.unverified_safe_pointer_because(
length, "Only care that the data is within sandbox boundary.");
driver->HandleDefault(data, length);
}
static void Driver_HandleStartCdataSection(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
driver->HandleStartCdataSection();
}
static void Driver_HandleEndCdataSection(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> /* aUserData */) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
driver->HandleEndCdataSection();
}
static void Driver_HandleStartDoctypeDecl(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */,
tainted_expat<const XML_Char*> aDoctypeName,
tainted_expat<const XML_Char*> aSysid,
tainted_expat<const XML_Char*> aPubid,
tainted_expat<int> aHasInternalSubset) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
const auto* doctypeName =
aDoctypeName.copy_and_verify_address(unverified_xml_string);
const auto* sysid = aSysid.copy_and_verify_address(unverified_xml_string);
const auto* pubid = aPubid.copy_and_verify_address(unverified_xml_string);
bool hasInternalSubset =
!!(aHasInternalSubset.copy_and_verify(safe_unverified<int>));
driver->HandleStartDoctypeDecl(doctypeName, sysid, pubid, hasInternalSubset);
}
static void Driver_HandleEndDoctypeDecl(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> /* aUserData */) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
driver->HandleEndDoctypeDecl();
}
static tainted_expat<int> Driver_HandleExternalEntityRef(
rlbox_sandbox_expat& aSandbox, tainted_expat<XML_Parser> /* aParser */,
tainted_expat<const XML_Char*> aOpenEntityNames,
tainted_expat<const XML_Char*> aBase,
tainted_expat<const XML_Char*> aSystemId,
tainted_expat<const XML_Char*> aPublicId) {
nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(driver);
const auto* openEntityNames =
aOpenEntityNames.copy_and_verify_address(unverified_xml_string);
const auto* base = aBase.copy_and_verify_address(unverified_xml_string);
const auto* systemId =
aSystemId.copy_and_verify_address(unverified_xml_string);
const auto* publicId =
aPublicId.copy_and_verify_address(unverified_xml_string);
return driver->HandleExternalEntityRef(openEntityNames, base, systemId,
publicId);
}
/***************************** END CALL BACKS ********************************/
/***************************** CATALOG UTILS *********************************/
// MathML 2.0" DTD to the the lightweight customized version that Mozilla uses.
// Since Mozilla is not validating, no need to fetch a *huge* file at each
// click.
// Catalogs.
struct nsCatalogData {
const char* mPublicID;
const char* mLocalDTD;
const char* mAgentSheet;
};
// The order of this table is guestimated to be in the optimum order
static const nsCatalogData kCatalogTable[] = {
{"-//W3C//DTD XHTML 1.0 Transitional//EN", "htmlmathml-f.ent", nullptr},
{"-//W3C//DTD XHTML 1.1//EN", "htmlmathml-f.ent", nullptr},
{"-//W3C//DTD XHTML 1.0 Strict//EN", "htmlmathml-f.ent", nullptr},
{"-//W3C//DTD XHTML 1.0 Frameset//EN", "htmlmathml-f.ent", nullptr},
{"-//W3C//DTD XHTML Basic 1.0//EN", "htmlmathml-f.ent", nullptr},
{"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "htmlmathml-f.ent", nullptr},
{"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN",
"htmlmathml-f.ent", nullptr},
{"-//W3C//DTD MathML 2.0//EN", "htmlmathml-f.ent", nullptr},
{"-//WAPFORUM//DTD XHTML Mobile 1.0//EN", "htmlmathml-f.ent", nullptr},
{nullptr, nullptr, nullptr}};
static const nsCatalogData* LookupCatalogData(const char16_t* aPublicID) {
nsDependentString publicID(aPublicID);
// linear search for now since the number of entries is going to
// code anyway
const nsCatalogData* data = kCatalogTable;
while (data->mPublicID) {
if (publicID.EqualsASCII(data->mPublicID)) {
return data;
}
++data;
}
return nullptr;
}
// This function provides a resource URI to a local DTD
// If aCatalogData is provided, it is used to remap the
// DTD instead of taking the filename from the URI. aDTD
// may be null in some cases that are relying on
// aCatalogData working for them.
static void GetLocalDTDURI(const nsCatalogData* aCatalogData, nsIURI* aDTD,
nsIURI** aResult) {
nsAutoCString fileName;
if (aCatalogData) {
// remap the DTD to a known local DTD
fileName.Assign(aCatalogData->mLocalDTD);
}
if (fileName.IsEmpty()) {
// Try to see if the user has installed the DTD file -- we extract the
// filename.ext of the DTD here. Hence, for any DTD for which we have
// no predefined mapping, users just have to copy the DTD file to our
// special DTD directory and it will be picked.
nsCOMPtr<nsIURL> dtdURL = do_QueryInterface(aDTD);
if (!dtdURL) {
// Not a URL with a filename, or maybe it was null. Either way, nothing
// else we can do here.
return;
}
dtdURL->GetFileName(fileName);
if (fileName.IsEmpty()) {
return;
}
}
respath += fileName;
NS_NewURI(aResult, respath);
}
/***************************** END CATALOG UTILS *****************************/
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsExpatDriver)
NS_INTERFACE_MAP_ENTRY(nsIDTD)
NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsExpatDriver)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsExpatDriver)
NS_IMPL_CYCLE_COLLECTION(nsExpatDriver, mSink)
nsExpatDriver::nsExpatDriver()
: mExpatParser(nullptr),
mInCData(false),
mInInternalSubset(false),
mInExternalDTD(false),
mMadeFinalCallToExpat(false),
mInParser(false),
mInternalState(NS_OK),
mExpatBuffered(0),
mTagDepth(0),
mCatalogData(nullptr),
mInnerWindowID(0) {}
nsExpatDriver::~nsExpatDriver() { Destroy(); }
void nsExpatDriver::Destroy() {
if (mSandboxPoolData) {
SandboxData()->DetachDriver();
if (mExpatParser) {
RLBOX_EXPAT_MCALL(MOZ_XML_ParserFree);
}
}
mSandboxPoolData.reset();
mURIs.Clear();
mExpatParser = nullptr;
}
// The AllocAttrs class is used to speed up copying attributes from the
// sandboxed expat by fast allocating attributes on the stack and only falling
// back to malloc when we need to allocate lots of attributes.
class MOZ_STACK_CLASS AllocAttrs {
#define NUM_STACK_SLOTS 16
public:
const char16_t** Init(size_t size) {
if (size <= NUM_STACK_SLOTS) {
return mInlineArr;
}
mHeapPtr = mozilla::MakeUnique<const char16_t*[]>(size);
return mHeapPtr.get();
}
private:
const char16_t* mInlineArr[NUM_STACK_SLOTS];
mozilla::UniquePtr<const char16_t*[]> mHeapPtr;
#undef NUM_STACK_SLOTS
};
/* static */
void nsExpatDriver::HandleStartElement(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> /* aUserData */,
tainted_expat<const char16_t*> aName,
tainted_expat<const char16_t**> aAttrs) {
nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(self && self->mSink);
const auto* name = aName.copy_and_verify_address(unverified_xml_string);
// Calculate the total number of elements in aAttrs.
// XML_GetSpecifiedAttributeCount will only give us the number of specified
// attrs (twice that number, actually), so we have to check for default
// attrs ourselves.
tainted_expat<int> count =
RLBOX_EXPAT_CALL(MOZ_XML_GetSpecifiedAttributeCount);
MOZ_RELEASE_ASSERT_TAINTED(count >= 0, "Unexpected attribute count");
tainted_expat<uint64_t> attrArrayLengthTainted;
for (attrArrayLengthTainted = rlbox::sandbox_static_cast<uint64_t>(count);
(aAttrs[attrArrayLengthTainted] != nullptr)
.unverified_safe_because("Bad length is checked later");
attrArrayLengthTainted += 2) {
// Just looping till we find out what the length is
}
uint32_t attrArrayLength =
attrArrayLengthTainted.copy_and_verify([&](uint64_t value) {
// A malicious length could result in an overflow when we allocate
// aAttrs and then access elements of the array.
MOZ_RELEASE_ASSERT(value < UINT32_MAX, "Overflow attempt");
return value;
});
// Copy tainted aAttrs from sandbox
AllocAttrs allocAttrs;
const char16_t** attrs = allocAttrs.Init(attrArrayLength + 1);
if (NS_WARN_IF(!aAttrs || !attrs)) {
self->MaybeStopParser(NS_ERROR_OUT_OF_MEMORY);
return;
}
for (uint32_t i = 0; i < attrArrayLength; i++) {
attrs[i] = aAttrs[i].copy_and_verify_address(unverified_xml_string);
}
attrs[attrArrayLength] = nullptr;
if (self->mSink) {
// We store the tagdepth in a PRUint16, so make sure the limit fits in a
// PRUint16.
static_assert(
sMaxXMLTreeDepth <=
std::numeric_limits<decltype(nsExpatDriver::mTagDepth)>::max());
if (++self->mTagDepth > sMaxXMLTreeDepth) {
self->MaybeStopParser(NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP);
return;
}
nsresult rv = self->mSink->HandleStartElement(
name, attrs, attrArrayLength,
RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentLineNumber,
safe_unverified<XML_Size>),
RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentColumnNumber,
safe_unverified<XML_Size>));
self->MaybeStopParser(rv);
}
}
/* static */
void nsExpatDriver::HandleStartElementForSystemPrincipal(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
tainted_expat<const char16_t*> aName,
tainted_expat<const char16_t**> aAttrs) {
nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(self);
if (!RLBOX_EXPAT_SAFE_CALL(MOZ_XML_ProcessingEntityValue,
safe_unverified<XML_Bool>)) {
HandleStartElement(aSandbox, aUserData, aName, aAttrs);
} else {
nsCOMPtr<Document> doc =
do_QueryInterface(self->mOriginalSink->GetTarget());
// Adjust the column number so that it is one based rather than zero
// based.
tainted_expat<XML_Size> colNumber =
RLBOX_EXPAT_CALL(MOZ_XML_GetCurrentColumnNumber) + 1;
tainted_expat<XML_Size> lineNumber =
RLBOX_EXPAT_CALL(MOZ_XML_GetCurrentLineNumber);
int32_t nameSpaceID;
RefPtr<nsAtom> prefix, localName;
const auto* name = aName.copy_and_verify_address(unverified_xml_string);
nsContentUtils::SplitExpatName(name, getter_AddRefs(prefix),
getter_AddRefs(localName), &nameSpaceID);
nsAutoString error;
error.AppendLiteral("Ignoring element <");
if (prefix) {
error.Append(prefix->GetUTF16String());
error.Append(':');
}
error.Append(localName->GetUTF16String());
error.AppendLiteral("> created from entity value.");
nsContentUtils::ReportToConsoleNonLocalized(
error, nsIScriptError::warningFlag, "XML Document"_ns, doc,
mozilla::SourceLocation(
doc->GetDocumentURI(),
lineNumber.unverified_safe_because(RLBOX_SAFE_PRINT),
colNumber.unverified_safe_because(RLBOX_SAFE_PRINT)));
}
}
/* static */
void nsExpatDriver::HandleEndElement(rlbox_sandbox_expat& aSandbox,
tainted_expat<void*> aUserData,
tainted_expat<const char16_t*> aName) {
nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(self);
const auto* name = aName.copy_and_verify_address(unverified_xml_string);
NS_ASSERTION(self->mSink, "content sink not found!");
NS_ASSERTION(self->mInternalState != NS_ERROR_HTMLPARSER_BLOCK,
"Shouldn't block from HandleStartElement.");
if (self->mSink && self->mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
nsresult rv = self->mSink->HandleEndElement(name);
--self->mTagDepth;
self->MaybeStopParser(rv);
}
}
/* static */
void nsExpatDriver::HandleEndElementForSystemPrincipal(
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
tainted_expat<const char16_t*> aName) {
nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
MOZ_ASSERT(self);
if (!RLBOX_EXPAT_SAFE_CALL(MOZ_XML_ProcessingEntityValue,
safe_unverified<XML_Bool>)) {
HandleEndElement(aSandbox, aUserData, aName);
}
}
nsresult nsExpatDriver::HandleCharacterData(const char16_t* aValue,
const uint32_t aLength) {
NS_ASSERTION(mSink, "content sink not found!");
if (mInCData) {
if (!mCDataText.Append(aValue, aLength, fallible)) {
MaybeStopParser(NS_ERROR_OUT_OF_MEMORY);
}
} else if (mSink) {
nsresult rv = mSink->HandleCharacterData(aValue, aLength);
MaybeStopParser(rv);
}
return NS_OK;
}
nsresult nsExpatDriver::HandleComment(const char16_t* aValue) {
NS_ASSERTION(mSink, "content sink not found!");
if (mInExternalDTD) {
// Ignore comments from external DTDs
return NS_OK;
}
if (mInInternalSubset) {
mInternalSubset.AppendLiteral("<!--");
mInternalSubset.Append(aValue);
mInternalSubset.AppendLiteral("-->");
} else if (mSink) {
nsresult rv = mSink->HandleComment(aValue);
MaybeStopParser(rv);
}
return NS_OK;
}
nsresult nsExpatDriver::HandleProcessingInstruction(const char16_t* aTarget,
const char16_t* aData) {
NS_ASSERTION(mSink, "content sink not found!");
if (mInExternalDTD) {
// Ignore PIs in external DTDs for now. Eventually we want to
// pass them to the sink in a way that doesn't put them in the DOM
return NS_OK;
}
if (mInInternalSubset) {
mInternalSubset.AppendLiteral("<?");
mInternalSubset.Append(aTarget);
mInternalSubset.Append(' ');
mInternalSubset.Append(aData);
mInternalSubset.AppendLiteral("?>");
} else if (mSink) {
nsresult rv = mSink->HandleProcessingInstruction(aTarget, aData);
MaybeStopParser(rv);
}
return NS_OK;
}
nsresult nsExpatDriver::HandleXMLDeclaration(const char16_t* aVersion,
const char16_t* aEncoding,
int32_t aStandalone) {
if (mSink) {
nsresult rv = mSink->HandleXMLDeclaration(aVersion, aEncoding, aStandalone);
MaybeStopParser(rv);
}
return NS_OK;
}
nsresult nsExpatDriver::HandleDefault(const char16_t* aValue,
const uint32_t aLength) {
NS_ASSERTION(mSink, "content sink not found!");
if (mInExternalDTD) {
// Ignore newlines in external DTDs
return NS_OK;
}
if (mInInternalSubset) {
mInternalSubset.Append(aValue, aLength);
} else if (mSink) {
uint32_t i;
nsresult rv = mInternalState;
for (i = 0; i < aLength && NS_SUCCEEDED(rv); ++i) {
if (aValue[i] == '\n' || aValue[i] == '\r') {
rv = mSink->HandleCharacterData(&aValue[i], 1);
}
}
MaybeStopParser(rv);
}
return NS_OK;
}
nsresult nsExpatDriver::HandleStartCdataSection() {
mInCData = true;
return NS_OK;
}
nsresult nsExpatDriver::HandleEndCdataSection() {
NS_ASSERTION(mSink, "content sink not found!");
mInCData = false;
if (mSink) {
nsresult rv =
mSink->HandleCDataSection(mCDataText.get(), mCDataText.Length());
MaybeStopParser(rv);
}
mCDataText.Truncate();
return NS_OK;
}
nsresult nsExpatDriver::HandleStartDoctypeDecl(const char16_t* aDoctypeName,
const char16_t* aSysid,
const char16_t* aPubid,
bool aHasInternalSubset) {
mDoctypeName = aDoctypeName;
mSystemID = aSysid;
mPublicID = aPubid;
if (aHasInternalSubset) {
// Consuming a huge internal subset translates to numerous
// allocations. In an effort to avoid too many allocations
// setting mInternalSubset's capacity to be 1K ( just a guesstimate! ).
mInInternalSubset = true;
mInternalSubset.SetCapacity(1024);
} else {
// Distinguish missing internal subset from an empty one
mInternalSubset.SetIsVoid(true);
}
return NS_OK;
}
nsresult nsExpatDriver::HandleEndDoctypeDecl() {
NS_ASSERTION(mSink, "content sink not found!");
mInInternalSubset = false;
if (mSink) {
// let the sink know any additional knowledge that we have about the
// agent sheets needed to layout the XML vocabulary of the document)
nsCOMPtr<nsIURI> data;
#if 0
if (mCatalogData && mCatalogData->mAgentSheet) {
NS_NewURI(getter_AddRefs(data), mCatalogData->mAgentSheet);
}
#endif
// The unused support for "catalog style sheets" was removed. It doesn't
MOZ_ASSERT(!mCatalogData || !mCatalogData->mAgentSheet,
"Need to add back support for catalog style sheets");
// Note: mInternalSubset already doesn't include the [] around it.
nsresult rv = mSink->HandleDoctypeDecl(mInternalSubset, mDoctypeName,
mSystemID, mPublicID, data);
MaybeStopParser(rv);
}
mInternalSubset.Truncate();
return NS_OK;
}
// Wrapper class for passing the sandbox data and parser as a closure to
// ExternalDTDStreamReaderFunc.
class RLBoxExpatClosure {
public:
RLBoxExpatClosure(RLBoxExpatSandboxData* aSbxData,
tainted_expat<XML_Parser> aExpatParser)
: mSbxData(aSbxData), mExpatParser(aExpatParser) {};
inline rlbox_sandbox_expat* Sandbox() const { return mSbxData->Sandbox(); };
inline tainted_expat<XML_Parser> Parser() const { return mExpatParser; };
private:
RLBoxExpatSandboxData* mSbxData;
tainted_expat<XML_Parser> mExpatParser;
};
static nsresult ExternalDTDStreamReaderFunc(nsIUnicharInputStream* aIn,
void* aClosure,
const char16_t* aFromSegment,
uint32_t aToOffset, uint32_t aCount,
uint32_t* aWriteCount) {
MOZ_ASSERT(aClosure && aFromSegment && aWriteCount);
*aWriteCount = 0;
// Get sandbox and parser
auto* closure = reinterpret_cast<RLBoxExpatClosure*>(aClosure);
MOZ_ASSERT(closure);
// Transfer segment into the sandbox
auto fromSegment =
TransferBuffer<char16_t>(closure->Sandbox(), aFromSegment, aCount);
NS_ENSURE_TRUE(*fromSegment, NS_ERROR_OUT_OF_MEMORY);
// Pass the buffer to expat for parsing.
if (closure->Sandbox()
->invoke_sandbox_function(
MOZ_XML_Parse, closure->Parser(),
rlbox::sandbox_reinterpret_cast<const char*>(*fromSegment),
aCount * sizeof(char16_t), 0)
.copy_and_verify(status_verifier) == XML_STATUS_OK) {
*aWriteCount = aCount;
return NS_OK;
}
return NS_ERROR_FAILURE;
}
int nsExpatDriver::HandleExternalEntityRef(const char16_t* openEntityNames,
const char16_t* base,
const char16_t* systemId,
const char16_t* publicId) {
if (mInInternalSubset && !mInExternalDTD && openEntityNames) {
mInternalSubset.Append(char16_t('%'));
mInternalSubset.Append(nsDependentString(openEntityNames));
mInternalSubset.Append(char16_t(';'));
}
nsCOMPtr<nsIURI> baseURI = GetBaseURI(base);
NS_ENSURE_TRUE(baseURI, 1);
// Load the external entity into a buffer.
nsCOMPtr<nsIInputStream> in;
nsCOMPtr<nsIURI> absURI;
nsresult rv = OpenInputStreamFromExternalDTD(
publicId, systemId, baseURI, getter_AddRefs(in), getter_AddRefs(absURI));
if (NS_FAILED(rv)) {
#ifdef DEBUG
nsCString message("Failed to open external DTD: publicId \"");
AppendUTF16toUTF8(MakeStringSpan(publicId), message);
message += "\" systemId \"";
AppendUTF16toUTF8(MakeStringSpan(systemId), message);
message += "\" base \"";
message.Append(baseURI->GetSpecOrDefault());
message += "\" URL \"";
if (absURI) {
message.Append(absURI->GetSpecOrDefault());
}
message += "\"";
NS_WARNING(message.get());
#endif
return 1;
}
nsCOMPtr<nsIUnicharInputStream> uniIn;
rv = NS_NewUnicharInputStream(in, getter_AddRefs(uniIn));
NS_ENSURE_SUCCESS(rv, 1);
int result = 1;
if (uniIn) {
auto utf16 = TransferBuffer<char16_t>(
Sandbox(), kUTF16, nsCharTraits<char16_t>::length(kUTF16) + 1);
NS_ENSURE_TRUE(*utf16, 1);
tainted_expat<XML_Parser> entParser;
entParser =
RLBOX_EXPAT_MCALL(MOZ_XML_ExternalEntityParserCreate, nullptr, *utf16);
if (entParser) {
auto baseURI = GetExpatBaseURI(absURI);
auto url = TransferBuffer<XML_Char>(Sandbox(), &baseURI[0],
ArrayLength(baseURI));
NS_ENSURE_TRUE(*url, 1);
Sandbox()->invoke_sandbox_function(MOZ_XML_SetBase, entParser, *url);
mInExternalDTD = true;
bool inParser = mInParser; // Save in-parser status
mInParser = true;
RLBoxExpatClosure closure(SandboxData(), entParser);
uint32_t totalRead;
do {
rv = uniIn->ReadSegments(ExternalDTDStreamReaderFunc, &closure,
uint32_t(-1), &totalRead);
} while (NS_SUCCEEDED(rv) && totalRead > 0);
result =
Sandbox()
->invoke_sandbox_function(MOZ_XML_Parse, entParser, nullptr, 0, 1)
.copy_and_verify(status_verifier);
mInParser = inParser; // Restore in-parser status
mInExternalDTD = false;
Sandbox()->invoke_sandbox_function(MOZ_XML_ParserFree, entParser);
}
}
return result;
}
nsresult nsExpatDriver::OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
const char16_t* aURLStr,
nsIURI* aBaseURI,
nsIInputStream** aStream,
nsIURI** aAbsURI) {
nsCOMPtr<nsIURI> uri;
nsresult rv = NS_NewURI(getter_AddRefs(uri), NS_ConvertUTF16toUTF8(aURLStr),
nullptr, aBaseURI);
// Even if the URI is malformed (most likely because we have a
// non-hierarchical base URI and a relative DTD URI, with the latter
// being the normal XHTML DTD case), we can try to see whether we
// have catalog data for aFPIStr.
if (NS_WARN_IF(NS_FAILED(rv) && rv != NS_ERROR_MALFORMED_URI)) {
return rv;
}
// make sure the URI, if we have one, is allowed to be loaded in sync
bool isUIResource = false;
if (uri) {
rv = NS_URIChainHasFlags(uri, nsIProtocolHandler::URI_IS_UI_RESOURCE,
&isUIResource);
NS_ENSURE_SUCCESS(rv, rv);
}
nsCOMPtr<nsIURI> localURI;
if (!isUIResource) {
// Check to see if we can map the DTD to a known local DTD, or if a DTD
// file of the same name exists in the special DTD directory
if (aFPIStr) {
// see if the Formal Public Identifier (FPI) maps to a catalog entry
mCatalogData = LookupCatalogData(aFPIStr);
GetLocalDTDURI(mCatalogData, uri, getter_AddRefs(localURI));
}
if (!localURI) {
return NS_ERROR_NOT_IMPLEMENTED;
}
}
nsCOMPtr<nsIChannel> channel;
if (localURI) {
localURI.swap(uri);
rv = NS_NewChannel(getter_AddRefs(channel), uri,
nsContentUtils::GetSystemPrincipal(),
nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_SEC_CONTEXT_IS_NULL,
nsIContentPolicy::TYPE_DTD);
NS_ENSURE_SUCCESS(rv, rv);
} else {
NS_ASSERTION(
mSink == nsCOMPtr<nsIExpatSink>(do_QueryInterface(mOriginalSink)),
"In nsExpatDriver::OpenInputStreamFromExternalDTD: "
"mOriginalSink not the same object as mSink?");
nsContentPolicyType policyType = nsIContentPolicy::TYPE_INTERNAL_DTD;
if (mOriginalSink) {
nsCOMPtr<Document> doc;
doc = do_QueryInterface(mOriginalSink->GetTarget());
if (doc) {
if (doc->SkipDTDSecurityChecks()) {
policyType = nsIContentPolicy::TYPE_INTERNAL_FORCE_ALLOWED_DTD;
}
rv = NS_NewChannel(
getter_AddRefs(channel), uri, doc,
nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_INHERITS_SEC_CONTEXT |
nsILoadInfo::SEC_ALLOW_CHROME,
policyType);
NS_ENSURE_SUCCESS(rv, rv);
}
}
if (!channel) {
nsCOMPtr<nsIPrincipal> nullPrincipal =
mozilla::NullPrincipal::CreateWithoutOriginAttributes();
rv = NS_NewChannel(
getter_AddRefs(channel), uri, nullPrincipal,
nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_INHERITS_SEC_CONTEXT |
nsILoadInfo::SEC_ALLOW_CHROME,
policyType);
NS_ENSURE_SUCCESS(rv, rv);
}
}
uri.forget(aAbsURI);
channel->SetContentType("application/xml"_ns);
return channel->Open(aStream);
}
static nsresult CreateErrorText(const char16_t* aDescription,
const char16_t* aSourceURL,
tainted_expat<XML_Size> aLineNumber,
tainted_expat<XML_Size> aColNumber,
nsString& aErrorString, bool spoofEnglish) {
aErrorString.Truncate();
nsAutoString msg;
nsresult rv = nsParserMsgUtils::GetLocalizedStringByName(
spoofEnglish ? XMLPARSER_PROPERTIES_en_US : XMLPARSER_PROPERTIES,
"XMLParsingError", msg);
NS_ENSURE_SUCCESS(rv, rv);
// XML Parsing Error: %1$S\nLocation: %2$S\nLine Number %3$u, Column %4$u:
nsTextFormatter::ssprintf(
aErrorString, msg.get(), aDescription, aSourceURL,
aLineNumber.unverified_safe_because(RLBOX_SAFE_PRINT),
aColNumber.unverified_safe_because(RLBOX_SAFE_PRINT));
return NS_OK;
}
static nsresult AppendErrorPointer(tainted_expat<XML_Size> aColNumber,
const char16_t* aSourceLine,
size_t aSourceLineLength,
nsString& aSourceString) {
aSourceString.Append(char16_t('\n'));
MOZ_RELEASE_ASSERT_TAINTED(aColNumber != static_cast<XML_Size>(0),
"Unexpected value of column");
// Last character will be '^'.
XML_Size last =
(aColNumber - 1).copy_and_verify([&](XML_Size val) -> XML_Size {
if (val > aSourceLineLength) {
// Unexpected value of last column, just return a safe value
return 0;
}
return val;
});
XML_Size i;
uint32_t minuses = 0;
for (i = 0; i < last; ++i) {
if (aSourceLine[i] == '\t') {
// Since this uses |white-space: pre;| a tab stop equals 8 spaces.
uint32_t add = 8 - (minuses % 8);
aSourceString.AppendASCII("--------", add);
minuses += add;
} else {
aSourceString.Append(char16_t('-'));
++minuses;
}
}
aSourceString.Append(char16_t('^'));
return NS_OK;
}
nsresult nsExpatDriver::HandleError() {
int32_t code =
RLBOX_EXPAT_MCALL(MOZ_XML_GetErrorCode).copy_and_verify(error_verifier);
// Map Expat error code to an error string
// XXX Deal with error returns.
nsAutoString description;
nsCOMPtr<Document> doc;
if (mOriginalSink) {
doc = do_QueryInterface(mOriginalSink->GetTarget());
}
bool spoofEnglish =
nsContentUtils::SpoofLocaleEnglish() && (!doc || !doc->AllowsL10n());
nsParserMsgUtils::GetLocalizedStringByID(
spoofEnglish ? XMLPARSER_PROPERTIES_en_US : XMLPARSER_PROPERTIES, code,
description);
if (code == XML_ERROR_TAG_MISMATCH) {
/**
* Expat can send the following:
* localName
* namespaceURI<separator>localName
* namespaceURI<separator>localName<separator>prefix
*
* and we use 0xFFFF for the <separator>.
*
*/
const char16_t* mismatch =
RLBOX_EXPAT_MCALL(MOZ_XML_GetMismatchedTag)
.copy_and_verify_address(unverified_xml_string);
const char16_t* uriEnd = nullptr;
const char16_t* nameEnd = nullptr;
const char16_t* pos;
for (pos = mismatch; *pos; ++pos) {
if (*pos == kExpatSeparatorChar) {
if (uriEnd) {
nameEnd = pos;
} else {
uriEnd = pos;
}
}
}