Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsConverterInputStream.h"
#include "nsIInputStream.h"
#include "nsReadLine.h"
#include "nsStreamUtils.h"
#include <algorithm>
#include <tuple>
using namespace mozilla;
#define CONVERTER_BUFFER_SIZE 8192
NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
nsIUnicharInputStream, nsIUnicharLineInputStream)
NS_IMETHODIMP
nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset,
int32_t aBufferSize, char16_t aReplacementChar) {
nsAutoCString label;
if (!aCharset) {
label.AssignLiteral("UTF-8");
} else {
label = aCharset;
}
auto encoding = Encoding::ForLabelNoReplacement(label);
if (!encoding) {
return NS_ERROR_UCONV_NOCONV;
}
// Previously, the implementation auto-switched only
// between the two UTF-16 variants and only when
// initialized with an endianness-unspecific label.
mConverter = encoding->NewDecoder();
size_t outputBufferSize;
if (aBufferSize <= 0) {
aBufferSize = CONVERTER_BUFFER_SIZE;
outputBufferSize = CONVERTER_BUFFER_SIZE;
} else {
// NetUtil.sys.mjs assumes that if buffer size equals
// the input size, the whole stream will be processed
// as one readString. This is not true with encoding_rs,
// because encoding_rs might want to see space for a
// surrogate pair, so let's compute a larger output
// buffer length.
CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
if (!needed.isValid()) {
return NS_ERROR_OUT_OF_MEMORY;
}
outputBufferSize = needed.value();
}
// set up our buffers.
if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
!mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
mInput = aStream;
mErrorsAreFatal = !aReplacementChar;
return NS_OK;
}
NS_IMETHODIMP
nsConverterInputStream::Close() {
nsresult rv = mInput ? mInput->Close() : NS_OK;
mLineBuffer = nullptr;
mInput = nullptr;
mConverter = nullptr;
mByteData.Clear();
mUnicharData.Clear();
return rv;
}
NS_IMETHODIMP
nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount,
uint32_t* aReadCount) {
NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
if (0 == readCount) {
// Fill the unichar buffer
readCount = Fill(&mLastErrorCode);
if (readCount == 0) {
*aReadCount = 0;
return mLastErrorCode;
}
}
if (readCount > aCount) {
readCount = aCount;
}
memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
readCount * sizeof(char16_t));
mUnicharDataOffset += readCount;
*aReadCount = readCount;
return NS_OK;
}
NS_IMETHODIMP
nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
void* aClosure, uint32_t aCount,
uint32_t* aReadCount) {
NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset;
if (0 == codeUnitsToWrite) {
// Fill the unichar buffer
codeUnitsToWrite = Fill(&mLastErrorCode);
if (codeUnitsToWrite == 0) {
*aReadCount = 0;
return mLastErrorCode;
}
}
if (codeUnitsToWrite > aCount) {
codeUnitsToWrite = aCount;
}
uint32_t codeUnitsWritten;
uint32_t totalCodeUnitsWritten = 0;
while (codeUnitsToWrite) {
nsresult rv =
aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten);
if (NS_FAILED(rv)) {
// don't propagate errors to the caller
break;
}
codeUnitsToWrite -= codeUnitsWritten;
totalCodeUnitsWritten += codeUnitsWritten;
mUnicharDataOffset += codeUnitsWritten;
}
*aReadCount = totalCodeUnitsWritten;
return NS_OK;
}
NS_IMETHODIMP
nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
uint32_t* aReadCount) {
NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
if (0 == readCount) {
// Fill the unichar buffer
readCount = Fill(&mLastErrorCode);
if (readCount == 0) {
*aReadCount = 0;
return mLastErrorCode;
}
}
if (readCount > aCount) {
readCount = aCount;
}
const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
aString.Assign(buf, readCount);
mUnicharDataOffset += readCount;
*aReadCount = readCount;
return NS_OK;
}
uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
if (!mInput) {
// We already closed the stream!
*aErrorCode = NS_BASE_STREAM_CLOSED;
return 0;
}
if (NS_FAILED(mLastErrorCode)) {
// We failed to completely convert last time, and error-recovery
// is disabled. We will fare no better this time, so...
*aErrorCode = mLastErrorCode;
return 0;
}
// mUnicharData.Length() is the buffer length, not the fill status.
// mUnicharDataLength reflects the current fill status.
mUnicharDataLength = 0;
// Whenever we convert, mUnicharData is logically empty.
mUnicharDataOffset = 0;
// Continue trying to read from the source stream until we successfully decode
// a character or encounter an error, as returning `0` here implies that the
// stream is complete.
//
// If the converter has been cleared, we've fully consumed the stream, and
// want to report EOF.
while (mUnicharDataLength == 0 && mConverter) {
// We assume a many to one conversion and are using equal sizes for
// the two buffers. However if an error happens at the very start
// of a byte buffer we may end up in a situation where n bytes lead
// to n+1 unicode chars. Thus we need to keep track of the leftover
// bytes as we convert.
uint32_t nb;
*aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
if (NS_FAILED(*aErrorCode)) {
return 0;
}
NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
"mByteData is lying to us somewhere");
// If `NS_FillArray` failed to read any new bytes, this is the last read,
// and we're at the end of the stream.
bool last = (nb == 0);
// Now convert as much of the byte buffer to unicode as possible
auto src = AsBytes(Span(mByteData));
auto dst = Span(mUnicharData);
// Truncation from size_t to uint32_t below is OK, because the sizes
// are bounded by the lengths of mByteData and mUnicharData.
uint32_t result;
size_t read;
size_t written;
if (mErrorsAreFatal) {
std::tie(result, read, written) =
mConverter->DecodeToUTF16WithoutReplacement(src, dst, last);
} else {
std::tie(result, read, written, std::ignore) =
mConverter->DecodeToUTF16(src, dst, last);
}
mLeftOverBytes = mByteData.Length() - read;
mUnicharDataLength = written;
// Clear `mConverter` if we reached the end of the stream, as we can't
// call methods on it anymore. This will also signal EOF to the caller
// through the loop condition.
if (last) {
MOZ_ASSERT(mLeftOverBytes == 0,
"Failed to read all bytes on the last pass?");
mConverter = nullptr;
}
// If we got a decode error, we're done.
if (result != kInputEmpty && result != kOutputFull) {
MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
*aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
return 0;
}
}
*aErrorCode = NS_OK;
return mUnicharDataLength;
}
NS_IMETHODIMP
nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) {
if (!mLineBuffer) {
mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>();
}
return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
}