Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set sw=2 ts=2 et tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "nsHtml5StreamParser.h"
8
9
#include "mozilla/DebugOnly.h"
10
#include "mozilla/Encoding.h"
11
#include "nsContentUtils.h"
12
#include "nsCyrillicDetector.h"
13
#include "nsHtml5Tokenizer.h"
14
#include "nsIHttpChannel.h"
15
#include "nsHtml5Parser.h"
16
#include "nsHtml5TreeBuilder.h"
17
#include "nsHtml5AtomTable.h"
18
#include "nsHtml5Module.h"
19
#include "nsHtml5StreamParserPtr.h"
20
#include "nsIDocShell.h"
21
#include "nsIScriptError.h"
22
#include "mozilla/Preferences.h"
23
#include "mozilla/SystemGroup.h"
24
#include "mozilla/StaticPrefs_intl.h"
25
#include "mozilla/StaticPrefs_html5.h"
26
#include "mozilla/UniquePtrExtensions.h"
27
#include "nsHtml5Highlighter.h"
28
#include "expat_config.h"
29
#include "expat.h"
30
#include "nsINestedURI.h"
31
#include "nsCharsetSource.h"
32
#include "nsIThreadRetargetableRequest.h"
33
#include "nsPrintfCString.h"
34
#include "nsNetUtil.h"
35
#include "nsXULAppAPI.h"
36
#include "mozilla/SchedulerGroup.h"
37
#include "nsJSEnvironment.h"
38
#include "mozilla/dom/Document.h"
39
#include "mozilla/dom/DebuggerUtilsBinding.h"
40
41
using namespace mozilla;
42
using namespace mozilla::dom;
43
44
/*
45
* Note that nsHtml5StreamParser implements cycle collecting AddRef and
46
* Release. Therefore, nsHtml5StreamParser must never be refcounted from
47
* the parser thread!
48
*
49
* To work around this limitation, runnables posted by the main thread to the
50
* parser thread hold their reference to the stream parser in an
51
* nsHtml5StreamParserPtr. Upon creation, nsHtml5StreamParserPtr addrefs the
52
* object it holds
53
* just like a regular nsRefPtr. This is OK, since the creation of the
54
* runnable and the nsHtml5StreamParserPtr happens on the main thread.
55
*
56
* When the runnable is done on the parser thread, the destructor of
57
* nsHtml5StreamParserPtr runs there. It doesn't call Release on the held object
58
* directly. Instead, it posts another runnable back to the main thread where
59
* that runnable calls Release on the wrapped object.
60
*
61
* When posting runnables in the other direction, the runnables have to be
62
* created on the main thread when nsHtml5StreamParser is instantiated and
63
* held for the lifetime of the nsHtml5StreamParser. This works, because the
64
* same runnabled can be dispatched multiple times and currently runnables
65
* posted from the parser thread to main thread don't need to wrap any
66
* runnable-specific data. (In the other direction, the runnables most notably
67
* wrap the byte data of the stream.)
68
*/
69
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
70
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
71
72
NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser)
73
NS_INTERFACE_TABLE(nsHtml5StreamParser, nsICharsetDetectionObserver)
74
NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser)
75
NS_INTERFACE_MAP_END
76
77
NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser)
78
79
NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser)
80
tmp->DropTimer();
81
NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
82
NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest)
83
NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner)
84
tmp->mExecutorFlusher = nullptr;
85
tmp->mLoadFlusher = nullptr;
86
tmp->mExecutor = nullptr;
87
NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet)
88
NS_IMPL_CYCLE_COLLECTION_UNLINK_END
89
90
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
91
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
92
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest)
93
NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner)
94
// hack: count the strongly owned edge wrapped in the runnable
95
if (tmp->mExecutorFlusher) {
96
NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor");
97
cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor));
98
}
99
// hack: count the strongly owned edge wrapped in the runnable
100
if (tmp->mLoadFlusher) {
101
NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor");
102
cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor));
103
}
104
// hack: count self if held by mChardet
105
if (tmp->mChardet) {
106
NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver");
107
cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp));
108
}
109
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
110
111
class nsHtml5ExecutorFlusher : public Runnable {
112
private:
113
RefPtr<nsHtml5TreeOpExecutor> mExecutor;
114
115
public:
116
explicit nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor)
117
: Runnable("nsHtml5ExecutorFlusher"), mExecutor(aExecutor) {}
118
NS_IMETHOD Run() override {
119
if (!mExecutor->isInList()) {
120
Document* doc = mExecutor->GetDocument();
121
if (XRE_IsContentProcess() &&
122
nsContentUtils::
123
HighPriorityEventPendingForTopLevelDocumentBeforeContentfulPaint(
124
doc)) {
125
// Possible early paint pending, reuse the runnable and try to
126
// call RunFlushLoop later.
127
nsCOMPtr<nsIRunnable> flusher = this;
128
if (NS_SUCCEEDED(
129
doc->Dispatch(TaskCategory::Network, flusher.forget()))) {
130
PROFILER_ADD_MARKER("HighPrio blocking parser flushing(1)", DOM);
131
return NS_OK;
132
}
133
}
134
mExecutor->RunFlushLoop();
135
}
136
return NS_OK;
137
}
138
};
139
140
class nsHtml5LoadFlusher : public Runnable {
141
private:
142
RefPtr<nsHtml5TreeOpExecutor> mExecutor;
143
144
public:
145
explicit nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor)
146
: Runnable("nsHtml5LoadFlusher"), mExecutor(aExecutor) {}
147
NS_IMETHOD Run() override {
148
mExecutor->FlushSpeculativeLoads();
149
return NS_OK;
150
}
151
};
152
153
nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
154
nsHtml5Parser* aOwner,
155
eParserMode aMode)
156
: mSniffingLength(0),
157
mBomState(eBomState::BOM_SNIFFING_NOT_STARTED),
158
mCharsetSource(kCharsetUninitialized),
159
mEncoding(WINDOWS_1252_ENCODING),
160
mFeedChardet(true),
161
mReparseForbidden(false),
162
mLastBuffer(nullptr), // Will be filled when starting
163
mExecutor(aExecutor),
164
mTreeBuilder(new nsHtml5TreeBuilder(
165
(aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML)
166
? nullptr
167
: mExecutor->GetStage(),
168
aMode == NORMAL ? mExecutor->GetStage() : nullptr)),
169
mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML)),
170
mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex"),
171
mOwner(aOwner),
172
mLastWasCR(false),
173
mStreamState(eHtml5StreamState::STREAM_NOT_STARTED),
174
mSpeculating(false),
175
mAtEOF(false),
176
mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex"),
177
mSpeculationFailureCount(0),
178
mLocalFileBytesBuffered(0),
179
mTerminated(false),
180
mInterrupted(false),
181
mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex"),
182
mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()),
183
mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)),
184
mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)),
185
mJapaneseDetector(mozilla::JapaneseDetector::Create(
186
StaticPrefs::intl_charset_detector_iso2022jp_allowed())),
187
mInitialEncodingWasFromParentFrame(false),
188
mHasHadErrors(false),
189
mDecodingLocalFileAsUTF8(false),
190
mFlushTimer(NS_NewTimer(mEventTarget)),
191
mFlushTimerMutex("nsHtml5StreamParser mFlushTimerMutex"),
192
mFlushTimerArmed(false),
193
mFlushTimerEverFired(false),
194
mMode(aMode),
195
mSkipContentSniffing(false) {
196
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
197
#ifdef DEBUG
198
mAtomTable.SetPermittedLookupEventTarget(mEventTarget);
199
#endif
200
mTokenizer->setInterner(&mAtomTable);
201
mTokenizer->setEncodingDeclarationHandler(this);
202
203
if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) {
204
nsHtml5Highlighter* highlighter =
205
new nsHtml5Highlighter(mExecutor->GetStage());
206
mTokenizer->EnableViewSource(highlighter); // takes ownership
207
mTreeBuilder->EnableViewSource(highlighter); // doesn't own
208
}
209
210
// Chardet instantiation adapted from File.
211
// Chardet is initialized here even if it turns out to be useless
212
// to make the chardet refcount its observer (nsHtml5StreamParser)
213
// on the main thread.
214
nsAutoCString detectorName;
215
Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
216
if (!detectorName.IsEmpty()) {
217
// We recognize one of the two magic strings for Russian and Ukranian.
218
if (detectorName.EqualsLiteral("ruprob")) {
219
mChardet = new nsRUProbDetector();
220
} else if (detectorName.EqualsLiteral("ukprob")) {
221
mChardet = new nsUKProbDetector();
222
}
223
if (mChardet) {
224
(void)mChardet->Init(this);
225
}
226
}
227
228
// There's a zeroing operator new for everything else
229
}
230
231
nsHtml5StreamParser::~nsHtml5StreamParser() {
232
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
233
mTokenizer->end();
234
#ifdef DEBUG
235
{
236
mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
237
MOZ_ASSERT(!mFlushTimer, "Flush timer was not dropped before dtor!");
238
}
239
mRequest = nullptr;
240
mObserver = nullptr;
241
mUnicodeDecoder = nullptr;
242
mSniffingBuffer = nullptr;
243
mMetaScanner = nullptr;
244
mFirstBuffer = nullptr;
245
mExecutor = nullptr;
246
mTreeBuilder = nullptr;
247
mTokenizer = nullptr;
248
mOwner = nullptr;
249
#endif
250
}
251
252
nsresult nsHtml5StreamParser::GetChannel(nsIChannel** aChannel) {
253
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
254
return mRequest ? CallQueryInterface(mRequest, aChannel)
255
: NS_ERROR_NOT_AVAILABLE;
256
}
257
258
NS_IMETHODIMP
259
nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
260
NS_ASSERTION(IsParserThread(), "Wrong thread!");
261
if (aConf == eBestAnswer || aConf == eSureAnswer) {
262
mFeedChardet = false; // just in case
263
auto encoding =
264
Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
265
if (!encoding) {
266
return NS_OK;
267
}
268
if (HasDecoder()) {
269
if (mEncoding == encoding) {
270
MOZ_ASSERT(mCharsetSource < kCharsetFromAutoDetection,
271
"Why are we running chardet at all?");
272
mCharsetSource = kCharsetFromAutoDetection;
273
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
274
} else {
275
// We've already committed to a decoder. Request a reload from the
276
// docshell.
277
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
278
kCharsetFromAutoDetection, 0);
279
FlushTreeOpsAndDisarmTimer();
280
Interrupt();
281
}
282
} else {
283
// Got a confident answer from the sniffing buffer. That code will
284
// take care of setting up the decoder.
285
mEncoding = WrapNotNull(encoding);
286
mCharsetSource = kCharsetFromAutoDetection;
287
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
288
}
289
}
290
return NS_OK;
291
}
292
293
void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
294
bool aLast) {
295
const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
296
if (!detected) {
297
return;
298
}
299
mFeedChardet = false;
300
if (mDecodingLocalFileAsUTF8 && detected != ISO_2022_JP_ENCODING) {
301
return;
302
}
303
int32_t source = kCharsetFromAutoDetection;
304
if (mCharsetSource == kCharsetFromParentForced ||
305
mCharsetSource == kCharsetFromUserForced) {
306
source = kCharsetFromUserForcedAutoDetection;
307
}
308
if (detected == mEncoding) {
309
MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
310
mCharsetSource = source;
311
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
312
} else if (HasDecoder()) {
313
// We've already committed to a decoder. Request a reload from the
314
// docshell.
315
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0);
316
FlushTreeOpsAndDisarmTimer();
317
Interrupt();
318
} else {
319
// Got a confident answer from the sniffing buffer. That code will
320
// take care of setting up the decoder.
321
mEncoding = WrapNotNull(detected);
322
mCharsetSource = source;
323
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
324
}
325
}
326
327
void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
328
bool aLast) {
329
if (mEncoding->IsJapaneseLegacy()) {
330
FeedJapaneseDetector(aBuffer, aLast);
331
} else if (mEncoding == WINDOWS_1251_ENCODING && mChardet &&
332
!mDecodingLocalFileAsUTF8) {
333
if (!aBuffer.IsEmpty()) {
334
bool dontFeed = false;
335
mozilla::Unused << mChardet->DoIt((const char*)aBuffer.Elements(),
336
aBuffer.Length(), &dontFeed);
337
if (dontFeed) {
338
mFeedChardet = false;
339
}
340
}
341
if (aLast) {
342
mozilla::Unused << mChardet->Done();
343
}
344
} else {
345
mFeedChardet = false;
346
}
347
}
348
349
void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) {
350
MOZ_ASSERT(NS_IsMainThread());
351
352
nsIDocShell* docshell = mExecutor->GetDocument()->GetDocShell();
353
if (docshell && docshell->GetWatchedByDevtools()) {
354
mURIToSendToDevtools = aURL;
355
356
nsID uuid;
357
nsresult rv = nsContentUtils::GenerateUUIDInPlace(uuid);
358
if (!NS_FAILED(rv)) {
359
char buffer[NSID_LENGTH];
360
uuid.ToProvidedString(buffer);
361
mUUIDForDevtools = NS_ConvertASCIItoUTF16(buffer);
362
}
363
}
364
365
if (aURL) {
366
nsCOMPtr<nsIURI> temp;
367
if (aURL->SchemeIs("view-source")) {
368
nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL);
369
nested->GetInnerURI(getter_AddRefs(temp));
370
} else {
371
temp = aURL;
372
}
373
if (temp->SchemeIs("data")) {
374
// Avoid showing potentially huge data: URLs. The three last bytes are
375
// UTF-8 for an ellipsis.
376
mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6");
377
} else {
378
nsresult rv = temp->GetSpec(mViewSourceTitle);
379
if (NS_FAILED(rv)) {
380
mViewSourceTitle.AssignLiteral("\xE2\x80\xA6");
381
}
382
}
383
}
384
}
385
386
nsresult
387
nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
388
Span<const uint8_t> aFromSegment) {
389
NS_ASSERTION(IsParserThread(), "Wrong thread!");
390
nsresult rv = NS_OK;
391
if (mDecodingLocalFileAsUTF8 && mCharsetSource <= kCharsetFromFileURLGuess) {
392
MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
393
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
394
} else {
395
if (mCharsetSource >= kCharsetFromAutoDetection &&
396
!(mCharsetSource == kCharsetFromUserForced ||
397
mCharsetSource == kCharsetFromParentForced)) {
398
mFeedChardet = false;
399
}
400
mDecodingLocalFileAsUTF8 = false;
401
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
402
}
403
if (mSniffingBuffer) {
404
rv = WriteStreamBytes(MakeSpan(mSniffingBuffer.get(), mSniffingLength));
405
NS_ENSURE_SUCCESS(rv, rv);
406
mSniffingBuffer = nullptr;
407
}
408
mMetaScanner = nullptr;
409
return WriteStreamBytes(aFromSegment);
410
}
411
412
nsresult nsHtml5StreamParser::SetupDecodingFromBom(
413
NotNull<const Encoding*> aEncoding) {
414
NS_ASSERTION(IsParserThread(), "Wrong thread!");
415
mEncoding = aEncoding;
416
mDecodingLocalFileAsUTF8 = false;
417
mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
418
mCharsetSource = kCharsetFromByteOrderMark;
419
mFeedChardet = false;
420
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
421
mSniffingBuffer = nullptr;
422
mMetaScanner = nullptr;
423
mBomState = BOM_SNIFFING_OVER;
424
return NS_OK;
425
}
426
427
void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(
428
Span<const uint8_t> aFromSegment) {
429
// Avoid underspecified heuristic craziness for XHR
430
if (mMode == LOAD_AS_DATA) {
431
return;
432
}
433
// Make sure there's enough data. Require room for "<title></title>"
434
if (mSniffingLength + aFromSegment.Length() < 30) {
435
return;
436
}
437
// even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
438
bool byteZero[2] = {false, false};
439
bool byteNonZero[2] = {false, false};
440
uint32_t i = 0;
441
if (mSniffingBuffer) {
442
for (; i < mSniffingLength; ++i) {
443
if (mSniffingBuffer[i]) {
444
if (byteNonZero[1 - (i % 2)]) {
445
return;
446
}
447
byteNonZero[i % 2] = true;
448
} else {
449
if (byteZero[1 - (i % 2)]) {
450
return;
451
}
452
byteZero[i % 2] = true;
453
}
454
}
455
}
456
for (size_t j = 0; j < aFromSegment.Length(); ++j) {
457
if (aFromSegment[j]) {
458
if (byteNonZero[1 - ((i + j) % 2)]) {
459
return;
460
}
461
byteNonZero[(i + j) % 2] = true;
462
} else {
463
if (byteZero[1 - ((i + j) % 2)]) {
464
return;
465
}
466
byteZero[(i + j) % 2] = true;
467
}
468
}
469
470
if (byteNonZero[0]) {
471
mEncoding = UTF_16LE_ENCODING;
472
} else {
473
mEncoding = UTF_16BE_ENCODING;
474
}
475
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
476
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
477
mFeedChardet = false;
478
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
479
}
480
481
void nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) {
482
if (aEncoding) {
483
nsDependentString utf16(aEncoding);
484
nsAutoCString utf8;
485
CopyUTF16toUTF8(utf16, utf8);
486
auto encoding = PreferredForInternalEncodingDecl(utf8);
487
if (encoding) {
488
mEncoding = WrapNotNull(encoding);
489
mCharsetSource = kCharsetFromMetaTag; // closest for XML
490
return;
491
}
492
// else the page declared an encoding Gecko doesn't support and we'd
493
// end up defaulting to UTF-8 anyway. Might as well fall through here
494
// right away and let the encoding be set to UTF-8 which we'd default to
495
// anyway.
496
}
497
mEncoding = UTF_8_ENCODING; // XML defaults to UTF-8 without a BOM
498
mCharsetSource = kCharsetFromMetaTag; // means confident
499
}
500
501
// A separate user data struct is used instead of passing the
502
// nsHtml5StreamParser instance as user data in order to avoid including
503
// expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts.
504
// Using a separate user data struct also avoids bloating nsHtml5StreamParser
505
// by one pointer.
506
struct UserData {
507
XML_Parser mExpat;
508
nsHtml5StreamParser* mStreamParser;
509
};
510
511
// Using no-namespace handler callbacks to avoid including expat.h in
512
// nsHtml5StreamParser.h, since doing so would cause naming conclicts.
513
static void HandleXMLDeclaration(void* aUserData, const XML_Char* aVersion,
514
const XML_Char* aEncoding, int aStandalone) {
515
UserData* ud = static_cast<UserData*>(aUserData);
516
ud->mStreamParser->SetEncodingFromExpat(
517
reinterpret_cast<const char16_t*>(aEncoding));
518
XML_StopParser(ud->mExpat, false);
519
}
520
521
static void HandleStartElement(void* aUserData, const XML_Char* aName,
522
const XML_Char** aAtts) {
523
UserData* ud = static_cast<UserData*>(aUserData);
524
XML_StopParser(ud->mExpat, false);
525
}
526
527
static void HandleEndElement(void* aUserData, const XML_Char* aName) {
528
UserData* ud = static_cast<UserData*>(aUserData);
529
XML_StopParser(ud->mExpat, false);
530
}
531
532
static void HandleComment(void* aUserData, const XML_Char* aName) {
533
UserData* ud = static_cast<UserData*>(aUserData);
534
XML_StopParser(ud->mExpat, false);
535
}
536
537
static void HandleProcessingInstruction(void* aUserData,
538
const XML_Char* aTarget,
539
const XML_Char* aData) {
540
UserData* ud = static_cast<UserData*>(aUserData);
541
XML_StopParser(ud->mExpat, false);
542
}
543
544
void nsHtml5StreamParser::FinalizeSniffingWithDetector(
545
Span<const uint8_t> aFromSegment, uint32_t aCountToSniffingLimit,
546
bool aEof) {
547
if (mSniffingBuffer) {
548
FeedDetector(MakeSpan(mSniffingBuffer.get(), mSniffingLength), false);
549
}
550
if (mFeedChardet && !aFromSegment.IsEmpty()) {
551
// Avoid buffer boundary-dependent behavior when
552
// reparsing is forbidden. If reparse is forbidden,
553
// act as if we only saw the first 1024 bytes.
554
// When reparsing isn't forbidden, buffer boundaries
555
// can have an effect on whether the page is loaded
556
// once or twice. :-(
557
FeedDetector(mReparseForbidden ? aFromSegment.To(aCountToSniffingLimit)
558
: aFromSegment,
559
false);
560
}
561
if (mFeedChardet && aEof &&
562
(!mReparseForbidden || aCountToSniffingLimit == aFromSegment.Length())) {
563
// Don't signal EOF if reparse is forbidden and we didn't pass all input
564
// to the detector above.
565
mFeedChardet = false;
566
FeedDetector(Span<const uint8_t>(), true);
567
}
568
}
569
570
nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
571
uint32_t aCountToSniffingLimit,
572
bool aEof) {
573
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
574
MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedAutoDetection,
575
"Should not finalize sniffing with strong decision already made.");
576
if (mMode == VIEW_SOURCE_XML) {
577
static const XML_Memory_Handling_Suite memsuite = {
578
(void* (*)(size_t))moz_xmalloc, (void* (*)(void*, size_t))moz_xrealloc,
579
free};
580
581
static const char16_t kExpatSeparator[] = {0xFFFF, '\0'};
582
583
static const char16_t kISO88591[] = {'I', 'S', 'O', '-', '8', '8',
584
'5', '9', '-', '1', '\0'};
585
586
UserData ud;
587
ud.mStreamParser = this;
588
589
// If we got this far, the stream didn't have a BOM. UTF-16-encoded XML
590
// documents MUST begin with a BOM. We don't support EBCDIC and such.
591
// Thus, at this point, what we have is garbage or something encoded using
592
// a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes
593
// without throwing errors when bytes have the most significant bit set
594
// and without triggering expat's unknown encoding code paths. This is
595
// enough to be able to use expat to parse the XML declaration in order
596
// to extract the encoding name from it.
597
ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator);
598
XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration);
599
XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement);
600
XML_SetCommentHandler(ud.mExpat, HandleComment);
601
XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction);
602
XML_SetUserData(ud.mExpat, static_cast<void*>(&ud));
603
604
XML_Status status = XML_STATUS_OK;
605
606
// aFromSegment points to the data obtained from the current network
607
// event. mSniffingBuffer (if it exists) contains the data obtained before
608
// the current event. Thus, mSniffingLenth bytes of mSniffingBuffer
609
// followed by aCountToSniffingLimit bytes from aFromSegment are the
610
// first 1024 bytes of the file (or the file as a whole if the file is
611
// 1024 bytes long or shorter). Thus, we parse both buffers, but if the
612
// first call succeeds already, we skip parsing the second buffer.
613
if (mSniffingBuffer) {
614
status = XML_Parse(ud.mExpat,
615
reinterpret_cast<const char*>(mSniffingBuffer.get()),
616
mSniffingLength, false);
617
}
618
if (status == XML_STATUS_OK && mCharsetSource < kCharsetFromMetaTag) {
619
mozilla::Unused << XML_Parse(
620
ud.mExpat, reinterpret_cast<const char*>(aFromSegment.Elements()),
621
aCountToSniffingLimit, false);
622
}
623
XML_ParserFree(ud.mExpat);
624
625
if (mCharsetSource < kCharsetFromMetaTag) {
626
// Failed to get an encoding from the XML declaration. XML defaults
627
// confidently to UTF-8 in this case.
628
// It is also possible that the document has an XML declaration that is
629
// longer than 1024 bytes, but that case is not worth worrying about.
630
mEncoding = UTF_8_ENCODING;
631
mCharsetSource = kCharsetFromMetaTag; // means confident
632
}
633
634
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
635
}
636
637
// meta scan failed.
638
if (!mSkipContentSniffing && mCharsetSource < kCharsetFromMetaPrescan) {
639
// Check for BOMless UTF-16 with Basic
640
// Latin content for compat with IE. See bug 631751.
641
SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
642
}
643
// the charset may have been set now
644
// maybe try chardet now;
645
if (mFeedChardet) {
646
FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, aEof);
647
// fall thru; callback may have changed charset
648
}
649
if (mCharsetSource == kCharsetUninitialized) {
650
// Hopefully this case is never needed, but dealing with it anyway
651
mEncoding = WINDOWS_1252_ENCODING;
652
mCharsetSource = kCharsetFromFallback;
653
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
654
} else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) {
655
NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
656
NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
657
NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8");
658
// Now mark charset source as non-weak to signal that we have a decision
659
mCharsetSource = kCharsetFromDocTypeDefault;
660
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
661
}
662
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
663
}
664
665
nsresult nsHtml5StreamParser::SniffStreamBytes(
666
Span<const uint8_t> aFromSegment) {
667
NS_ASSERTION(IsParserThread(), "Wrong thread!");
668
nsresult rv = NS_OK;
669
670
// mEncoding and mCharsetSource potentially have come from channel or higher
671
// by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
672
// If we don't find a BOM, the previously set values of mEncoding and
673
// mCharsetSource are not modified by the BOM sniffing here.
674
for (uint32_t i = 0;
675
i < aFromSegment.Length() && mBomState != BOM_SNIFFING_OVER; i++) {
676
switch (mBomState) {
677
case BOM_SNIFFING_NOT_STARTED:
678
NS_ASSERTION(i == 0, "Bad BOM sniffing state.");
679
switch (aFromSegment[0]) {
680
case 0xEF:
681
mBomState = SEEN_UTF_8_FIRST_BYTE;
682
break;
683
case 0xFF:
684
mBomState = SEEN_UTF_16_LE_FIRST_BYTE;
685
break;
686
case 0xFE:
687
mBomState = SEEN_UTF_16_BE_FIRST_BYTE;
688
break;
689
default:
690
mBomState = BOM_SNIFFING_OVER;
691
break;
692
}
693
break;
694
case SEEN_UTF_16_LE_FIRST_BYTE:
695
if (aFromSegment[i] == 0xFE) {
696
rv = SetupDecodingFromBom(UTF_16LE_ENCODING);
697
NS_ENSURE_SUCCESS(rv, rv);
698
return WriteStreamBytes(aFromSegment.From(i + 1));
699
}
700
mBomState = BOM_SNIFFING_OVER;
701
break;
702
case SEEN_UTF_16_BE_FIRST_BYTE:
703
if (aFromSegment[i] == 0xFF) {
704
rv = SetupDecodingFromBom(UTF_16BE_ENCODING);
705
NS_ENSURE_SUCCESS(rv, rv);
706
return WriteStreamBytes(aFromSegment.From(i + 1));
707
}
708
mBomState = BOM_SNIFFING_OVER;
709
break;
710
case SEEN_UTF_8_FIRST_BYTE:
711
if (aFromSegment[i] == 0xBB) {
712
mBomState = SEEN_UTF_8_SECOND_BYTE;
713
} else {
714
mBomState = BOM_SNIFFING_OVER;
715
}
716
break;
717
case SEEN_UTF_8_SECOND_BYTE:
718
if (aFromSegment[i] == 0xBF) {
719
rv = SetupDecodingFromBom(UTF_8_ENCODING);
720
NS_ENSURE_SUCCESS(rv, rv);
721
return WriteStreamBytes(aFromSegment.From(i + 1));
722
}
723
mBomState = BOM_SNIFFING_OVER;
724
break;
725
default:
726
mBomState = BOM_SNIFFING_OVER;
727
break;
728
}
729
}
730
// if we get here, there either was no BOM or the BOM sniffing isn't complete
731
// yet
732
733
MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark,
734
"Should not come here if BOM was found.");
735
MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent,
736
"kCharsetFromOtherComponent is for XSLT.");
737
738
if (mBomState == BOM_SNIFFING_OVER && mCharsetSource == kCharsetFromChannel) {
739
// There was no BOM and the charset came from channel. mEncoding
740
// still contains the charset from the channel as set by an
741
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
742
// overwrite mEncoding. (Note that if the user has overridden the charset,
743
// we don't come here but check <meta> for XSS-dangerous charsets first.)
744
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
745
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
746
}
747
748
if (!mMetaScanner &&
749
(mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
750
mMetaScanner = new nsHtml5MetaScanner(mTreeBuilder);
751
}
752
753
if (mSniffingLength + aFromSegment.Length() >= SNIFFING_BUFFER_SIZE) {
754
// this is the last buffer
755
uint32_t countToSniffingLimit = SNIFFING_BUFFER_SIZE - mSniffingLength;
756
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
757
nsHtml5ByteReadable readable(
758
aFromSegment.Elements(),
759
aFromSegment.Elements() + countToSniffingLimit);
760
nsAutoCString charset;
761
auto encoding = mMetaScanner->sniff(&readable);
762
// Due to the way nsHtml5Portability reports OOM, ask the tree buider
763
nsresult rv;
764
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
765
MarkAsBroken(rv);
766
return rv;
767
}
768
if (encoding) {
769
// meta scan successful; honor overrides unless meta is XSS-dangerous
770
if ((mCharsetSource == kCharsetFromParentForced ||
771
mCharsetSource == kCharsetFromUserForced) &&
772
(encoding->IsAsciiCompatible() ||
773
encoding == ISO_2022_JP_ENCODING)) {
774
// Honor override
775
if (mEncoding->IsJapaneseLegacy()) {
776
mFeedChardet = true;
777
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
778
false);
779
}
780
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
781
aFromSegment);
782
}
783
mEncoding = WrapNotNull(encoding);
784
mCharsetSource = kCharsetFromMetaPrescan;
785
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
786
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
787
aFromSegment);
788
}
789
}
790
if (mCharsetSource == kCharsetFromParentForced ||
791
mCharsetSource == kCharsetFromUserForced) {
792
// meta not found, honor override
793
if (mEncoding->IsJapaneseLegacy()) {
794
mFeedChardet = true;
795
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, false);
796
}
797
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
798
}
799
return FinalizeSniffing(aFromSegment, countToSniffingLimit, false);
800
}
801
802
// not the last buffer
803
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
804
nsHtml5ByteReadable readable(
805
aFromSegment.Elements(),
806
aFromSegment.Elements() + aFromSegment.Length());
807
auto encoding = mMetaScanner->sniff(&readable);
808
// Due to the way nsHtml5Portability reports OOM, ask the tree buider
809
nsresult rv;
810
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
811
MarkAsBroken(rv);
812
return rv;
813
}
814
if (encoding) {
815
// meta scan successful; honor overrides unless meta is XSS-dangerous
816
if ((mCharsetSource == kCharsetFromParentForced ||
817
mCharsetSource == kCharsetFromUserForced) &&
818
(encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
819
// Honor override
820
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
821
aFromSegment);
822
}
823
mEncoding = WrapNotNull(encoding);
824
mCharsetSource = kCharsetFromMetaPrescan;
825
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
826
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
827
}
828
}
829
830
if (!mSniffingBuffer) {
831
mSniffingBuffer = MakeUniqueFallible<uint8_t[]>(SNIFFING_BUFFER_SIZE);
832
if (!mSniffingBuffer) {
833
return NS_ERROR_OUT_OF_MEMORY;
834
}
835
}
836
memcpy(&mSniffingBuffer[mSniffingLength], aFromSegment.Elements(),
837
aFromSegment.Length());
838
mSniffingLength += aFromSegment.Length();
839
return NS_OK;
840
}
841
842
class AddContentRunnable : public Runnable {
843
public:
844
AddContentRunnable(const nsAString& aParserID, nsIURI* aURI,
845
Span<const char16_t> aData, bool aComplete)
846
: Runnable("AddContent") {
847
nsAutoCString spec;
848
aURI->GetSpec(spec);
849
mData.mUri.Construct(NS_ConvertUTF8toUTF16(spec));
850
mData.mParserID.Construct(aParserID);
851
mData.mContents.Construct(aData.Elements(), aData.Length());
852
mData.mComplete.Construct(aComplete);
853
}
854
855
NS_IMETHOD Run() override {
856
nsAutoString json;
857
if (!mData.ToJSON(json)) {
858
return NS_ERROR_FAILURE;
859
}
860
861
nsCOMPtr<nsIObserverService> obsService = services::GetObserverService();
862
if (obsService) {
863
obsService->NotifyObservers(nullptr, "devtools-html-content",
864
PromiseFlatString(json).get());
865
}
866
867
return NS_OK;
868
}
869
870
HTMLContent mData;
871
};
872
873
inline void nsHtml5StreamParser::OnNewContent(Span<const char16_t> aData) {
874
if (mURIToSendToDevtools) {
875
NS_DispatchToMainThread(new AddContentRunnable(mUUIDForDevtools,
876
mURIToSendToDevtools, aData,
877
/* aComplete */ false));
878
}
879
}
880
881
inline void nsHtml5StreamParser::OnContentComplete() {
882
if (mURIToSendToDevtools) {
883
NS_DispatchToMainThread(new AddContentRunnable(
884
mUUIDForDevtools, mURIToSendToDevtools, Span<const char16_t>(),
885
/* aComplete */ true));
886
mURIToSendToDevtools = nullptr;
887
}
888
}
889
890
nsresult nsHtml5StreamParser::WriteStreamBytes(
891
Span<const uint8_t> aFromSegment) {
892
NS_ASSERTION(IsParserThread(), "Wrong thread!");
893
// mLastBuffer should always point to a buffer of the size
894
// READ_BUFFER_SIZE.
895
if (!mLastBuffer) {
896
NS_WARNING("mLastBuffer should not be null!");
897
MarkAsBroken(NS_ERROR_NULL_POINTER);
898
return NS_ERROR_NULL_POINTER;
899
}
900
size_t totalRead = 0;
901
auto src = aFromSegment;
902
for (;;) {
903
auto dst = mLastBuffer->TailAsSpan(READ_BUFFER_SIZE);
904
uint32_t result;
905
size_t read;
906
size_t written;
907
bool hadErrors;
908
Tie(result, read, written, hadErrors) =
909
mUnicodeDecoder->DecodeToUTF16(src, dst, false);
910
if (!mDecodingLocalFileAsUTF8) {
911
OnNewContent(dst.To(written));
912
}
913
if (hadErrors && !mHasHadErrors) {
914
if (mDecodingLocalFileAsUTF8) {
915
ReDecodeLocalFile();
916
return NS_OK;
917
}
918
mHasHadErrors = true;
919
if (mEncoding == UTF_8_ENCODING) {
920
mTreeBuilder->TryToEnableEncodingMenu();
921
}
922
}
923
src = src.From(read);
924
totalRead += read;
925
mLastBuffer->AdvanceEnd(written);
926
if (result == kOutputFull) {
927
RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
928
nsHtml5OwningUTF16Buffer::FalliblyCreate(READ_BUFFER_SIZE);
929
if (!newBuf) {
930
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
931
return NS_ERROR_OUT_OF_MEMORY;
932
}
933
mLastBuffer = (mLastBuffer->next = newBuf.forget());
934
} else {
935
MOZ_ASSERT(totalRead == aFromSegment.Length(),
936
"The Unicode decoder consumed the wrong number of bytes.");
937
if (mDecodingLocalFileAsUTF8 &&
938
mLocalFileBytesBuffered == LOCAL_FILE_UTF_8_BUFFER_SIZE) {
939
CommitLocalFileToUTF8();
940
}
941
return NS_OK;
942
}
943
}
944
}
945
946
void nsHtml5StreamParser::ReDecodeLocalFile() {
947
MOZ_ASSERT(mDecodingLocalFileAsUTF8);
948
mDecodingLocalFileAsUTF8 = false;
949
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
950
mHasHadErrors = false;
951
952
// We need the detector to start with fresh state.
953
// Turn off ISO-2022-JP detection, because if this doc was
954
// ISO-2022-JP, it would have already been detected.
955
mJapaneseDetector = mozilla::JapaneseDetector::Create(false);
956
mFeedChardet = true;
957
958
// Throw away previous decoded data
959
mLastBuffer = mFirstBuffer;
960
mLastBuffer->next = nullptr;
961
mLastBuffer->setStart(0);
962
mLastBuffer->setEnd(0);
963
964
// Decode again
965
for (auto&& buffer : mBufferedLocalFileData) {
966
DoDataAvailable(buffer);
967
}
968
}
969
970
void nsHtml5StreamParser::CommitLocalFileToUTF8() {
971
MOZ_ASSERT(mDecodingLocalFileAsUTF8);
972
mDecodingLocalFileAsUTF8 = false;
973
mFeedChardet = false;
974
mEncoding = UTF_8_ENCODING;
975
mCharsetSource = kCharsetFromFileURLGuess;
976
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
977
978
nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer;
979
while (buffer) {
980
Span<const char16_t> data(buffer->getBuffer() + buffer->getStart(),
981
buffer->getLength());
982
OnNewContent(data);
983
buffer = buffer->next;
984
}
985
}
986
987
class MaybeRunCollector : public Runnable {
988
public:
989
explicit MaybeRunCollector(nsIDocShell* aDocShell)
990
: Runnable("MaybeRunCollector"), mDocShell(aDocShell) {}
991
992
NS_IMETHOD Run() override {
993
nsJSContext::MaybeRunNextCollectorSlice(mDocShell,
994
JS::GCReason::HTML_PARSER);
995
return NS_OK;
996
}
997
998
nsCOMPtr<nsIDocShell> mDocShell;
999
};
1000
1001
nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
1002
MOZ_RELEASE_ASSERT(STREAM_NOT_STARTED == mStreamState,
1003
"Got OnStartRequest when the stream had already started.");
1004
MOZ_ASSERT(
1005
!mExecutor->HasStarted(),
1006
"Got OnStartRequest at the wrong stage in the executor life cycle.");
1007
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
1008
if (mObserver) {
1009
mObserver->OnStartRequest(aRequest);
1010
}
1011
mRequest = aRequest;
1012
nsCOMPtr<nsIChannel> myChannel(do_QueryInterface(aRequest));
1013
nsCOMPtr<nsILoadInfo> loadInfo = myChannel->LoadInfo();
1014
mSkipContentSniffing = loadInfo->GetSkipContentSniffing();
1015
1016
if (mSkipContentSniffing) {
1017
mFeedChardet = false;
1018
}
1019
1020
mStreamState = STREAM_BEING_READ;
1021
1022
if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
1023
mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
1024
}
1025
1026
// For View Source, the parser should run with scripts "enabled" if a normal
1027
// load would have scripts enabled.
1028
bool scriptingEnabled =
1029
mMode == LOAD_AS_DATA ? false : mExecutor->IsScriptEnabled();
1030
mOwner->StartTokenizer(scriptingEnabled);
1031
1032
MOZ_ASSERT(!mDecodingLocalFileAsUTF8);
1033
bool isSrcdoc = false;
1034
nsCOMPtr<nsIChannel> channel;
1035
nsresult rv = GetChannel(getter_AddRefs(channel));
1036
if (NS_SUCCEEDED(rv)) {
1037
isSrcdoc = NS_IsSrcdocChannel(channel);
1038
if (!isSrcdoc && mCharsetSource <= kCharsetFromFileURLGuess) {
1039
nsCOMPtr<nsIURI> originalURI;
1040
rv = channel->GetOriginalURI(getter_AddRefs(originalURI));
1041
if (NS_SUCCEEDED(rv)) {
1042
if (originalURI->SchemeIs("resource")) {
1043
mCharsetSource = kCharsetFromBuiltIn;
1044
mEncoding = UTF_8_ENCODING;
1045
} else {
1046
nsCOMPtr<nsIURI> currentURI;
1047
rv = channel->GetURI(getter_AddRefs(currentURI));
1048
if (NS_SUCCEEDED(rv)) {
1049
nsCOMPtr<nsIURI> innermost = NS_GetInnermostURI(currentURI);
1050
mDecodingLocalFileAsUTF8 = innermost->SchemeIs("file");
1051
}
1052
}
1053
}
1054
}
1055
}
1056
mTreeBuilder->setIsSrcdocDocument(isSrcdoc);
1057
mTreeBuilder->setScriptingEnabled(scriptingEnabled);
1058
mTreeBuilder->SetPreventScriptExecution(
1059
!((mMode == NORMAL) && scriptingEnabled));
1060
mTokenizer->start();
1061
mExecutor->Start();
1062
mExecutor->StartReadingFromStage();
1063
1064
if (mMode == PLAIN_TEXT) {
1065
mTreeBuilder->StartPlainText();
1066
mTokenizer->StartPlainText();
1067
} else if (mMode == VIEW_SOURCE_PLAIN) {
1068
nsAutoString viewSourceTitle;
1069
CopyUTF8toUTF16(mViewSourceTitle, viewSourceTitle);
1070
mTreeBuilder->EnsureBufferSpace(viewSourceTitle.Length());
1071
mTreeBuilder->StartPlainTextViewSource(viewSourceTitle);
1072
mTokenizer->StartPlainText();
1073
}
1074
1075
/*
1076
* If you move the following line, be very careful not to cause
1077
* WillBuildModel to be called before the document has had its
1078
* script global object set.
1079
*/
1080
rv = mExecutor->WillBuildModel(eDTDMode_unknown);
1081
NS_ENSURE_SUCCESS(rv, rv);
1082
1083
RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
1084
nsHtml5OwningUTF16Buffer::FalliblyCreate(READ_BUFFER_SIZE);
1085
if (!newBuf) {
1086
// marks this stream parser as terminated,
1087
// which prevents entry to code paths that
1088
// would use mFirstBuffer or mLastBuffer.
1089
return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
1090
}
1091
MOZ_ASSERT(!mFirstBuffer, "How come we have the first buffer set?");
1092
MOZ_ASSERT(!mLastBuffer, "How come we have the last buffer set?");
1093
mFirstBuffer = mLastBuffer = newBuf;
1094
1095
rv = NS_OK;
1096
1097
// The line below means that the encoding can end up being wrong if
1098
// a view-source URL is loaded without having the encoding hint from a
1099
// previous normal load in the history.
1100
mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT);
1101
1102
mDocGroup = mExecutor->GetDocument()->GetDocGroup();
1103
1104
nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv));
1105
if (NS_SUCCEEDED(rv)) {
1106
// Non-HTTP channels are bogus enough that we let them work with unlabeled
1107
// runnables for now. Asserting for HTTP channels only.
1108
MOZ_ASSERT(mDocGroup || mMode == LOAD_AS_DATA,
1109
"How come the doc group is still null?");
1110
1111
nsAutoCString method;
1112
Unused << httpChannel->GetRequestMethod(method);
1113
// XXX does Necko have a way to renavigate POST, etc. without hitting
1114
// the network?
1115
if (!method.EqualsLiteral("GET")) {
1116
// This is the old Gecko behavior but the HTML5 spec disagrees.
1117
// Don't reparse on POST.
1118
mReparseForbidden = true;
1119
mFeedChardet = false; // can't restart anyway
1120
}
1121
}
1122
1123
// Attempt to retarget delivery of data (via OnDataAvailable) to the parser
1124
// thread, rather than through the main thread.
1125
nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest =
1126
do_QueryInterface(mRequest, &rv);
1127
if (threadRetargetableRequest) {
1128
rv = threadRetargetableRequest->RetargetDeliveryTo(mEventTarget);
1129
if (NS_SUCCEEDED(rv)) {
1130
// Parser thread should be now ready to get data from necko and parse it
1131
// and main thread might have a chance to process a collector slice.
1132
// We need to do this asynchronously so that necko may continue processing
1133
// the request.
1134
nsCOMPtr<nsIRunnable> runnable =
1135
new MaybeRunCollector(mExecutor->GetDocument()->GetDocShell());
1136
mozilla::SystemGroup::Dispatch(mozilla::TaskCategory::GarbageCollection,
1137
runnable.forget());
1138
}
1139
}
1140
1141
if (NS_FAILED(rv)) {
1142
NS_WARNING("Failed to retarget HTML data delivery to the parser thread.");
1143
}
1144
1145
if (mCharsetSource == kCharsetFromParentFrame) {
1146
// Remember this in case chardet overwrites mCharsetSource
1147
mInitialEncodingWasFromParentFrame = true;
1148
}
1149
1150
if (mCharsetSource >= kCharsetFromAutoDetection &&
1151
!(mCharsetSource == kCharsetFromParentForced ||
1152
mCharsetSource == kCharsetFromUserForced)) {
1153
mFeedChardet = false;
1154
}
1155
1156
if (mCharsetSource < kCharsetFromUtf8OnlyMime) {
1157
// we aren't ready to commit to an encoding yet
1158
// leave converter uninstantiated for now
1159
return NS_OK;
1160
}
1161
1162
// We are loading JSON/WebVTT/etc. into a browsing context.
1163
// There's no need to remove the BOM manually here, because
1164
// the UTF-8 decoder removes it.
1165
mReparseForbidden = true;
1166
mFeedChardet = false;
1167
1168
// Instantiate the converter here to avoid BOM sniffing.
1169
mDecodingLocalFileAsUTF8 = false;
1170
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
1171
return NS_OK;
1172
}
1173
1174
nsresult nsHtml5StreamParser::CheckListenerChain() {
1175
NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!");
1176
if (!mObserver) {
1177
return NS_OK;
1178
}
1179
nsresult rv;
1180
nsCOMPtr<nsIThreadRetargetableStreamListener> retargetable =
1181
do_QueryInterface(mObserver, &rv);
1182
if (NS_SUCCEEDED(rv) && retargetable) {
1183
rv = retargetable->CheckListenerChain();
1184
}
1185
return rv;
1186
}
1187
1188
void nsHtml5StreamParser::DoStopRequest() {
1189
NS_ASSERTION(IsParserThread(), "Wrong thread!");
1190
MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState,
1191
"Stream ended without being open.");
1192
mTokenizerMutex.AssertCurrentThreadOwns();
1193
1194
auto guard = MakeScopeExit([&] { OnContentComplete(); });
1195
1196
if (IsTerminated()) {
1197
return;
1198
}
1199
1200
if (!mUnicodeDecoder) {
1201
nsresult rv;
1202
Span<const uint8_t> empty;
1203
if (NS_FAILED(rv = FinalizeSniffing(empty, 0, true))) {
1204
MarkAsBroken(rv);
1205
return;
1206
}
1207
}
1208
if (mFeedChardet) {
1209
mFeedChardet = false;
1210
FeedDetector(Span<uint8_t>(), true);
1211
}
1212
1213
MOZ_ASSERT(mUnicodeDecoder,
1214
"Should have a decoder after finalizing sniffing.");
1215
1216
// mLastBuffer should always point to a buffer of the size
1217
// READ_BUFFER_SIZE.
1218
if (!mLastBuffer) {
1219
NS_WARNING("mLastBuffer should not be null!");
1220
MarkAsBroken(NS_ERROR_NULL_POINTER);
1221
return;
1222
}
1223
1224
Span<uint8_t> src; // empty span
1225
for (;;) {
1226
auto dst = mLastBuffer->TailAsSpan(READ_BUFFER_SIZE);
1227
uint32_t result;
1228
size_t read;
1229
size_t written;
1230
bool hadErrors;
1231
Tie(result, read, written, hadErrors) =
1232
mUnicodeDecoder->DecodeToUTF16(src, dst, true);
1233
if (!mDecodingLocalFileAsUTF8) {
1234
OnNewContent(dst.To(written));
1235
}
1236
if (hadErrors && !mHasHadErrors) {
1237
if (mDecodingLocalFileAsUTF8) {
1238
ReDecodeLocalFile();
1239
DoStopRequest();
1240
return;
1241
}
1242
mHasHadErrors = true;
1243
if (mEncoding == UTF_8_ENCODING) {
1244
mTreeBuilder->TryToEnableEncodingMenu();
1245
}
1246
}
1247
MOZ_ASSERT(read == 0, "How come an empty span was read form?");
1248
mLastBuffer->AdvanceEnd(written);
1249
if (result == kOutputFull) {
1250
RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
1251
nsHtml5OwningUTF16Buffer::FalliblyCreate(READ_BUFFER_SIZE);
1252
if (!newBuf) {
1253
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
1254
return;
1255
}
1256
mLastBuffer = (mLastBuffer->next = newBuf.forget());
1257
} else {
1258
if (mDecodingLocalFileAsUTF8) {
1259
MOZ_ASSERT(mLocalFileBytesBuffered < LOCAL_FILE_UTF_8_BUFFER_SIZE);
1260
CommitLocalFileToUTF8();
1261
}
1262
break;
1263
}
1264
}
1265
1266
mStreamState = STREAM_ENDED;
1267
1268
if (IsTerminatedOrInterrupted()) {
1269
return;
1270
}
1271
1272
ParseAvailableData();
1273
}
1274
1275
class nsHtml5RequestStopper : public Runnable {
1276
private:
1277
nsHtml5StreamParserPtr mStreamParser;
1278
1279
public:
1280
explicit nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser)
1281
: Runnable("nsHtml5RequestStopper"), mStreamParser(aStreamParser) {}
1282
NS_IMETHOD Run() override {
1283
mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
1284
mStreamParser->DoStopRequest();
1285
return NS_OK;
1286
}
1287
};
1288
1289
nsresult nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest,
1290
nsresult status) {
1291
NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream.");
1292
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
1293
if (mObserver) {
1294
mObserver->OnStopRequest(aRequest, status);
1295
}
1296
nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this);
1297
if (NS_FAILED(mEventTarget->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) {
1298
NS_WARNING("Dispatching StopRequest event failed.");
1299
}
1300
return NS_OK;
1301
}
1302
1303
void nsHtml5StreamParser::DoDataAvailableBuffer(
1304
mozilla::Buffer<uint8_t>&& aBuffer) {
1305
if (MOZ_LIKELY(!mDecodingLocalFileAsUTF8)) {
1306
DoDataAvailable(aBuffer);
1307
return;
1308
}
1309
CheckedInt<size_t> bufferedPlusLength(aBuffer.Length());
1310
bufferedPlusLength += mLocalFileBytesBuffered;
1311
if (!bufferedPlusLength.isValid()) {
1312
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
1313
return;
1314
}
1315
// Ensure that WriteStreamBytes() sees a buffer ending
1316
// exactly at LOCAL_FILE_UTF_8_BUFFER_SIZE
1317
// if we are about to cross the threshold. This way,
1318
// Necko buffer boundaries don't affect user-visible
1319
// behavior.
1320
if (bufferedPlusLength.value() <= LOCAL_FILE_UTF_8_BUFFER_SIZE) {
1321
// Truncation OK, because we just checked the range.
1322
mLocalFileBytesBuffered = bufferedPlusLength.value();
1323
mBufferedLocalFileData.AppendElement(std::move(aBuffer));
1324
DoDataAvailable(mBufferedLocalFileData.LastElement());
1325
} else {
1326
// Truncation OK, because the constant is small enough.
1327
size_t overBoundary =
1328
bufferedPlusLength.value() - LOCAL_FILE_UTF_8_BUFFER_SIZE;
1329
MOZ_RELEASE_ASSERT(overBoundary < aBuffer.Length());
1330
size_t untilBoundary = aBuffer.Length() - overBoundary;
1331
auto span = aBuffer.AsSpan();
1332
auto head = span.To(untilBoundary);
1333
auto tail = span.From(untilBoundary);
1334
MOZ_RELEASE_ASSERT(mLocalFileBytesBuffered + untilBoundary ==
1335
LOCAL_FILE_UTF_8_BUFFER_SIZE);
1336
// We make a theoretically useless copy here, because avoiding
1337
// the copy adds too much complexity.
1338
Maybe<Buffer<uint8_t>> maybe = Buffer<uint8_t>::CopyFrom(head);
1339
if (maybe.isNothing()) {
1340
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
1341
return;
1342
}
1343
mLocalFileBytesBuffered = LOCAL_FILE_UTF_8_BUFFER_SIZE;
1344
mBufferedLocalFileData.AppendElement(std::move(*maybe));
1345
1346
DoDataAvailable(head);
1347
// Re-decode may have happened here.
1348
DoDataAvailable(tail);
1349
}
1350
// Do this clean-up here to avoid use-after-free when
1351
// DoDataAvailable is passed a span pointing into an
1352
// element of mBufferedLocalFileData.
1353
if (!mDecodingLocalFileAsUTF8) {
1354
mBufferedLocalFileData.Clear();
1355
}
1356
}
1357
1358
void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {
1359
NS_ASSERTION(IsParserThread(), "Wrong thread!");
1360
MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState,
1361
"DoDataAvailable called when stream not open.");
1362
mTokenizerMutex.AssertCurrentThreadOwns();
1363
1364
if (IsTerminated()) {
1365
return;
1366
}
1367
1368
nsresult rv;
1369
if (HasDecoder()) {
1370
if (mFeedChardet) {
1371
FeedDetector(aBuffer, false);
1372
}
1373
rv = WriteStreamBytes(aBuffer);
1374
} else {
1375
rv = SniffStreamBytes(aBuffer);
1376
}
1377
if (NS_FAILED(rv)) {
1378
MarkAsBroken(rv);
1379
return;
1380
}
1381
1382
if (IsTerminatedOrInterrupted()) {
1383
return;
1384
}
1385
1386
if (mDecodingLocalFileAsUTF8) {
1387
return;
1388
}
1389
1390
ParseAvailableData();
1391
1392
if (mFlushTimerArmed || mSpeculating) {
1393
return;
1394
}
1395
1396
{
1397
mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
1398
mFlushTimer->InitWithNamedFuncCallback(
1399
nsHtml5StreamParser::TimerCallback, static_cast<void*>(this),
1400
mFlushTimerEverFired ? StaticPrefs::html5_flushtimer_initialdelay()
1401
: StaticPrefs::html5_flushtimer_subsequentdelay(),
1402
nsITimer::TYPE_ONE_SHOT, "nsHtml5StreamParser::DoDataAvailable");
1403
}
1404
mFlushTimerArmed = true;
1405
}
1406
1407
class nsHtml5DataAvailable : public Runnable {
1408
private:
1409
nsHtml5StreamParserPtr mStreamParser;
1410
Buffer<uint8_t> mData;
1411
1412
public:
1413
nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser,
1414
Buffer<uint8_t>&& aData)
1415
: Runnable("nsHtml5DataAvailable"),
1416
mStreamParser(aStreamParser),
1417
mData(std::move(aData)) {}
1418
NS_IMETHOD Run() override {
1419
mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
1420
mStreamParser->DoDataAvailableBuffer(std::move(mData));
1421
return NS_OK;
1422
}
1423
};
1424
1425
nsresult nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
1426
nsIInputStream* aInStream,
1427
uint64_t aSourceOffset,
1428
uint32_t aLength) {
1429
nsresult rv;
1430
if (NS_FAILED(rv = mExecutor->IsBroken())) {
1431
return rv;
1432
}
1433
1434
MOZ_ASSERT(mRequest == aRequest, "Got data on wrong stream.");
1435
uint32_t totalRead;
1436
// Main thread to parser thread dispatch requires copying to buffer first.
1437
if (MOZ_UNLIKELY(NS_IsMainThread())) {
1438
Maybe<Buffer<uint8_t>> maybe = Buffer<uint8_t>::Alloc(aLength);
1439
if (maybe.isNothing()) {
1440
return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
1441
}
1442
Buffer<uint8_t> data(std::move(*maybe));
1443
rv = aInStream->Read(reinterpret_cast<char*>(data.Elements()),
1444
data.Length(), &totalRead);
1445
NS_ENSURE_SUCCESS(rv, rv);
1446
MOZ_ASSERT(totalRead == aLength);
1447
1448
nsCOMPtr<nsIRunnable> dataAvailable =
1449
new nsHtml5DataAvailable(this, std::move(data));
1450
if (NS_FAILED(mEventTarget->Dispatch(dataAvailable,
1451
nsIThread::DISPATCH_NORMAL))) {
1452
NS_WARNING("Dispatching DataAvailable event failed.");
1453
}
1454
return rv;
1455
}
1456
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
1457
mozilla::MutexAutoLock autoLock(mTokenizerMutex);
1458
1459
if (MOZ_UNLIKELY(mDecodingLocalFileAsUTF8)) {
1460
// It's a bit sad to potentially buffer the first 1024
1461
// bytes in two places, but it's a lot simpler than trying
1462
// to optitize out that copy. It only happens for local files
1463
// and not for the http(s) content anyway.
1464
Maybe<Buffer<uint8_t>> maybe = Buffer<uint8_t>::Alloc(aLength);
1465
if (maybe.isNothing()) {
1466
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
1467
return NS_ERROR_OUT_OF_MEMORY;
1468
}
1469
Buffer<uint8_t> data(std::move(*maybe));
1470
rv = aInStream->Read(reinterpret_cast<char*>(data.Elements()),
1471
data.Length(), &totalRead);
1472
NS_ENSURE_SUCCESS(rv, rv);
1473
MOZ_ASSERT(totalRead == aLength);
1474
DoDataAvailableBuffer(std::move(data));
1475
return rv;
1476
}
1477
// Read directly from response buffer.
1478
rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength, &totalRead);
1479
NS_ENSURE_SUCCESS(rv, rv);
1480
MOZ_ASSERT(totalRead == aLength);
1481
return rv;
1482
}
1483
1484
/* static */
1485
nsresult nsHtml5StreamParser::CopySegmentsToParser(
1486
nsIInputStream* aInStream, void* aClosure, const char* aFromSegment,
1487
uint32_t aToOffset, uint32_t aCount, uint32_t* aWriteCount) {
1488
nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure);
1489
1490
parser->DoDataAvailable(AsBytes(MakeSpan(aFromSegment, aCount)));
1491
// Assume DoDataAvailable consumed all available bytes.
1492
*aWriteCount = aCount;
1493
return NS_OK;
1494
}
1495
1496
const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl(
1497
const nsACString& aEncoding) {
1498
const Encoding* newEncoding = Encoding::ForLabel(aEncoding);
1499
if (!newEncoding) {
1500
// the encoding name is bogus
1501
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported", true,
1502
mTokenizer->getLineNumber());
1503
return nullptr;
1504
}
1505
1506
if (newEncoding == UTF_16BE_ENCODING || newEncoding == UTF_16LE_ENCODING) {
1507
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUtf16", true,
1508
mTokenizer->getLineNumber());
1509
newEncoding = UTF_8_ENCODING;
1510
}
1511
1512
if (newEncoding == X_USER_DEFINED_ENCODING) {
1513
// WebKit/Blink hack for Indian and Armenian legacy sites
1514
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUserDefined", true,
1515
mTokenizer->getLineNumber());
1516
newEncoding = WINDOWS_1252_ENCODING;
1517
}
1518
1519
if (newEncoding == mEncoding) {
1520
if (mCharsetSource < kCharsetFromMetaPrescan) {
1521
if (mInitialEncodingWasFromParentFrame) {
1522
mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame", false,
1523
mTokenizer->getLineNumber());
1524
} else {
1525
mTreeBuilder->MaybeComplainAboutCharset("EncLateMeta", false,
1526
mTokenizer->getLineNumber());
1527
}
1528
}
1529
mCharsetSource = kCharsetFromMetaTag; // become confident
1530
mFeedChardet = false; // don't feed chardet when confident
1531
return nullptr;
1532
}
1533
1534
return newEncoding;
1535
}
1536
1537
bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) {
1538
// This code needs to stay in sync with
1539
// nsHtml5MetaScanner::tryCharset. Unfortunately, the
1540
// trickery with member fields there leads to some copy-paste reuse. :-(
1541
NS_ASSERTION(IsParserThread(), "Wrong thread!");
1542
if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to
1543
// "confident" in the HTML5 spec
1544
return false;
1545
}
1546
1547
nsString newEncoding16; // Not Auto, because using it to hold nsStringBuffer*
1548
aEncoding.ToString(newEncoding16);
1549
nsAutoCString newEncoding;
1550
CopyUTF16toUTF8(newEncoding16, newEncoding);
1551
1552
auto encoding = PreferredForInternalEncodingDecl(newEncoding);
1553
if (!encoding) {
1554
return false;
1555
}
1556
1557
if (mReparseForbidden) {
1558
// This mReparseForbidden check happens after the call to
1559
// PreferredForInternalEncodingDecl so that if that method calls
1560
// MaybeComplainAboutCharset, its charset complaint wins over the one
1561
// below.
1562
mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaTooLate", true,
1563
mTokenizer->getLineNumber());
1564
return false; // not reparsing even if we wanted to
1565
}
1566
1567
// Avoid having the chardet ask for another restart after this restart
1568
// request.
1569
mFeedChardet = false;
1570
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag,
1571
mTokenizer->getLineNumber());
1572
FlushTreeOpsAndDisarmTimer();
1573
Interrupt();
1574
// the tree op executor will cause the stream parser to terminate
1575
// if the charset switch request is accepted or it'll uninterrupt
1576
// if the request failed. Note that if the restart request fails,
1577
// we don't bother trying to make chardet resume. Might as well
1578
// assume that chardet-requested restarts would fail, too.
1579
return true;
1580
}
1581
1582
void nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer() {
1583
NS_ASSERTION(IsParserThread(), "Wrong thread!");
1584
if (mFlushTimerArmed) {
1585
// avoid calling Cancel if the flush timer isn't armed to avoid acquiring
1586
// a mutex
1587
{
1588
mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
1589
mFlushTimer->Cancel();
1590
}
1591
mFlushTimerArmed = false;
1592
}
1593
if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
1594
mTokenizer->FlushViewSource();
1595
}
1596
mTreeBuilder->Flush();
1597
nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher);
1598
if (NS_FAILED(DispatchToMain(runnable.forget()))) {
1599
NS_WARNING("failed to dispatch executor flush event");
1600
}
1601
}
1602
1603
void nsHtml5StreamParser::ParseAvailableData() {
1604
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
1605
mTokenizerMutex.AssertCurrentThreadOwns();
1606
MOZ_ASSERT(!mDecodingLocalFileAsUTF8);
1607
1608
if (IsTerminatedOrInterrupted()) {
1609
return;
1610
}
1611
1612
if (mSpeculating && !IsSpeculationEnabled()) {
1613
return;
1614
}
1615
1616
for (;;) {
1617
if (!mFirstBuffer->hasMore()) {
1618
if (mFirstBuffer == mLastBuffer) {
1619
switch (mStreamState) {
1620
case STREAM_BEING_READ:
1621
// never release the last buffer.
1622
if (!mSpeculating) {
1623
// reuse buffer space if not speculating
1624
mFirstBuffer->setStart(0);
1625
mFirstBuffer->setEnd(0);
1626
}
1627
mTreeBuilder->FlushLoads();
1628
{
1629
// Dispatch this runnable unconditionally, because the loads
1630
// that need flushing may have been flushed earlier even if the
1631
// flush right above here did nothing.
1632
nsCOMPtr<nsIRunnable> runnable(mLoadFlusher);
1633
if (NS_FAILED(DispatchToMain(runnable.forget()))) {
1634
NS_WARNING("failed to dispatch load flush event");
1635
}
1636
}
1637
return; // no more data for now but expecting more
1638
case STREAM_ENDED:
1639
if (mAtEOF) {
1640
return;
1641
}
1642
mAtEOF = true;
1643
if (mCharsetSource < kCharsetFromMetaTag) {
1644
if (mInitialEncodingWasFromParentFrame) {
1645
// Unfortunately, this check doesn't take effect for
1646
// cross-origin frames, so cross-origin ad frames that have
1647
// no text and only an image or a Flash embed get the more
1648
// severe message from the next if block. The message is
1649
// technically accurate, though.
1650
mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationFrame",
1651
false, 0);
1652
} else if (mMode == NORMAL) {
1653
mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclaration",
1654
true, 0);
1655
} else if (mMode == PLAIN_TEXT) {
1656
mTreeBuilder->MaybeComplainAboutCharset("EncNoDeclarationPlain",
1657
true, 0);
1658
}
1659
}
1660
if (NS_SUCCEEDED(mTreeBuilder->IsBroken())) {
1661
mTokenizer->eof();
1662
nsresult rv;
1663
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
1664
MarkAsBroken(rv);
1665
} else {
1666
mTreeBuilder->StreamEnded();
1667
if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
1668
mTokenizer->EndViewSource();
1669
}
1670
}
1671
}
1672
FlushTreeOpsAndDisarmTimer();
1673
return; // no more data and not expecting more
1674
default:
1675
MOZ_ASSERT_UNREACHABLE("It should be impossible to reach this.");
1676
return;
1677
}
1678
}
1679
mFirstBuffer = mFirstBuffer->next;
1680
continue;
1681
}
1682
1683
// now we have a non-empty buffer
1684
mFirstBuffer->adjust(mLastWasCR);
1685
mLastWasCR = false;
1686
if (mFirstBuffer->hasMore()) {
1687
if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) {
1688
MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
1689
return;
1690
}
1691
mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
1692
nsresult rv;
1693
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
1694
MarkAsBroken(rv);
1695
return;
1696
}
1697
// At this point, internalEncodingDeclaration() may have called
1698
// Terminate, but that never happens together with script.
1699
// Can't assert that here, though, because it's possible that the main
1700
// thread has called Terminate() while this thread was parsing.
1701
if (mTreeBuilder->HasScript()) {
1702
// HasScript() cannot return true if the tree builder is preventing
1703
// script execution.
1704
MOZ_ASSERT(mMode == NORMAL);
1705
mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
1706
nsHtml5Speculation* speculation = new nsHtml5Speculation(
1707
mFirstBuffer, mFirstBuffer->getStart(), mTokenizer->getLineNumber(),
1708
mTreeBuilder->newSnapshot());
1709
mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(),
1710
speculation->GetStartLineNumber());
1711
FlushTreeOpsAndDisarmTimer();
1712
mTreeBuilder->SetOpSink(speculation);
1713
mSpeculations.AppendElement(speculation); // adopts the pointer
1714
mSpeculating = true;
1715
}
1716
if (IsTerminatedOrInterrupted()) {
1717
return;
1718
}
1719
}
1720
}
1721
}
1722
1723
class nsHtml5StreamParserContinuation : public Runnable {
1724
private:
1725
nsHtml5StreamParserPtr mStreamParser;
1726
1727
public:
1728
explicit nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser)
1729
: Runnable("nsHtml5StreamParserContinuation"),
1730
mStreamParser(aStreamParser) {}
1731
NS_IMETHOD Run() override {
1732
mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
1733
mStreamParser->Uninterrupt();
1734
mStreamParser->ParseAvailableData();
1735
return NS_OK;
1736
}
1737
};
1738
1739
void nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer,
1740
nsHtml5TreeBuilder* aTreeBuilder,
1741
bool aLastWasCR) {
1742
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
1743
NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML),
1744
"ContinueAfterScripts called in view source mode!");
1745
if (NS_FAILED(mExecutor->IsBroken())) {
1746
return;
1747
}
1748
#ifdef DEBUG
1749
mExecutor->AssertStageEmpty();
1750
#endif
1751
bool speculationFailed = false;
1752
{
1753
mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
1754
if (mSpeculations.IsEmpty()) {
1755
MOZ_ASSERT_UNREACHABLE(
1756
"ContinueAfterScripts called without "
1757
"speculations.");
1758
return;
1759
}
1760
1761
nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
1762
if (aLastWasCR || !aTokenizer->isInDataState() ||
1763
!aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) {
1764
speculationFailed = true;
1765
// We've got a failed speculation :-(
1766
MaybeDisableFutureSpeculation();
1767
Interrupt(); // Make the parser thread release the tokenizer mutex sooner
1768
// now fall out of the speculationAutoLock into the tokenizerAutoLock
1769
// block
1770
} else {
1771
// We've got a successful speculation!
1772
if (mSpeculations.Length() > 1) {
1773
// the first speculation isn't the current speculation, so there's
1774
// no need to bother the parser thread.
1775
speculation->FlushToSink(mExecutor);
1776
NS_ASSERTION(!mExecutor->IsScriptExecuting(),
1777
"ParseUntilBlocked() was supposed to ensure we don't come "
1778
"here when scripts are executing.");
1779
NS_ASSERTION(
1780
mExecutor->IsInFlushLoop(),
1781
"How are we here if "
1782
"RunFlushLoop() didn't call ParseUntilBlocked() which is the "
1783
"only caller of this method?");
1784
mSpeculations.RemoveElementAt(0);
1785
return;
1786
}
1787
// else
1788
Interrupt(); // Make the parser thread release the tokenizer mutex sooner
1789
1790
// now fall through
1791
// the first speculation is the current speculation. Need to
1792
// release the the speculation mutex and acquire the tokenizer
1793
// mutex. (Just acquiring the other mutex here would deadlock)
1794
}
1795
}
1796
{
1797
mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex);
1798
#ifdef DEBUG
1799
{
1800
mAtomTable.SetPermittedLookupEventTarget(
1801
GetMainThreadSerialEventTarget());
1802
}
1803
#endif
1804
// In principle, the speculation mutex should be acquired here,
1805
// but there's no point, because the parser thread only acquires it
1806
// when it has also acquired the tokenizer mutex and we are already
1807
// holding the tokenizer mutex.
1808
if (speculationFailed) {
1809
// Rewind the stream
1810
mAtEOF = false;
1811
nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
1812
mFirstBuffer = speculation->GetBuffer();
1813
mFirstBuffer->setStart(speculation->GetStart());
1814
mTokenizer->setLineNumber(speculation->GetStartLineNumber());
1815
1816
nsContentUtils::ReportToConsole(
1817
nsIScriptError::warningFlag, NS_LITERAL_CSTRING("DOM Events"),
1818
mExecutor->GetDocument(), nsContentUtils::eDOM_PROPERTIES,
1819
"SpeculationFailed", nsTArray<nsString>(), nullptr, EmptyString(),
1820
speculation->GetStartLineNumber());
1821
1822
nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next;
1823
while (buffer) {
1824
buffer->setStart(0);
1825
buffer = buffer->next;
1826
}
1827
1828
mSpeculations.Clear(); // potentially a huge number of destructors
1829
// run here synchronously on the main thread...
1830
1831
mTreeBuilder->flushCharacters(); // empty the pending buffer
1832
mTreeBuilder->ClearOps(); // now get rid of the failed ops
1833
1834
mTreeBuilder->SetOpSink(mExecutor->GetStage());
1835
mExecutor->StartReadingFromStage();
1836
mSpeculating = false;
1837
1838
// Copy state over
1839
mLastWasCR = aLastWasCR;
1840
mTokenizer->loadState(aTokenizer);
1841
mTreeBuilder->loadState(aTreeBuilder);
1842
} else {
1843
// We've got a successful speculation and at least a moment ago it was
1844
// the current speculation
1845
mSpeculations.ElementAt(0)->