Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "builtin/RegExp.h"
8
9
#include "mozilla/Casting.h"
10
#include "mozilla/CheckedInt.h"
11
#include "mozilla/TextUtils.h"
12
13
#include "frontend/TokenStream.h"
14
#ifndef ENABLE_NEW_REGEXP
15
# include "irregexp/RegExpParser.h"
16
#endif
17
#include "jit/InlinableNatives.h"
18
#include "js/PropertySpec.h"
19
#include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
20
#ifdef ENABLE_NEW_REGEXP
21
# include "new-regexp/RegExpAPI.h"
22
#endif
23
#include "util/StringBuffer.h"
24
#include "util/Unicode.h"
25
#include "vm/JSContext.h"
26
#include "vm/RegExpStatics.h"
27
#include "vm/SelfHosting.h"
28
29
#include "vm/EnvironmentObject-inl.h"
30
#include "vm/JSObject-inl.h"
31
#include "vm/NativeObject-inl.h"
32
#include "vm/ObjectOperations-inl.h"
33
34
using namespace js;
35
36
using mozilla::AssertedCast;
37
using mozilla::CheckedInt;
38
using mozilla::IsAsciiDigit;
39
40
using JS::CompileOptions;
41
using JS::RegExpFlag;
42
using JS::RegExpFlags;
43
44
/*
45
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
46
* steps 3, 16-25.
47
*/
48
bool js::CreateRegExpMatchResult(JSContext* cx, HandleString input,
49
const MatchPairs& matches,
50
MutableHandleValue rval) {
51
MOZ_ASSERT(input);
52
53
/*
54
* Create the (slow) result array for a match.
55
*
56
* Array contents:
57
* 0: matched string
58
* 1..pairCount-1: paren matches
59
* input: input string
60
* index: start index for the match
61
*/
62
63
// Get the templateObject that defines the shape and type of the output
64
// object.
65
JSObject* templateObject =
66
cx->realm()->regExps.getOrCreateMatchResultTemplateObject(cx);
67
if (!templateObject) {
68
return false;
69
}
70
71
size_t numPairs = matches.length();
72
MOZ_ASSERT(numPairs > 0);
73
74
// Step 17.
75
RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(
76
cx, numPairs, templateObject));
77
if (!arr) {
78
return false;
79
}
80
81
// Steps 22-24.
82
// Store a Value for each pair.
83
for (size_t i = 0; i < numPairs; i++) {
84
const MatchPair& pair = matches[i];
85
86
if (pair.isUndefined()) {
87
MOZ_ASSERT(i != 0); // Since we had a match, first pair must be present.
88
arr->setDenseInitializedLength(i + 1);
89
arr->initDenseElement(i, UndefinedValue());
90
} else {
91
JSLinearString* str =
92
NewDependentString(cx, input, pair.start, pair.length());
93
if (!str) {
94
return false;
95
}
96
arr->setDenseInitializedLength(i + 1);
97
arr->initDenseElement(i, StringValue(str));
98
}
99
}
100
101
// Step 20 (reordered).
102
// Set the |index| property.
103
arr->setSlot(RegExpRealm::MatchResultObjectIndexSlot,
104
Int32Value(matches[0].start));
105
106
// Step 21 (reordered).
107
// Set the |input| property.
108
arr->setSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(input));
109
110
#ifdef DEBUG
111
RootedValue test(cx);
112
RootedId id(cx, NameToId(cx->names().index));
113
if (!NativeGetProperty(cx, arr, id, &test)) {
114
return false;
115
}
116
MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectIndexSlot));
117
id = NameToId(cx->names().input);
118
if (!NativeGetProperty(cx, arr, id, &test)) {
119
return false;
120
}
121
MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectInputSlot));
122
#endif
123
124
// Step 25.
125
rval.setObject(*arr);
126
return true;
127
}
128
129
static int32_t CreateRegExpSearchResult(const MatchPairs& matches) {
130
/* Fit the start and limit of match into a int32_t. */
131
uint32_t position = matches[0].start;
132
uint32_t lastIndex = matches[0].limit;
133
MOZ_ASSERT(position < 0x8000);
134
MOZ_ASSERT(lastIndex < 0x8000);
135
return position | (lastIndex << 15);
136
}
137
138
/*
139
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
140
* steps 3, 9-14, except 12.a.i, 12.c.i.1.
141
*/
142
static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res,
143
MutableHandleRegExpShared re,
144
HandleLinearString input,
145
size_t searchIndex,
146
VectorMatchPairs* matches,
147
size_t* endIndex) {
148
RegExpRunStatus status =
149
RegExpShared::execute(cx, re, input, searchIndex, matches, endIndex);
150
151
/* Out of spec: Update RegExpStatics. */
152
if (status == RegExpRunStatus_Success && res) {
153
if (matches) {
154
if (!res->updateFromMatchPairs(cx, input, *matches)) {
155
return RegExpRunStatus_Error;
156
}
157
} else {
158
res->updateLazily(cx, input, re, searchIndex);
159
}
160
}
161
return status;
162
}
163
164
/* Legacy ExecuteRegExp behavior is baked into the JSAPI. */
165
bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res,
166
Handle<RegExpObject*> reobj,
167
HandleLinearString input, size_t* lastIndex,
168
bool test, MutableHandleValue rval) {
169
RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj));
170
if (!shared) {
171
return false;
172
}
173
174
VectorMatchPairs matches;
175
176
RegExpRunStatus status =
177
ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches, nullptr);
178
if (status == RegExpRunStatus_Error) {
179
return false;
180
}
181
182
if (status == RegExpRunStatus_Success_NotFound) {
183
/* ExecuteRegExp() previously returned an array or null. */
184
rval.setNull();
185
return true;
186
}
187
188
*lastIndex = matches[0].limit;
189
190
if (test) {
191
/* Forbid an array, as an optimization. */
192
rval.setBoolean(true);
193
return true;
194
}
195
196
return CreateRegExpMatchResult(cx, input, matches, rval);
197
}
198
199
static bool CheckPatternSyntaxSlow(JSContext* cx, HandleAtom pattern,
200
RegExpFlags flags) {
201
LifoAllocScope allocScope(&cx->tempLifoAlloc());
202
CompileOptions options(cx);
203
frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
204
#ifdef ENABLE_NEW_REGEXP
205
return irregexp::CheckPatternSyntax(cx, dummyTokenStream, pattern, flags);
206
#else
207
return irregexp::ParsePatternSyntax(dummyTokenStream, allocScope.alloc(),
208
pattern, flags.unicode());
209
#endif
210
}
211
212
static RegExpShared* CheckPatternSyntax(JSContext* cx, HandleAtom pattern,
213
RegExpFlags flags) {
214
// If we already have a RegExpShared for this pattern/flags, we can
215
// avoid the much slower CheckPatternSyntaxSlow call.
216
217
if (RegExpShared* shared = cx->zone()->regExps().maybeGet(pattern, flags)) {
218
#ifdef DEBUG
219
// Assert the pattern is valid.
220
if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
221
MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed());
222
return nullptr;
223
}
224
#endif
225
return shared;
226
}
227
228
if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
229
return nullptr;
230
}
231
232
// Allocate and return a new RegExpShared so we will hit the fast path
233
// next time.
234
return cx->zone()->regExps().get(cx, pattern, flags);
235
}
236
237
/*
238
* ES 2016 draft Mar 25, 2016 21.2.3.2.2.
239
*
240
* Steps 14-15 set |obj|'s "lastIndex" property to zero. Some of
241
* RegExpInitialize's callers have a fresh RegExp not yet exposed to script:
242
* in these cases zeroing "lastIndex" is infallible. But others have a RegExp
243
* whose "lastIndex" property might have been made non-writable: here, zeroing
244
* "lastIndex" can fail. We efficiently solve this problem by completely
245
* removing "lastIndex" zeroing from the provided function.
246
*
247
* CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES!
248
*
249
* Because this function only ever returns a user-provided |obj| in the spec,
250
* we omit it and just return the usual success/failure.
251
*/
252
static bool RegExpInitializeIgnoringLastIndex(JSContext* cx,
253
Handle<RegExpObject*> obj,
254
HandleValue patternValue,
255
HandleValue flagsValue) {
256
RootedAtom pattern(cx);
257
if (patternValue.isUndefined()) {
258
/* Step 1. */
259
pattern = cx->names().empty;
260
} else {
261
/* Step 2. */
262
pattern = ToAtom<CanGC>(cx, patternValue);
263
if (!pattern) {
264
return false;
265
}
266
}
267
268
/* Step 3. */
269
RegExpFlags flags = RegExpFlag::NoFlags;
270
if (!flagsValue.isUndefined()) {
271
/* Step 4. */
272
RootedString flagStr(cx, ToString<CanGC>(cx, flagsValue));
273
if (!flagStr) {
274
return false;
275
}
276
277
/* Step 5. */
278
if (!ParseRegExpFlags(cx, flagStr, &flags)) {
279
return false;
280
}
281
}
282
283
/* Steps 7-8. */
284
RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags);
285
if (!shared) {
286
return false;
287
}
288
289
/* Steps 9-12. */
290
obj->initIgnoringLastIndex(pattern, flags);
291
292
obj->setShared(*shared);
293
294
return true;
295
}
296
297
/* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */
298
bool js::RegExpCreate(JSContext* cx, HandleValue patternValue,
299
HandleValue flagsValue, MutableHandleValue rval) {
300
/* Step 1. */
301
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject));
302
if (!regexp) {
303
return false;
304
}
305
306
/* Step 2. */
307
if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue,
308
flagsValue)) {
309
return false;
310
}
311
regexp->zeroLastIndex(cx);
312
313
rval.setObject(*regexp);
314
return true;
315
}
316
317
MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) {
318
return v.isObject() && v.toObject().is<RegExpObject>();
319
}
320
321
/* ES6 draft rc3 7.2.8. */
322
bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) {
323
/* Step 1. */
324
if (!value.isObject()) {
325
*result = false;
326
return true;
327
}
328
RootedObject obj(cx, &value.toObject());
329
330
/* Steps 2-3. */
331
RootedValue isRegExp(cx);
332
RootedId matchId(cx, SYMBOL_TO_JSID(cx->wellKnownSymbols().match));
333
if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) {
334
return false;
335
}
336
337
/* Step 4. */
338
if (!isRegExp.isUndefined()) {
339
*result = ToBoolean(isRegExp);
340
return true;
341
}
342
343
/* Steps 5-6. */
344
ESClass cls;
345
if (!GetClassOfValue(cx, value, &cls)) {
346
return false;
347
}
348
349
*result = cls == ESClass::RegExp;
350
return true;
351
}
352
353
/* ES6 B.2.5.1. */
354
MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx,
355
const CallArgs& args) {
356
MOZ_ASSERT(IsRegExpObject(args.thisv()));
357
358
Rooted<RegExpObject*> regexp(cx, &args.thisv().toObject().as<RegExpObject>());
359
360
// Step 3.
361
RootedValue patternValue(cx, args.get(0));
362
ESClass cls;
363
if (!GetClassOfValue(cx, patternValue, &cls)) {
364
return false;
365
}
366
if (cls == ESClass::RegExp) {
367
// Step 3a.
368
if (args.hasDefined(1)) {
369
JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
370
JSMSG_NEWREGEXP_FLAGGED);
371
return false;
372
}
373
374
// Beware! |patternObj| might be a proxy into another compartment, so
375
// don't assume |patternObj.is<RegExpObject>()|. For the same reason,
376
// don't reuse the RegExpShared below.
377
RootedObject patternObj(cx, &patternValue.toObject());
378
379
RootedAtom sourceAtom(cx);
380
RegExpFlags flags = RegExpFlag::NoFlags;
381
{
382
// Step 3b.
383
RegExpShared* shared = RegExpToShared(cx, patternObj);
384
if (!shared) {
385
return false;
386
}
387
388
sourceAtom = shared->getSource();
389
flags = shared->getFlags();
390
}
391
392
// Step 5, minus lastIndex zeroing.
393
regexp->initIgnoringLastIndex(sourceAtom, flags);
394
} else {
395
// Step 4.
396
RootedValue P(cx, patternValue);
397
RootedValue F(cx, args.get(1));
398
399
// Step 5, minus lastIndex zeroing.
400
if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
401
return false;
402
}
403
}
404
405
// The final niggling bit of step 5.
406
//
407
// |regexp| is user-exposed, but if its "lastIndex" property hasn't been
408
// made non-writable, we can still use a fast path to zero it.
409
if (regexp->lookupPure(cx->names().lastIndex)->writable()) {
410
regexp->zeroLastIndex(cx);
411
} else {
412
RootedValue zero(cx, Int32Value(0));
413
if (!SetProperty(cx, regexp, cx->names().lastIndex, zero)) {
414
return false;
415
}
416
}
417
418
args.rval().setObject(*regexp);
419
return true;
420
}
421
422
static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) {
423
CallArgs args = CallArgsFromVp(argc, vp);
424
425
/* Steps 1-2. */
426
return CallNonGenericMethod<IsRegExpObject, regexp_compile_impl>(cx, args);
427
}
428
429
/*
430
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1.
431
*/
432
bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) {
433
CallArgs args = CallArgsFromVp(argc, vp);
434
435
// Steps 1.
436
bool patternIsRegExp;
437
if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) {
438
return false;
439
}
440
441
// We can delay step 3 and step 4a until later, during
442
// GetPrototypeFromBuiltinConstructor calls. Accessing the new.target
443
// and the callee from the stack is unobservable.
444
if (!args.isConstructing()) {
445
// Step 3.b.
446
if (patternIsRegExp && !args.hasDefined(1)) {
447
RootedObject patternObj(cx, &args[0].toObject());
448
449
// Step 3.b.i.
450
RootedValue patternConstructor(cx);
451
if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor,
452
&patternConstructor)) {
453
return false;
454
}
455
456
// Step 3.b.ii.
457
if (patternConstructor.isObject() &&
458
patternConstructor.toObject() == args.callee()) {
459
args.rval().set(args[0]);
460
return true;
461
}
462
}
463
}
464
465
RootedValue patternValue(cx, args.get(0));
466
467
// Step 4.
468
ESClass cls;
469
if (!GetClassOfValue(cx, patternValue, &cls)) {
470
return false;
471
}
472
if (cls == ESClass::RegExp) {
473
// Beware! |patternObj| might be a proxy into another compartment, so
474
// don't assume |patternObj.is<RegExpObject>()|.
475
RootedObject patternObj(cx, &patternValue.toObject());
476
477
RootedAtom sourceAtom(cx);
478
RegExpFlags flags;
479
RootedRegExpShared shared(cx);
480
{
481
// Step 4.a.
482
shared = RegExpToShared(cx, patternObj);
483
if (!shared) {
484
return false;
485
}
486
sourceAtom = shared->getSource();
487
488
// Step 4.b.
489
// Get original flags in all cases, to compare with passed flags.
490
flags = shared->getFlags();
491
492
// If the RegExpShared is in another Zone, don't reuse it.
493
if (cx->zone() != shared->zone()) {
494
shared = nullptr;
495
}
496
}
497
498
// Step 7.
499
RootedObject proto(cx);
500
if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
501
return false;
502
}
503
504
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
505
if (!regexp) {
506
return false;
507
}
508
509
// Step 8.
510
if (args.hasDefined(1)) {
511
// Step 4.c / 21.2.3.2.2 RegExpInitialize step 4.
512
RegExpFlags flagsArg = RegExpFlag::NoFlags;
513
RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
514
if (!flagStr) {
515
return false;
516
}
517
if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) {
518
return false;
519
}
520
521
// Don't reuse the RegExpShared if we have different flags.
522
if (flags != flagsArg) {
523
shared = nullptr;
524
}
525
526
if (!flags.unicode() && flagsArg.unicode()) {
527
// Have to check syntax again when adding 'u' flag.
528
529
// ES 2017 draft rev 9b49a888e9dfe2667008a01b2754c3662059ae56
530
// 21.2.3.2.2 step 7.
531
shared = CheckPatternSyntax(cx, sourceAtom, flagsArg);
532
if (!shared) {
533
return false;
534
}
535
}
536
flags = flagsArg;
537
}
538
539
regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
540
541
if (shared) {
542
regexp->setShared(*shared);
543
}
544
545
args.rval().setObject(*regexp);
546
return true;
547
}
548
549
RootedValue P(cx);
550
RootedValue F(cx);
551
552
// Step 5.
553
if (patternIsRegExp) {
554
RootedObject patternObj(cx, &patternValue.toObject());
555
556
// Step 5.a.
557
if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P)) {
558
return false;
559
}
560
561
// Step 5.b.
562
F = args.get(1);
563
if (F.isUndefined()) {
564
if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F)) {
565
return false;
566
}
567
}
568
} else {
569
// Steps 6.a-b.
570
P = patternValue;
571
F = args.get(1);
572
}
573
574
// Step 7.
575
RootedObject proto(cx);
576
if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
577
return false;
578
}
579
580
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
581
if (!regexp) {
582
return false;
583
}
584
585
// Step 8.
586
if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
587
return false;
588
}
589
regexp->zeroLastIndex(cx);
590
591
args.rval().setObject(*regexp);
592
return true;
593
}
594
595
/*
596
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1
597
* steps 4, 7-8.
598
*/
599
bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) {
600
CallArgs args = CallArgsFromVp(argc, vp);
601
MOZ_ASSERT(args.length() == 2);
602
MOZ_ASSERT(!args.isConstructing());
603
604
// Step 4.a.
605
RootedAtom sourceAtom(cx, AtomizeString(cx, args[0].toString()));
606
if (!sourceAtom) {
607
return false;
608
}
609
610
// Step 4.c.
611
RegExpFlags flags = AssertedCast<uint8_t>(int32_t(args[1].toNumber()));
612
613
// Step 7.
614
RegExpObject* regexp = RegExpAlloc(cx, GenericObject);
615
if (!regexp) {
616
return false;
617
}
618
619
// Step 8.
620
regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
621
args.rval().setObject(*regexp);
622
return true;
623
}
624
625
MOZ_ALWAYS_INLINE bool IsRegExpPrototype(HandleValue v, JSContext* cx) {
626
return (v.isObject() &&
627
cx->global()->maybeGetRegExpPrototype() == &v.toObject());
628
}
629
630
// ES 2017 draft 21.2.5.4.
631
MOZ_ALWAYS_INLINE bool regexp_global_impl(JSContext* cx, const CallArgs& args) {
632
MOZ_ASSERT(IsRegExpObject(args.thisv()));
633
634
// Steps 4-6.
635
RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
636
args.rval().setBoolean(reObj->global());
637
return true;
638
}
639
640
bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) {
641
CallArgs args = CallArgsFromVp(argc, vp);
642
643
// Step 3.a.
644
if (IsRegExpPrototype(args.thisv(), cx)) {
645
args.rval().setUndefined();
646
return true;
647
}
648
649
// Steps 1-3.
650
return CallNonGenericMethod<IsRegExpObject, regexp_global_impl>(cx, args);
651
}
652
653
// ES 2017 draft 21.2.5.5.
654
MOZ_ALWAYS_INLINE bool regexp_ignoreCase_impl(JSContext* cx,
655
const CallArgs& args) {
656
MOZ_ASSERT(IsRegExpObject(args.thisv()));
657
658
// Steps 4-6.
659
RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
660
args.rval().setBoolean(reObj->ignoreCase());
661
return true;
662
}
663
664
bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) {
665
CallArgs args = CallArgsFromVp(argc, vp);
666
667
// Step 3.a.
668
if (IsRegExpPrototype(args.thisv(), cx)) {
669
args.rval().setUndefined();
670
return true;
671
}
672
673
// Steps 1-3.
674
return CallNonGenericMethod<IsRegExpObject, regexp_ignoreCase_impl>(cx, args);
675
}
676
677
// ES 2017 draft 21.2.5.7.
678
MOZ_ALWAYS_INLINE bool regexp_multiline_impl(JSContext* cx,
679
const CallArgs& args) {
680
MOZ_ASSERT(IsRegExpObject(args.thisv()));
681
682
// Steps 4-6.
683
RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
684
args.rval().setBoolean(reObj->multiline());
685
return true;
686
}
687
688
bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) {
689
CallArgs args = CallArgsFromVp(argc, vp);
690
691
// Step 3.a.
692
if (IsRegExpPrototype(args.thisv(), cx)) {
693
args.rval().setUndefined();
694
return true;
695
}
696
697
// Steps 1-3.
698
return CallNonGenericMethod<IsRegExpObject, regexp_multiline_impl>(cx, args);
699
}
700
701
// ES 2017 draft 21.2.5.10.
702
MOZ_ALWAYS_INLINE bool regexp_source_impl(JSContext* cx, const CallArgs& args) {
703
MOZ_ASSERT(IsRegExpObject(args.thisv()));
704
705
// Step 5.
706
RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
707
RootedAtom src(cx, reObj->getSource());
708
if (!src) {
709
return false;
710
}
711
712
// Step 7.
713
JSString* str = EscapeRegExpPattern(cx, src);
714
if (!str) {
715
return false;
716
}
717
718
args.rval().setString(str);
719
return true;
720
}
721
722
static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) {
723
CallArgs args = CallArgsFromVp(argc, vp);
724
725
// Step 3.a.
726
if (IsRegExpPrototype(args.thisv(), cx)) {
727
args.rval().setString(cx->names().emptyRegExp);
728
return true;
729
}
730
731
// Steps 1-4.
732
return CallNonGenericMethod<IsRegExpObject, regexp_source_impl>(cx, args);
733
}
734
735
// ES 2020 draft 21.2.5.3.
736
MOZ_ALWAYS_INLINE bool regexp_dotAll_impl(JSContext* cx, const CallArgs& args) {
737
MOZ_ASSERT(IsRegExpObject(args.thisv()));
738
739
// Steps 4-6.
740
RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
741
args.rval().setBoolean(reObj->dotAll());
742
return true;
743
}
744
745
bool js::regexp_dotAll(JSContext* cx, unsigned argc, JS::Value* vp) {
746
CallArgs args = CallArgsFromVp(argc, vp);
747
748
// Step 3.a.
749
if (IsRegExpPrototype(args.thisv(), cx)) {
750
args.rval().setUndefined();
751
return true;
752
}
753
754
// Steps 1-3.
755
return CallNonGenericMethod<IsRegExpObject, regexp_dotAll_impl>(cx, args);
756
}
757
758
// ES 2017 draft 21.2.5.12.
759
MOZ_ALWAYS_INLINE bool regexp_sticky_impl(JSContext* cx, const CallArgs& args) {
760
MOZ_ASSERT(IsRegExpObject(args.thisv()));
761
762
// Steps 4-6.
763
RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
764
args.rval().setBoolean(reObj->sticky());
765
return true;
766
}
767
768
bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) {
769
CallArgs args = CallArgsFromVp(argc, vp);
770
771
// Step 3.a.
772
if (IsRegExpPrototype(args.thisv(), cx)) {
773
args.rval().setUndefined();
774
return true;
775
}
776
777
// Steps 1-3.
778
return CallNonGenericMethod<IsRegExpObject, regexp_sticky_impl>(cx, args);
779
}
780
781
// ES 2017 draft 21.2.5.15.
782
MOZ_ALWAYS_INLINE bool regexp_unicode_impl(JSContext* cx,
783
const CallArgs& args) {
784
MOZ_ASSERT(IsRegExpObject(args.thisv()));
785
786
// Steps 4-6.
787
RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
788
args.rval().setBoolean(reObj->unicode());
789
return true;
790
}
791
792
bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) {
793
CallArgs args = CallArgsFromVp(argc, vp);
794
795
// Step 3.a.
796
if (IsRegExpPrototype(args.thisv(), cx)) {
797
args.rval().setUndefined();
798
return true;
799
}
800
801
// Steps 1-3.
802
return CallNonGenericMethod<IsRegExpObject, regexp_unicode_impl>(cx, args);
803
}
804
805
const JSPropertySpec js::regexp_properties[] = {
806
JS_SELF_HOSTED_GET("flags", "$RegExpFlagsGetter", 0),
807
JS_PSG("global", regexp_global, 0),
808
JS_PSG("ignoreCase", regexp_ignoreCase, 0),
809
JS_PSG("multiline", regexp_multiline, 0),
810
#ifdef ENABLE_NEW_REGEXP
811
JS_PSG("dotAll", regexp_dotAll, 0),
812
#endif
813
JS_PSG("source", regexp_source, 0),
814
JS_PSG("sticky", regexp_sticky, 0),
815
JS_PSG("unicode", regexp_unicode, 0),
816
JS_PS_END};
817
818
const JSFunctionSpec js::regexp_methods[] = {
819
JS_SELF_HOSTED_FN(js_toSource_str, "$RegExpToString", 0, 0),
820
JS_SELF_HOSTED_FN(js_toString_str, "$RegExpToString", 0, 0),
821
JS_FN("compile", regexp_compile, 2, 0),
822
JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0),
823
JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0),
824
JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0),
825
JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0),
826
JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0),
827
JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0),
828
JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0),
829
JS_FS_END};
830
831
#define STATIC_PAREN_GETTER_CODE(parenNum) \
832
if (!res->createParen(cx, parenNum, args.rval())) return false; \
833
if (args.rval().isUndefined()) \
834
args.rval().setString(cx->runtime()->emptyString); \
835
return true
836
837
/*
838
* RegExp static properties.
839
*
840
* RegExp class static properties and their Perl counterparts:
841
*
842
* RegExp.input $_
843
* RegExp.lastMatch $&
844
* RegExp.lastParen $+
845
* RegExp.leftContext $`
846
* RegExp.rightContext $'
847
*/
848
849
#define DEFINE_STATIC_GETTER(name, code) \
850
static bool name(JSContext* cx, unsigned argc, Value* vp) { \
851
CallArgs args = CallArgsFromVp(argc, vp); \
852
RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
853
if (!res) return false; \
854
code; \
855
}
856
857
DEFINE_STATIC_GETTER(static_input_getter,
858
return res->createPendingInput(cx, args.rval()))
859
DEFINE_STATIC_GETTER(static_lastMatch_getter,
860
return res->createLastMatch(cx, args.rval()))
861
DEFINE_STATIC_GETTER(static_lastParen_getter,
862
return res->createLastParen(cx, args.rval()))
863
DEFINE_STATIC_GETTER(static_leftContext_getter,
864
return res->createLeftContext(cx, args.rval()))
865
DEFINE_STATIC_GETTER(static_rightContext_getter,
866
return res->createRightContext(cx, args.rval()))
867
868
DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1))
869
DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2))
870
DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3))
871
DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4))
872
DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5))
873
DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6))
874
DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7))
875
DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8))
876
DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9))
877
878
#define DEFINE_STATIC_SETTER(name, code) \
879
static bool name(JSContext* cx, unsigned argc, Value* vp) { \
880
RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
881
if (!res) return false; \
882
code; \
883
return true; \
884
}
885
886
static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) {
887
CallArgs args = CallArgsFromVp(argc, vp);
888
RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
889
if (!res) {
890
return false;
891
}
892
893
RootedString str(cx, ToString<CanGC>(cx, args.get(0)));
894
if (!str) {
895
return false;
896
}
897
898
res->setPendingInput(str);
899
args.rval().setString(str);
900
return true;
901
}
902
903
const JSPropertySpec js::regexp_static_props[] = {
904
JS_PSGS("input", static_input_getter, static_input_setter,
905
JSPROP_PERMANENT | JSPROP_ENUMERATE),
906
JS_PSG("lastMatch", static_lastMatch_getter,
907
JSPROP_PERMANENT | JSPROP_ENUMERATE),
908
JS_PSG("lastParen", static_lastParen_getter,
909
JSPROP_PERMANENT | JSPROP_ENUMERATE),
910
JS_PSG("leftContext", static_leftContext_getter,
911
JSPROP_PERMANENT | JSPROP_ENUMERATE),
912
JS_PSG("rightContext", static_rightContext_getter,
913
JSPROP_PERMANENT | JSPROP_ENUMERATE),
914
JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
915
JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
916
JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
917
JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
918
JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
919
JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
920
JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
921
JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
922
JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
923
JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT),
924
JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT),
925
JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT),
926
JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT),
927
JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT),
928
JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0),
929
JS_PS_END};
930
931
template <typename CharT>
932
static bool IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,
933
size_t index) {
934
JS::AutoCheckCannotGC nogc;
935
MOZ_ASSERT(index > 0 && index < input->length());
936
const CharT* inputChars = input->chars<CharT>(nogc);
937
938
return unicode::IsTrailSurrogate(inputChars[index]) &&
939
unicode::IsLeadSurrogate(inputChars[index - 1]);
940
}
941
942
static bool IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,
943
int32_t index) {
944
if (index <= 0 || size_t(index) >= input->length()) {
945
return false;
946
}
947
948
return input->hasLatin1Chars()
949
? IsTrailSurrogateWithLeadSurrogateImpl<Latin1Char>(input, index)
950
: IsTrailSurrogateWithLeadSurrogateImpl<char16_t>(input, index);
951
}
952
953
/*
954
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
955
* steps 3, 9-14, except 12.a.i, 12.c.i.1.
956
*/
957
static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp,
958
HandleString string, int32_t lastIndex,
959
VectorMatchPairs* matches,
960
size_t* endIndex) {
961
/*
962
* WARNING: Despite the presence of spec step comment numbers, this
963
* algorithm isn't consistent with any ES6 version, draft or
964
* otherwise. YOU HAVE BEEN WARNED.
965
*/
966
967
/* Steps 1-2 performed by the caller. */
968
Handle<RegExpObject*> reobj = regexp.as<RegExpObject>();
969
970
RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj));
971
if (!re) {
972
return RegExpRunStatus_Error;
973
}
974
975
RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
976
if (!res) {
977
return RegExpRunStatus_Error;
978
}
979
980
RootedLinearString input(cx, string->ensureLinear(cx));
981
if (!input) {
982
return RegExpRunStatus_Error;
983
}
984
985
/* Handled by caller */
986
MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length());
987
988
/* Steps 4-8 performed by the caller. */
989
990
/* Step 10. */
991
if (reobj->unicode()) {
992
/*
993
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
994
* 21.2.2.2 step 2.
995
* Let listIndex be the index into Input of the character that was
996
* obtained from element index of str.
997
*
998
* In the spec, pattern match is performed with decoded Unicode code
999
* points, but our implementation performs it with UTF-16 encoded
1000
* string. In step 2, we should decrement lastIndex (index) if it
1001
* points the trail surrogate that has corresponding lead surrogate.
1002
*
1003
* var r = /\uD83D\uDC38/ug;
1004
* r.lastIndex = 1;
1005
* var str = "\uD83D\uDC38";
1006
* var result = r.exec(str); // pattern match starts from index 0
1007
* print(result.index); // prints 0
1008
*
1009
* Note: this doesn't match the current spec text and result in
1010
* different values for `result.index` under certain conditions.
1011
* However, the spec will change to match our implementation's
1013
*/
1014
if (IsTrailSurrogateWithLeadSurrogate(input, lastIndex)) {
1015
lastIndex--;
1016
}
1017
}
1018
1019
/* Steps 3, 11-14, except 12.a.i, 12.c.i.1. */
1020
RegExpRunStatus status =
1021
ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches, endIndex);
1022
if (status == RegExpRunStatus_Error) {
1023
return RegExpRunStatus_Error;
1024
}
1025
1026
/* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */
1027
1028
return status;
1029
}
1030
1031
/*
1032
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1033
* steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1034
*/
1035
static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp,
1036
HandleString string, int32_t lastIndex,
1037
MutableHandleValue rval) {
1038
/* Execute regular expression and gather matches. */
1039
VectorMatchPairs matches;
1040
1041
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1042
RegExpRunStatus status =
1043
ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
1044
if (status == RegExpRunStatus_Error) {
1045
return false;
1046
}
1047
1048
/* Steps 12.a, 12.c. */
1049
if (status == RegExpRunStatus_Success_NotFound) {
1050
rval.setNull();
1051
return true;
1052
}
1053
1054
/* Steps 16-25 */
1055
return CreateRegExpMatchResult(cx, string, matches, rval);
1056
}
1057
1058
/*
1059
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1060
* steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1061
*/
1062
bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) {
1063
CallArgs args = CallArgsFromVp(argc, vp);
1064
MOZ_ASSERT(args.length() == 3);
1065
MOZ_ASSERT(IsRegExpObject(args[0]));
1066
MOZ_ASSERT(args[1].isString());
1067
MOZ_ASSERT(args[2].isNumber());
1068
1069
RootedObject regexp(cx, &args[0].toObject());
1070
RootedString string(cx, args[1].toString());
1071
1072
int32_t lastIndex;
1073
MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1074
1075
/* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
1076
return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
1077
}
1078
1079
/*
1080
* Separate interface for use by IonMonkey.
1081
* This code cannot re-enter Ion code.
1082
*/
1083
bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp,
1084
HandleString input, int32_t maybeLastIndex,
1085
MatchPairs* maybeMatches, MutableHandleValue output) {
1086
// The MatchPairs will always be passed in, but RegExp execution was
1087
// successful only if the pairs have actually been filled in.
1088
if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
1089
return CreateRegExpMatchResult(cx, input, *maybeMatches, output);
1090
}
1091
1092
// |maybeLastIndex| only contains a valid value when the RegExp execution
1093
// was not successful.
1094
MOZ_ASSERT(maybeLastIndex >= 0);
1095
return RegExpMatcherImpl(cx, regexp, input, maybeLastIndex, output);
1096
}
1097
1098
/*
1099
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1100
* steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1101
* This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub,
1102
* changes to this code need to get reflected in there too.
1103
*/
1104
static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
1105
HandleString string, int32_t lastIndex,
1106
int32_t* result) {
1107
/* Execute regular expression and gather matches. */
1108
VectorMatchPairs matches;
1109
1110
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1111
RegExpRunStatus status =
1112
ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
1113
if (status == RegExpRunStatus_Error) {
1114
return false;
1115
}
1116
1117
/* Steps 12.a, 12.c. */
1118
if (status == RegExpRunStatus_Success_NotFound) {
1119
*result = -1;
1120
return true;
1121
}
1122
1123
/* Steps 16-25 */
1124
*result = CreateRegExpSearchResult(matches);
1125
return true;
1126
}
1127
1128
/*
1129
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1130
* steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1131
*/
1132
bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) {
1133
CallArgs args = CallArgsFromVp(argc, vp);
1134
MOZ_ASSERT(args.length() == 3);
1135
MOZ_ASSERT(IsRegExpObject(args[0]));
1136
MOZ_ASSERT(args[1].isString());
1137
MOZ_ASSERT(args[2].isNumber());
1138
1139
RootedObject regexp(cx, &args[0].toObject());
1140
RootedString string(cx, args[1].toString());
1141
1142
int32_t lastIndex;
1143
MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1144
1145
/* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
1146
int32_t result = 0;
1147
if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) {
1148
return false;
1149
}
1150
1151
args.rval().setInt32(result);
1152
return true;
1153
}
1154
1155
/*
1156
* Separate interface for use by IonMonkey.
1157
* This code cannot re-enter Ion code.
1158
*/
1159
bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
1160
HandleString input, int32_t lastIndex,
1161
MatchPairs* maybeMatches, int32_t* result) {
1162
MOZ_ASSERT(lastIndex >= 0);
1163
1164
// The MatchPairs will always be passed in, but RegExp execution was
1165
// successful only if the pairs have actually been filled in.
1166
if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
1167
*result = CreateRegExpSearchResult(*maybeMatches);
1168
return true;
1169
}
1170
return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
1171
}
1172
1173
/*
1174
* ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1175
* steps 3, 9-14, except 12.a.i, 12.c.i.1.
1176
*/
1177
bool js::RegExpTester(JSContext* cx, unsigned argc, Value* vp) {
1178
CallArgs args = CallArgsFromVp(argc, vp);
1179
MOZ_ASSERT(args.length() == 3);
1180
MOZ_ASSERT(IsRegExpObject(args[0]));
1181
MOZ_ASSERT(args[1].isString());
1182
MOZ_ASSERT(args[2].isNumber());
1183
1184
RootedObject regexp(cx, &args[0].toObject());
1185
RootedString string(cx, args[1].toString());
1186
1187
int32_t lastIndex;
1188
MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1189
1190
/* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1191
size_t endIndex = 0;
1192
RegExpRunStatus status =
1193
ExecuteRegExp(cx, regexp, string, lastIndex, nullptr, &endIndex);
1194
1195
if (status == RegExpRunStatus_Error) {
1196
return false;
1197
}
1198
1199
if (status == RegExpRunStatus_Success) {
1200
MOZ_ASSERT(endIndex <= INT32_MAX);
1201
args.rval().setInt32(int32_t(endIndex));
1202
} else {
1203
args.rval().setInt32(-1);
1204
}
1205
return true;
1206
}
1207
1208
/*
1209
* Separate interface for use by IonMonkey.
1210
* This code cannot re-enter Ion code.
1211
*/
1212
bool js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
1213
int32_t lastIndex, int32_t* endIndex) {
1214
MOZ_ASSERT(lastIndex >= 0);
1215
1216
size_t endIndexTmp = 0;
1217
RegExpRunStatus status =
1218
ExecuteRegExp(cx, regexp, input, lastIndex, nullptr, &endIndexTmp);
1219
1220
if (status == RegExpRunStatus_Success) {
1221
MOZ_ASSERT(endIndexTmp <= INT32_MAX);
1222
*endIndex = int32_t(endIndexTmp);
1223
return true;
1224
}
1225
if (status == RegExpRunStatus_Success_NotFound) {
1226
*endIndex = -1;
1227
return true;
1228
}
1229
1230
return false;
1231
}
1232
1233
using CapturesVector = GCVector<Value, 4>;
1234
1235
struct JSSubString {
1236
JSLinearString* base = nullptr;
1237
size_t offset = 0;
1238
size_t length = 0;
1239
1240
JSSubString() = default;
1241
1242
void initEmpty(JSLinearString* base) {
1243
this->base = base;
1244
offset = length = 0;
1245
}
1246
void init(JSLinearString* base, size_t offset, size_t length) {
1247
this->base = base;
1248
this->offset = offset;
1249
this->length = length;
1250
}
1251
};
1252
1253
static void GetParen(JSLinearString* matched, const JS::Value& capture,
1254
JSSubString* out) {
1255
if (capture.isUndefined()) {
1256
out->initEmpty(matched);
1257
return;
1258
}
1259
JSLinearString& captureLinear = capture.toString()->asLinear();
1260
out->init(&captureLinear, 0, captureLinear.length());
1261
}
1262
1263
template <typename CharT>
1264
static bool InterpretDollar(JSLinearString* matched, JSLinearString* string,
1265
size_t position, size_t tailPos,
1266
Handle<CapturesVector> captures,
1267
JSLinearString* replacement,
1268
const CharT* replacementBegin,
1269
const CharT* currentDollar,
1270
const CharT* replacementEnd, JSSubString* out,
1271
size_t* skip) {
1272
MOZ_ASSERT(*currentDollar == '$');
1273
1274
/* If there is only a dollar, bail now. */
1275
if (currentDollar + 1 >= replacementEnd) {
1276
return false;
1277
}
1278
1279
/* ES 2016 draft Mar 25, 2016 Table 46. */
1280
char16_t c = currentDollar[1];
1281
if (IsAsciiDigit(c)) {
1282
/* $n, $nn */
1283
unsigned num = AsciiDigitToNumber(c);
1284
if (num > captures.length()) {
1285
// The result is implementation-defined, do not substitute.
1286
return false;
1287
}
1288
1289
const CharT* currentChar = currentDollar + 2;
1290
if (currentChar < replacementEnd) {
1291
c = *currentChar;
1292
if (IsAsciiDigit(c)) {
1293
unsigned tmpNum = 10 * num + AsciiDigitToNumber(c);
1294
// If num > captures.length(), the result is implementation-defined.
1295
// Consume next character only if num <= captures.length().
1296
if (tmpNum <= captures.length()) {
1297
currentChar++;
1298
num = tmpNum;
1299
}
1300
}
1301
}
1302
1303
if (num == 0) {
1304
// The result is implementation-defined.
1305
// Do not substitute.
1306
return false;
1307
}
1308
1309
*skip = currentChar - currentDollar;
1310
1311
MOZ_ASSERT(num <= captures.length());
1312
1313
GetParen(matched, captures[num - 1], out);
1314
return true;
1315
}
1316
1317
*skip = 2;
1318
switch (c) {
1319
default:
1320
return false;
1321
case '$':
1322
out->init(replacement, currentDollar - replacementBegin, 1);
1323
break;
1324
case '&':
1325
out->init(matched, 0, matched->length());
1326
break;
1327
case '+':
1328
// SpiderMonkey extension
1329
if (captures.length() == 0) {
1330
out->initEmpty(matched);
1331
} else {
1332
GetParen(matched, captures[captures.length() - 1], out);
1333
}
1334
break;
1335
case '`':
1336
out->init(string, 0, position);
1337
break;
1338
case '\'':
1339
out->init(string, tailPos, string->length() - tailPos);
1340
break;
1341
}
1342
return true;
1343
}
1344
1345
template <typename CharT>
1346
static bool FindReplaceLengthString(JSContext* cx, HandleLinearString matched,
1347
HandleLinearString string, size_t position,
1348
size_t tailPos,
1349
Handle<CapturesVector> captures,
1350
HandleLinearString replacement,
1351
size_t firstDollarIndex, size_t* sizep) {
1352
CheckedInt<uint32_t> replen = replacement->length();
1353
1354
JS::AutoCheckCannotGC nogc;
1355
MOZ_ASSERT(firstDollarIndex < replacement->length());
1356
const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1357
const CharT* currentDollar = replacementBegin + firstDollarIndex;
1358
const CharT* replacementEnd = replacementBegin + replacement->length();
1359
do {
1360
JSSubString sub;
1361
size_t skip;
1362
if (InterpretDollar(matched, string, position, tailPos, captures,
1363
replacement, replacementBegin, currentDollar,
1364
replacementEnd, &sub, &skip)) {
1365
if (sub.length > skip) {
1366
replen += sub.length - skip;
1367
} else {
1368
replen -= skip - sub.length;
1369
}
1370
currentDollar += skip;
1371
} else {
1372
currentDollar++;
1373
}
1374
1375
currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1376
} while (currentDollar);
1377
1378
if (!replen.isValid()) {
1379
ReportAllocationOverflow(cx);
1380
return false;
1381
}
1382
1383
*sizep = replen.value();
1384
return true;
1385
}
1386
1387
static bool FindReplaceLength(JSContext* cx, HandleLinearString matched,
1388
HandleLinearString string, size_t position,
1389
size_t tailPos, Handle<CapturesVector> captures,
1390
HandleLinearString replacement,
1391
size_t firstDollarIndex, size_t* sizep) {
1392
return replacement->hasLatin1Chars()
1393
? FindReplaceLengthString<Latin1Char>(
1394
cx, matched, string, position, tailPos, captures,
1395
replacement, firstDollarIndex, sizep)
1396
: FindReplaceLengthString<char16_t>(cx, matched, string, position,
1397
tailPos, captures, replacement,
1398
firstDollarIndex, sizep);
1399
}
1400
1401
/*
1402
* Precondition: |sb| already has necessary growth space reserved (as
1403
* derived from FindReplaceLength), and has been inflated to TwoByte if
1404
* necessary.
1405
*/
1406
template <typename CharT>
1407
static void DoReplace(HandleLinearString matched, HandleLinearString string,
1408
size_t position, size_t tailPos,
1409
Handle<CapturesVector> captures,
1410
HandleLinearString replacement, size_t firstDollarIndex,
1411
StringBuffer& sb) {
1412
JS::AutoCheckCannotGC nogc;
1413
const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1414
const CharT* currentChar = replacementBegin;
1415
1416
MOZ_ASSERT(firstDollarIndex < replacement->length());
1417
const CharT* currentDollar = replacementBegin + firstDollarIndex;
1418
const CharT* replacementEnd = replacementBegin + replacement->length();
1419
do {
1420
/* Move one of the constant portions of the replacement value. */
1421
size_t len = currentDollar - currentChar;
1422
sb.infallibleAppend(currentChar, len);
1423
currentChar = currentDollar;
1424
1425
JSSubString sub;
1426
size_t skip;
1427
if (InterpretDollar(matched, string, position, tailPos, captures,
1428
replacement, replacementBegin, currentDollar,
1429
replacementEnd, &sub, &skip)) {
1430
sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
1431
currentChar += skip;
1432
currentDollar += skip;
1433
} else {
1434
currentDollar++;
1435
}
1436
1437
currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1438
} while (currentDollar);
1439
sb.infallibleAppend(currentChar,
1440
replacement->length() - (currentChar - replacementBegin));
1441
}
1442
1443
static bool NeedTwoBytes(HandleLinearString string,
1444
HandleLinearString replacement,
1445
HandleLinearString matched,
1446
Handle<CapturesVector> captures) {
1447
if (string->hasTwoByteChars()) {
1448
return true;
1449
}
1450
if (replacement->hasTwoByteChars()) {
1451
return true;
1452
}
1453
if (matched->hasTwoByteChars()) {
1454
return true;
1455
}
1456
1457
for (size_t i = 0, len = captures.length(); i < len; i++) {
1458
const Value& capture = captures[i];
1459
if (capture.isUndefined()) {
1460
continue;
1461
}
1462
if (capture.toString()->hasTwoByteChars()) {
1463
return true;
1464
}
1465
}
1466
1467
return false;
1468
}
1469
1470
/* ES 2016 draft Mar 25, 2016 21.1.3.14.1. */
1471
bool js::RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult,
1472
HandleLinearString string, size_t position,
1473
HandleLinearString replacement,
1474
size_t firstDollarIndex,
1475
MutableHandleValue rval) {
1476
MOZ_ASSERT(firstDollarIndex < replacement->length());
1477
1478
// Step 1 (skipped).
1479
1480
// Step 10 (reordered).
1481
uint32_t matchResultLength = matchResult->length();
1482
MOZ_ASSERT(matchResultLength > 0);
1483
MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength());
1484
1485
const Value& matchedValue = matchResult->getDenseElement(0);
1486
RootedLinearString matched(cx, matchedValue.toString()->ensureLinear(cx));
1487
if (!matched) {
1488
return false;
1489
}
1490
1491
// Step 2.
1492
size_t matchLength = matched->length();
1493
1494
// Steps 3-5 (skipped).
1495
1496
// Step 6.
1497
MOZ_ASSERT(position <= string->length());
1498
1499
uint32_t nCaptures = matchResultLength - 1;
1500
Rooted<CapturesVector> captures(cx, CapturesVector(cx));
1501
if (!captures.reserve(nCaptures)) {
1502
return false;
1503
}
1504
1505
// Step 7.
1506
for (uint32_t i = 1; i <= nCaptures; i++) {
1507
const Value& capture = matchResult->getDenseElement(i);
1508
1509
if (capture.isUndefined()) {
1510
captures.infallibleAppend(capture);
1511
continue;
1512
}
1513
1514
JSLinearString* captureLinear = capture.toString()->ensureLinear(cx);
1515
if (!captureLinear) {
1516
return false;
1517
}
1518
captures.infallibleAppend(StringValue(captureLinear));
1519
}
1520
1521
// Step 8 (skipped).
1522
1523
// Step 9.
1524
CheckedInt<uint32_t> checkedTailPos(0);
1525
checkedTailPos += position;
1526
checkedTailPos += matchLength;
1527
if (!checkedTailPos.isValid()) {
1528
ReportAllocationOverflow(cx);
1529
return false;
1530
}
1531
uint32_t tailPos = checkedTailPos.value();
1532
1533
// Step 11.
1534
size_t reserveLength;
1535
if (!FindReplaceLength(cx, matched, string, position, tailPos, captures,
1536
replacement, firstDollarIndex, &reserveLength)) {
1537
return false;
1538
}
1539
1540
JSStringBuilder result(cx);
1541
if (NeedTwoBytes(string, replacement, matched, captures)) {
1542
if (!result.ensureTwoByteChars()) {
1543
return false;
1544
}
1545
}
1546
1547
if (!result.reserve(reserveLength)) {
1548
return false;
1549
}
1550
1551
if (replacement->hasLatin1Chars()) {
1552
DoReplace<Latin1Char>(matched, string, position, tailPos, captures,
1553
replacement, firstDollarIndex, result);
1554
} else {
1555
DoReplace<char16_t>(matched, string, position, tailPos, captures,
1556
replacement, firstDollarIndex, result);
1557
}
1558
1559
// Step 12.
1560
JSString* resultString = result.finishString();
1561
if (!resultString) {
1562
return false;
1563
}
1564
1565
rval.setString(resultString);
1566
return true;
1567
}
1568
1569
bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) {
1570
CallArgs args = CallArgsFromVp(argc, vp);
1571
MOZ_ASSERT(args.length() == 1);
1572
JSString* str = args[0].toString();
1573
1574
// Should be handled in different path.
1575
MOZ_ASSERT(str->length() != 0);
1576
1577
int32_t index = -1;
1578
if (!GetFirstDollarIndexRaw(cx, str, &index)) {
1579
return false;
1580
}
1581
1582
args.rval().setInt32(index);
1583
return true;
1584
}
1585
1586
template <typename TextChar>
1587
static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text,
1588
uint32_t textLen) {
1589
const TextChar* end = text + textLen;
1590
for (const TextChar* c = text; c != end; ++c) {
1591
if (*c == '$') {
1592
return c - text;
1593
}
1594
}
1595
return -1;
1596
}
1597
1598
int32_t js::GetFirstDollarIndexRawFlat(JSLinearString* text) {
1599
uint32_t len = text->length();
1600
1601
JS::AutoCheckCannotGC nogc;
1602
if (text->hasLatin1Chars()) {
1603
return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len);
1604
}
1605
1606
return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len);
1607
}
1608
1609
bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) {
1610
JSLinearString* text = str->ensureLinear(cx);
1611
if (!text) {
1612
return false;
1613
}
1614
1615
*index = GetFirstDollarIndexRawFlat(text);
1616
return true;
1617
}
1618
1619
bool js::RegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) {
1620
// This can only be called from self-hosted code.
1621
CallArgs args = CallArgsFromVp(argc, vp);
1622
MOZ_ASSERT(args.length() == 1);
1623
1624
args.rval().setBoolean(
1625
RegExpPrototypeOptimizableRaw(cx, &args[0].toObject()));
1626
return true;
1627
}
1628
1629
bool js::RegExpPrototypeOptimizableRaw(JSContext* cx, JSObject* proto) {
1630
AutoUnsafeCallWithABI unsafe;
1631
AutoAssertNoPendingException aanpe(cx);
1632
if (!proto->isNative()) {
1633
return false;
1634
}
1635
1636
NativeObject* nproto = static_cast<NativeObject*>(proto);
1637
1638
Shape* shape = cx->realm()->regExps.getOptimizableRegExpPrototypeShape();
1639
if (shape == nproto->lastProperty()) {
1640
return true;
1641
}
1642
1643
JSFunction* flagsGetter;
1644
if (!GetOwnGetterPure(cx, proto, NameToId(cx->names().flags), &flagsGetter)) {
1645
return false;
1646
}
1647
1648
if (!flagsGetter) {
1649
return false;
1650
}
1651
1652
if (!IsSelfHostedFunctionWithName(flagsGetter,
1653
cx->names().RegExpFlagsGetter)) {
1654
return false;
1655
}
1656
1657
JSNative globalGetter;
1658
if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().global),
1659
&globalGetter)) {
1660
return false;
1661
}
1662
1663
if (globalGetter != regexp_global) {
1664
return false;
1665
}
1666
1667
JSNative ignoreCaseGetter;
1668
if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().ignoreCase),
1669
&ignoreCaseGetter)) {
1670
return false;
1671
}
1672
1673
if (ignoreCaseGetter != regexp_ignoreCase) {
1674
return false;
1675
}
1676
1677
JSNative multilineGetter;
1678
if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().multiline),
1679
&multilineGetter)) {
1680
return false;
1681
}
1682
1683
if (multilineGetter != regexp_multiline) {
1684
return false;
1685
}
1686
1687
JSNative stickyGetter;
1688
if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().sticky),
1689
&stickyGetter)) {
1690
return false;
1691
}
1692
1693
if (stickyGetter != regexp_sticky) {
1694
return false;
1695
}
1696
1697
JSNative unicodeGetter;
1698
if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().unicode),
1699
&unicodeGetter)) {
1700
return false;
1701
}
1702
1703
if (unicodeGetter != regexp_unicode) {
1704
return false;
1705
}
1706
1707
#ifdef ENABLE_NEW_REGEXP
1708
JSNative dotAllGetter;
1709
if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().dotAll),
1710
&dotAllGetter)) {
1711
return false;
1712
}
1713
1714
if (dotAllGetter != regexp_dotAll) {
1715
return false;
1716
}
1717
#endif
1718
1719
// Check if @@match, @@search, and exec are own data properties,
1720
// those values should be tested in selfhosted JS.
1721
bool has = false;
1722
if (!HasOwnDataPropertyPure(
1723
cx, proto, SYMBOL_TO_JSID(cx->wellKnownSymbols().match), &has)) {
1724