Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
/* Structured representation of Unicode locale IDs used with Intl functions. */
8
9
#ifndef builtin_intl_LanguageTag_h
10
#define builtin_intl_LanguageTag_h
11
12
#include "mozilla/Assertions.h"
13
#include "mozilla/Span.h"
14
#include "mozilla/TextUtils.h"
15
#include "mozilla/TypedEnumBits.h"
16
#include "mozilla/Variant.h"
17
18
#include <algorithm>
19
#include <stddef.h>
20
#include <stdint.h>
21
#include <string.h>
22
#include <utility>
23
24
#include "js/AllocPolicy.h"
25
#include "js/GCAPI.h"
26
#include "js/Result.h"
27
#include "js/RootingAPI.h"
28
#include "js/Utility.h"
29
#include "js/Vector.h"
30
31
struct JSContext;
32
class JSLinearString;
33
class JSString;
34
class JSTracer;
35
36
namespace js {
37
38
namespace intl {
39
40
/**
41
* Return true if |language| is a valid language subtag.
42
*/
43
template <typename CharT>
44
bool IsStructurallyValidLanguageTag(mozilla::Span<const CharT> language);
45
46
/**
47
* Return true if |script| is a valid script subtag.
48
*/
49
template <typename CharT>
50
bool IsStructurallyValidScriptTag(mozilla::Span<const CharT> script);
51
52
/**
53
* Return true if |region| is a valid region subtag.
54
*/
55
template <typename CharT>
56
bool IsStructurallyValidRegionTag(mozilla::Span<const CharT> region);
57
58
#ifdef DEBUG
59
/**
60
* Return true if |variant| is a valid variant subtag.
61
*/
62
bool IsStructurallyValidVariantTag(mozilla::Span<const char> variant);
63
64
/**
65
* Return true if |extension| is a valid Unicode extension subtag.
66
*/
67
bool IsStructurallyValidUnicodeExtensionTag(
68
mozilla::Span<const char> extension);
69
70
/**
71
* Return true if |privateUse| is a valid private-use subtag.
72
*/
73
bool IsStructurallyValidPrivateUseTag(mozilla::Span<const char> privateUse);
74
75
#endif
76
77
template <typename CharT>
78
char AsciiToLowerCase(CharT c) {
79
MOZ_ASSERT(mozilla::IsAscii(c));
80
return mozilla::IsAsciiUppercaseAlpha(c) ? (c + 0x20) : c;
81
}
82
83
template <typename CharT>
84
char AsciiToUpperCase(CharT c) {
85
MOZ_ASSERT(mozilla::IsAscii(c));
86
return mozilla::IsAsciiLowercaseAlpha(c) ? (c - 0x20) : c;
87
}
88
89
template <typename CharT>
90
void AsciiToLowerCase(CharT* chars, size_t length, char* dest) {
91
// Tell the analysis the |std::transform| function can't GC.
92
JS::AutoSuppressGCAnalysis nogc;
93
94
char (&fn)(CharT) = AsciiToLowerCase;
95
std::transform(chars, chars + length, dest, fn);
96
}
97
98
template <typename CharT>
99
void AsciiToUpperCase(CharT* chars, size_t length, char* dest) {
100
// Tell the analysis the |std::transform| function can't GC.
101
JS::AutoSuppressGCAnalysis nogc;
102
103
char (&fn)(CharT) = AsciiToUpperCase;
104
std::transform(chars, chars + length, dest, fn);
105
}
106
107
template <typename CharT>
108
void AsciiToTitleCase(CharT* chars, size_t length, char* dest) {
109
if (length > 0) {
110
AsciiToUpperCase(chars, 1, dest);
111
AsciiToLowerCase(chars + 1, length - 1, dest + 1);
112
}
113
}
114
115
// Constants for language subtag lengths.
116
namespace LanguageTagLimits {
117
118
// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
119
static constexpr size_t LanguageLength = 8;
120
121
// unicode_script_subtag = alpha{4} ;
122
static constexpr size_t ScriptLength = 4;
123
124
// unicode_region_subtag = (alpha{2} | digit{3}) ;
125
static constexpr size_t RegionLength = 3;
126
static constexpr size_t AlphaRegionLength = 2;
127
static constexpr size_t DigitRegionLength = 3;
128
129
// key = alphanum alpha ;
130
static constexpr size_t UnicodeKeyLength = 2;
131
132
// tkey = alpha digit ;
133
static constexpr size_t TransformKeyLength = 2;
134
135
} // namespace LanguageTagLimits
136
137
// Fixed size language subtag which is stored inline in LanguageTag.
138
template <size_t Length>
139
class LanguageTagSubtag final {
140
uint8_t length_ = 0;
141
char chars_[Length] = {}; // zero initialize
142
143
public:
144
LanguageTagSubtag() = default;
145
146
LanguageTagSubtag(const LanguageTagSubtag&) = delete;
147
LanguageTagSubtag& operator=(const LanguageTagSubtag&) = delete;
148
149
size_t length() const { return length_; }
150
bool missing() const { return length_ == 0; }
151
bool present() const { return length_ > 0; }
152
153
mozilla::Span<const char> span() const { return {chars_, length_}; }
154
155
template <typename CharT>
156
void set(mozilla::Span<const CharT> str) {
157
MOZ_ASSERT(str.size() <= Length);
158
std::copy_n(str.data(), str.size(), chars_);
159
length_ = str.size();
160
}
161
162
// The toXYZCase() methods are using |Length| instead of |length()|, because
163
// current compilers (tested GCC and Clang) can't infer the maximum string
164
// length - even when using hints like |std::min| - and instead are emitting
165
// SIMD optimized code. Using a fixed sized length avoids emitting the SIMD
166
// code. (Emitting SIMD code doesn't make sense here, because the SIMD code
167
// only kicks in for long strings.) A fixed length will additionally ensure
168
// the compiler unrolls the loop in the case conversion code.
169
170
void toLowerCase() { AsciiToLowerCase(chars_, Length, chars_); }
171
172
void toUpperCase() { AsciiToUpperCase(chars_, Length, chars_); }
173
174
void toTitleCase() { AsciiToTitleCase(chars_, Length, chars_); }
175
176
template <size_t N>
177
bool equalTo(const char (&str)[N]) const {
178
static_assert(N - 1 <= Length,
179
"subtag literals must not exceed the maximum subtag length");
180
181
return length_ == N - 1 && memcmp(chars_, str, N - 1) == 0;
182
}
183
};
184
185
using LanguageSubtag = LanguageTagSubtag<LanguageTagLimits::LanguageLength>;
186
using ScriptSubtag = LanguageTagSubtag<LanguageTagLimits::ScriptLength>;
187
using RegionSubtag = LanguageTagSubtag<LanguageTagLimits::RegionLength>;
188
189
/**
190
* Object representing a language tag.
191
*
192
* All subtags are already in canonicalized case.
193
*/
194
class MOZ_STACK_CLASS LanguageTag final {
195
LanguageSubtag language_ = {};
196
ScriptSubtag script_ = {};
197
RegionSubtag region_ = {};
198
199
using VariantsVector = Vector<JS::UniqueChars, 2>;
200
using ExtensionsVector = Vector<JS::UniqueChars, 2>;
201
202
VariantsVector variants_;
203
ExtensionsVector extensions_;
204
JS::UniqueChars privateuse_ = nullptr;
205
206
friend class LanguageTagParser;
207
208
public:
209
// Flag to request canonicalized Unicode extensions.
210
enum class UnicodeExtensionCanonicalForm : bool { No, Yes };
211
212
private:
213
bool canonicalizeUnicodeExtension(
214
JSContext* cx, JS::UniqueChars& unicodeExtension,
215
UnicodeExtensionCanonicalForm canonicalForm);
216
217
bool canonicalizeTransformExtension(JSContext* cx,
218
JS::UniqueChars& transformExtension);
219
220
public:
221
static bool languageMapping(LanguageSubtag& language);
222
static bool complexLanguageMapping(const LanguageSubtag& language);
223
224
private:
225
static bool regionMapping(RegionSubtag& region);
226
static bool complexRegionMapping(const RegionSubtag& region);
227
228
void performComplexLanguageMappings();
229
void performComplexRegionMappings();
230
231
MOZ_MUST_USE bool updateGrandfatheredMappings(JSContext* cx);
232
233
static const char* replaceUnicodeExtensionType(
234
mozilla::Span<const char> key, mozilla::Span<const char> type);
235
236
public:
237
explicit LanguageTag(JSContext* cx) : variants_(cx), extensions_(cx) {}
238
239
LanguageTag(const LanguageTag&) = delete;
240
LanguageTag& operator=(const LanguageTag&) = delete;
241
242
const LanguageSubtag& language() const { return language_; }
243
const ScriptSubtag& script() const { return script_; }
244
const RegionSubtag& region() const { return region_; }
245
const auto& variants() const { return variants_; }
246
const auto& extensions() const { return extensions_; }
247
const char* privateuse() const { return privateuse_.get(); }
248
249
/**
250
* Return the Unicode extension subtag or nullptr if not present.
251
*/
252
const char* unicodeExtension() const;
253
254
private:
255
ptrdiff_t unicodeExtensionIndex() const;
256
257
public:
258
/**
259
* Set the language subtag. The input must be a valid language subtag.
260
*/
261
template <size_t N>
262
void setLanguage(const char (&language)[N]) {
263
mozilla::Span<const char> span(language, N - 1);
264
MOZ_ASSERT(IsStructurallyValidLanguageTag(span));
265
language_.set(span);
266
}
267
268
/**
269
* Set the language subtag. The input must be a valid language subtag.
270
*/
271
void setLanguage(const LanguageSubtag& language) {
272
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.span()));
273
language_.set(language.span());
274
}
275
276
/**
277
* Set the script subtag. The input must be a valid script subtag.
278
*/
279
template <size_t N>
280
void setScript(const char (&script)[N]) {
281
mozilla::Span<const char> span(script, N - 1);
282
MOZ_ASSERT(IsStructurallyValidScriptTag(span));
283
script_.set(span);
284
}
285
286
/**
287
* Set the script subtag. The input must be a valid script subtag or the empty
288
* string.
289
*/
290
void setScript(const ScriptSubtag& script) {
291
MOZ_ASSERT(script.missing() || IsStructurallyValidScriptTag(script.span()));
292
script_.set(script.span());
293
}
294
295
/**
296
* Set the region subtag. The input must be a valid region subtag.
297
*/
298
template <size_t N>
299
void setRegion(const char (&region)[N]) {
300
mozilla::Span<const char> span(region, N - 1);
301
MOZ_ASSERT(IsStructurallyValidRegionTag(span));
302
region_.set(span);
303
}
304
305
/**
306
* Set the region subtag. The input must be a valid region subtag or the empty
307
* empty string.
308
*/
309
void setRegion(const RegionSubtag& region) {
310
MOZ_ASSERT(region.missing() || IsStructurallyValidRegionTag(region.span()));
311
region_.set(region.span());
312
}
313
314
/**
315
* Removes all variant subtags.
316
*/
317
void clearVariants() { variants_.clearAndFree(); }
318
319
/**
320
* Set the Unicode extension subtag. The input must be a valid Unicode
321
* extension subtag.
322
*/
323
bool setUnicodeExtension(JS::UniqueChars extension);
324
325
/**
326
* Remove any Unicode extension subtag if present.
327
*/
328
void clearUnicodeExtension();
329
330
/**
331
* Set the private-use subtag. The input must be a valid private-use subtag
332
* or nullptr.
333
*/
334
void setPrivateuse(JS::UniqueChars privateuse) {
335
MOZ_ASSERT(!privateuse ||
336
IsStructurallyValidPrivateUseTag(
337
{privateuse.get(), strlen(privateuse.get())}));
338
privateuse_ = std::move(privateuse);
339
}
340
341
/**
342
* Canonicalize the base-name subtags, that means the language, script,
343
* region, and variant subtags.
344
*/
345
bool canonicalizeBaseName(JSContext* cx);
346
347
/**
348
* Canonicalize all extension subtags.
349
*/
350
bool canonicalizeExtensions(JSContext* cx,
351
UnicodeExtensionCanonicalForm canonicalForm);
352
353
/**
354
* Canonicalizes the given structurally valid Unicode BCP 47 locale
355
* identifier, including regularized case of subtags. For example, the
356
* language tag Zh-haNS-bu-variant2-Variant1-u-ca-chinese-t-Zh-laTN-x-PRIVATE,
357
* where
358
*
359
* Zh ; 2*3ALPHA
360
* -haNS ; ["-" script]
361
* -bu ; ["-" region]
362
* -variant2 ; *("-" variant)
363
* -Variant1
364
* -u-ca-chinese ; *("-" extension)
365
* -t-Zh-laTN
366
* -x-PRIVATE ; ["-" privateuse]
367
*
368
* becomes zh-Hans-MM-variant1-variant2-t-zh-latn-u-ca-chinese-x-private
369
*
370
* UTS 35 specifies two different canonicalization algorithms. There's one to
371
* canonicalize BCP 47 language tags and other one to canonicalize Unicode
372
* locale identifiers. The latter one wasn't present when ECMA-402 was changed
373
* to use Unicode BCP 47 locale identifiers instead of BCP 47 language tags,
374
* so ECMA-402 currently only uses the former to canonicalize Unicode BCP 47
375
* locale identifiers.
376
*
377
* Spec: ECMAScript Internationalization API Specification, 6.2.3.
378
* Spec:
381
*/
382
bool canonicalize(JSContext* cx,
383
UnicodeExtensionCanonicalForm canonicalForm) {
384
return canonicalizeBaseName(cx) &&
385
canonicalizeExtensions(cx, canonicalForm);
386
}
387
388
/**
389
* Return the string representation of this language tag.
390
*/
391
JSString* toString(JSContext* cx) const;
392
393
/**
394
* Return the string representation of this language tag as a null-terminated
395
* C-string.
396
*/
397
JS::UniqueChars toStringZ(JSContext* cx) const;
398
399
/**
400
* Add likely-subtags to the language tag.
401
*
403
*/
404
bool addLikelySubtags(JSContext* cx);
405
406
/**
407
* Remove likely-subtags from the language tag.
408
*
410
*/
411
bool removeLikelySubtags(JSContext* cx);
412
};
413
414
/**
415
* Parser for Unicode BCP 47 locale identifiers.
416
*
418
*/
419
class MOZ_STACK_CLASS LanguageTagParser final {
420
public:
421
// Exposed as |public| for |MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS|.
422
enum class TokenKind : uint8_t {
423
None = 0b000,
424
Alpha = 0b001,
425
Digit = 0b010,
426
AlphaDigit = 0b011,
427
Error = 0b100
428
};
429
430
private:
431
class Token final {
432
size_t index_;
433
size_t length_;
434
TokenKind kind_;
435
436
public:
437
Token(TokenKind kind, size_t index, size_t length)
438
: index_(index), length_(length), kind_(kind) {}
439
440
TokenKind kind() const { return kind_; }
441
size_t index() const { return index_; }
442
size_t length() const { return length_; }
443
444
bool isError() const { return kind_ == TokenKind::Error; }
445
bool isNone() const { return kind_ == TokenKind::None; }
446
bool isAlpha() const { return kind_ == TokenKind::Alpha; }
447
bool isDigit() const { return kind_ == TokenKind::Digit; }
448
bool isAlphaDigit() const { return kind_ == TokenKind::AlphaDigit; }
449
};
450
451
using LocaleChars = mozilla::Variant<const JS::Latin1Char*, const char16_t*>;
452
453
const LocaleChars& locale_;
454
size_t length_;
455
size_t index_ = 0;
456
457
LanguageTagParser(const LocaleChars& locale, size_t length)
458
: locale_(locale), length_(length) {}
459
460
char16_t charAtUnchecked(size_t index) const {
461
if (locale_.is<const JS::Latin1Char*>()) {
462
return locale_.as<const JS::Latin1Char*>()[index];
463
}
464
return locale_.as<const char16_t*>()[index];
465
}
466
467
char charAt(size_t index) const {
468
char16_t c = charAtUnchecked(index);
469
MOZ_ASSERT(mozilla::IsAscii(c));
470
return c;
471
}
472
473
// Copy the token characters into |subtag|.
474
template <size_t N>
475
void copyChars(const Token& tok, LanguageTagSubtag<N>& subtag) const {
476
size_t index = tok.index();
477
size_t length = tok.length();
478
if (locale_.is<const JS::Latin1Char*>()) {
479
using T = const JS::Latin1Char;
480
subtag.set(mozilla::MakeSpan(locale_.as<T*>() + index, length));
481
} else {
482
using T = const char16_t;
483
subtag.set(mozilla::MakeSpan(locale_.as<T*>() + index, length));
484
}
485
}
486
487
// Create a string copy of |length| characters starting at |index|.
488
JS::UniqueChars chars(JSContext* cx, size_t index, size_t length) const;
489
490
// Create a string copy of the token characters.
491
JS::UniqueChars chars(JSContext* cx, const Token& tok) const {
492
return chars(cx, tok.index(), tok.length());
493
}
494
495
JS::UniqueChars extension(JSContext* cx, const Token& start,
496
const Token& end) const {
497
MOZ_ASSERT(start.index() < end.index());
498
499
size_t length = end.index() - 1 - start.index();
500
return chars(cx, start.index(), length);
501
}
502
503
Token nextToken();
504
505
// unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
506
//
507
// Four character language subtags are not allowed in Unicode BCP 47 locale
508
// identifiers. Also see the comparison to Unicode CLDR locale identifiers in
510
bool isLanguage(const Token& tok) const {
511
return tok.isAlpha() && ((2 <= tok.length() && tok.length() <= 3) ||
512
(5 <= tok.length() && tok.length() <= 8));
513
}
514
515
// unicode_script_subtag = alpha{4} ;
516
bool isScript(const Token& tok) const {
517
return tok.isAlpha() && tok.length() == 4;
518
}
519
520
// unicode_region_subtag = (alpha{2} | digit{3}) ;
521
bool isRegion(const Token& tok) const {
522
return (tok.isAlpha() && tok.length() == 2) ||
523
(tok.isDigit() && tok.length() == 3);
524
}
525
526
// unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
527
bool isVariant(const Token& tok) const {
528
return (5 <= tok.length() && tok.length() <= 8) ||
529
(tok.length() == 4 && mozilla::IsAsciiDigit(charAt(tok.index())));
530
}
531
532
// Returns the code unit of the first character at the given singleton token.
533
// Always returns the lower case form of an alphabetical character.
534
char singletonKey(const Token& tok) const {
535
MOZ_ASSERT(tok.length() == 1);
536
return AsciiToLowerCase(charAt(tok.index()));
537
}
538
539
// extensions = unicode_locale_extensions |
540
// transformed_extensions |
541
// other_extensions ;
542
//
543
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
544
// (sep attribute)+ (sep keyword)*) ;
545
//
546
// transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) |
547
// (sep tfield)+) ;
548
//
549
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
550
bool isExtensionStart(const Token& tok) const {
551
return tok.length() == 1 && singletonKey(tok) != 'x';
552
}
553
554
// other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
555
bool isOtherExtensionPart(const Token& tok) const {
556
return 2 <= tok.length() && tok.length() <= 8;
557
}
558
559
// unicode_locale_extensions = sep [uU] ((sep keyword)+ |
560
// (sep attribute)+ (sep keyword)*) ;
561
// keyword = key (sep type)? ;
562
bool isUnicodeExtensionPart(const Token& tok) const {
563
return isUnicodeExtensionKey(tok) || isUnicodeExtensionType(tok) ||
564
isUnicodeExtensionAttribute(tok);
565
}
566
567
// attribute = alphanum{3,8} ;
568
bool isUnicodeExtensionAttribute(const Token& tok) const {
569
return 3 <= tok.length() && tok.length() <= 8;
570
}
571
572
// key = alphanum alpha ;
573
bool isUnicodeExtensionKey(const Token& tok) const {
574
return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index() + 1));
575
}
576
577
// type = alphanum{3,8} (sep alphanum{3,8})* ;
578
bool isUnicodeExtensionType(const Token& tok) const {
579
return 3 <= tok.length() && tok.length() <= 8;
580
}
581
582
// tkey = alpha digit ;
583
bool isTransformExtensionKey(const Token& tok) const {
584
return tok.length() == 2 && mozilla::IsAsciiAlpha(charAt(tok.index())) &&
585
mozilla::IsAsciiDigit(charAt(tok.index() + 1));
586
}
587
588
// tvalue = (sep alphanum{3,8})+ ;
589
bool isTransformExtensionPart(const Token& tok) const {
590
return 3 <= tok.length() && tok.length() <= 8;
591
}
592
593
// pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
594
bool isPrivateUseStart(const Token& tok) const {
595
return tok.length() == 1 && singletonKey(tok) == 'x';
596
}
597
598
// pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
599
bool isPrivateUsePart(const Token& tok) const {
600
return 1 <= tok.length() && tok.length() <= 8;
601
}
602
603
// Helper function for use in |parseBaseName| and
604
// |parseTlangInTransformExtension|. Do not use this directly!
605
static JS::Result<bool> internalParseBaseName(JSContext* cx,
606
LanguageTagParser& ts,
607
LanguageTag& tag, Token& tok);
608
609
// Parse the `unicode_language_id` production, i.e. the
610
// language/script/region/variants portion of a language tag, into |tag|.
611
// |tok| must be the current token.
612
static JS::Result<bool> parseBaseName(JSContext* cx, LanguageTagParser& ts,
613
LanguageTag& tag, Token& tok) {
614
return internalParseBaseName(cx, ts, tag, tok);
615
}
616
617
// Parse the `tlang` production within a parsed 't' transform extension.
618
// The precise requirements for "previously parsed" are:
619
//
620
// * the input begins from current token |tok| with a valid `tlang`
621
// * the `tlang` is wholly lowercase (*not* canonical case)
622
// * variant subtags in the `tlang` may contain duplicates and be
623
// unordered
624
//
625
// Return an error on internal failure. Otherwise, return a success value. If
626
// there was no `tlang`, then |tag.language().missing()|. But if there was a
627
// `tlang`, then |tag| is filled with subtags exactly as they appeared in the
628
// parse input.
629
static JS::Result<JS::Ok> parseTlangInTransformExtension(
630
JSContext* cx, LanguageTagParser& ts, LanguageTag& tag, Token& tok) {
631
MOZ_ASSERT(ts.isLanguage(tok));
632
return internalParseBaseName(cx, ts, tag, tok).map([](bool parsed) {
633
MOZ_ASSERT(parsed);
634
return JS::Ok();
635
});
636
}
637
638
friend class LanguageTag;
639
640
class Range final {
641
size_t begin_;
642
size_t length_;
643
644
public:
645
Range(size_t begin, size_t length) : begin_(begin), length_(length) {}
646
647
template <typename T>
648
T* begin(T* ptr) const {
649
return ptr + begin_;
650
}
651
652
size_t length() const { return length_; }
653
};
654
655
using TFieldVector = js::Vector<Range, 8>;
656
using AttributesVector = js::Vector<Range, 8>;
657
using KeywordsVector = js::Vector<Range, 8>;
658
659
// Parse |extension|, which must be a validated, fully lowercase
660
// `transformed_extensions` subtag, and fill |tag| and |fields| from the
661
// `tlang` and `tfield` components. Data in |tag| is lowercase, consistent
662
// with |extension|.
663
static JS::Result<bool> parseTransformExtension(
664
JSContext* cx, mozilla::Span<const char> extension, LanguageTag& tag,
665
TFieldVector& fields);
666
667
// Parse |extension|, which must be a validated, fully lowercase
668
// `unicode_locale_extensions` subtag, and fill |attributes| and |keywords|
669
// from the `attribute` and `keyword` components.
670
static JS::Result<bool> parseUnicodeExtension(
671
JSContext* cx, mozilla::Span<const char> extension,
672
AttributesVector& attributes, KeywordsVector& keywords);
673
674
static JS::Result<bool> tryParse(JSContext* cx, LocaleChars& localeChars,
675
size_t localeLength, LanguageTag& tag);
676
677
public:
678
// Parse the input string as a language tag. Reports an error to the context
679
// if the input can't be parsed completely.
680
static bool parse(JSContext* cx, JSLinearString* locale, LanguageTag& tag);
681
682
// Parse the input string as a language tag. Reports an error to the context
683
// if the input can't be parsed completely.
684
static bool parse(JSContext* cx, mozilla::Span<const char> locale,
685
LanguageTag& tag);
686
687
// Parse the input string as a language tag. Returns Ok(true) if the input
688
// could be completely parsed, Ok(false) if the input couldn't be parsed,
689
// or Err() in case of internal error.
690
static JS::Result<bool> tryParse(JSContext* cx, JSLinearString* locale,
691
LanguageTag& tag);
692
693
// Parse the input string as a language tag. Returns Ok(true) if the input
694
// could be completely parsed, Ok(false) if the input couldn't be parsed,
695
// or Err() in case of internal error.
696
static JS::Result<bool> tryParse(JSContext* cx,
697
mozilla::Span<const char> locale,
698
LanguageTag& tag);
699
700
// Parse the input string as the base-name parts (language, script, region,
701
// variants) of a language tag. Ignores any trailing characters.
702
static bool parseBaseName(JSContext* cx, mozilla::Span<const char> locale,
703
LanguageTag& tag);
704
705
// Return true iff |extension| can be parsed as a Unicode extension subtag.
706
static bool canParseUnicodeExtension(mozilla::Span<const char> extension);
707
708
// Return true iff |unicodeType| can be parsed as a Unicode extension type.
709
static bool canParseUnicodeExtensionType(JSLinearString* unicodeType);
710
};
711
712
MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(LanguageTagParser::TokenKind)
713
714
/**
715
* Parse a string as a standalone |language| tag. If |str| is a standalone
716
* language tag, store it in |result| and return true. Otherwise return false.
717
*/
718
MOZ_MUST_USE bool ParseStandaloneLanguageTag(JS::Handle<JSLinearString*> str,
719
LanguageSubtag& result);
720
721
/**
722
* Parse a string as a standalone |script| tag. If |str| is a standalone script
723
* tag, store it in |result| and return true. Otherwise return false.
724
*/
725
MOZ_MUST_USE bool ParseStandaloneScriptTag(JS::Handle<JSLinearString*> str,
726
ScriptSubtag& result);
727
728
/**
729
* Parse a string as a standalone |region| tag. If |str| is a standalone region
730
* tag, store it in |result| and return true. Otherwise return false.
731
*/
732
MOZ_MUST_USE bool ParseStandaloneRegionTag(JS::Handle<JSLinearString*> str,
733
RegionSubtag& result);
734
735
/**
736
* Parse a string as an ISO-639 language code. Return |nullptr| in the result if
737
* the input could not be parsed or the canonical form of the resulting language
738
* tag contains more than a single language subtag.
739
*/
740
JS::Result<JSString*> ParseStandaloneISO639LanguageTag(
741
JSContext* cx, JS::Handle<JSLinearString*> str);
742
743
class UnicodeExtensionKeyword final {
744
char key_[LanguageTagLimits::UnicodeKeyLength];
745
JSLinearString* type_;
746
747
public:
748
using UnicodeKey = const char (&)[LanguageTagLimits::UnicodeKeyLength + 1];
749
using UnicodeKeySpan =
750
mozilla::Span<const char, LanguageTagLimits::UnicodeKeyLength>;
751
752
UnicodeExtensionKeyword(UnicodeKey key, JSLinearString* type)
753
: key_{key[0], key[1]}, type_(type) {}
754
755
UnicodeKeySpan key() const { return {key_, sizeof(key_)}; }
756
JSLinearString* type() const { return type_; }
757
758
void trace(JSTracer* trc);
759
};
760
761
extern MOZ_MUST_USE bool ApplyUnicodeExtensionToTag(
762
JSContext* cx, LanguageTag& tag,
763
JS::HandleVector<UnicodeExtensionKeyword> keywords);
764
765
} // namespace intl
766
767
} // namespace js
768
769
#endif /* builtin_intl_LanguageTag_h */