Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2
* vim: set ts=8 sts=2 et sw=2 tw=80:
3
* This Source Code Form is subject to the terms of the Mozilla Public
4
* License, v. 2.0. If a copy of the MPL was not distributed with this
5
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#ifndef builtin_intl_SharedIntlData_h
8
#define builtin_intl_SharedIntlData_h
9
10
#include "mozilla/MemoryReporting.h"
11
#include "mozilla/UniquePtr.h"
12
13
#include <stddef.h>
14
15
#include "js/AllocPolicy.h"
16
#include "js/CharacterEncoding.h"
17
#include "js/GCAPI.h"
18
#include "js/GCHashTable.h"
19
#include "js/RootingAPI.h"
20
#include "js/Utility.h"
21
#include "vm/StringType.h"
22
23
using UDateTimePatternGenerator = void*;
24
25
namespace js {
26
27
namespace intl {
28
29
class DateTimePatternGeneratorDeleter {
30
public:
31
void operator()(UDateTimePatternGenerator* ptr);
32
};
33
34
/**
35
* Stores Intl data which can be shared across compartments (but not contexts).
36
*
37
* Used for data which is expensive when computed repeatedly or is not
38
* available through ICU.
39
*/
40
class SharedIntlData {
41
struct LinearStringLookup {
42
union {
43
const JS::Latin1Char* latin1Chars;
44
const char16_t* twoByteChars;
45
};
46
bool isLatin1;
47
size_t length;
48
JS::AutoCheckCannotGC nogc;
49
HashNumber hash = 0;
50
51
explicit LinearStringLookup(JSLinearString* string)
52
: isLatin1(string->hasLatin1Chars()), length(string->length()) {
53
if (isLatin1) {
54
latin1Chars = string->latin1Chars(nogc);
55
} else {
56
twoByteChars = string->twoByteChars(nogc);
57
}
58
}
59
60
LinearStringLookup(const char* chars, size_t length)
61
: isLatin1(true), length(length) {
62
latin1Chars = reinterpret_cast<const JS::Latin1Char*>(chars);
63
}
64
};
65
66
private:
67
/**
68
* Information tracking the set of the supported time zone names, derived
69
* from the IANA time zone database <https://www.iana.org/time-zones>.
70
*
71
* There are two kinds of IANA time zone names: Zone and Link (denoted as
72
* such in database source files). Zone names are the canonical, preferred
73
* name for a time zone, e.g. Asia/Kolkata. Link names simply refer to
74
* target Zone names for their meaning, e.g. Asia/Calcutta targets
75
* Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a
76
* sense of deprecation: some Link names also exist partly for convenience,
77
* e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC.
78
*
79
* Two data sources determine the time zone names we support: those ICU
80
* supports and IANA's zone information.
81
*
82
* Unfortunately the names ICU and IANA support, and their Link
83
* relationships from name to target, aren't identical, so we can't simply
84
* implicitly trust ICU's name handling. We must perform various
85
* preprocessing of user-provided zone names and post-processing of
86
* ICU-provided zone names to implement ECMA-402's IANA-consistent behavior.
87
*
90
*/
91
92
using TimeZoneName = JSAtom*;
93
94
struct TimeZoneHasher {
95
struct Lookup : LinearStringLookup {
96
explicit Lookup(JSLinearString* timeZone);
97
};
98
99
static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
100
static bool match(TimeZoneName key, const Lookup& lookup);
101
};
102
103
using TimeZoneSet =
104
GCHashSet<TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
105
using TimeZoneMap =
106
GCHashMap<TimeZoneName, TimeZoneName, TimeZoneHasher, SystemAllocPolicy>;
107
108
/**
109
* As a threshold matter, available time zones are those time zones ICU
110
* supports, via ucal_openTimeZones. But ICU supports additional non-IANA
111
* time zones described in intl/icu/source/tools/tzcode/icuzones (listed in
112
* IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards
113
* compatibility purposes. This set consists of ICU's supported time zones,
114
* minus all backwards-compatibility time zones.
115
*/
116
TimeZoneSet availableTimeZones;
117
118
/**
119
* IANA treats some time zone names as Zones, that ICU instead treats as
120
* Links. For example, IANA considers "America/Indiana/Indianapolis" to be
121
* a Zone and "America/Fort_Wayne" a Link that targets it, but ICU
122
* considers the former a Link that targets "America/Indianapolis" (which
123
* IANA treats as a Link).
124
*
125
* ECMA-402 requires that we respect IANA data, so if we're asked to
126
* canonicalize a time zone name in this set, we must *not* return ICU's
127
* canonicalization.
128
*/
129
TimeZoneSet ianaZonesTreatedAsLinksByICU;
130
131
/**
132
* IANA treats some time zone names as Links to one target, that ICU
133
* instead treats as either Zones, or Links to different targets. An
134
* example of the former is "Asia/Calcutta, which IANA assigns the target
135
* "Asia/Kolkata" but ICU considers its own Zone. An example of the latter
136
* is "America/Virgin", which IANA assigns the target
137
* "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas".
138
*
139
* ECMA-402 requires that we respect IANA data, so if we're asked to
140
* canonicalize a time zone name that's a key in this map, we *must* return
141
* the corresponding value and *must not* return ICU's canonicalization.
142
*/
143
TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU;
144
145
bool timeZoneDataInitialized = false;
146
147
/**
148
* Precomputes the available time zone names, because it's too expensive to
149
* call ucal_openTimeZones() repeatedly.
150
*/
151
bool ensureTimeZones(JSContext* cx);
152
153
public:
154
/**
155
* Returns the validated time zone name in |result|. If the input time zone
156
* isn't a valid IANA time zone name, |result| remains unchanged.
157
*/
158
bool validateTimeZoneName(JSContext* cx, JS::Handle<JSString*> timeZone,
159
JS::MutableHandle<JSAtom*> result);
160
161
/**
162
* Returns the canonical time zone name in |result|. If no canonical name
163
* was found, |result| remains unchanged.
164
*
165
* This method only handles time zones which are canonicalized differently
166
* by ICU when compared to IANA.
167
*/
168
bool tryCanonicalizeTimeZoneConsistentWithIANA(
169
JSContext* cx, JS::Handle<JSString*> timeZone,
170
JS::MutableHandle<JSAtom*> result);
171
172
private:
173
using Locale = JSAtom*;
174
175
struct LocaleHasher {
176
struct Lookup : LinearStringLookup {
177
explicit Lookup(JSLinearString* locale);
178
Lookup(const char* chars, size_t length);
179
};
180
181
static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; }
182
static bool match(Locale key, const Lookup& lookup);
183
};
184
185
using LocaleSet = GCHashSet<Locale, LocaleHasher, SystemAllocPolicy>;
186
187
// Set of supported locales for all Intl service constructors except Collator,
188
// which uses its own set.
189
//
190
// UDateFormat:
191
// udat_[count,get]Available() return the same results as their
192
// uloc_[count,get]Available() counterparts.
193
//
194
// UNumberFormatter:
195
// unum_[count,get]Available() return the same results as their
196
// uloc_[count,get]Available() counterparts.
197
//
198
// UListFormatter, UPluralRules, and URelativeDateTimeFormatter:
199
// We're going to use ULocale availableLocales as per ICU recommendation:
201
LocaleSet supportedLocales;
202
203
// ucol_[count,get]Available() return different results compared to
204
// uloc_[count,get]Available(), we can't use |supportedLocales| here.
205
LocaleSet collatorSupportedLocales;
206
207
bool supportedLocalesInitialized = false;
208
209
// CountAvailable and GetAvailable describe the signatures used for ICU API
210
// to determine available locales for various functionality.
211
using CountAvailable = int32_t (*)();
212
using GetAvailable = const char* (*)(int32_t localeIndex);
213
214
static bool getAvailableLocales(JSContext* cx, LocaleSet& locales,
215
CountAvailable countAvailable,
216
GetAvailable getAvailable);
217
218
/**
219
* Precomputes the available locales sets.
220
*/
221
bool ensureSupportedLocales(JSContext* cx);
222
223
public:
224
enum class SupportedLocaleKind {
225
Collator,
226
DateTimeFormat,
227
ListFormat,
228
NumberFormat,
229
PluralRules,
230
RelativeTimeFormat
231
};
232
233
/**
234
* Sets |supported| to true if |locale| is supported by the requested Intl
235
* service constructor. Otherwise sets |supported| to false.
236
*/
237
MOZ_MUST_USE bool isSupportedLocale(JSContext* cx, SupportedLocaleKind kind,
238
JS::Handle<JSString*> locale,
239
bool* supported);
240
241
private:
242
/**
243
* The case first parameter (BCP47 key "kf") allows to switch the order of
244
* upper- and lower-case characters. ICU doesn't directly provide an API
245
* to query the default case first value of a given locale, but instead
246
* requires to instantiate a collator object and then query the case first
247
* attribute (UCOL_CASE_FIRST).
248
* To avoid instantiating an additional collator object whenever we need
249
* to retrieve the default case first value of a specific locale, we
250
* compute the default case first value for every supported locale only
251
* once and then keep a list of all locales which don't use the default
252
* case first setting.
253
* There is almost no difference between lower-case first and when case
254
* first is disabled (UCOL_LOWER_FIRST resp. UCOL_OFF), so we only need to
255
* track locales which use upper-case first as their default setting.
256
*
257
* Instantiating collator objects for each available locale is slow
258
* (bug 1527879), therefore we're hardcoding the two locales using upper-case
259
* first ("da" (Danish) and "mt" (Maltese)) and only assert in debug-mode
260
* these two locales match the upper-case first locales returned by ICU. A
261
* system-ICU may support a different set of locales, therefore we're always
262
* calling into ICU to find the upper-case first locales in that case.
263
*/
264
265
#if DEBUG || MOZ_SYSTEM_ICU
266
LocaleSet upperCaseFirstLocales;
267
268
bool upperCaseFirstInitialized = false;
269
270
/**
271
* Precomputes the available locales which use upper-case first sorting.
272
*/
273
bool ensureUpperCaseFirstLocales(JSContext* cx);
274
#endif
275
276
public:
277
/**
278
* Sets |isUpperFirst| to true if |locale| sorts upper-case characters
279
* before lower-case characters.
280
*/
281
bool isUpperCaseFirst(JSContext* cx, JS::Handle<JSString*> locale,
282
bool* isUpperFirst);
283
284
private:
285
using UniqueUDateTimePatternGenerator =
286
mozilla::UniquePtr<UDateTimePatternGenerator,
287
DateTimePatternGeneratorDeleter>;
288
289
UniqueUDateTimePatternGenerator dateTimePatternGenerator;
290
JS::UniqueChars dateTimePatternGeneratorLocale;
291
292
public:
293
/**
294
* Wrapper around |udatpg_open| to return a possibly cached generator
295
* instance. The returned pointer must not be closed via |udatpg_close|.
296
*/
297
UDateTimePatternGenerator* getDateTimePatternGenerator(JSContext* cx,
298
const char* locale);
299
300
public:
301
void destroyInstance();
302
303
void trace(JSTracer* trc);
304
305
size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
306
};
307
308
} // namespace intl
309
310
} // namespace js
311
312
#endif /* builtin_intl_SharedIntlData_h */