Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
* License, v. 2.0. If a copy of the MPL was not distributed with this
4
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
// as does this
7
#include "nsICharsetConverterManager.h"
8
#include "mozilla/dom/FallbackEncoding.h"
9
#include "nsIServiceManager.h"
10
11
#include "nsISupports.h"
12
#include "nsIPrefBranch.h"
13
#include "nsIPrefService.h"
14
#include "nsIMimeConverter.h"
15
#include "nsMsgUtils.h"
16
#include "nsMsgI18N.h"
17
#include "nsMsgMimeCID.h"
18
#include "nsILineInputStream.h"
19
#include "nsMimeTypes.h"
20
#include "nsString.h"
21
#include "prmem.h"
22
#include "plstr.h"
23
#include "nsUTF8Utils.h"
24
#include "nsNetUtil.h"
25
#include "nsCRTGlue.h"
26
#include "nsComponentManagerUtils.h"
27
#include "nsUnicharUtils.h"
28
#include "nsIFileStreams.h"
29
#include "../../intl/nsUTF7ToUnicode.h"
30
#include "../../intl/nsMUTF7ToUnicode.h"
31
#include "../../intl/nsUnicodeToMUTF7.h"
32
33
//
34
// International functions necessary for composition
35
//
36
37
nsresult nsMsgI18NConvertFromUnicode(const nsACString& aCharset,
38
const nsAString& inString,
39
nsACString& outString,
40
bool aReportUencNoMapping) {
41
if (inString.IsEmpty()) {
42
outString.Truncate();
43
return NS_OK;
44
}
45
46
auto encoding = mozilla::Encoding::ForLabelNoReplacement(aCharset);
47
if (!encoding) {
48
return NS_ERROR_UCONV_NOCONV;
49
} else if (encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING) {
50
// We shouldn't ever ship anything in these encodings.
51
return NS_ERROR_UCONV_NOCONV;
52
}
53
54
const mozilla::Encoding* actualEncoding;
55
nsresult rv;
56
mozilla::Tie(rv, actualEncoding) = encoding->Encode(inString, outString);
57
mozilla::Unused << actualEncoding;
58
59
if (rv == NS_OK_HAD_REPLACEMENTS) {
60
rv = aReportUencNoMapping ? NS_ERROR_UENC_NOMAPPING : NS_OK;
61
}
62
63
return rv;
64
}
65
66
nsresult nsMsgI18NConvertToUnicode(const nsACString& aCharset,
67
const nsACString& inString,
68
nsAString& outString) {
69
if (inString.IsEmpty()) {
70
outString.Truncate();
71
return NS_OK;
72
}
73
if (aCharset.IsEmpty()) {
74
// Despite its name, it also works for Latin-1.
75
CopyASCIItoUTF16(inString, outString);
76
return NS_OK;
77
}
78
79
if (aCharset.Equals("UTF-8", nsCaseInsensitiveCStringComparator())) {
80
return UTF_8_ENCODING->DecodeWithBOMRemoval(inString, outString);
81
}
82
83
// Look up Thunderbird's special aliases from charsetalias.properties.
84
nsresult rv;
85
nsCOMPtr<nsICharsetConverterManager> ccm =
86
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
87
NS_ENSURE_SUCCESS(rv, rv);
88
89
nsCString newCharset;
90
rv = ccm->GetCharsetAlias(PromiseFlatCString(aCharset).get(), newCharset);
91
NS_ENSURE_SUCCESS(rv, rv);
92
93
if (newCharset.Equals("UTF-7", nsCaseInsensitiveCStringComparator())) {
94
// Special treatment for decoding UTF-7 since it's not handled by
95
// encoding_rs.
96
return CopyUTF7toUTF16(inString, outString);
97
}
98
99
auto encoding = mozilla::Encoding::ForLabelNoReplacement(newCharset);
100
if (!encoding) return NS_ERROR_UCONV_NOCONV;
101
return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(inString,
102
outString);
103
}
104
105
// This is used to decode UTF-7. No support for encoding in UTF-7.
106
nsresult CopyUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) {
107
// UTF-7 encoding size cannot be larger than the size in UTF-16.
108
nsUTF7ToUnicode converter;
109
int32_t inLen = aSrc.Length();
110
int32_t outLen = inLen;
111
aDest.SetLength(outLen);
112
converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(),
113
&outLen);
114
MOZ_ASSERT(inLen == (int32_t)aSrc.Length(),
115
"UTF-7 should not produce a longer output");
116
aDest.SetLength(outLen);
117
return NS_OK;
118
}
119
120
nsresult CopyUTF16toMUTF7(const nsAString& aSrc, nsACString& aDest) {
121
#define IMAP_UTF7_BUF_LENGTH 100
122
nsUnicodeToMUTF7 converter;
123
static char buffer[IMAP_UTF7_BUF_LENGTH];
124
const char16_t* in = aSrc.BeginReading();
125
int32_t inLen = aSrc.Length();
126
int32_t outLen;
127
aDest.Truncate();
128
while (inLen > 0) {
129
outLen = IMAP_UTF7_BUF_LENGTH;
130
int32_t remaining = inLen;
131
converter.ConvertNoBuffNoErr(in, &remaining, buffer, &outLen);
132
aDest.Append(buffer, outLen);
133
in += remaining;
134
inLen -= remaining;
135
}
136
outLen = IMAP_UTF7_BUF_LENGTH;
137
converter.FinishNoBuff(buffer, &outLen);
138
if (outLen > 0) aDest.Append(buffer, outLen);
139
return NS_OK;
140
}
141
142
nsresult CopyMUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) {
143
// UTF-7 encoding size cannot be larger than the size in UTF-16.
144
nsMUTF7ToUnicode converter;
145
int32_t inLen = aSrc.Length();
146
int32_t outLen = inLen;
147
aDest.SetLength(outLen);
148
converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(),
149
&outLen);
150
MOZ_ASSERT(inLen == (int32_t)aSrc.Length(),
151
"UTF-7 should not produce a longer output");
152
aDest.SetLength(outLen);
153
return NS_OK;
154
}
155
156
// Charset used by the file system.
157
const nsACString& nsMsgI18NFileSystemCharset() {
158
/* Get a charset used for the file. */
159
static nsAutoCString fileSystemCharset;
160
161
if (fileSystemCharset.IsEmpty())
162
mozilla::dom::FallbackEncoding::FromLocale()->Name(fileSystemCharset);
163
164
return fileSystemCharset;
165
}
166
167
// Charset used by the text file.
168
void nsMsgI18NTextFileCharset(nsACString& aCharset) {
169
mozilla::dom::FallbackEncoding::FromLocale()->Name(aCharset);
170
}
171
172
// MIME encoder, output string should be freed by PR_FREE
173
// XXX : fix callers later to avoid allocation and copy
174
char* nsMsgI18NEncodeMimePartIIStr(const char* header, bool structured,
175
const char* charset, int32_t fieldnamelen,
176
bool usemime) {
177
// No MIME, convert to the outgoing mail charset.
178
if (!usemime) {
179
nsAutoCString convertedStr;
180
if (NS_SUCCEEDED(nsMsgI18NConvertFromUnicode(
181
charset ? nsDependentCString(charset) : EmptyCString(),
182
NS_ConvertUTF8toUTF16(header), convertedStr)))
183
return PL_strdup(convertedStr.get());
184
else
185
return PL_strdup(header);
186
}
187
188
nsAutoCString encodedString;
189
nsresult res;
190
nsCOMPtr<nsIMimeConverter> converter =
191
do_GetService(NS_MIME_CONVERTER_CONTRACTID, &res);
192
if (NS_SUCCEEDED(res) && nullptr != converter) {
193
res = converter->EncodeMimePartIIStr_UTF8(
194
nsDependentCString(header), structured, fieldnamelen,
195
nsIMimeConverter::MIME_ENCODED_WORD_SIZE, encodedString);
196
}
197
198
return NS_SUCCEEDED(res) ? PL_strdup(encodedString.get()) : nullptr;
199
}
200
201
// Return True if a charset is stateful (e.g. JIS).
202
bool nsMsgI18Nstateful_charset(const char* charset) {
203
// TODO: use charset manager's service
204
return (PL_strcasecmp(charset, "ISO-2022-JP") == 0);
205
}
206
207
bool nsMsgI18Nmultibyte_charset(const char* charset) {
208
nsresult res;
209
nsCOMPtr<nsICharsetConverterManager> ccm =
210
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
211
bool result = false;
212
213
if (NS_SUCCEEDED(res)) {
214
nsAutoString charsetData;
215
res = ccm->GetCharsetData(charset, u".isMultibyte", charsetData);
216
if (NS_SUCCEEDED(res)) {
217
result = charsetData.LowerCaseEqualsLiteral("true");
218
}
219
}
220
221
return result;
222
}
223
224
bool nsMsgI18Ncheck_data_in_charset_range(const char* charset,
225
const char16_t* inString) {
226
if (!charset || !*charset || !inString || !*inString) return true;
227
228
bool res = true;
229
230
auto encoding =
231
mozilla::Encoding::ForLabelNoReplacement(nsDependentCString(charset));
232
if (!encoding) return false;
233
auto encoder = encoding->NewEncoder();
234
235
uint8_t buffer[512];
236
auto src = mozilla::MakeStringSpan(inString);
237
auto dst = mozilla::MakeSpan(buffer);
238
while (true) {
239
uint32_t result;
240
size_t read;
241
size_t written;
242
mozilla::Tie(result, read, written) =
243
encoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
244
if (result == mozilla::kInputEmpty) {
245
// All converted successfully.
246
break;
247
} else if (result != mozilla::kOutputFull) {
248
// Didn't use all the input but the output isn't full, hence
249
// there was an unencodable character.
250
res = false;
251
break;
252
}
253
src = src.From(read);
254
// dst = dst.From(written); // Just overwrite output since we don't need it.
255
}
256
257
return res;
258
}
259
260
// Simple parser to parse META charset.
261
// It only supports the case when the description is within one line.
262
const char* nsMsgI18NParseMetaCharset(nsIFile* file) {
263
static char charset[nsIMimeConverter::MAX_CHARSET_NAME_LENGTH + 1];
264
265
*charset = '\0';
266
267
bool isDirectory = false;
268
file->IsDirectory(&isDirectory);
269
if (isDirectory) {
270
NS_ERROR("file is a directory");
271
return charset;
272
}
273
274
nsresult rv;
275
nsCOMPtr<nsIFileInputStream> fileStream =
276
do_CreateInstance(NS_LOCALFILEINPUTSTREAM_CONTRACTID, &rv);
277
NS_ENSURE_SUCCESS(rv, charset);
278
279
rv = fileStream->Init(file, PR_RDONLY, 0664, false);
280
nsCOMPtr<nsILineInputStream> lineStream = do_QueryInterface(fileStream, &rv);
281
282
nsCString curLine;
283
bool more = true;
284
while (NS_SUCCEEDED(rv) && more) {
285
rv = lineStream->ReadLine(curLine, &more);
286
if (curLine.IsEmpty()) continue;
287
288
ToUpperCase(curLine);
289
290
if (curLine.Find("/HEAD") != -1) break;
291
292
if (curLine.Find("META") != -1 && curLine.Find("HTTP-EQUIV") != -1 &&
293
curLine.Find("CONTENT-TYPE") != -1 && curLine.Find("CHARSET") != -1) {
294
char* cp = (char*)PL_strchr(PL_strstr(curLine.get(), "CHARSET"), '=');
295
char* token = nullptr;
296
if (cp) {
297
char* newStr = cp + 1;
298
token = NS_strtok(" \"\'", &newStr);
299
}
300
if (token) {
301
PL_strncpy(charset, token, sizeof(charset));
302
charset[sizeof(charset) - 1] = '\0';
303
304
// this function cannot parse a file if it is really
305
// encoded by one of the following charsets
306
// so we can say that the charset label must be incorrect for
307
// the .html if we actually see those charsets parsed
308
// and we should ignore them
309
if (!PL_strncasecmp("UTF-16", charset, sizeof("UTF-16") - 1) ||
310
!PL_strncasecmp("UTF-32", charset, sizeof("UTF-32") - 1))
311
charset[0] = '\0';
312
313
break;
314
}
315
}
316
}
317
318
return charset;
319
}
320
321
nsresult nsMsgI18NShrinkUTF8Str(const nsCString& inString, uint32_t aMaxLength,
322
nsACString& outString) {
323
if (inString.IsEmpty()) {
324
outString.Truncate();
325
return NS_OK;
326
}
327
if (inString.Length() < aMaxLength) {
328
outString.Assign(inString);
329
return NS_OK;
330
}
331
NS_ASSERTION(MsgIsUTF8(inString), "Invalid UTF-8 string is inputted");
332
const char* start = inString.get();
333
const char* end = start + inString.Length();
334
const char* last = start + aMaxLength;
335
const char* cur = start;
336
const char* prev = nullptr;
337
bool err = false;
338
while (cur < last) {
339
prev = cur;
340
if (!UTF8CharEnumerator::NextChar(&cur, end, &err) || err) break;
341
}
342
if (!prev || err) {
343
outString.Truncate();
344
return NS_OK;
345
}
346
uint32_t len = prev - start;
347
outString.Assign(Substring(inString, 0, len));
348
return NS_OK;
349
}
350
351
void nsMsgI18NConvertRawBytesToUTF16(const nsCString& inString,
352
const nsACString& charset,
353
nsAString& outString) {
354
if (MsgIsUTF8(inString)) {
355
CopyUTF8toUTF16(inString, outString);
356
return;
357
}
358
359
nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, outString);
360
if (NS_SUCCEEDED(rv)) return;
361
362
const char* cur = inString.BeginReading();
363
const char* end = inString.EndReading();
364
outString.Truncate();
365
while (cur < end) {
366
char c = *cur++;
367
if (c & char(0x80))
368
outString.Append(UCS2_REPLACEMENT_CHAR);
369
else
370
outString.Append(c);
371
}
372
}
373
374
void nsMsgI18NConvertRawBytesToUTF8(const nsCString& inString,
375
const nsACString& charset,
376
nsACString& outString) {
377
if (MsgIsUTF8(inString)) {
378
outString.Assign(inString);
379
return;
380
}
381
382
nsAutoString utf16Text;
383
nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, utf16Text);
384
if (NS_SUCCEEDED(rv)) {
385
CopyUTF16toUTF8(utf16Text, outString);
386
return;
387
}
388
389
// EF BF BD (UTF-8 encoding of U+FFFD)
390
NS_NAMED_LITERAL_CSTRING(utf8ReplacementChar, "\357\277\275");
391
const char* cur = inString.BeginReading();
392
const char* end = inString.EndReading();
393
outString.Truncate();
394
while (cur < end) {
395
char c = *cur++;
396
if (c & char(0x80))
397
outString.Append(utf8ReplacementChar);
398
else
399
outString.Append(c);
400
}
401
}