Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
* License, v. 2.0. If a copy of the MPL was not distributed with this
4
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
// as does this
7
#include "nsICharsetConverterManager.h"
8
#include "mozilla/dom/FallbackEncoding.h"
9
#include "nsIServiceManager.h"
10
11
#include "nsISupports.h"
12
#include "nsIPrefBranch.h"
13
#include "nsIPrefService.h"
14
#include "nsIMimeConverter.h"
15
#include "nsMsgUtils.h"
16
#include "nsMsgI18N.h"
17
#include "nsMsgMimeCID.h"
18
#include "nsILineInputStream.h"
19
#include "nsMimeTypes.h"
20
#include "nsString.h"
21
#include "prmem.h"
22
#include "plstr.h"
23
#include "nsUTF8Utils.h"
24
#include "nsNetUtil.h"
25
#include "nsCRTGlue.h"
26
#include "nsComponentManagerUtils.h"
27
#include "nsUnicharUtils.h"
28
#include "nsIFileStreams.h"
29
#include "../../intl/nsUTF7ToUnicode.h"
30
#include "../../intl/nsMUTF7ToUnicode.h"
31
#include "../../intl/nsUnicodeToMUTF7.h"
32
33
//
34
// International functions necessary for composition
35
//
36
37
nsresult nsMsgI18NConvertFromUnicode(const nsACString& aCharset,
38
const nsAString& inString,
39
nsACString& outString,
40
bool aReportUencNoMapping) {
41
if (inString.IsEmpty()) {
42
outString.Truncate();
43
return NS_OK;
44
}
45
46
auto encoding = mozilla::Encoding::ForLabelNoReplacement(aCharset);
47
if (!encoding) {
48
return NS_ERROR_UCONV_NOCONV;
49
} else if (encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING) {
50
// We shouldn't ever ship anything in these encodings.
51
return NS_ERROR_UCONV_NOCONV;
52
}
53
54
const mozilla::Encoding* actualEncoding;
55
nsresult rv;
56
mozilla::Tie(rv, actualEncoding) = encoding->Encode(inString, outString);
57
mozilla::Unused << actualEncoding;
58
59
if (rv == NS_OK_HAD_REPLACEMENTS) {
60
rv = aReportUencNoMapping ? NS_ERROR_UENC_NOMAPPING : NS_OK;
61
}
62
63
return rv;
64
}
65
66
nsresult nsMsgI18NConvertToUnicode(const nsACString& aCharset,
67
const nsACString& inString,
68
nsAString& outString) {
69
if (inString.IsEmpty()) {
70
outString.Truncate();
71
return NS_OK;
72
}
73
if (aCharset.IsEmpty()) {
74
// Despite its name, it also works for Latin-1.
75
CopyASCIItoUTF16(inString, outString);
76
return NS_OK;
77
}
78
79
if (aCharset.Equals("UTF-8", nsCaseInsensitiveCStringComparator())) {
80
return UTF_8_ENCODING->DecodeWithBOMRemoval(inString, outString);
81
}
82
83
// Look up Thunderbird's special aliases from charsetalias.properties.
84
nsresult rv;
85
nsCOMPtr<nsICharsetConverterManager> ccm =
86
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
87
NS_ENSURE_SUCCESS(rv, rv);
88
89
nsCString newCharset;
90
rv = ccm->GetCharsetAlias(PromiseFlatCString(aCharset).get(), newCharset);
91
NS_ENSURE_SUCCESS(rv, rv);
92
93
if (newCharset.Equals("UTF-7", nsCaseInsensitiveCStringComparator())) {
94
// Special treatment for decoding UTF-7 since it's not handled by
95
// encoding_rs.
96
return CopyUTF7toUTF16(inString, outString);
97
}
98
99
auto encoding = mozilla::Encoding::ForLabelNoReplacement(newCharset);
100
if (!encoding) return NS_ERROR_UCONV_NOCONV;
101
return encoding->DecodeWithoutBOMHandling(inString, outString);
102
}
103
104
// This is used to decode UTF-7. No support for encoding in UTF-7.
105
nsresult CopyUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) {
106
// UTF-7 encoding size cannot be larger than the size in UTF-16.
107
nsUTF7ToUnicode converter;
108
int32_t inLen = aSrc.Length();
109
int32_t outLen = inLen;
110
aDest.SetLength(outLen);
111
converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(),
112
&outLen);
113
MOZ_ASSERT(inLen == (int32_t)aSrc.Length(),
114
"UTF-7 should not produce a longer output");
115
aDest.SetLength(outLen);
116
return NS_OK;
117
}
118
119
nsresult CopyUTF16toMUTF7(const nsAString& aSrc, nsACString& aDest) {
120
#define IMAP_UTF7_BUF_LENGTH 100
121
nsUnicodeToMUTF7 converter;
122
static char buffer[IMAP_UTF7_BUF_LENGTH];
123
const char16_t* in = aSrc.BeginReading();
124
int32_t inLen = aSrc.Length();
125
int32_t outLen;
126
aDest.Truncate();
127
while (inLen > 0) {
128
outLen = IMAP_UTF7_BUF_LENGTH;
129
int32_t remaining = inLen;
130
converter.ConvertNoBuffNoErr(in, &remaining, buffer, &outLen);
131
aDest.Append(buffer, outLen);
132
in += remaining;
133
inLen -= remaining;
134
}
135
outLen = IMAP_UTF7_BUF_LENGTH;
136
converter.FinishNoBuff(buffer, &outLen);
137
if (outLen > 0) aDest.Append(buffer, outLen);
138
return NS_OK;
139
}
140
141
nsresult CopyMUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) {
142
// UTF-7 encoding size cannot be larger than the size in UTF-16.
143
nsMUTF7ToUnicode converter;
144
int32_t inLen = aSrc.Length();
145
int32_t outLen = inLen;
146
aDest.SetLength(outLen);
147
converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(),
148
&outLen);
149
MOZ_ASSERT(inLen == (int32_t)aSrc.Length(),
150
"UTF-7 should not produce a longer output");
151
aDest.SetLength(outLen);
152
return NS_OK;
153
}
154
155
// Charset used by the file system.
156
const nsACString& nsMsgI18NFileSystemCharset() {
157
/* Get a charset used for the file. */
158
static nsAutoCString fileSystemCharset;
159
160
if (fileSystemCharset.IsEmpty())
161
mozilla::dom::FallbackEncoding::FromLocale()->Name(fileSystemCharset);
162
163
return fileSystemCharset;
164
}
165
166
// Charset used by the text file.
167
void nsMsgI18NTextFileCharset(nsACString& aCharset) {
168
mozilla::dom::FallbackEncoding::FromLocale()->Name(aCharset);
169
}
170
171
// MIME encoder, output string should be freed by PR_FREE
172
// XXX : fix callers later to avoid allocation and copy
173
char* nsMsgI18NEncodeMimePartIIStr(const char* header, bool structured,
174
const char* charset, int32_t fieldnamelen,
175
bool usemime) {
176
// No MIME, convert to the outgoing mail charset.
177
if (!usemime) {
178
nsAutoCString convertedStr;
179
if (NS_SUCCEEDED(nsMsgI18NConvertFromUnicode(
180
charset ? nsDependentCString(charset) : EmptyCString(),
181
NS_ConvertUTF8toUTF16(header), convertedStr)))
182
return PL_strdup(convertedStr.get());
183
else
184
return PL_strdup(header);
185
}
186
187
nsAutoCString encodedString;
188
nsresult res;
189
nsCOMPtr<nsIMimeConverter> converter =
190
do_GetService(NS_MIME_CONVERTER_CONTRACTID, &res);
191
if (NS_SUCCEEDED(res) && nullptr != converter) {
192
res = converter->EncodeMimePartIIStr_UTF8(
193
nsDependentCString(header), structured, fieldnamelen,
194
nsIMimeConverter::MIME_ENCODED_WORD_SIZE, encodedString);
195
}
196
197
return NS_SUCCEEDED(res) ? PL_strdup(encodedString.get()) : nullptr;
198
}
199
200
// Return True if a charset is stateful (e.g. JIS).
201
bool nsMsgI18Nstateful_charset(const char* charset) {
202
// TODO: use charset manager's service
203
return (PL_strcasecmp(charset, "ISO-2022-JP") == 0);
204
}
205
206
bool nsMsgI18Nmultibyte_charset(const char* charset) {
207
nsresult res;
208
nsCOMPtr<nsICharsetConverterManager> ccm =
209
do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
210
bool result = false;
211
212
if (NS_SUCCEEDED(res)) {
213
nsAutoString charsetData;
214
res = ccm->GetCharsetData(charset, u".isMultibyte", charsetData);
215
if (NS_SUCCEEDED(res)) {
216
result = charsetData.LowerCaseEqualsLiteral("true");
217
}
218
}
219
220
return result;
221
}
222
223
bool nsMsgI18Ncheck_data_in_charset_range(const char* charset,
224
const char16_t* inString) {
225
if (!charset || !*charset || !inString || !*inString) return true;
226
227
bool res = true;
228
229
auto encoding =
230
mozilla::Encoding::ForLabelNoReplacement(nsDependentCString(charset));
231
if (!encoding) return false;
232
auto encoder = encoding->NewEncoder();
233
234
uint8_t buffer[512];
235
auto src = mozilla::MakeStringSpan(inString);
236
auto dst = mozilla::MakeSpan(buffer);
237
while (true) {
238
uint32_t result;
239
size_t read;
240
size_t written;
241
mozilla::Tie(result, read, written) =
242
encoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
243
if (result == mozilla::kInputEmpty) {
244
// All converted successfully.
245
break;
246
} else if (result != mozilla::kOutputFull) {
247
// Didn't use all the input but the output isn't full, hence
248
// there was an unencodable character.
249
res = false;
250
break;
251
}
252
src = src.From(read);
253
// dst = dst.From(written); // Just overwrite output since we don't need it.
254
}
255
256
return res;
257
}
258
259
// Simple parser to parse META charset.
260
// It only supports the case when the description is within one line.
261
const char* nsMsgI18NParseMetaCharset(nsIFile* file) {
262
static char charset[nsIMimeConverter::MAX_CHARSET_NAME_LENGTH + 1];
263
264
*charset = '\0';
265
266
bool isDirectory = false;
267
file->IsDirectory(&isDirectory);
268
if (isDirectory) {
269
NS_ERROR("file is a directory");
270
return charset;
271
}
272
273
nsresult rv;
274
nsCOMPtr<nsIFileInputStream> fileStream =
275
do_CreateInstance(NS_LOCALFILEINPUTSTREAM_CONTRACTID, &rv);
276
NS_ENSURE_SUCCESS(rv, charset);
277
278
rv = fileStream->Init(file, PR_RDONLY, 0664, false);
279
nsCOMPtr<nsILineInputStream> lineStream = do_QueryInterface(fileStream, &rv);
280
281
nsCString curLine;
282
bool more = true;
283
while (NS_SUCCEEDED(rv) && more) {
284
rv = lineStream->ReadLine(curLine, &more);
285
if (curLine.IsEmpty()) continue;
286
287
ToUpperCase(curLine);
288
289
if (curLine.Find("/HEAD") != -1) break;
290
291
if (curLine.Find("META") != -1 && curLine.Find("HTTP-EQUIV") != -1 &&
292
curLine.Find("CONTENT-TYPE") != -1 && curLine.Find("CHARSET") != -1) {
293
char* cp = (char*)PL_strchr(PL_strstr(curLine.get(), "CHARSET"), '=');
294
char* token = nullptr;
295
if (cp) {
296
char* newStr = cp + 1;
297
token = NS_strtok(" \"\'", &newStr);
298
}
299
if (token) {
300
PL_strncpy(charset, token, sizeof(charset));
301
charset[sizeof(charset) - 1] = '\0';
302
303
// this function cannot parse a file if it is really
304
// encoded by one of the following charsets
305
// so we can say that the charset label must be incorrect for
306
// the .html if we actually see those charsets parsed
307
// and we should ignore them
308
if (!PL_strncasecmp("UTF-16", charset, sizeof("UTF-16") - 1) ||
309
!PL_strncasecmp("UTF-32", charset, sizeof("UTF-32") - 1))
310
charset[0] = '\0';
311
312
break;
313
}
314
}
315
}
316
317
return charset;
318
}
319
320
nsresult nsMsgI18NShrinkUTF8Str(const nsCString& inString, uint32_t aMaxLength,
321
nsACString& outString) {
322
if (inString.IsEmpty()) {
323
outString.Truncate();
324
return NS_OK;
325
}
326
if (inString.Length() < aMaxLength) {
327
outString.Assign(inString);
328
return NS_OK;
329
}
330
NS_ASSERTION(MsgIsUTF8(inString), "Invalid UTF-8 string is inputted");
331
const char* start = inString.get();
332
const char* end = start + inString.Length();
333
const char* last = start + aMaxLength;
334
const char* cur = start;
335
const char* prev = nullptr;
336
bool err = false;
337
while (cur < last) {
338
prev = cur;
339
if (!UTF8CharEnumerator::NextChar(&cur, end, &err) || err) break;
340
}
341
if (!prev || err) {
342
outString.Truncate();
343
return NS_OK;
344
}
345
uint32_t len = prev - start;
346
outString.Assign(Substring(inString, 0, len));
347
return NS_OK;
348
}
349
350
void nsMsgI18NConvertRawBytesToUTF16(const nsCString& inString,
351
const nsACString& charset,
352
nsAString& outString) {
353
if (MsgIsUTF8(inString)) {
354
CopyUTF8toUTF16(inString, outString);
355
return;
356
}
357
358
nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, outString);
359
if (NS_SUCCEEDED(rv)) return;
360
361
const char* cur = inString.BeginReading();
362
const char* end = inString.EndReading();
363
outString.Truncate();
364
while (cur < end) {
365
char c = *cur++;
366
if (c & char(0x80))
367
outString.Append(UCS2_REPLACEMENT_CHAR);
368
else
369
outString.Append(c);
370
}
371
}
372
373
void nsMsgI18NConvertRawBytesToUTF8(const nsCString& inString,
374
const nsACString& charset,
375
nsACString& outString) {
376
if (MsgIsUTF8(inString)) {
377
outString.Assign(inString);
378
return;
379
}
380
381
nsAutoString utf16Text;
382
nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, utf16Text);
383
if (NS_SUCCEEDED(rv)) {
384
CopyUTF16toUTF8(utf16Text, outString);
385
return;
386
}
387
388
// EF BF BD (UTF-8 encoding of U+FFFD)
389
NS_NAMED_LITERAL_CSTRING(utf8ReplacementChar, "\357\277\275");
390
const char* cur = inString.BeginReading();
391
const char* end = inString.EndReading();
392
outString.Truncate();
393
while (cur < end) {
394
char c = *cur++;
395
if (c & char(0x80))
396
outString.Append(utf8ReplacementChar);
397
else
398
outString.Append(c);
399
}
400
}