test_charset_functions.js

Enable keyboard shortcuts

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**

 * For current reference, see

 * [Encoding Living Standard](https://encoding.spec.whatwg.org)

*/

add_task(async function test_getCharsetAlias() {

  const manager = Cc["@mozilla.org/charset-converter-manager;1"].getService(

    Ci.nsICharsetConverterManager

);

  Assert.equal(manager.getCharsetAlias("UtF-7"), "UTF-7");

  Assert.equal(manager.getCharsetAlias("utF-8"), "UTF-8");

  Assert.equal(manager.getCharsetAlias("iso8859_1"), "windows-1252");

  Assert.equal(manager.getCharsetAlias("CP936"), "GBK");

  Assert.equal(manager.getCharsetAlias("X-User-Defined"), "x-user-defined");

  Assert.throws(

    () => manager.getCharsetAlias("replacement"),

    /Component returned failure code: 0x80500001/,

    `"replacement" should throw NS_ERROR_UCONV_NOCONV"`

);

  Assert.throws(

    () => manager.getCharsetAlias("this-shouldnt-exist"),

    /Component returned failure code: 0x80040111/,

    `non-existent label should throw NS_ERROR_NOT_AVAILABLE`

);

});

add_task(async function test_getCharsetLangGroup() {

  // This data comes from the now-removed charsetData.properties file. The

  // commented-out charsets did not work before either. Only "big5-hkscs" now

  // returns "zh-tw" instead of failing.

  const langGroups = new Map([

    ["BIG5", "zh-tw"],

    //    ["big5-hkscs", "zh-hk"],

    ["EUC-JP", "ja"],

    ["euc-kr", "ko"],

    ["GB2312", "zh-cn"],

    ["gb18030", "zh-cn"],

    //    ["GB18030.2000-0", "zh-cn"],

    //    ["gb18030.2000-1", "zh-cn"],

    //    ["HKSCS-1", "zh-hk"],

    ["ibm866", "x-cyrillic"],

    //    ["IBM1125", "x-cyrillic"],

    //    ["ibm1131", "x-cyrillic"],

    ["ISO-2022-JP", "ja"],

    ["iso-8859-1", "x-western"],

    ["ISO-8859-10", "x-western"],

    ["iso-8859-14", "x-western"],

    ["ISO-8859-15", "x-western"],

    ["iso-8859-2", "x-western"],

    ["ISO-8859-16", "x-western"],

    ["iso-8859-3", "x-western"],

    ["ISO-8859-4", "x-western"],

    ["iso-8859-13", "x-western"],

    ["ISO-8859-5", "x-cyrillic"],

    ["iso-8859-6", "ar"],

    ["ISO-8859-7", "el"],

    ["iso-8859-8", "he"],

    ["ISO-8859-8-I", "he"],

    //    ["jis_0208-1983", "ja"],

    ["KOI8-R", "x-cyrillic"],

    ["koi8-u", "x-cyrillic"],

    ["SHIFT_JIS", "ja"],

    ["windows-874", "th"],

    ["UTF-8", "x-unicode"],

    ["utf-16", "x-unicode"],

    ["UTF-16BE", "x-unicode"],

    ["utf-16le", "x-unicode"],

    ["UTF-7", "x-unicode"],

    //    ["replacement", "x-unicode"],

    ["WINDOWS-1250", "x-western"],

    ["windows-1251", "x-cyrillic"],

    ["WINDOWS-1252", "x-western"],

    ["windows-1253", "el"],

    ["WINDOWS-1254", "x-western"],

    ["windows-1255", "he"],

    ["WINDOWS-1256", "ar"],

    ["windows-1257", "x-western"],

    ["WINDOWS-1258", "x-western"],

    ["gbk", "zh-cn"],

    ["X-MAC-CYRILLIC", "x-cyrillic"],

    ["macintosh", "x-western"],

    ["X-USER-DEFINED", "x-unicode"],

]);

  const manager = Cc["@mozilla.org/charset-converter-manager;1"].getService(

    Ci.nsICharsetConverterManager

);

  for (const [charset, langGroup] of langGroups) {

    Assert.equal(

      manager.getCharsetLangGroup(charset),

      langGroup,

      `Language group for ${charset} should match`

);

});

add_task(async function test_isMultiByteCharset() {

  // This data comes from the now-removed charsetData.properties file.

  const multiByteCharsets = [

    "ISO-2022-JP",

    "shift_jis",

    "EUC-JP",

    "big5",

    "BIG5-HKSCS",

    "gb2312",

    "EUC-KR",

    "utf-7",

    "UTF-8",

    "replacement",

    // These charsets were not recognized as multi-byte charsets before,

    // but are now.

    "gbk",

    "gb18030",

    "UTF-16BE",

    "UTF-16LE",

];

  // Some single-byte charsets to test.

  const singleByteCharsets = [

    "WINDOWS-1252",

    "windows-874",

    "ISO-8859-2",

    "koi8-r",

    "MACINTOSH",

    "ibm866",

    "X-MAC-CYRILLIC",

    "x-user-defined",

];

  const manager = Cc["@mozilla.org/charset-converter-manager;1"].getService(

    Ci.nsICharsetConverterManager

);

  for (const charset of multiByteCharsets) {

    Assert.ok(

      manager.isMultiByteCharset(charset),

      `${charset} is a multi-byte charset`

);

  for (const charset of singleByteCharsets) {

    Assert.ok(

      !manager.isMultiByteCharset(charset),

      `${charset} is a single-byte charset`

);

});