Archive.worker.mjs

mozilla-central/browser/components/backup/Archive.worker.mjs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Firefox :: Profiles

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

import { PromiseWorker } from "resource://gre/modules/workers/PromiseWorker.mjs";

// The ArchiveUtils module is designed to be imported in both worker and

// main thread contexts.

/* eslint-disable mozilla/reject-import-system-module-from-non-system */

import { ArchiveUtils } from "resource:///modules/backup/ArchiveUtils.sys.mjs";

import { ArchiveEncryptor } from "resource:///modules/backup/ArchiveEncryption.sys.mjs";

import { BackupError } from "resource:///modules/backup/BackupError.mjs";

import { ERRORS } from "chrome://browser/content/backup/backup-constants.mjs";

/**

 * An ArchiveWorker is a PromiseWorker that tries to do most of the heavy

 * lifting of dealing with single-file archives for backups, to avoid doing

 * much on the main thread. This is mostly important for single-file archive

 * _creation_, as this is supposed to occur silently in the background without

 * the user noticing any degredation in performance.

*/

class ArchiveWorker {

  #worker = null;

  constructor() {

    // Connect the provider to the worker.

    this.#connectToPromiseWorker();

/**

   * Generates a boundary string that can be used to separate sections in a

   * multipart/mixed MIME message.

   * See https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.

   * @returns {string}

*/

  #generateBoundary() {

    return (

      "----=_Part_" +

      new Date().getTime() +

      "_" +

      Math.random().toString(36).slice(2, 12) +

      "_" +

      Math.random().toString(36).slice(2, 12)

);

/**

   * Calculates how many base64 bytes will be generated from some number of

   * unencoded bytes. This presumes that the base64 bytes include a newline

   * terminator at the end.

   * @param {number} bytes

   *   The number of bytes to be converted to base64.

   * @param {boolean} encrypting

   *   True if encryption via ArchiveEncryptor is being applied.

   * @returns {number}

*/

  #computeChunkBase64Bytes(bytes, encrypting) {

    if (encrypting) {

      bytes += ArchiveUtils.TAG_LENGTH_BYTES;

    return 4 * Math.ceil(bytes / 3) + 1;

/**

   * @typedef {object} EncryptionArgs

   * @property {CryptoKey} publicKey

   *   The RSA-OAEP public key that will be used to derive keys for encrypting

   *   the backup.

   * @property {CryptoKey} backupAuthKey

   *   The AES-GCM key that will be used to authenticate the owner of the

   *   backup.

   * @property {Uint8Array} wrappedSecrets

   *   The encrypted backup secrets computed by ArchiveEncryptionState.

   * @property {Uint8Array} salt

   *   A salt computed for the PBKDF2 stretching of the recovery code.

   * @property {Uint8Array} nonce

   *   A nonce computed when wrapping the private key and OSKeyStore secret.

*/

/**

   * Constructs a single-file archive for a backup on the filesystem. A

   * single-file archive is a specially crafted HTML document that includes,

   * among other things, an inlined multipart/mixed MIME message within a

   * document comment.

   * @param {object} params

   *   Arguments that are described in more detail below.

   * @param {string} params.archivePath

   *   The path on the file system to write the single-file archive.

   * @param {string} params.markup

   *   The HTML markup to insert into the archive file before the HTML

   *   comment block. This is the markup that will be rendered if the HTML

   *   file is opened in a web browser.

   * @param {object} params.backupMetadata

   *   The metadata associated with this backup. This is a copy of the metadata

   *   object that is contained within the compressed backups' manifest.

   * @param {string} params.compressedBackupSnapshotPath

   *   The path on the file system where the compressed backup file is located.

   * @param {EncryptionArgs} [params.encryptionArgs=undefined]

   *   Optional EncryptionArgs, which will be used to encrypt this archive.

   * @param {number} params.chunkSize

   *   The size of the chunks to break the byte stream into for encoding.

   * @returns {Promise<undefined>}

*/

  async constructArchive({

    archivePath,

    markup,

    backupMetadata,

    compressedBackupSnapshotPath,

    encryptionArgs,

    chunkSize,

  }) {

    let encryptor = null;

    if (encryptionArgs) {

      encryptor = await ArchiveEncryptor.initialize(

        encryptionArgs.publicKey,

        encryptionArgs.backupAuthKey

);

    let boundary = this.#generateBoundary();

    let jsonBlock;

    if (encryptor) {

      jsonBlock = await encryptor.confirm(

        backupMetadata,

        encryptionArgs.wrappedSecrets,

        encryptionArgs.salt,

        encryptionArgs.nonce

);

    } else {

      jsonBlock = {

        version: ArchiveUtils.SCHEMA_VERSION,

        encConfig: null,

        meta: backupMetadata,

};

    let serializedJsonBlock = JSON.stringify(jsonBlock);

    let textEncoder = new TextEncoder();

    let jsonBlockLength = textEncoder.encode(serializedJsonBlock).length;

    // Once we get the ability to stream to the filesystem from IOUtils in a

    // worker, we should use that instead of appending each of these chunks.

//

    // This isn't supposed to be some kind of generalized MIME message

    // generator, so we're happy to construct it by hand here.

    await IOUtils.writeUTF8(archivePath, markup);

    await IOUtils.writeUTF8(

      archivePath,

${ArchiveUtils.INLINE_MIME_START_MARKER}

Content-Type: multipart/mixed; boundary="${boundary}"

--${boundary}

Content-Type: application/json; charset=utf-8

Content-Disposition: attachment; filename="archive.json"

Content-Length: ${jsonBlockLength}

${JSON.stringify(jsonBlock)}

`,

      { mode: "append" }

);

    let compressedBackupSnapshotFile = IOUtils.openFileForSyncReading(

      compressedBackupSnapshotPath

);

    let totalBytesToRead = compressedBackupSnapshotFile.size;

    // To calculate the Content-Length of the base64 block, we start by

    // computing how many newlines we'll be adding...

    let totalNewlines = Math.ceil(totalBytesToRead / chunkSize);

    // Next, we determine how many full-sized chunks of chunkSize we'll be

    // using, and multiply that by the number of base64 bytes that such a chunk

    // will require.

    let fullSizeChunks = totalNewlines - 1;

    let fullSizeChunkBase64Bytes = this.#computeChunkBase64Bytes(

      chunkSize,

      !!encryptor

);

    let totalBase64Bytes = fullSizeChunks * fullSizeChunkBase64Bytes;

    // Finally, if there are any leftover bytes that are less than chunkSize,

    // determine how many bytes those will require, and add it to our total.

    let leftoverChunkBytes = totalBytesToRead % chunkSize;

    if (leftoverChunkBytes) {

      totalBase64Bytes += this.#computeChunkBase64Bytes(

        leftoverChunkBytes,

        !!encryptor

);

    } else {

      // We divided perfectly by chunkSize, so add another

      // fullSizeChunkBase64Bytes to the total.

      totalBase64Bytes += fullSizeChunkBase64Bytes;

    await IOUtils.writeUTF8(

      archivePath,

      `--${boundary}

Content-Type: application/octet-stream

Content-Disposition: attachment; filename="archive.zip"

Content-Transfer-Encoding: base64

Content-Length: ${totalBase64Bytes}

`,

      { mode: "append" }

);

    // And now we read in the bytes of the compressed file, base64 encode them,

    // and append them to the document. Down the line, this is also where

    // encryption will be done.

    let currentIndex = 0;

    while (currentIndex < totalBytesToRead) {

      let bytesToRead = Math.min(chunkSize, totalBytesToRead - currentIndex);

      if (bytesToRead <= 0) {

        throw new BackupError(

          "Failed to calculate the right number of bytes to read.",

          ERRORS.FILE_SYSTEM_ERROR

);

      let buffer = new Uint8Array(bytesToRead);

      compressedBackupSnapshotFile.readBytesInto(buffer, currentIndex);

      let bytesToWrite;

      if (encryptor) {

        let isLastChunk = bytesToRead < chunkSize;

        bytesToWrite = await encryptor.encrypt(buffer, isLastChunk);

      } else {

        bytesToWrite = buffer;

      // We're very intentionally newline-separating these blocks here, as

      // these blocks may have been run through encryption, and the same blocks

      // must be run through decryption to unpack the archive.

      // Newline-separation makes it easier to identify and manage these blocks.

      await IOUtils.writeUTF8(

        archivePath,

        ArchiveUtils.arrayToBase64(bytesToWrite) + "\n",

          mode: "append",

);

      currentIndex += bytesToRead;

    await IOUtils.writeUTF8(

      archivePath,

--${boundary}

${ArchiveUtils.INLINE_MIME_END_MARKER}

`,

      { mode: "append" }

);

    compressedBackupSnapshotFile.close();

    return true;

/**

   * @typedef {object} ArchiveHeaderResult

   * @property {string} contentType

   *   The value of the Content-Type for the inlined MIME message.

   * @property {number} startByteOffset

   *   The byte offset within the archive file where the inlined MIME message

   *   begins.

*/

/**

   * Given a path to a single-file archive HTML file, this method will sniff

   * the header of the file to make sure it matches one that we support. If

   * successful, it will resolve with the contentType of the inline MIME

   * message, as well as the byte offset for which the start of the inlined MIME

   * message can be read from.

   * @param {string} archivePath

   *   The path to a single-file archive HTML file.

   * @returns {Promise<ArchiveHeaderResult, Error>}

*/

  parseArchiveHeader(archivePath) {

    // We expect the first bytes of the file to indicate that this is an HTML5

    // file and to give us a version number we can handle.

    let syncReadFile = IOUtils.openFileForSyncReading(archivePath);

    let totalBytes = syncReadFile.size;

    // This seems like a reasonable minimum number of bytes to read in to get

    // at the header. If the header data isn't in there, then it's a corrupt

    // file.

    const MAX_BYTES_TO_READ = 256;

    let headerBytesToRead = Math.min(

      MAX_BYTES_TO_READ,

      totalBytes - MAX_BYTES_TO_READ

);

    let headerBuffer = new Uint8Array(headerBytesToRead);

    syncReadFile.readBytesInto(headerBuffer, 0);

    let textDecoder = new TextDecoder();

    let decodedHeader = textDecoder.decode(headerBuffer);

    const EXPECTED_HEADER =

      /^<!DOCTYPE html>[\r\n]+<!-- Version: (\d+) -->[\r\n]+/;

    let headerMatches = decodedHeader.match(EXPECTED_HEADER);

    if (!headerMatches) {

      throw new BackupError("Corrupt archive header", ERRORS.CORRUPTED_ARCHIVE);

    let version = parseInt(headerMatches[1], 10);

    // In the future, if we ever bump the ARCHIVE_FILE_VERSION, this is where we

    // could place migrations / handlers for older archive versions.

    if (version != ArchiveUtils.ARCHIVE_FILE_VERSION) {

      throw new BackupError(

        "Unsupported archive version: " + version,

        ERRORS.UNSUPPORTED_BACKUP_VERSION

);

    // Now we have to scan forward, looking for the INLINE_MIME_MARKER_START

    // and the Content-Type, which appears just before the MIME message.

//

    // We scan by reading bytes into a buffer rather than reading in the whole

    // file, since the file could be quite large (100s of MB).

    let currentIndex = headerBuffer.byteLength;

    let startByteOffset = 0;

    // We keep the old buffer around, and always join it with the buffer that

    // contains the recently read-in bytes. That way, we can account for the

    // possibility that the INLINE_MIME_START_MARKER and Content-Type were

    // only half-loaded in prior or current buffer.

    let oldBuffer = headerBuffer;

    let priorIndex = 0;

    let contentType = null;

    const EXPECTED_MARKER = new RegExp(

      `${ArchiveUtils.INLINE_MIME_START_MARKER}\nContent-Type: (.+)\n\n`

);

    let textEncoder = new TextEncoder();

    while (currentIndex < totalBytes) {

      let bytesToRead = Math.min(MAX_BYTES_TO_READ, totalBytes - currentIndex);

      // This shouldn't happen, but better safe than sorry.

      if (bytesToRead <= 0) {

        throw new BackupError(

          "Failed to calculate the proper number of bytes to read: " +

            bytesToRead,

          ERRORS.UNKNOWN

);

      let buffer = new Uint8Array(bytesToRead);

      syncReadFile.readBytesInto(buffer, currentIndex);

      let combinedBuffer = new Uint8Array(

        oldBuffer.byteLength + buffer.byteLength

);

      combinedBuffer.set(oldBuffer, 0);

      combinedBuffer.set(buffer, oldBuffer.byteLength);

      // Now we look for the inline MIME marker, and try to extract the

      // Content-Type for it.

      let decodedString = textDecoder.decode(combinedBuffer);

      let markerMatches = decodedString.match(EXPECTED_MARKER);

      if (markerMatches) {

        // If we found it, we want to find the byte index for the point

        // immediately after the match. You'd think we could use

        // decodedString.search for this, but unfortunately search returns

        // character indexes and not byte indexes (and Unicode characters,

        // which might be displayed in the markup of the page, are multiple

        // bytes long). To work around this, we use a TextEncoder to encode

        // everything leading up to the marker, and count the number of bytes.

        // Since the buffer may have cut through a multibyte character, we

        // also need to work around the workaround by discounting undecoded

        // characters (which TextDecoder replaces with �).Then we count the

        // number of bytes in our match. The sum of these two values, plus

        // the priorIndex gives us the byte index of the point right after

        // our regular expression match in a Unicode-character compatible way.

//

        // This all presumes that the archive file was encoded as UTF-8. Since

        // we control the generation of this file, this is a safe assumption.

        let match = markerMatches[0];

        let matchBytes = textEncoder.encode(match).byteLength;

        let matchIndex = decodedString.indexOf(match);

        let numberOfUndecodedCharacters =

          ArchiveUtils.countReplacementCharacters(decodedString);

        // Skip the undecoded characters at the start of the string,

        // if necessary

        let substringUpToMatch = decodedString.slice(

          numberOfUndecodedCharacters,

          matchIndex

);

        let substringUpToMatchBytes =

          textEncoder.encode(substringUpToMatch).byteLength;

        startByteOffset = priorIndex + substringUpToMatchBytes + matchBytes;

        contentType = markerMatches[1];

        break;

      priorIndex = currentIndex;

      currentIndex += bytesToRead;

      oldBuffer = buffer;

    if (!contentType) {

      throw new BackupError(

        "Failed to find embedded data in archive",

        ERRORS.CORRUPTED_ARCHIVE

);

    return { startByteOffset, contentType };

/**

   * Implements the standard boilerplate to make this class work as a

   * PromiseWorker.

*/

  #connectToPromiseWorker() {

    this.#worker = new PromiseWorker.AbstractWorker();

    this.#worker.dispatch = (method, args = []) => {

      if (!this[method]) {

        throw new BackupError(

          "Method does not exist: " + method,

          ERRORS.INTERNAL_ERROR

);

      return this[method](...args);

};

    this.#worker.close = () => self.close();

    this.#worker.postMessage = (message, ...transfers) => {

      self.postMessage(message, ...transfers);

};

    self.callMainThread = this.#worker.callMainThread.bind(this.#worker);

    self.addEventListener("message", msg => this.#worker.handleMessage(msg));

    self.addEventListener("unhandledrejection", function (error) {

      throw error.reason;

});

new ArchiveWorker();