Source code
Revision control
Copy as Markdown
Other Tools
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
import { PromiseWorker } from "resource://gre/modules/workers/PromiseWorker.mjs";
// The ArchiveUtils module is designed to be imported in both worker and
// main thread contexts.
/* eslint-disable mozilla/reject-import-system-module-from-non-system */
import { ArchiveUtils } from "resource:///modules/backup/ArchiveUtils.sys.mjs";
import { ArchiveEncryptor } from "resource:///modules/backup/ArchiveEncryption.sys.mjs";
import { BackupError } from "resource:///modules/backup/BackupError.mjs";
import { ERRORS } from "chrome://browser/content/backup/backup-constants.mjs";
/**
* An ArchiveWorker is a PromiseWorker that tries to do most of the heavy
* lifting of dealing with single-file archives for backups, to avoid doing
* much on the main thread. This is mostly important for single-file archive
* _creation_, as this is supposed to occur silently in the background without
* the user noticing any degredation in performance.
*/
class ArchiveWorker {
#worker = null;
constructor() {
// Connect the provider to the worker.
this.#connectToPromiseWorker();
}
/**
* Generates a boundary string that can be used to separate sections in a
* multipart/mixed MIME message.
*
*
* @returns {string}
*/
#generateBoundary() {
return (
"----=_Part_" +
new Date().getTime() +
"_" +
Math.random().toString(36).slice(2, 12) +
"_" +
Math.random().toString(36).slice(2, 12)
);
}
/**
* Calculates how many base64 bytes will be generated from some number of
* unencoded bytes. This presumes that the base64 bytes include a newline
* terminator at the end.
*
* @param {number} bytes
* The number of bytes to be converted to base64.
* @param {boolean} encrypting
* True if encryption via ArchiveEncryptor is being applied.
* @returns {number}
*/
#computeChunkBase64Bytes(bytes, encrypting) {
if (encrypting) {
bytes += ArchiveUtils.TAG_LENGTH_BYTES;
}
return 4 * Math.ceil(bytes / 3) + 1;
}
/**
* @typedef {object} EncryptionArgs
* @property {CryptoKey} publicKey
* The RSA-OAEP public key that will be used to derive keys for encrypting
* the backup.
* @property {CryptoKey} backupAuthKey
* The AES-GCM key that will be used to authenticate the owner of the
* backup.
* @property {Uint8Array} wrappedSecrets
* The encrypted backup secrets computed by ArchiveEncryptionState.
* @property {Uint8Array} salt
* A salt computed for the PBKDF2 stretching of the recovery code.
* @property {Uint8Array} nonce
* A nonce computed when wrapping the private key and OSKeyStore secret.
*/
/**
* Constructs a single-file archive for a backup on the filesystem. A
* single-file archive is a specially crafted HTML document that includes,
* among other things, an inlined multipart/mixed MIME message within a
* document comment.
*
* @param {object} params
* Arguments that are described in more detail below.
* @param {string} params.archivePath
* The path on the file system to write the single-file archive.
* @param {string} params.markup
* The HTML markup to insert into the archive file before the HTML
* comment block. This is the markup that will be rendered if the HTML
* file is opened in a web browser.
* @param {object} params.backupMetadata
* The metadata associated with this backup. This is a copy of the metadata
* object that is contained within the compressed backups' manifest.
* @param {string} params.compressedBackupSnapshotPath
* The path on the file system where the compressed backup file is located.
* @param {EncryptionArgs} [params.encryptionArgs=undefined]
* Optional EncryptionArgs, which will be used to encrypt this archive.
* @param {number} params.chunkSize
* The size of the chunks to break the byte stream into for encoding.
* @returns {Promise<undefined>}
*/
async constructArchive({
archivePath,
markup,
backupMetadata,
compressedBackupSnapshotPath,
encryptionArgs,
chunkSize,
}) {
let encryptor = null;
if (encryptionArgs) {
encryptor = await ArchiveEncryptor.initialize(
encryptionArgs.publicKey,
encryptionArgs.backupAuthKey
);
}
let boundary = this.#generateBoundary();
let jsonBlock;
if (encryptor) {
jsonBlock = await encryptor.confirm(
backupMetadata,
encryptionArgs.wrappedSecrets,
encryptionArgs.salt,
encryptionArgs.nonce
);
} else {
jsonBlock = {
version: ArchiveUtils.SCHEMA_VERSION,
encConfig: null,
meta: backupMetadata,
};
}
let serializedJsonBlock = JSON.stringify(jsonBlock);
let textEncoder = new TextEncoder();
let jsonBlockLength = textEncoder.encode(serializedJsonBlock).length;
// Once we get the ability to stream to the filesystem from IOUtils in a
// worker, we should use that instead of appending each of these chunks.
//
// This isn't supposed to be some kind of generalized MIME message
// generator, so we're happy to construct it by hand here.
await IOUtils.writeUTF8(archivePath, markup);
await IOUtils.writeUTF8(
archivePath,
`
${ArchiveUtils.INLINE_MIME_START_MARKER}
Content-Type: multipart/mixed; boundary="${boundary}"
--${boundary}
Content-Type: application/json; charset=utf-8
Content-Disposition: attachment; filename="archive.json"
Content-Length: ${jsonBlockLength}
${JSON.stringify(jsonBlock)}
`,
{ mode: "append" }
);
let compressedBackupSnapshotFile = IOUtils.openFileForSyncReading(
compressedBackupSnapshotPath
);
let totalBytesToRead = compressedBackupSnapshotFile.size;
// To calculate the Content-Length of the base64 block, we start by
// computing how many newlines we'll be adding...
let totalNewlines = Math.ceil(totalBytesToRead / chunkSize);
// Next, we determine how many full-sized chunks of chunkSize we'll be
// using, and multiply that by the number of base64 bytes that such a chunk
// will require.
let fullSizeChunks = totalNewlines - 1;
let fullSizeChunkBase64Bytes = this.#computeChunkBase64Bytes(
chunkSize,
!!encryptor
);
let totalBase64Bytes = fullSizeChunks * fullSizeChunkBase64Bytes;
// Finally, if there are any leftover bytes that are less than chunkSize,
// determine how many bytes those will require, and add it to our total.
let leftoverChunkBytes = totalBytesToRead % chunkSize;
if (leftoverChunkBytes) {
totalBase64Bytes += this.#computeChunkBase64Bytes(
leftoverChunkBytes,
!!encryptor
);
} else {
// We divided perfectly by chunkSize, so add another
// fullSizeChunkBase64Bytes to the total.
totalBase64Bytes += fullSizeChunkBase64Bytes;
}
await IOUtils.writeUTF8(
archivePath,
`--${boundary}
Content-Type: application/octet-stream
Content-Disposition: attachment; filename="archive.zip"
Content-Transfer-Encoding: base64
Content-Length: ${totalBase64Bytes}
`,
{ mode: "append" }
);
// And now we read in the bytes of the compressed file, base64 encode them,
// and append them to the document. Down the line, this is also where
// encryption will be done.
let currentIndex = 0;
while (currentIndex < totalBytesToRead) {
let bytesToRead = Math.min(chunkSize, totalBytesToRead - currentIndex);
if (bytesToRead <= 0) {
throw new BackupError(
"Failed to calculate the right number of bytes to read.",
ERRORS.FILE_SYSTEM_ERROR
);
}
let buffer = new Uint8Array(bytesToRead);
compressedBackupSnapshotFile.readBytesInto(buffer, currentIndex);
let bytesToWrite;
if (encryptor) {
let isLastChunk = bytesToRead < chunkSize;
bytesToWrite = await encryptor.encrypt(buffer, isLastChunk);
} else {
bytesToWrite = buffer;
}
// We're very intentionally newline-separating these blocks here, as
// these blocks may have been run through encryption, and the same blocks
// must be run through decryption to unpack the archive.
// Newline-separation makes it easier to identify and manage these blocks.
await IOUtils.writeUTF8(
archivePath,
ArchiveUtils.arrayToBase64(bytesToWrite) + "\n",
{
mode: "append",
}
);
currentIndex += bytesToRead;
}
await IOUtils.writeUTF8(
archivePath,
`
--${boundary}
${ArchiveUtils.INLINE_MIME_END_MARKER}
`,
{ mode: "append" }
);
compressedBackupSnapshotFile.close();
return true;
}
/**
* @typedef {object} ArchiveHeaderResult
* @property {string} contentType
* The value of the Content-Type for the inlined MIME message.
* @property {number} startByteOffset
* The byte offset within the archive file where the inlined MIME message
* begins.
*/
/**
* Given a path to a single-file archive HTML file, this method will sniff
* the header of the file to make sure it matches one that we support. If
* successful, it will resolve with the contentType of the inline MIME
* message, as well as the byte offset for which the start of the inlined MIME
* message can be read from.
*
* @param {string} archivePath
* The path to a single-file archive HTML file.
* @returns {Promise<ArchiveHeaderResult, Error>}
*/
parseArchiveHeader(archivePath) {
// We expect the first bytes of the file to indicate that this is an HTML5
// file and to give us a version number we can handle.
let syncReadFile = IOUtils.openFileForSyncReading(archivePath);
let totalBytes = syncReadFile.size;
// This seems like a reasonable minimum number of bytes to read in to get
// at the header. If the header data isn't in there, then it's a corrupt
// file.
const MAX_BYTES_TO_READ = 256;
let headerBytesToRead = Math.min(
MAX_BYTES_TO_READ,
totalBytes - MAX_BYTES_TO_READ
);
let headerBuffer = new Uint8Array(headerBytesToRead);
syncReadFile.readBytesInto(headerBuffer, 0);
let textDecoder = new TextDecoder();
let decodedHeader = textDecoder.decode(headerBuffer);
const EXPECTED_HEADER =
/^<!DOCTYPE html>[\r\n]+<!-- Version: (\d+) -->[\r\n]+/;
let headerMatches = decodedHeader.match(EXPECTED_HEADER);
if (!headerMatches) {
throw new BackupError("Corrupt archive header", ERRORS.CORRUPTED_ARCHIVE);
}
let version = parseInt(headerMatches[1], 10);
// In the future, if we ever bump the ARCHIVE_FILE_VERSION, this is where we
// could place migrations / handlers for older archive versions.
if (version != ArchiveUtils.ARCHIVE_FILE_VERSION) {
throw new BackupError(
"Unsupported archive version: " + version,
ERRORS.UNSUPPORTED_BACKUP_VERSION
);
}
// Now we have to scan forward, looking for the INLINE_MIME_MARKER_START
// and the Content-Type, which appears just before the MIME message.
//
// We scan by reading bytes into a buffer rather than reading in the whole
// file, since the file could be quite large (100s of MB).
let currentIndex = headerBuffer.byteLength;
let startByteOffset = 0;
// We keep the old buffer around, and always join it with the buffer that
// contains the recently read-in bytes. That way, we can account for the
// possibility that the INLINE_MIME_START_MARKER and Content-Type were
// only half-loaded in prior or current buffer.
let oldBuffer = headerBuffer;
let priorIndex = 0;
let contentType = null;
const EXPECTED_MARKER = new RegExp(
`${ArchiveUtils.INLINE_MIME_START_MARKER}\nContent-Type: (.+)\n\n`
);
let textEncoder = new TextEncoder();
while (currentIndex < totalBytes) {
let bytesToRead = Math.min(MAX_BYTES_TO_READ, totalBytes - currentIndex);
// This shouldn't happen, but better safe than sorry.
if (bytesToRead <= 0) {
throw new BackupError(
"Failed to calculate the proper number of bytes to read: " +
bytesToRead,
ERRORS.UNKNOWN
);
}
let buffer = new Uint8Array(bytesToRead);
syncReadFile.readBytesInto(buffer, currentIndex);
let combinedBuffer = new Uint8Array(
oldBuffer.byteLength + buffer.byteLength
);
combinedBuffer.set(oldBuffer, 0);
combinedBuffer.set(buffer, oldBuffer.byteLength);
// Now we look for the inline MIME marker, and try to extract the
// Content-Type for it.
let decodedString = textDecoder.decode(combinedBuffer);
let markerMatches = decodedString.match(EXPECTED_MARKER);
if (markerMatches) {
// If we found it, we want to find the byte index for the point
// immediately after the match. You'd think we could use
// decodedString.search for this, but unfortunately search returns
// character indexes and not byte indexes (and Unicode characters,
// which might be displayed in the markup of the page, are multiple
// bytes long). To work around this, we use a TextEncoder to encode
// everything leading up to the marker, and count the number of bytes.
// Since the buffer may have cut through a multibyte character, we
// also need to work around the workaround by discounting undecoded
// characters (which TextDecoder replaces with �).Then we count the
// number of bytes in our match. The sum of these two values, plus
// the priorIndex gives us the byte index of the point right after
// our regular expression match in a Unicode-character compatible way.
//
// This all presumes that the archive file was encoded as UTF-8. Since
// we control the generation of this file, this is a safe assumption.
let match = markerMatches[0];
let matchBytes = textEncoder.encode(match).byteLength;
let matchIndex = decodedString.indexOf(match);
let numberOfUndecodedCharacters =
ArchiveUtils.countReplacementCharacters(decodedString);
// Skip the undecoded characters at the start of the string,
// if necessary
let substringUpToMatch = decodedString.slice(
numberOfUndecodedCharacters,
matchIndex
);
let substringUpToMatchBytes =
textEncoder.encode(substringUpToMatch).byteLength;
startByteOffset = priorIndex + substringUpToMatchBytes + matchBytes;
contentType = markerMatches[1];
break;
}
priorIndex = currentIndex;
currentIndex += bytesToRead;
oldBuffer = buffer;
}
if (!contentType) {
throw new BackupError(
"Failed to find embedded data in archive",
ERRORS.CORRUPTED_ARCHIVE
);
}
return { startByteOffset, contentType };
}
/**
* Implements the standard boilerplate to make this class work as a
* PromiseWorker.
*/
#connectToPromiseWorker() {
this.#worker = new PromiseWorker.AbstractWorker();
this.#worker.dispatch = (method, args = []) => {
if (!this[method]) {
throw new BackupError(
"Method does not exist: " + method,
ERRORS.INTERNAL_ERROR
);
}
return this[method](...args);
};
this.#worker.close = () => self.close();
this.#worker.postMessage = (message, ...transfers) => {
self.postMessage(message, ...transfers);
};
self.callMainThread = this.#worker.callMainThread.bind(this.#worker);
self.addEventListener("message", msg => this.#worker.handleMessage(msg));
self.addEventListener("unhandledrejection", function (error) {
throw error.reason;
});
}
}
new ArchiveWorker();