Source code

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
const lazy = {};
ChromeUtils.defineESModuleGetters(lazy, {
PlacesUtils: "resource://gre/modules/PlacesUtils.sys.mjs",
Sqlite: "resource://gre/modules/Sqlite.sys.mjs",
});
ChromeUtils.defineLazyGetter(lazy, "logger", function () {
return lazy.PlacesUtils.getLogger({
prefix: "PlacesSemanticHistoryDatabase",
});
});
// Every time the schema or the underlying data changes, you must bump up the
// schema version. This is also necessary for example if we change the embedding
// size.
// Remember to:
// 1. Bump up the version number
// 2. Add a migration function to migrate the data to the new schema.
// 3. Update #createDatabaseEntities and #checkDatabaseEntities
// 4. Add a test to check that the migration works correctly.
// Note downgrades are not supported, so when you bump up the version and the
// user downgrades, the database will be deleted and recreated.
// If a migration throws, the database will also be deleted and recreated.
const CURRENT_SCHEMA_VERSION = 1;
export class PlacesSemanticHistoryDatabase {
#asyncShutdownBlocker;
#conn;
#databaseFolderPath;
#embeddingSize;
databaseFileName;
#schemaVersion = CURRENT_SCHEMA_VERSION;
constructor({ embeddingSize, fileName }) {
this.#embeddingSize = embeddingSize;
this.databaseFileName = fileName;
this.#databaseFolderPath = PathUtils.profileDir;
}
get currentSchemaVersion() {
return this.#schemaVersion;
}
async setCurrentSchemaVersionForTests(version) {
this.#schemaVersion = version;
if (this.#conn) {
await this.#conn.setSchemaVersion(version);
}
}
/**
* Connects to the semantic.sqlite database and attaches the Places DB.
*
* @returns {Promise<object>}
* A promise resolving to the database connection.
*/
async getConnection() {
if (this.#conn) {
return this.#conn;
}
try {
// Connect to the database
this.#conn = await this.#openConnection();
} catch (e) {
if (
e.result == Cr.NS_ERROR_FILE_CORRUPTED ||
e.errors?.some(error => error.result == Ci.mozIStorageError.NOTADB)
) {
lazy.logger.info("Removing corrupted database files");
await this.removeDatabaseFiles();
this.#conn = await this.#openConnection();
} else {
lazy.logger.error("Failed to open connection", e);
// Re-throw the exception for the caller.
throw e;
}
}
// Add shutdown blocker to close connection gracefully
this.#asyncShutdownBlocker = async () => {
await this.closeConnection();
};
lazy.Sqlite.shutdown.addBlocker(
"PlacesSemanticHistoryDatabase: Shutdown",
this.#asyncShutdownBlocker
);
try {
lazy.logger.info("Initializing schema");
await this.#initializeSchema();
} catch (e) {
lazy.logger.warn("Schema initialization failed");
// If the schema cannot be initialized close the connection and create
// a new database file.
await this.closeConnection();
await this.removeDatabaseFiles();
this.#conn = await this.#openConnection();
await this.#initializeSchema();
}
return this.#conn;
}
async #openConnection() {
lazy.logger.info("Trying to open connection");
let conn = await lazy.Sqlite.openConnection({
path: this.databaseFilePath,
extensions: ["vec"],
});
// WAL is generally faster and allows for concurrent reads and writes.
await conn.execute("PRAGMA journal_mode = WAL");
await conn.execute("PRAGMA wal_autocheckpoint = 16");
// We're not hooking up this to the vacuum manager yet, but let's start
// storing vacuum information, in case we want to do that in the future.
await conn.execute("PRAGMA auto_vacuum = INCREMENTAL");
// Attach the Places database, as we need to join on it.
let placesDbPath = PathUtils.join(
this.#databaseFolderPath,
"places.sqlite"
);
await conn.execute(`ATTACH DATABASE '${placesDbPath}' AS places`);
return conn;
}
/**
* Closes the connection to the database, if it's open.
* @returns {Promise<void>} resolves when done.
*/
async closeConnection() {
if (this.#conn) {
lazy.logger.info("Closing connection");
lazy.Sqlite.shutdown.removeBlocker(this.#asyncShutdownBlocker);
await this.#conn.close();
this.#conn = null;
}
}
/**
* Initializes the semantic database, creating virtual tables if needed.
* Any exception thrown here should be handled by the caller replacing the
* database.
*/
async #initializeSchema() {
let version = await this.#conn.getSchemaVersion();
if (version > CURRENT_SCHEMA_VERSION) {
throw new Error("Downgrade of the schema is not supported");
}
if (version == CURRENT_SCHEMA_VERSION) {
let healthy = await this.#checkDatabaseEntities(this.#embeddingSize);
if (!healthy) {
throw new Error("Database schema is not healthy");
}
return;
}
await this.#conn.executeTransaction(async () => {
if (version == 0) {
// This is a newly created database, just create the entities.
lazy.logger.info("Creating database schema");
await this.#createDatabaseEntities(this.#embeddingSize);
await this.#conn.setSchemaVersion(CURRENT_SCHEMA_VERSION);
// eslint-disable-next-line no-useless-return
return;
}
lazy.logger.info("Migrating database schema");
// Put migrations here with a brief description of what they do.
// If you want to fully replace the database with a new one, as the data
// cannot be easily migrated, just throw an Error from the migration.
// if (version < 2) {
// // This migration adds a new column...
// this.#migrateToVersion2(...);
// }
// if (version < 3) {
// // This migration adds an index...
// this.#migrateToVersion3(...);
// }
await this.#conn.setSchemaVersion(CURRENT_SCHEMA_VERSION);
});
}
/**
* Creates the necessary virtual tables in the semantic.sqlite database.
* @param {number} embeddingSize - The size of the embedding.
* @returns {Promise<void>} resolves when done.
*/
async #createDatabaseEntities(embeddingSize) {
await this.#conn.execute(`
CREATE VIRTUAL TABLE vec_history USING vec0(
embedding FLOAT[${embeddingSize}],
embedding_coarse bit[${embeddingSize}]
);
`);
await this.#conn.execute(`
CREATE TABLE vec_history_mapping (
rowid INTEGER PRIMARY KEY,
url_hash INTEGER NOT NULL UNIQUE
);
`);
}
/**
* Verifies that the schema is current, there's no missing entities or
* changed embedding size.
* @param {number} embeddingSize - The size of the embedding.
* @returns {Promise<boolean>} whether the schema is consistent or not.
*/
async #checkDatabaseEntities(embeddingSize) {
let tables = await this.#conn.execute(
`SELECT name FROM sqlite_master WHERE type='table'`
);
let tableNames = tables.map(row => row.getResultByName("name"));
if (
!tableNames.includes("vec_history") ||
!tableNames.includes("vec_history_mapping")
) {
return false;
}
// If the embedding size changed the database should be recreated. This
// should be handled by a migration, but we check to be overly safe.
let embeddingSizeCheck = await this.#conn.execute(
`PRAGMA table_info(vec_history)`
);
let embeddingSizeRow = embeddingSizeCheck.find(
row => row.getResultByName("name") == "embedding"
);
if (embeddingSizeRow.getResultByName("type") != `FLOAT[${embeddingSize}]`) {
return false;
}
return true;
}
/**
* Returns the path to the semantic database.
* @returns {string} The path to the semantic database.
*/
get databaseFilePath() {
return PathUtils.join(PathUtils.profileDir, this.databaseFileName);
}
/**
* Removes the semantic database file and auxiliary files.
* @returns {Promise<void>} resolves when done.
*/
async removeDatabaseFiles() {
lazy.logger.info("Removing database files");
await this.closeConnection();
try {
for (let file of [
this.databaseFilePath,
PathUtils.join(
this.#databaseFolderPath,
this.databaseFileName + "-wal"
),
PathUtils.join(
this.#databaseFolderPath,
this.databaseFileName + "-shm"
),
]) {
await IOUtils.remove(file, {
retryReadonly: true,
recursive: true,
ignoreAbsent: true,
});
}
} catch (e) {
// Try to clear on next startup.
Services.prefs.setBoolPref(
"places.semanticHistory.removeOnStartup",
true
);
// Re-throw the exception for the caller.
throw e;
}
}
}