Source code
Revision control
Copy as Markdown
Other Tools
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
#include "Classifier.h"
#include "LookupCacheV4.h"
#include "nsIFile.h"
#include "nsNetCID.h"
#include "nsPrintfCString.h"
#include "nsThreadUtils.h"
#include "mozilla/Components.h"
#include "mozilla/EndianUtils.h"
#include "mozilla/Telemetry.h"
#include "mozilla/IntegerPrintfMacros.h"
#include "mozilla/LazyIdleThread.h"
#include "mozilla/Logging.h"
#include "mozilla/SyncRunnable.h"
#include "mozilla/Base64.h"
#include "mozilla/Unused.h"
#include "mozilla/UniquePtr.h"
#include "nsUrlClassifierDBService.h"
#include "nsUrlClassifierUtils.h"
// MOZ_LOG=UrlClassifierDbService:5
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
#define LOG(args) \
MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
#define LOG_ENABLED() \
MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
#define STORE_DIRECTORY "safebrowsing"_ns
#define TO_DELETE_DIR_SUFFIX "-to_delete"_ns
#define BACKUP_DIR_SUFFIX "-backup"_ns
#define UPDATING_DIR_SUFFIX "-updating"_ns
#define V4_METADATA_SUFFIX ".metadata"_ns
#define V2_METADATA_SUFFIX ".sbstore"_ns
// The amount of time, in milliseconds, that our IO thread will stay alive after
// the last event it processes.
#define DEFAULT_THREAD_TIMEOUT_MS 5000
namespace mozilla {
namespace safebrowsing {
bool Classifier::OnUpdateThread() const {
bool onthread = false;
if (mUpdateThread) {
mUpdateThread->IsOnCurrentThread(&onthread);
}
return onthread;
}
void Classifier::SplitTables(const nsACString& str,
nsTArray<nsCString>& tables) {
tables.Clear();
for (const auto& table : str.Split(',')) {
if (!table.IsEmpty()) {
tables.AppendElement(table);
}
}
// Remove duplicates
tables.Sort();
const auto newEnd = std::unique(tables.begin(), tables.end());
tables.TruncateLength(std::distance(tables.begin(), newEnd));
}
nsresult Classifier::GetPrivateStoreDirectory(
nsIFile* aRootStoreDirectory, const nsACString& aTableName,
const nsACString& aProvider, nsIFile** aPrivateStoreDirectory) {
NS_ENSURE_ARG_POINTER(aPrivateStoreDirectory);
if (!StringEndsWith(aTableName, "-proto"_ns)) {
// Only V4 table names (ends with '-proto') would be stored
// to per-provider sub-directory.
nsCOMPtr<nsIFile>(aRootStoreDirectory).forget(aPrivateStoreDirectory);
return NS_OK;
}
if (aProvider.IsEmpty()) {
// When failing to get provider, just store in the root folder.
nsCOMPtr<nsIFile>(aRootStoreDirectory).forget(aPrivateStoreDirectory);
return NS_OK;
}
nsCOMPtr<nsIFile> providerDirectory;
// Clone first since we are gonna create a new directory.
nsresult rv = aRootStoreDirectory->Clone(getter_AddRefs(providerDirectory));
NS_ENSURE_SUCCESS(rv, rv);
// Append the provider name to the root store directory.
rv = providerDirectory->AppendNative(aProvider);
NS_ENSURE_SUCCESS(rv, rv);
// Ensure existence of the provider directory.
bool dirExists;
rv = providerDirectory->Exists(&dirExists);
NS_ENSURE_SUCCESS(rv, rv);
if (!dirExists) {
LOG(("Creating private directory for %s", nsCString(aTableName).get()));
rv = providerDirectory->Create(nsIFile::DIRECTORY_TYPE, 0755);
NS_ENSURE_SUCCESS(rv, rv);
providerDirectory.forget(aPrivateStoreDirectory);
return rv;
}
// Store directory exists. Check if it's a directory.
bool isDir;
rv = providerDirectory->IsDirectory(&isDir);
NS_ENSURE_SUCCESS(rv, rv);
if (!isDir) {
return NS_ERROR_FILE_DESTINATION_NOT_DIR;
}
providerDirectory.forget(aPrivateStoreDirectory);
return NS_OK;
}
Classifier::Classifier()
: mIsTableRequestResultOutdated(true),
mAsyncUpdateInProgress(false),
mUpdateInterrupted(true),
mIsClosed(false) {
// Make a lazy thread for any IO
mUpdateThread =
new LazyIdleThread(DEFAULT_THREAD_TIMEOUT_MS, "Classifier Update",
LazyIdleThread::ShutdownMethod::ManualShutdown);
}
Classifier::~Classifier() {
if (mUpdateThread) {
mUpdateThread->Shutdown();
mUpdateThread = nullptr;
}
Close();
}
nsresult Classifier::SetupPathNames() {
// Get the root directory where to store all the databases.
nsresult rv = mCacheDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
NS_ENSURE_SUCCESS(rv, rv);
rv = mRootStoreDirectory->AppendNative(STORE_DIRECTORY);
NS_ENSURE_SUCCESS(rv, rv);
// Make sure LookupCaches (which are persistent and survive updates)
// are reading/writing in the right place. We will be moving their
// files "underneath" them during backup/restore.
for (uint32_t i = 0; i < mLookupCaches.Length(); i++) {
mLookupCaches[i]->UpdateRootDirHandle(mRootStoreDirectory);
}
// Directory where to move a backup before an update.
rv = mCacheDirectory->Clone(getter_AddRefs(mBackupDirectory));
NS_ENSURE_SUCCESS(rv, rv);
rv = mBackupDirectory->AppendNative(STORE_DIRECTORY + BACKUP_DIR_SUFFIX);
NS_ENSURE_SUCCESS(rv, rv);
// Directory where to be working on the update.
rv = mCacheDirectory->Clone(getter_AddRefs(mUpdatingDirectory));
NS_ENSURE_SUCCESS(rv, rv);
rv = mUpdatingDirectory->AppendNative(STORE_DIRECTORY + UPDATING_DIR_SUFFIX);
NS_ENSURE_SUCCESS(rv, rv);
// Directory where to move the backup so we can atomically
// delete (really move) it.
rv = mCacheDirectory->Clone(getter_AddRefs(mToDeleteDirectory));
NS_ENSURE_SUCCESS(rv, rv);
rv = mToDeleteDirectory->AppendNative(STORE_DIRECTORY + TO_DELETE_DIR_SUFFIX);
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult Classifier::CreateStoreDirectory() {
if (ShouldAbort()) {
return NS_OK; // nothing to do, the classifier is done
}
// Ensure the safebrowsing directory exists.
bool storeExists;
nsresult rv = mRootStoreDirectory->Exists(&storeExists);
NS_ENSURE_SUCCESS(rv, rv);
if (!storeExists) {
rv = mRootStoreDirectory->Create(nsIFile::DIRECTORY_TYPE, 0755);
NS_ENSURE_SUCCESS(rv, rv);
} else {
bool storeIsDir;
rv = mRootStoreDirectory->IsDirectory(&storeIsDir);
NS_ENSURE_SUCCESS(rv, rv);
if (!storeIsDir) return NS_ERROR_FILE_DESTINATION_NOT_DIR;
}
return NS_OK;
}
// Testing entries are created directly in LookupCache instead of
// files from profile.
nsresult Classifier::ClearLegacyFiles() {
if (ShouldAbort()) {
return NS_OK; // nothing to do, the classifier is done
}
nsTArray<nsLiteralCString> tables = {
"test-phish-simple"_ns, "test-malware-simple"_ns,
"test-unwanted-simple"_ns, "test-harmful-simple"_ns,
"test-track-simple"_ns, "test-trackwhite-simple"_ns,
"test-block-simple"_ns,
};
const auto fnFindAndRemove = [](nsIFile* aRootDirectory,
const nsACString& aFileName) {
nsCOMPtr<nsIFile> file;
nsresult rv = aRootDirectory->Clone(getter_AddRefs(file));
if (NS_FAILED(rv)) {
return false;
}
rv = file->AppendNative(aFileName);
if (NS_FAILED(rv)) {
return false;
}
bool exists;
rv = file->Exists(&exists);
if (NS_FAILED(rv) || !exists) {
return false;
}
rv = file->Remove(false);
if (NS_FAILED(rv)) {
return false;
}
return true;
};
for (const auto& table : tables) {
// Remove both .sbstore and .vlpse if .sbstore exists
if (fnFindAndRemove(mRootStoreDirectory, table + ".sbstore"_ns)) {
fnFindAndRemove(mRootStoreDirectory, table + ".vlpset"_ns);
}
}
return NS_OK;
}
nsresult Classifier::Open(nsIFile& aCacheDirectory) {
// Remember the Local profile directory.
nsresult rv = aCacheDirectory.Clone(getter_AddRefs(mCacheDirectory));
NS_ENSURE_SUCCESS(rv, rv);
// Create the handles to the update and backup directories.
rv = SetupPathNames();
NS_ENSURE_SUCCESS(rv, rv);
// Clean up any to-delete directories that haven't been deleted yet.
// This is still required for backward compatibility.
rv = CleanToDelete();
NS_ENSURE_SUCCESS(rv, rv);
// If we met a crash during the previous update, "safebrowsing-updating"
// directory will exist and let's remove it.
rv = mUpdatingDirectory->Remove(true);
if (NS_SUCCEEDED(rv)) {
// If the "safebrowsing-updating" exists, it implies a crash occurred
// in the previous update.
LOG(("We may have hit a crash in the previous update."));
}
// Check whether we have an incomplete update and recover from the
// backup if so.
rv = RecoverBackups();
NS_ENSURE_SUCCESS(rv, rv);
// Make sure the main store directory exists.
rv = CreateStoreDirectory();
NS_ENSURE_SUCCESS(rv, rv);
rv = ClearLegacyFiles();
Unused << NS_WARN_IF(NS_FAILED(rv));
// Build the list of know urlclassifier lists
// XXX: Disk IO potentially on the main thread during startup
RegenActiveTables();
return NS_OK;
}
void Classifier::Close() {
// Close will be called by PreShutdown, so it is important to note that
// things put here should not affect an ongoing update thread.
mIsClosed = true;
DropStores();
}
void Classifier::Reset() {
MOZ_ASSERT(!OnUpdateThread(), "Reset() MUST NOT be called on update thread");
LOG(("Reset() is called so we interrupt the update."));
mUpdateInterrupted = true;
// We don't pass the ref counted object 'Classifier' to resetFunc because we
// don't want to release 'Classifier in the update thread, which triggers an
// assertion when LazyIdelUpdate thread is not created and removed by the same
// thread (worker thread). Since |resetFuc| is a synchronous call, we can just
// pass the reference of Classifier because Classifier's life cycle is
// guarantee longer than |resetFunc|.
auto resetFunc = [&] {
if (this->mIsClosed) {
return; // too late to reset, bail
}
this->DropStores();
this->mRootStoreDirectory->Remove(true);
this->mBackupDirectory->Remove(true);
this->mUpdatingDirectory->Remove(true);
this->mToDeleteDirectory->Remove(true);
this->CreateStoreDirectory();
this->RegenActiveTables();
};
if (!mUpdateThread) {
LOG(("Async update has been disabled. Just Reset() on worker thread."));
resetFunc();
return;
}
nsCOMPtr<nsIRunnable> r =
NS_NewRunnableFunction("safebrowsing::Classifier::Reset", resetFunc);
SyncRunnable::DispatchToThread(mUpdateThread, r);
}
void Classifier::ResetTables(ClearType aType,
const nsTArray<nsCString>& aTables) {
for (uint32_t i = 0; i < aTables.Length(); i++) {
LOG(("Resetting table: %s", aTables[i].get()));
RefPtr<LookupCache> cache = GetLookupCache(aTables[i]);
if (cache) {
// Remove any cached Completes for this table if clear type is Clear_Cache
if (aType == Clear_Cache) {
cache->ClearCache();
} else {
cache->ClearAll();
}
}
}
// Clear on-disk database if clear type is Clear_All
if (aType == Clear_All) {
DeleteTables(mRootStoreDirectory, aTables);
RegenActiveTables();
}
}
// |DeleteTables| is used by |GetLookupCache| to remove on-disk data when
// we detect prefix file corruption. So make sure not to call |GetLookupCache|
// again in this function to avoid infinite loop.
void Classifier::DeleteTables(nsIFile* aDirectory,
const nsTArray<nsCString>& aTables) {
nsCOMPtr<nsIDirectoryEnumerator> entries;
nsresult rv = aDirectory->GetDirectoryEntries(getter_AddRefs(entries));
NS_ENSURE_SUCCESS_VOID(rv);
nsCOMPtr<nsIFile> file;
while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(file))) &&
file) {
// If |file| is a directory, recurse to find its entries as well.
bool isDirectory;
if (NS_FAILED(file->IsDirectory(&isDirectory))) {
continue;
}
if (isDirectory) {
DeleteTables(file, aTables);
continue;
}
nsCString leafName;
rv = file->GetNativeLeafName(leafName);
NS_ENSURE_SUCCESS_VOID(rv);
// Remove file extension if there's one.
int32_t dotPosition = leafName.RFind(".");
if (dotPosition >= 0) {
leafName.Truncate(dotPosition);
}
if (!leafName.IsEmpty() && aTables.Contains(leafName)) {
if (NS_FAILED(file->Remove(false))) {
NS_WARNING(nsPrintfCString("Fail to remove file %s from the disk",
leafName.get())
.get());
}
}
}
NS_ENSURE_SUCCESS_VOID(rv);
}
// This function is I/O intensive. It should only be called before applying
// an update.
void Classifier::TableRequest(nsACString& aResult) {
MOZ_ASSERT(!NS_IsMainThread(),
"TableRequest must be called on the classifier worker thread.");
// This function and all disk I/O are guaranteed to occur
// on the same thread so we don't need to add a lock around.
if (!mIsTableRequestResultOutdated) {
aResult = mTableRequestResult;
return;
}
// We reset tables failed to load here; not just tables are corrupted.
// It is because this is a safer way to ensure Safe Browsing databases
// can be recovered from any bad situations.
nsTArray<nsCString> failedTables;
// Load meta data from *.sbstore files in the root directory.
// Specifically for v4 tables.
nsCString v2Metadata;
nsresult rv = LoadHashStore(mRootStoreDirectory, v2Metadata, failedTables);
if (NS_SUCCEEDED(rv)) {
aResult.Append(v2Metadata);
}
// Load meta data from *.metadata files in the root directory.
// Specifically for v4 tables.
nsCString v4Metadata;
rv = LoadMetadata(mRootStoreDirectory, v4Metadata, failedTables);
if (NS_SUCCEEDED(rv)) {
aResult.Append(v4Metadata);
}
// Clear data for tables that we failed to open, a full update should
// be requested for those tables.
if (failedTables.Length() != 0) {
LOG(("Reset tables failed to open before applying an update"));
ResetTables(Clear_All, failedTables);
}
// Update the TableRequest result in-memory cache.
mTableRequestResult = aResult;
mIsTableRequestResultOutdated = false;
}
nsresult Classifier::CheckURIFragments(
const nsTArray<nsCString>& aSpecFragments, const nsACString& aTable,
LookupResultArray& aResults) {
// A URL can form up to 30 different fragments
MOZ_ASSERT(aSpecFragments.Length() != 0);
MOZ_ASSERT(aSpecFragments.Length() <=
(MAX_HOST_COMPONENTS * (MAX_PATH_COMPONENTS + 2)));
if (LOG_ENABLED()) {
uint32_t urlIdx = 0;
for (uint32_t i = 1; i < aSpecFragments.Length(); i++) {
if (aSpecFragments[urlIdx].Length() < aSpecFragments[i].Length()) {
urlIdx = i;
}
}
LOG(("Checking table %s, URL is %s", aTable.BeginReading(),
aSpecFragments[urlIdx].get()));
}
RefPtr<LookupCache> cache = GetLookupCache(aTable);
if (NS_WARN_IF(!cache)) {
return NS_ERROR_FAILURE;
}
// Now check each lookup fragment against the entries in the DB.
for (uint32_t i = 0; i < aSpecFragments.Length(); i++) {
Completion lookupHash;
lookupHash.FromPlaintext(aSpecFragments[i]);
bool has, confirmed;
uint32_t matchLength;