Source code
Revision control
Copy as Markdown
Other Tools
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
// Originally based on Chrome sources:
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "HashStore.h"
#include "nsICryptoHash.h"
#include "nsISeekableStream.h"
#include "nsNetUtil.h"
#include "nsCheckSummedOutputStream.h"
#include "prio.h"
#include "mozilla/Logging.h"
#include "zlib.h"
#include "Classifier.h"
#include "nsUrlClassifierDBService.h"
#include "mozilla/Telemetry.h"
// Main store for SafeBrowsing protocol data. We store
// known add/sub chunks, prefixes and completions in memory
// during an update, and serialize to disk.
// We do not store the add prefixes, those are retrieved by
// decompressing the PrefixSet cache whenever we need to apply
// an update.
//
// byte slicing: Many of the 4-byte values stored here are strongly
// correlated in the upper bytes, and uncorrelated in the lower
// bytes. Because zlib/DEFLATE requires match lengths of at least
// 3 to achieve good compression, and we don't get those if only
// the upper 16-bits are correlated, it is worthwhile to slice 32-bit
// values into 4 1-byte slices and compress the slices individually.
// The slices corresponding to MSBs will compress very well, and the
// slice corresponding to LSB almost nothing. Because of this, we
// only apply DEFLATE to the 3 most significant bytes, and store the
// LSB uncompressed.
//
// byte sliced (numValues) data format:
// uint32_t compressed-size
// compressed-size bytes zlib DEFLATE data
// 0...numValues byte MSB of 4-byte numValues data
// uint32_t compressed-size
// compressed-size bytes zlib DEFLATE data
// 0...numValues byte 2nd byte of 4-byte numValues data
// uint32_t compressed-size
// compressed-size bytes zlib DEFLATE data
// 0...numValues byte 3rd byte of 4-byte numValues data
// 0...numValues byte LSB of 4-byte numValues data
//
// Store data format:
// uint32_t magic
// uint32_t version
// uint32_t numAddChunks
// uint32_t numSubChunks
// uint32_t numAddPrefixes
// uint32_t numSubPrefixes
// uint32_t numAddCompletes
// uint32_t numSubCompletes
// 0...numAddChunks uint32_t addChunk
// 0...numSubChunks uint32_t subChunk
// byte sliced (numAddPrefixes) uint32_t add chunk of AddPrefixes
// byte sliced (numSubPrefixes) uint32_t add chunk of SubPrefixes
// byte sliced (numSubPrefixes) uint32_t sub chunk of SubPrefixes
// byte sliced (numSubPrefixes) uint32_t SubPrefixes
// byte sliced (numAddCompletes) uint32_t add chunk of AddCompletes
// 0...numSubCompletes 32-byte Completions + uint32_t addChunk
// + uint32_t subChunk
// 16-byte MD5 of all preceding data
// Name of the SafeBrowsing store
#define STORE_SUFFIX ".sbstore"
// MOZ_LOG=UrlClassifierDbService:5
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
#define LOG(args) \
MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
#define LOG_ENABLED() \
MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
namespace mozilla::safebrowsing {
const uint32_t STORE_MAGIC = 0x1231af3b;
const uint32_t CURRENT_VERSION = 4;
nsresult TableUpdateV2::NewAddPrefix(uint32_t aAddChunk, const Prefix& aHash) {
AddPrefix* add = mAddPrefixes.AppendElement(fallible);
if (!add) return NS_ERROR_OUT_OF_MEMORY;
add->addChunk = aAddChunk;
add->prefix = aHash;
return NS_OK;
}
nsresult TableUpdateV2::NewSubPrefix(uint32_t aAddChunk, const Prefix& aHash,
uint32_t aSubChunk) {
SubPrefix* sub = mSubPrefixes.AppendElement(fallible);
if (!sub) return NS_ERROR_OUT_OF_MEMORY;
sub->addChunk = aAddChunk;
sub->prefix = aHash;
sub->subChunk = aSubChunk;
return NS_OK;
}
nsresult TableUpdateV2::NewAddComplete(uint32_t aAddChunk,
const Completion& aHash) {
AddComplete* add = mAddCompletes.AppendElement(fallible);
if (!add) return NS_ERROR_OUT_OF_MEMORY;
add->addChunk = aAddChunk;
add->complete = aHash;
return NS_OK;
}
nsresult TableUpdateV2::NewSubComplete(uint32_t aAddChunk,
const Completion& aHash,
uint32_t aSubChunk) {
SubComplete* sub = mSubCompletes.AppendElement(fallible);
if (!sub) return NS_ERROR_OUT_OF_MEMORY;
sub->addChunk = aAddChunk;
sub->complete = aHash;
sub->subChunk = aSubChunk;
return NS_OK;
}
nsresult TableUpdateV2::NewMissPrefix(const Prefix& aPrefix) {
Prefix* prefix = mMissPrefixes.AppendElement(aPrefix, fallible);
if (!prefix) return NS_ERROR_OUT_OF_MEMORY;
return NS_OK;
}
void TableUpdateV4::NewPrefixes(int32_t aSize, const nsACString& aPrefixes) {
NS_ENSURE_TRUE_VOID(aSize >= 4 && aSize <= COMPLETE_SIZE);
NS_ENSURE_TRUE_VOID(aPrefixes.Length() % aSize == 0);
NS_ENSURE_TRUE_VOID(!mPrefixesMap.Contains(aSize));
int numOfPrefixes = aPrefixes.Length() / aSize;
if (aSize <= 4 && LOG_ENABLED()) {
const uint32_t* p =
reinterpret_cast<const uint32_t*>(ToNewCString(aPrefixes));
// Dump the first/last 10 fixed-length prefixes for debugging.
LOG(("* The first 10 (maximum) fixed-length prefixes: "));
for (int i = 0; i < std::min(10, numOfPrefixes); i++) {
const uint8_t* c = reinterpret_cast<const uint8_t*>(&p[i]);
LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
}
LOG(("* The last 10 (maximum) fixed-length prefixes: "));
for (int i = std::max(0, numOfPrefixes - 10); i < numOfPrefixes; i++) {
const uint8_t* c = reinterpret_cast<const uint8_t*>(&p[i]);
LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
}
LOG(("---- %zu fixed-length prefixes in total.",
aPrefixes.Length() / aSize));
}
mPrefixesMap.InsertOrUpdate(aSize, MakeUnique<nsCString>(aPrefixes));
}
nsresult TableUpdateV4::NewRemovalIndices(const uint32_t* aIndices,
size_t aNumOfIndices) {
MOZ_ASSERT(mRemovalIndiceArray.IsEmpty(),
"mRemovalIndiceArray must be empty");
if (!mRemovalIndiceArray.SetCapacity(aNumOfIndices, fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
for (size_t i = 0; i < aNumOfIndices; i++) {
mRemovalIndiceArray.AppendElement(aIndices[i]);
}
return NS_OK;
}
void TableUpdateV4::SetSHA256(const std::string& aSHA256) {
mSHA256.Assign(aSHA256.data(), aSHA256.size());
}
nsresult TableUpdateV4::NewFullHashResponse(
const Prefix& aPrefix, const CachedFullHashResponse& aResponse) {
CachedFullHashResponse* response =
mFullHashResponseMap.GetOrInsertNew(aPrefix.ToUint32());
if (!response) {
return NS_ERROR_OUT_OF_MEMORY;
}
*response = aResponse;
return NS_OK;
}
void TableUpdateV4::Clear() {
mPrefixesMap.Clear();
mRemovalIndiceArray.Clear();
}
HashStore::HashStore(const nsACString& aTableName, const nsACString& aProvider,
nsIFile* aRootStoreDir)
: mTableName(aTableName), mInUpdate(false), mFileSize(0) {
nsresult rv = Classifier::GetPrivateStoreDirectory(
aRootStoreDir, aTableName, aProvider, getter_AddRefs(mStoreDirectory));
if (NS_FAILED(rv)) {
LOG(("Failed to get private store directory for %s", mTableName.get()));
mStoreDirectory = aRootStoreDir;
}
}
HashStore::~HashStore() = default;
nsresult HashStore::Reset() {
LOG(("HashStore resetting"));
// Close InputStream before removing the file
if (mInputStream) {
nsresult rv = mInputStream->Close();
NS_ENSURE_SUCCESS(rv, rv);
mInputStream = nullptr;
}
nsCOMPtr<nsIFile> storeFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
NS_ENSURE_SUCCESS(rv, rv);
rv = storeFile->AppendNative(mTableName + nsLiteralCString(STORE_SUFFIX));
NS_ENSURE_SUCCESS(rv, rv);
rv = storeFile->Remove(false);
NS_ENSURE_SUCCESS(rv, rv);
mFileSize = 0;
return NS_OK;
}
nsresult HashStore::CheckChecksum(uint32_t aFileSize) {
if (!mInputStream) {
return NS_OK;
}
// Check for file corruption by
// comparing the stored checksum to actual checksum of data
nsAutoCString hash;
nsAutoCString compareHash;
uint32_t read;
nsresult rv = CalculateChecksum(hash, aFileSize, true);
NS_ENSURE_SUCCESS(rv, rv);
compareHash.SetLength(hash.Length());
if (hash.Length() > aFileSize) {
NS_WARNING("SafeBrowsing file not long enough to store its hash");
return NS_ERROR_FAILURE;
}
nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(mInputStream);
rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, aFileSize - hash.Length());
NS_ENSURE_SUCCESS(rv, rv);
rv = mInputStream->Read(compareHash.BeginWriting(), hash.Length(), &read);
NS_ENSURE_SUCCESS(rv, rv);
NS_ASSERTION(read == hash.Length(), "Could not read hash bytes");
if (!hash.Equals(compareHash)) {
NS_WARNING("SafeBrowsing file failed checksum.");
return NS_ERROR_FAILURE;
}
return NS_OK;
}
nsresult HashStore::Open(uint32_t aVersion) {
nsCOMPtr<nsIFile> storeFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
NS_ENSURE_SUCCESS(rv, rv);
rv = storeFile->AppendNative(mTableName + ".sbstore"_ns);
NS_ENSURE_SUCCESS(rv, rv);
nsCOMPtr<nsIInputStream> origStream;
rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile,
PR_RDONLY | nsIFile::OS_READAHEAD);
if (rv == NS_ERROR_FILE_NOT_FOUND) {
UpdateHeader();
return NS_OK;
}
NS_ENSURE_SUCCESS(rv, rv);
int64_t fileSize;
rv = storeFile->GetFileSize(&fileSize);
NS_ENSURE_SUCCESS(rv, rv);
if (fileSize < 0 || fileSize > UINT32_MAX) {
return NS_ERROR_FAILURE;
}
mFileSize = static_cast<uint32_t>(fileSize);
rv = NS_NewBufferedInputStream(getter_AddRefs(mInputStream),
origStream.forget(), mFileSize);
NS_ENSURE_SUCCESS(rv, rv);
rv = ReadHeader();
if (NS_WARN_IF(NS_FAILED(rv))) {
LOG(("Failed to read header for %s", mTableName.get()));
return NS_ERROR_FILE_CORRUPTED;
}
rv = SanityCheck(aVersion);
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult HashStore::ReadHeader() {
if (!mInputStream) {
UpdateHeader();
return NS_OK;
}
nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
NS_ENSURE_SUCCESS(rv, rv);
void* buffer = &mHeader;
rv = NS_ReadInputStreamToBuffer(mInputStream, &buffer, sizeof(Header));
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult HashStore::SanityCheck(uint32_t aVersion) const {
const uint32_t VER = aVersion == 0 ? CURRENT_VERSION : aVersion;
if (mHeader.magic != STORE_MAGIC || mHeader.version != VER) {
NS_WARNING("Unexpected header data in the store.");
// Version mismatch is also considered file corrupted,
// We need this error code to know if we should remove the file.
return NS_ERROR_FILE_CORRUPTED;
}
return NS_OK;
}
nsresult HashStore::CalculateChecksum(nsAutoCString& aChecksum,
uint32_t aFileSize,
bool aChecksumPresent) {
aChecksum.Truncate();
// Reset mInputStream to start
nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
nsCOMPtr<nsICryptoHash> hash =
do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
// Size of MD5 hash in bytes
const uint32_t CHECKSUM_SIZE = 16;
// MD5 is not a secure hash function, but since this is a filesystem integrity
// check, this usage is ok.
rv = hash->Init(nsICryptoHash::MD5);
NS_ENSURE_SUCCESS(rv, rv);
if (!aChecksumPresent) {
// Hash entire file
rv = hash->UpdateFromStream(mInputStream, UINT32_MAX);
} else {
// Hash everything but last checksum bytes
if (aFileSize < CHECKSUM_SIZE) {
NS_WARNING("SafeBrowsing file isn't long enough to store its checksum");
return NS_ERROR_FAILURE;
}
rv = hash->UpdateFromStream(mInputStream, aFileSize - CHECKSUM_SIZE);
}
NS_ENSURE_SUCCESS(rv, rv);
rv = hash->Finish(false, aChecksum);
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
void HashStore::UpdateHeader() {
mHeader.magic = STORE_MAGIC;
mHeader.version = CURRENT_VERSION;
mHeader.numAddChunks = mAddChunks.Length();
mHeader.numSubChunks = mSubChunks.Length();
mHeader.numAddPrefixes = mAddPrefixes.Length();
mHeader.numSubPrefixes = mSubPrefixes.Length();
mHeader.numAddCompletes = mAddCompletes.Length();
mHeader.numSubCompletes = mSubCompletes.Length();
}
nsresult HashStore::ReadChunkNumbers() {
if (!mInputStream) {
return NS_OK;
}
nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, sizeof(Header));
NS_ENSURE_SUCCESS(rv, rv);
rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks);
NS_ENSURE_SUCCESS(rv, rv);
NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks,
"Read the right amount of add chunks.");
rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks);
NS_ENSURE_SUCCESS(rv, rv);
NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks,
"Read the right amount of sub chunks.");
return NS_OK;
}
nsresult HashStore::ReadHashes() {
if (!mInputStream) {
// BeginUpdate has been called but Open hasn't initialized mInputStream,
// because the existing HashStore is empty.
return NS_OK;
}
nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
uint32_t offset = sizeof(Header);
offset += (mHeader.numAddChunks + mHeader.numSubChunks) * sizeof(uint32_t);
nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset);
NS_ENSURE_SUCCESS(rv, rv);
rv = ReadAddPrefixes();
NS_ENSURE_SUCCESS(rv, rv);
rv = ReadSubPrefixes();
NS_ENSURE_SUCCESS(rv, rv);
rv = ReadAddCompletes();
NS_ENSURE_SUCCESS(rv, rv);
rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes);
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult HashStore::PrepareForUpdate() {
nsresult rv = CheckChecksum(mFileSize);
NS_ENSURE_SUCCESS(rv, rv);
rv = ReadChunkNumbers();
NS_ENSURE_SUCCESS(rv, rv);
rv = ReadHashes();
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult HashStore::BeginUpdate() {
// Check wether the file is corrupted and read the rest of the store
// in memory.
nsresult rv = PrepareForUpdate();
NS_ENSURE_SUCCESS(rv, rv);
// Close input stream, won't be needed any more and
// we will rewrite ourselves.
if (mInputStream) {
rv = mInputStream->Close();
NS_ENSURE_SUCCESS(rv, rv);
}
mInUpdate = true;
return NS_OK;
}
template <class T>
static nsresult Merge(ChunkSet* aStoreChunks, FallibleTArray<T>* aStorePrefixes,
const ChunkSet& aUpdateChunks,
FallibleTArray<T>& aUpdatePrefixes,
bool aAllowMerging = false) {
EntrySort(aUpdatePrefixes);
auto storeIter = aStorePrefixes->begin();
auto storeEnd = aStorePrefixes->end();
// use a separate array so we can keep the iterators valid
// if the nsTArray grows
nsTArray<T> adds;
for (const auto& updatePrefix : aUpdatePrefixes) {
// skip this chunk if we already have it, unless we're
// merging completions, in which case we'll always already
// have the chunk from the original prefix
if (aStoreChunks->Has(updatePrefix.Chunk()))
if (!aAllowMerging) continue;
// XXX: binary search for insertion point might be faster in common
// case?
while (storeIter < storeEnd && (storeIter->Compare(updatePrefix) < 0)) {
// skip forward to matching element (or not...)
storeIter++;
}
// no match, add
if (storeIter == storeEnd || storeIter->Compare(updatePrefix) != 0) {
if (!adds.AppendElement(updatePrefix, fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
}
}
// Chunks can be empty, but we should still report we have them
// to make the chunkranges continuous.
aStoreChunks->Merge(aUpdateChunks);
if (!aStorePrefixes->AppendElements(adds, fallible))
return NS_ERROR_OUT_OF_MEMORY;
EntrySort(*aStorePrefixes);
return NS_OK;
}
nsresult HashStore::ApplyUpdate(RefPtr<TableUpdateV2> aUpdate) {
MOZ_ASSERT(mTableName.Equals(aUpdate->TableName()));
nsresult rv = mAddExpirations.Merge(aUpdate->AddExpirations());