Source code

Revision control

Copy as Markdown

Other Tools

//! # rust-cascade
//!
//! A library for creating and querying the cascading bloom filters described by
//! Larisch, Choffnes, Levin, Maggs, Mislove, and Wilson in
//! "CRLite: A Scalable System for Pushing All TLS Revocations to All Browsers"
extern crate byteorder;
extern crate murmurhash3;
extern crate rand;
extern crate sha2;
use byteorder::{ByteOrder, LittleEndian, ReadBytesExt};
use murmurhash3::murmurhash3_x86_32;
#[cfg(feature = "builder")]
use rand::rngs::OsRng;
#[cfg(feature = "builder")]
use rand::RngCore;
use sha2::{Digest, Sha256};
use std::convert::{TryFrom, TryInto};
use std::fmt;
use std::io::{ErrorKind, Read};
use std::mem::size_of;
#[derive(Debug)]
pub enum CascadeError {
LongSalt,
TooManyLayers,
Collision,
UnknownHashFunction,
CapacityViolation(&'static str),
Parse(&'static str),
}
impl fmt::Display for CascadeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
CascadeError::LongSalt => {
write!(f, "Cannot serialize a filter with a salt of length >= 256.")
}
CascadeError::TooManyLayers => {
write!(f, "Cannot serialize a filter with >= 255 layers.")
}
CascadeError::Collision => {
write!(f, "Collision between included and excluded sets.")
}
CascadeError::UnknownHashFunction => {
write!(f, "Unknown hash function.")
}
CascadeError::CapacityViolation(function) => {
write!(f, "Unexpected call to {}", function)
}
CascadeError::Parse(reason) => {
write!(f, "Cannot parse cascade: {}", reason)
}
}
}
}
/// A Bloom filter representing a specific layer in a multi-layer cascading Bloom filter.
/// The same hash function is used for all layers, so it is not encoded here.
struct Bloom {
/// How many hash functions this filter uses
n_hash_funcs: u32,
/// The bit length of the filter
size: u32,
/// The data of the filter
data: Vec<u8>,
}
#[repr(u8)]
#[derive(Copy, Clone, PartialEq)]
/// These enumerations need to match the python filter-cascade project:
pub enum HashAlgorithm {
MurmurHash3 = 1,
Sha256l32 = 2, // low 32 bits of sha256
Sha256 = 3, // all 256 bits of sha256
}
impl fmt::Display for HashAlgorithm {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", *self as u8)
}
}
impl TryFrom<u8> for HashAlgorithm {
type Error = CascadeError;
fn try_from(value: u8) -> Result<HashAlgorithm, CascadeError> {
match value {
// Naturally, these need to match the enum declaration
1 => Ok(Self::MurmurHash3),
2 => Ok(Self::Sha256l32),
3 => Ok(Self::Sha256),
_ => Err(CascadeError::UnknownHashFunction),
}
}
}
/// A CascadeIndexGenerator provides one-time access to a table of pseudorandom functions H_ij
/// in which each function is of the form
/// H(s: &[u8], r: u32) -> usize
/// and for which 0 <= H(s,r) < r for all s, r.
/// The pseudorandom functions share a common key, represented as a octet string, and the table can
/// be constructed from this key alone. The functions are pseudorandom with respect to s, but not
/// r. For a uniformly random key/table, fixed r, and arbitrary strings m0 and m1,
/// H_ij(m0, r) is computationally indistinguishable from H_ij(m1,r)
/// for all i,j.
///
/// A call to next_layer() increments i and resets j.
/// A call to next_index(s, r) increments j, and outputs some value H_ij(s) with 0 <= H_ij(s) < r.
#[derive(Debug)]
enum CascadeIndexGenerator {
MurmurHash3 {
key: Vec<u8>,
counter: u32,
depth: u8,
},
Sha256l32 {
key: Vec<u8>,
counter: u32,
depth: u8,
},
Sha256Ctr {
key: Vec<u8>,
counter: u32,
state: [u8; 32],
state_available: u8,
},
}
impl PartialEq for CascadeIndexGenerator {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(
CascadeIndexGenerator::MurmurHash3 { key: ref a, .. },
CascadeIndexGenerator::MurmurHash3 { key: ref b, .. },
)
| (
CascadeIndexGenerator::Sha256l32 { key: ref a, .. },
CascadeIndexGenerator::Sha256l32 { key: ref b, .. },
)
| (
CascadeIndexGenerator::Sha256Ctr { key: ref a, .. },
CascadeIndexGenerator::Sha256Ctr { key: ref b, .. },
) => a == b,
_ => false,
}
}
}
impl CascadeIndexGenerator {
fn new(hash_alg: HashAlgorithm, key: Vec<u8>) -> Self {
match hash_alg {
HashAlgorithm::MurmurHash3 => Self::MurmurHash3 {
key,
counter: 0,
depth: 1,
},
HashAlgorithm::Sha256l32 => Self::Sha256l32 {
key,
counter: 0,
depth: 1,
},
HashAlgorithm::Sha256 => Self::Sha256Ctr {
key,
counter: 0,
state: [0; 32],
state_available: 0,
},
}
}
fn next_layer(&mut self) {
match self {
Self::MurmurHash3 {
ref mut counter,
ref mut depth,
..
}
| Self::Sha256l32 {
ref mut counter,
ref mut depth,
..
} => {
*counter = 0;
*depth += 1;
}
Self::Sha256Ctr { .. } => (),
}
}
fn next_index(&mut self, salt: &[u8], range: u32) -> usize {
let index = match self {
Self::MurmurHash3 {
key,
ref mut counter,
depth,
} => {
let hash_seed = (*counter << 16) + *depth as u32;
*counter += 1;
murmurhash3_x86_32(key, hash_seed)
}
Self::Sha256l32 {
key,
ref mut counter,
depth,
} => {
let mut hasher = Sha256::new();
hasher.update(salt);
hasher.update(counter.to_le_bytes());
hasher.update(depth.to_le_bytes());
hasher.update(&key);
*counter += 1;
u32::from_le_bytes(
hasher.finalize()[0..4]
.try_into()
.expect("sha256 should have given enough bytes"),
)
}
Self::Sha256Ctr {
key,
ref mut counter,
ref mut state,
ref mut state_available,
} => {
// |bytes_needed| is the minimum number of bytes needed to represent a value in [0, range).
let bytes_needed = ((range.next_power_of_two().trailing_zeros() + 7) / 8) as usize;
let mut index_arr = [0u8; 4];
for byte in index_arr.iter_mut().take(bytes_needed) {
if *state_available == 0 {
let mut hasher = Sha256::new();
hasher.update(counter.to_le_bytes());
hasher.update(salt);
hasher.update(&key);
hasher.finalize_into(state.into());
*state_available = state.len() as u8;
*counter += 1;
}
*byte = state[state.len() - *state_available as usize];
*state_available -= 1;
}
LittleEndian::read_u32(&index_arr)
}
};
(index % range) as usize
}
}
impl Bloom {
/// `new_crlite_bloom` creates an empty bloom filter for a layer of a cascade with the
/// parameters specified in [LCL+17, Section III.C].
///
/// # Arguments
/// * `include_capacity` - the number of elements that will be encoded at the new layer.
/// * `exclude_capacity` - the number of elements in the complement of the encoded set.
/// * `top_layer` - whether this is the top layer of the filter.
#[cfg(feature = "builder")]
pub fn new_crlite_bloom(
include_capacity: usize,
exclude_capacity: usize,
top_layer: bool,
) -> Self {
assert!(include_capacity != 0 && exclude_capacity != 0);
let r = include_capacity as f64;
let s = exclude_capacity as f64;
// The desired false positive rate for the top layer is
// p = r/(sqrt(2)*s).
// With this setting, the number of false positives (which will need to be
// encoded at the second layer) is expected to be a factor of sqrt(2)
// smaller than the number of elements encoded at the top layer.
//
// At layer i > 1 we try to ensure that the number of elements to be
// encoded at layer i+1 is half the number of elements encoded at
// layer i. So we take p = 1/2.
let log2_fp_rate = match top_layer {
true => (r / s).log2() - 0.5f64,
false => -1f64,
};
// the number of hash functions (k) and the size of the bloom filter (m) are given in
// [LCL+17] as k = log2(1/p) and m = r log2(1/p) / ln(2).
//
// If this formula gives a value of m < 256, we take m=256 instead. This results in very
// slightly sub-optimal size, but gives us the added benefit of doing less hashing.
let n_hash_funcs = (-log2_fp_rate).round() as u32;
let size = match (r * (-log2_fp_rate) / (f64::ln(2f64))).round() as u32 {
size if size >= 256 => size,
_ => 256,
};
Bloom {
n_hash_funcs,
size,
data: vec![0u8; ((size + 7) / 8) as usize],
}
}
/// `read` attempts to decode the Bloom filter represented by the bytes in the given reader.
///
/// # Arguments
/// * `reader` - The encoded representation of this Bloom filter. May be empty. May include
/// additional data describing further Bloom filters.
/// The format of an encoded Bloom filter is:
/// [1 byte] - the hash algorithm to use in the filter
/// [4 little endian bytes] - the length in bits of the filter
/// [4 little endian bytes] - the number of hash functions to use in the filter
/// [1 byte] - which layer in the cascade this filter is
/// [variable length bytes] - the filter itself (must be of minimal length)
pub fn read<R: Read>(
reader: &mut R,
) -> Result<Option<(Bloom, usize, HashAlgorithm)>, CascadeError> {
let hash_algorithm_val = match reader.read_u8() {
Ok(val) => val,
// If reader is at EOF, there is no bloom filter.
Err(e) if e.kind() == ErrorKind::UnexpectedEof => return Ok(None),
Err(_) => return Err(CascadeError::Parse("read error")),
};
let hash_algorithm = HashAlgorithm::try_from(hash_algorithm_val)?;
let size = reader
.read_u32::<byteorder::LittleEndian>()
.or(Err(CascadeError::Parse("truncated at layer size")))?;
let n_hash_funcs = reader
.read_u32::<byteorder::LittleEndian>()
.or(Err(CascadeError::Parse("truncated at layer hash count")))?;
let layer = reader
.read_u8()
.or(Err(CascadeError::Parse("truncated at layer number")))?;
let byte_count = ((size + 7) / 8) as usize;
let mut data = vec![0; byte_count];
reader
.read_exact(&mut data)
.or(Err(CascadeError::Parse("truncated at layer data")))?;
let bloom = Bloom {
n_hash_funcs,
size,
data,
};
Ok(Some((bloom, layer as usize, hash_algorithm)))
}
fn has(&self, generator: &mut CascadeIndexGenerator, salt: &[u8]) -> bool {
for _ in 0..self.n_hash_funcs {
let bit_index = generator.next_index(salt, self.size);
assert!(bit_index < self.size as usize);
let byte_index = bit_index / 8;
let mask = 1 << (bit_index % 8);
if self.data[byte_index] & mask == 0 {
return false;
}
}
true
}
#[cfg(feature = "builder")]
fn insert(&mut self, generator: &mut CascadeIndexGenerator, salt: &[u8]) {
for _ in 0..self.n_hash_funcs {
let bit_index = generator.next_index(salt, self.size);
let byte_index = bit_index / 8;
let mask = 1 << (bit_index % 8);
self.data[byte_index] |= mask;
}
}
pub fn approximate_size_of(&self) -> usize {
size_of::<Bloom>() + self.data.len()
}
}
impl fmt::Display for Bloom {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "n_hash_funcs={} size={}", self.n_hash_funcs, self.size)
}
}
/// A multi-layer cascading Bloom filter.
pub struct Cascade {
/// The Bloom filter for this layer in the cascade
filters: Vec<Bloom>,
/// The salt in use, if any
salt: Vec<u8>,
/// The hash algorithm / index generating function to use
hash_algorithm: HashAlgorithm,
/// Whether the logic should be inverted
inverted: bool,
}
impl Cascade {
/// from_bytes attempts to decode and return a multi-layer cascading Bloom filter.
///
/// # Arguments
/// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2
/// little endian bytes indicating the version. The current version is 2. The Python
/// filter-cascade project defines the formats, see
///
/// May be of length 0, in which case `None` is returned.
pub fn from_bytes(bytes: Vec<u8>) -> Result<Option<Self>, CascadeError> {
if bytes.is_empty() {
return Ok(None);
}
let mut reader = bytes.as_slice();
let version = reader
.read_u16::<byteorder::LittleEndian>()
.or(Err(CascadeError::Parse("truncated at version")))?;
let mut filters = vec![];
let mut salt = vec![];
let mut top_hash_alg = None;
let mut inverted = false;
if version > 2 {
return Err(CascadeError::Parse("unknown version"));
}
if version == 2 {
let inverted_val = reader
.read_u8()
.or(Err(CascadeError::Parse("truncated at inverted")))?;
if inverted_val > 1 {
return Err(CascadeError::Parse("invalid value for inverted"));
}
inverted = 0 != inverted_val;
let salt_len: usize = reader
.read_u8()
.or(Err(CascadeError::Parse("truncated at salt length")))?
.into();
if salt_len >= 256 {
return Err(CascadeError::Parse("salt too long"));
}
if salt_len > 0 {
let mut salt_bytes = vec![0; salt_len];
reader
.read_exact(&mut salt_bytes)
.or(Err(CascadeError::Parse("truncated at salt")))?;
salt = salt_bytes;
}
}
while let Some((filter, layer_number, layer_hash_alg)) = Bloom::read(&mut reader)? {
filters.push(filter);
if layer_number != filters.len() {
return Err(CascadeError::Parse("irregular layer numbering"));
}
if *top_hash_alg.get_or_insert(layer_hash_alg) != layer_hash_alg {
return Err(CascadeError::Parse("Inconsistent hash algorithms"));
}
}
if filters.is_empty() {
return Err(CascadeError::Parse("missing filters"));
}
let hash_algorithm = top_hash_alg.ok_or(CascadeError::Parse("missing hash algorithm"))?;
Ok(Some(Cascade {
filters,
salt,
hash_algorithm,
inverted,
}))
}
/// to_bytes encodes a cascade in the version 2 format.
pub fn to_bytes(&self) -> Result<Vec<u8>, CascadeError> {
if self.salt.len() >= 256 {
return Err(CascadeError::LongSalt);
}
if self.filters.len() >= 255 {
return Err(CascadeError::TooManyLayers);
}
let mut out = vec![];
let version: u16 = 2;
let inverted: u8 = self.inverted.into();
let salt_len: u8 = self.salt.len() as u8;
let hash_alg: u8 = self.hash_algorithm as u8;
out.extend_from_slice(&version.to_le_bytes());
out.push(inverted);
out.push(salt_len);
out.extend_from_slice(&self.salt);
for (layer, bloom) in self.filters.iter().enumerate() {
out.push(hash_alg);
out.extend_from_slice(&bloom.size.to_le_bytes());
out.extend_from_slice(&bloom.n_hash_funcs.to_le_bytes());
out.push((1 + layer) as u8); // 1-indexed
out.extend_from_slice(&bloom.data);
}
Ok(out)
}
/// has determines if the given sequence of bytes is in the cascade.
///
/// # Arguments
/// `entry` - The bytes to query
pub fn has(&self, entry: Vec<u8>) -> bool {
// Query filters 0..self.filters.len() until we get a non-membership result.
// If this occurs at an even index filter, the element *is not* included.
// ... at an odd-index filter, the element *is* included.
let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, entry);
let mut rv = false;
for filter in &self.filters {
if filter.has(&mut generator, &self.salt) {
rv = !rv;
generator.next_layer();
} else {
break;
}
}
if self.inverted {
rv = !rv;
}
rv
}
pub fn invert(&mut self) {
self.inverted = !self.inverted;
}
/// Determine the approximate amount of memory in bytes used by this
/// Cascade. Because this implementation does not integrate with the
/// allocator, it can't get an accurate measurement of how much memory it
/// uses. However, it can make a reasonable guess, assuming the sizes of
/// the bloom filters are large enough to dominate the overall allocated
/// size.
pub fn approximate_size_of(&self) -> usize {
size_of::<Cascade>()
+ self
.filters
.iter()
.map(|x| x.approximate_size_of())
.sum::<usize>()
+ self.salt.len()
}
}
impl fmt::Display for Cascade {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(
f,
"salt={:?} inverted={} hash_algorithm={}",
self.salt, self.inverted, self.hash_algorithm,
)?;
for filter in &self.filters {
writeln!(f, "\t[{}]", filter)?;
}
Ok(())
}
}
/// A CascadeBuilder creates a Cascade with layers given by `Bloom::new_crlite_bloom`.
///
/// A builder is initialized using [`CascadeBuilder::default`] or [`CascadeBuilder::new`]. Prefer `default`. The `new` constructor
/// allows the user to specify sensitive internal details such as the hash function and the domain
/// separation parameter.
///
/// Both constructors take `include_capacity` and an `exclude_capacity` parameters. The
/// `include_capacity` is the number of elements that will be encoded in the Cascade. The
/// `exclude_capacity` is size of the complement of the encoded set.
///
/// The encoded set is specified through calls to [`CascadeBuilder::include`]. Its complement is specified through
/// calls to [`CascadeBuilder::exclude`]. The cascade is built with a call to [`CascadeBuilder::finalize`].
///
/// The builder will track of the number of calls to `include` and `exclude`.
/// The caller is responsible for making *exactly* `include_capacity` calls to `include`
/// followed by *exactly* `exclude_capacity` calls to `exclude`.
/// Calling `exclude` before all `include` calls have been made will result in a panic!().
/// Calling `finalize` before all `exclude` calls have been made will result in a panic!().
///
#[cfg(feature = "builder")]
pub struct CascadeBuilder {
filters: Vec<Bloom>,
salt: Vec<u8>,
hash_algorithm: HashAlgorithm,
to_include: Vec<CascadeIndexGenerator>,
to_exclude: Vec<CascadeIndexGenerator>,
status: BuildStatus,
}
#[cfg(feature = "builder")]
impl CascadeBuilder {
pub fn default(include_capacity: usize, exclude_capacity: usize) -> Self {
let mut salt = vec![0u8; 16];
OsRng.fill_bytes(&mut salt);
CascadeBuilder::new(
HashAlgorithm::Sha256,
salt,
include_capacity,
exclude_capacity,
)
}
pub fn new(
hash_algorithm: HashAlgorithm,
salt: Vec<u8>,
include_capacity: usize,
exclude_capacity: usize,
) -> Self {
CascadeBuilder {
filters: vec![Bloom::new_crlite_bloom(
include_capacity,
exclude_capacity,
true,
)],
salt,
to_include: vec![],
to_exclude: vec![],
hash_algorithm,
status: BuildStatus(include_capacity, exclude_capacity),
}
}
pub fn include(&mut self, item: Vec<u8>) -> Result<(), CascadeError> {
match self.status {
BuildStatus(ref mut cap, _) if *cap > 0 => *cap -= 1,
_ => return Err(CascadeError::CapacityViolation("include")),
}
let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
self.filters[0].insert(&mut generator, &self.salt);
self.to_include.push(generator);
Ok(())
}
pub fn exclude(&mut self, item: Vec<u8>) -> Result<(), CascadeError> {
match self.status {
BuildStatus(0, ref mut cap) if *cap > 0 => *cap -= 1,
_ => return Err(CascadeError::CapacityViolation("exclude")),
}
let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
if self.filters[0].has(&mut generator, &self.salt) {
self.to_exclude.push(generator);
}
Ok(())
}
/// `exclude_threaded` is like `exclude` but it stores false positives in a caller-owned
/// `ExcludeSet`. This allows the caller to exclude items in parallel.
pub fn exclude_threaded(&self, exclude_set: &mut ExcludeSet, item: Vec<u8>) {
exclude_set.size += 1;
let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
if self.filters[0].has(&mut generator, &self.salt) {
exclude_set.set.push(generator);
}
}
/// `collect_exclude_set` merges an `ExcludeSet` into the internal storage of the CascadeBuilder.
pub fn collect_exclude_set(
&mut self,
exclude_set: &mut ExcludeSet,
) -> Result<(), CascadeError> {
match self.status {
BuildStatus(0, ref mut cap) if *cap >= exclude_set.size => *cap -= exclude_set.size,
_ => return Err(CascadeError::CapacityViolation("exclude")),
}
self.to_exclude.append(&mut exclude_set.set);
Ok(())
}
fn push_layer(&mut self) -> Result<(), CascadeError> {
// At even layers we encode elements of to_include. At odd layers we encode elements of
// to_exclude. In both cases, we track false positives by filtering the complement of the
// encoded set through the newly produced bloom filter.
let at_even_layer = self.filters.len() % 2 == 0;
let (to_encode, to_filter) = match at_even_layer {
true => (&mut self.to_include, &mut self.to_exclude),
false => (&mut self.to_exclude, &mut self.to_include),
};
// split ownership of `salt` away from `to_encode` and `to_filter`
// We need an immutable reference to salt during `to_encode.iter_mut()`
let mut bloom = Bloom::new_crlite_bloom(to_encode.len(), to_filter.len(), false);
let salt = self.salt.as_slice();
to_encode.iter_mut().for_each(|x| {
x.next_layer();
bloom.insert(x, salt)
});
let mut delta = to_filter.len();
to_filter.retain_mut(|x| {
x.next_layer();
bloom.has(x, salt)
});
delta -= to_filter.len();
if delta == 0 {
// Check for collisions between the |to_encode| and |to_filter| sets.
// The implementation of PartialEq for CascadeIndexGenerator will successfully
// identify cases where the user called |include(item)| and |exclude(item)| for the
// same item. It will not identify collisions in the underlying hash function.
for x in to_encode.iter_mut() {
if to_filter.contains(x) {
return Err(CascadeError::Collision);
}
}
}
self.filters.push(bloom);
Ok(())
}
pub fn finalize(mut self) -> Result<Box<Cascade>, CascadeError> {
match self.status {
BuildStatus(0, 0) => (),
_ => return Err(CascadeError::CapacityViolation("finalize")),
}
loop {
if self.to_exclude.is_empty() {
break;
}
self.push_layer()?;
if self.to_include.is_empty() {
break;
}
self.push_layer()?;
}
Ok(Box::new(Cascade {
filters: self.filters,
salt: self.salt,
hash_algorithm: self.hash_algorithm,
inverted: false,
}))
}
}
/// BuildStatus is used to ensure that the `include`, `exclude`, and `finalize` calls to
/// CascadeBuilder are made in the right order. The (a,b) state indicates that the
/// CascadeBuilder is waiting for `a` calls to `include` and `b` calls to `exclude`.
#[cfg(feature = "builder")]
struct BuildStatus(usize, usize);
/// CascadeBuilder::exclude takes `&mut self` so that it can count exclusions and push items to
/// self.to_exclude. The bulk of the work it does, however, can be done with an immutable reference
/// to the top level bloom filter. An `ExcludeSet` is used by `CascadeBuilder::exclude_threaded` to
/// track the changes to a `CascadeBuilder` that would be made with a call to
/// `CascadeBuilder::exclude`.
#[cfg(feature = "builder")]
#[derive(Default)]
pub struct ExcludeSet {
size: usize,
set: Vec<CascadeIndexGenerator>,
}
#[cfg(test)]
mod tests {
use Bloom;
use Cascade;
#[cfg(feature = "builder")]
use CascadeBuilder;
#[cfg(feature = "builder")]
use CascadeError;
use CascadeIndexGenerator;
#[cfg(feature = "builder")]
use ExcludeSet;
use HashAlgorithm;
#[test]
fn bloom_v1_test_from_bytes() {
let src: Vec<u8> = vec![
0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00,
];
let mut reader = src.as_slice();
match Bloom::read(&mut reader) {
Ok(Some((bloom, 1, HashAlgorithm::MurmurHash3))) => {
assert!(bloom.has(
&mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"this".to_vec()),
&vec![]
));
assert!(bloom.has(
&mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"that".to_vec()),
&vec![]
));
assert!(!bloom.has(
&mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"other".to_vec()),
&vec![]
));
}
Ok(_) => panic!("Parsing failed"),
Err(_) => panic!("Parsing failed"),
};
assert!(reader.is_empty());
let short: Vec<u8> = vec![
0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41,
];
assert!(Bloom::read(&mut short.as_slice()).is_err());
let empty: Vec<u8> = Vec::new();
let mut reader = empty.as_slice();
match Bloom::read(&mut reader) {
Ok(should_be_none) => assert!(should_be_none.is_none()),
Err(_) => panic!("Parsing failed"),
};
}
#[test]
fn bloom_v3_unsupported() {
let src: Vec<u8> = vec![0x03, 0x01, 0x00];
assert!(Bloom::read(&mut src.as_slice()).is_err());
}
#[test]
fn cascade_v1_murmur_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v1_murmur_mlbf").to_vec();
let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed")
.expect("Cascade should be Some");
// Key format is SHA256(issuer SPKI) + serial number
let key_for_revoked_cert_1 = vec![
0x2e, 0xb2, 0xd5, 0xa8, 0x60, 0xfe, 0x50, 0xe9, 0xc2, 0x42, 0x36, 0x85, 0x52, 0x98,
0x01, 0x50, 0xe4, 0x5d, 0xb5, 0x32, 0x1a, 0x5b, 0x00, 0x5e, 0x26, 0xd6, 0x76, 0x25,
0x3a, 0x40, 0x9b, 0xf5, 0x06, 0x2d, 0xf5, 0x68, 0xa0, 0x51, 0x31, 0x08, 0x20, 0xd7,
0xec, 0x43, 0x27, 0xe1, 0xba, 0xfd,
];
assert!(cascade.has(key_for_revoked_cert_1));
let key_for_revoked_cert_2 = vec![
0xf1, 0x1c, 0x3d, 0xd0, 0x48, 0xf7, 0x4e, 0xdb, 0x7c, 0x45, 0x19, 0x2b, 0x83, 0xe5,
0x98, 0x0d, 0x2f, 0x67, 0xec, 0x84, 0xb4, 0xdd, 0xb9, 0x39, 0x6e, 0x33, 0xff, 0x51,
0x73, 0xed, 0x69, 0x8f, 0x00, 0xd2, 0xe8, 0xf6, 0xaa, 0x80, 0x48, 0x1c, 0xd4,
];
assert!(cascade.has(key_for_revoked_cert_2));
let key_for_valid_cert = vec![
0x99, 0xfc, 0x9d, 0x40, 0xf1, 0xad, 0xb1, 0x63, 0x65, 0x61, 0xa6, 0x1d, 0x68, 0x3d,
0x9e, 0xa6, 0xb4, 0x60, 0xc5, 0x7d, 0x0c, 0x75, 0xea, 0x00, 0xc3, 0x41, 0xb9, 0xdf,
0xb9, 0x0b, 0x5f, 0x39, 0x0b, 0x77, 0x75, 0xf7, 0xaf, 0x9a, 0xe5, 0x42, 0x65, 0xc9,
0xcd, 0x32, 0x57, 0x10, 0x77, 0x8e,
];
assert!(!cascade.has(key_for_valid_cert));
assert_eq!(cascade.approximate_size_of(), 15408);
let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf").to_vec();
assert!(Cascade::from_bytes(v).is_err());
}
#[test]
fn cascade_v2_sha256l32_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_sha256l32_mlbf").to_vec();
let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed")
.expect("Cascade should be Some");
assert!(cascade.salt.len() == 0);
assert!(cascade.inverted == false);
assert!(cascade.has(b"this".to_vec()) == true);
assert!(cascade.has(b"that".to_vec()) == true);
assert!(cascade.has(b"other".to_vec()) == false);
assert_eq!(cascade.approximate_size_of(), 1001);
}
#[test]
fn cascade_v2_sha256l32_with_salt_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_sha256l32_salt_mlbf").to_vec();
let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed")
.expect("Cascade should be Some");
assert!(cascade.salt == b"nacl".to_vec());
assert!(cascade.inverted == false);
assert!(cascade.has(b"this".to_vec()) == true);
assert!(cascade.has(b"that".to_vec()) == true);
assert!(cascade.has(b"other".to_vec()) == false);
assert_eq!(cascade.approximate_size_of(), 1001);
}
#[test]
fn cascade_v2_murmur_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_murmur_mlbf").to_vec();
let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed")
.expect("Cascade should be Some");
assert!(cascade.salt.len() == 0);
assert!(cascade.inverted == false);
assert!(cascade.has(b"this".to_vec()) == true);
assert!(cascade.has(b"that".to_vec()) == true);
assert!(cascade.has(b"other".to_vec()) == false);
assert_eq!(cascade.approximate_size_of(), 992);
}
#[test]
fn cascade_v2_murmur_inverted_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf").to_vec();
let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed")
.expect("Cascade should be Some");
assert!(cascade.salt.len() == 0);
assert!(cascade.inverted == true);
assert!(cascade.has(b"this".to_vec()) == true);
assert!(cascade.has(b"that".to_vec()) == true);
assert!(cascade.has(b"other".to_vec()) == false);
assert_eq!(cascade.approximate_size_of(), 1058);
}
#[test]
fn cascade_v2_sha256l32_inverted_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_sha256l32_inverted_mlbf").to_vec();
let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed")
.expect("Cascade should be Some");
assert!(cascade.salt.len() == 0);
assert!(cascade.inverted == true);
assert!(cascade.has(b"this".to_vec()) == true);
assert!(cascade.has(b"that".to_vec()) == true);
assert!(cascade.has(b"other".to_vec()) == false);
assert_eq!(cascade.approximate_size_of(), 1061);
}
#[test]
fn cascade_v2_sha256ctr_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_sha256ctr_salt_mlbf").to_vec();
let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed")
.expect("Cascade should be Some");
assert!(cascade.salt == b"nacl".to_vec());
assert!(cascade.inverted == false);
assert!(cascade.has(b"this".to_vec()) == true);
assert!(cascade.has(b"that".to_vec()) == true);
assert!(cascade.has(b"other".to_vec()) == false);
assert_eq!(cascade.approximate_size_of(), 1070);
}
#[test]
fn cascade_empty() {
let cascade = Cascade::from_bytes(Vec::new()).expect("parsing Cascade should succeed");
assert!(cascade.is_none());
}
#[test]
fn cascade_test_from_bytes() {
let unknown_version: Vec<u8> = vec![0xff, 0xff, 0x00, 0x00];
match Cascade::from_bytes(unknown_version) {
Ok(_) => panic!("Cascade::from_bytes allows unknown version."),
Err(_) => (),
}
let first_layer_is_zero: Vec<u8> = vec![
0x01, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
];
match Cascade::from_bytes(first_layer_is_zero) {
Ok(_) => panic!("Cascade::from_bytes allows zero indexed layers."),
Err(_) => (),
}
let second_layer_is_three: Vec<u8> = vec![
0x01, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01,
0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00,
];
match Cascade::from_bytes(second_layer_is_three) {
Ok(_) => panic!("Cascade::from_bytes allows non-sequential layers."),
Err(_) => (),
}
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_collision() {
let mut builder = CascadeBuilder::default(1, 1);
builder.include(b"collision!".to_vec()).ok();
builder.exclude(b"collision!".to_vec()).ok();
assert!(matches!(builder.finalize(), Err(CascadeError::Collision)));
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_exclude_too_few() {
let mut builder = CascadeBuilder::default(1, 1);
builder.include(b"1".to_vec()).ok();
assert!(matches!(
builder.finalize(),
Err(CascadeError::CapacityViolation(_))
));
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_include_too_few() {
let mut builder = CascadeBuilder::default(1, 1);
assert!(matches!(
builder.exclude(b"1".to_vec()),
Err(CascadeError::CapacityViolation(_))
));
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_include_too_many() {
let mut builder = CascadeBuilder::default(1, 1);
builder.include(b"1".to_vec()).ok();
assert!(matches!(
builder.include(b"2".to_vec()),
Err(CascadeError::CapacityViolation(_))
));
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_exclude_too_many() {
let mut builder = CascadeBuilder::default(1, 1);
builder.include(b"1".to_vec()).ok();
builder.exclude(b"2".to_vec()).ok();
assert!(matches!(
builder.exclude(b"3".to_vec()),
Err(CascadeError::CapacityViolation(_))
));
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_exclude_threaded_no_collect() {
let mut builder = CascadeBuilder::default(1, 3);
let mut exclude_set = ExcludeSet::default();
builder.include(b"1".to_vec()).ok();
builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
assert!(matches!(
builder.finalize(),
Err(CascadeError::CapacityViolation(_))
));
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_exclude_threaded_too_many() {
let mut builder = CascadeBuilder::default(1, 3);
let mut exclude_set = ExcludeSet::default();
builder.include(b"1".to_vec()).ok();
builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
builder.exclude_threaded(&mut exclude_set, b"5".to_vec());
assert!(matches!(
builder.collect_exclude_set(&mut exclude_set),
Err(CascadeError::CapacityViolation(_))
));
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_exclude_threaded() {
let mut builder = CascadeBuilder::default(1, 3);
let mut exclude_set = ExcludeSet::default();
builder.include(b"1".to_vec()).ok();
builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
builder.collect_exclude_set(&mut exclude_set).ok();
builder.finalize().ok();
}
#[cfg(feature = "builder")]
fn cascade_builder_test_generate(hash_alg: HashAlgorithm, inverted: bool) {
let total = 10_000_usize;
let included = 100_usize;
let salt = vec![0u8; 16];
let mut builder =
CascadeBuilder::new(hash_alg, salt, included, (total - included) as usize);
for i in 0..included {
builder.include(i.to_le_bytes().to_vec()).ok();
}
for i in included..total {
builder.exclude(i.to_le_bytes().to_vec()).ok();
}
let mut cascade = builder.finalize().unwrap();
if inverted {
cascade.invert()
}
// Ensure we can serialize / deserialize
let cascade_bytes = cascade.to_bytes().expect("failed to serialize cascade");
let cascade = Cascade::from_bytes(cascade_bytes)
.expect("failed to deserialize cascade")
.expect("cascade should not be None here");
// Ensure each query gives the correct result
for i in 0..included {
assert!(cascade.has(i.to_le_bytes().to_vec()) == true ^ inverted)
}
for i in included..total {
assert!(cascade.has(i.to_le_bytes().to_vec()) == false ^ inverted)
}
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_generate_murmurhash3_inverted() {
cascade_builder_test_generate(HashAlgorithm::MurmurHash3, true);
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_generate_murmurhash3() {
cascade_builder_test_generate(HashAlgorithm::MurmurHash3, false);
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_generate_sha256l32() {
cascade_builder_test_generate(HashAlgorithm::Sha256l32, false);
}
#[test]
#[cfg(feature = "builder")]
fn cascade_builder_test_generate_sha256() {
cascade_builder_test_generate(HashAlgorithm::Sha256, false);
}
}