Revision control
Copy as Markdown
Other Tools
import filtercascade
import hashlib
from pathlib import Path
import sys
import logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
def predictable_serial_gen(start, end):
counter = start
while counter < end:
counter += 1
m = hashlib.sha256()
m.update(counter.to_bytes(4, byteorder="big"))
yield m.hexdigest()
def store(fc, path):
if path.exists():
path.unlink()
with open(path, "wb") as f:
fc.tofile(f)
small_set = list(set(predictable_serial_gen(0, 500)))
large_set = set(predictable_serial_gen(500, 10_000))
# filter parameters
growth_factor = 1.0
min_filter_length = 177 # 177 * 1.44 ~ 256, so smallest filter will have 256 bits
print("--- v2_sha256l32_with_salt ---")
v2_sha256l32_with_salt = filtercascade.FilterCascade(
[],
defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
salt=b"nacl",
growth_factor=growth_factor,
min_filter_length=min_filter_length,
)
v2_sha256l32_with_salt.initialize(
include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
)
store(v2_sha256l32_with_salt, Path("test_v2_sha256l32_salt_mlbf"))
print("--- v2_sha256l32 ---")
v2_sha256l32 = filtercascade.FilterCascade(
[],
defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
growth_factor=growth_factor,
min_filter_length=min_filter_length,
)
v2_sha256l32.initialize(
include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
)
store(v2_sha256l32, Path("test_v2_sha256l32_mlbf"))
print("--- v2_murmur ---")
v2_murmur = filtercascade.FilterCascade(
[],
defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3,
growth_factor=growth_factor,
min_filter_length=min_filter_length,
)
v2_murmur.initialize(
include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
)
store(v2_murmur, Path("test_v2_murmur_mlbf"))
print("--- v2_murmur_inverted ---")
v2_murmur_inverted = filtercascade.FilterCascade(
[],
defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3,
growth_factor=growth_factor,
min_filter_length=min_filter_length,
)
v2_murmur_inverted.initialize(
include=large_set | set([b"this", b"that"]), exclude=[b"other"] + small_set
)
store(v2_murmur_inverted, Path("test_v2_murmur_inverted_mlbf"))
print("--- v2_sha256l32_inverted ---")
v2_sha256l32_inverted = filtercascade.FilterCascade(
[],
defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
growth_factor=growth_factor,
min_filter_length=min_filter_length,
)
v2_sha256l32_inverted.initialize(
include=large_set | set([b"this", b"that"]), exclude=[b"other"] + small_set
)
store(v2_sha256l32_inverted, Path("test_v2_sha256l32_inverted_mlbf"))
print("--- v2_sha256ctr_with_salt ---")
v2_sha256ctr_with_salt = filtercascade.FilterCascade(
[],
defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256CTR,
salt=b"nacl",
growth_factor=growth_factor,
min_filter_length=min_filter_length,
)
v2_sha256ctr_with_salt.initialize(
include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
)
store(v2_sha256ctr_with_salt, Path("test_v2_sha256ctr_salt_mlbf"))