reference.rs - mozsearch

//! A slow, but clear reference implementation of SeaHash.

//!

//! # Specification

//!

//! The input buffer is padded with null bytes until the length is divisible by 8.

//!

//! We start out with state

//!

//! ```notest

//! a = 0x16f11fe89b0d677c

//! b = 0xb480a793d8e6c86c

//! c = 0x6fe2e5aaf078ebc9

//! d = 0x14f994a4c5259381

//! ```

//!

//! If a seed is given, each of the initial state component are modularly multiplied by the seed.

//!

//! From the stream, we read one 64-bit block (in little-endian) at a time.  This number, `n`,

//! determines the new state by:

//!

//! ```notest

//! a' = b

//! b' = c

//! c' = d

//! d' = g(a ⊕ n)

//! ```

//!

//! `g(x)` is defined as `g(x) = j(h(j(x)))` with `h(x) = (x ≫ 32) ≫ (x ≫ 60)` and `j(x) ≡ px (mod

//! 2^64)` with `p = 0x7ed0e9fa0d94a33`.

//!

//! Let the final state be `(x, y, z, w)`. Then the final result is given by `H = g(x ⊕ y ⊕ z ⊕ w ⊕

//! l)` where `l` is the number of bytes in the original buffer.

use helper;

/// Read an integer in little-endian.

fn read_int(int: &[u8]) -> u64 {

    debug_assert!(

        int.len() <= 8,

        "The buffer length of the integer must be less than or equal to \

                  the one of an u64."

);

    // Start at 0.

    let mut x = 0;

    for &i in int.iter().rev() {

        // Shift up a byte.

        x <<= 8;

        // Set the lower byte.

        x |= i as u64;

/// A hash state.

struct State {

    /// The `a` substate.

    a: u64,

    /// The `b` substate.

    b: u64,

    /// The `c` substate.

    c: u64,

    /// The `d` substate.

    d: u64,

impl State {

    /// Write a 64-bit integer to the state.

    fn write_u64(&mut self, x: u64) {

        let mut a = self.a;

        // Mix `x` into `a`.

        a = helper::diffuse(a ^ x);

        //  Rotate around.

        //  _______________________

        // |                       v

        // a <---- b <---- c <---- d

        self.a = self.b;

        self.b = self.c;

        self.c = self.d;

        self.d = a;

    /// Calculate the final hash.

    fn finish(self, total: usize) -> u64 {

        // Even though XORing is commutative, it doesn't matter, because the state vector's initial

        // components are mutually distinct, and thus swapping even and odd chunks will affect the

        // result, because it is sensitive to the initial condition. To add discreteness, we

        // diffuse.

        helper::diffuse(

            self.a ^ self.b ^ self.c ^ self.d

            // We XOR in the number of written bytes to make it zero-sensitive when excessive bytes

            // are written (0u32.0u8 ≠ 0u16.0u8).

            ^ total as u64,

    /// Create a new state with some initial values (seed).

    fn with_seeds(k1: u64, k2: u64, k3: u64, k4: u64) -> State {

        State {

            // These values are randomly generated.

            a: k1,

            b: k2,

            c: k3,

            d: k4,

/// A reference implementation of SeaHash.

///

/// This is bloody slow when compared to the optimized version. This is because SeaHash was

/// specifically designed to take all sorts of hardware and software hacks into account to achieve

/// maximal performance, but this makes code significantly less readable. As such, this version has

/// only one goal: to make the algorithm readable and understandable.

pub fn hash(buf: &[u8]) -> u64 {

    hash_seeded(

        buf,

        0x16f11fe89b0d677c,

        0xb480a793d8e6c86c,

        0x6fe2e5aaf078ebc9,

        0x14f994a4c5259381,

/// The seeded version of the reference implementation.

pub fn hash_seeded(buf: &[u8], k1: u64, k2: u64, k3: u64, k4: u64) -> u64 {

    // Initialize the state.

    let mut state = State::with_seeds(k1, k2, k3, k4);

    // Partition the rounded down buffer into chunks of 8 bytes, and iterate over them. The last

    // block might not be 8 bytes long.

    for int in buf.chunks(8) {

        // Read the chunk into an integer and write into the state.

        state.write_u64(read_int(int));

    // Finish the hash state and return the final value.

    state.finish(buf.len())

#[cfg(test)]

mod tests {

    use super::*;

    #[test]

    fn shakespear() {

        assert_eq!(hash(b"to be or not to be"), 1988685042348123509);