slide_hash.rs - mozsearch

Enable keyboard shortcuts

pub fn slide_hash(state: &mut crate::deflate::State) {

    let wsize = state.w_size as u16;

    slide_hash_chain(state.head.as_mut_slice(), wsize);

    slide_hash_chain(state.prev.as_mut_slice(), wsize);

fn slide_hash_chain(table: &mut [u16], wsize: u16) {

    #[cfg(target_arch = "x86_64")]

    if crate::cpu_features::is_enabled_avx2() {

        return avx2::slide_hash_chain(table, wsize);

    #[cfg(target_arch = "aarch64")]

    if crate::cpu_features::is_enabled_neon() {

        return neon::slide_hash_chain(table, wsize);

    #[cfg(target_arch = "wasm32")]

    if crate::cpu_features::is_enabled_simd128() {

        return wasm::slide_hash_chain(table, wsize);

    rust::slide_hash_chain(table, wsize);

mod rust {

    pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {

        for m in table.iter_mut() {

            *m = m.saturating_sub(wsize);

/// # Safety

///

/// These functions should only be executed on `aarch64` systems with the `neon` feature enabled.

#[cfg(target_arch = "aarch64")]

mod neon {

    use core::arch::aarch64::{

        uint16x8_t, uint16x8x4_t, vdupq_n_u16, vld1q_u16_x4, vqsubq_u16, vst1q_u16_x4,

};

    pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {

        assert!(crate::cpu_features::is_enabled_neon());

        unsafe { slide_hash_chain_internal(table, wsize) }

    /// # Safety

///

    /// Behavior is undefined if the `neon` target feature is not enabled

    #[target_feature(enable = "neon")]

    unsafe fn slide_hash_chain_internal(table: &mut [u16], wsize: u16) {

        debug_assert_eq!(table.len() % 32, 0);

        let v = unsafe { vdupq_n_u16(wsize) };

        for chunk in table.chunks_exact_mut(32) {

            unsafe {

                let p0 = vld1q_u16_x4(chunk.as_ptr());

                let p0 = vqsubq_u16_x4_x1(p0, v);

                vst1q_u16_x4(chunk.as_mut_ptr(), p0);

    /// # Safety

///

    /// Behavior is undefined if the `neon` target feature is not enabled

    #[target_feature(enable = "neon")]

    unsafe fn vqsubq_u16_x4_x1(a: uint16x8x4_t, b: uint16x8_t) -> uint16x8x4_t {

        unsafe {

            uint16x8x4_t(

                vqsubq_u16(a.0, b),

                vqsubq_u16(a.1, b),

                vqsubq_u16(a.2, b),

                vqsubq_u16(a.3, b),

#[cfg(target_arch = "x86_64")]

mod avx2 {

    use core::arch::x86_64::{

        __m256i, _mm256_loadu_si256, _mm256_set1_epi16, _mm256_storeu_si256, _mm256_subs_epu16,

};

    pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {

        assert!(crate::cpu_features::is_enabled_avx2());

        unsafe { slide_hash_chain_internal(table, wsize) }

    /// # Safety

///

    /// Behavior is undefined if the `avx` target feature is not enabled

    #[target_feature(enable = "avx2")]

    unsafe fn slide_hash_chain_internal(table: &mut [u16], wsize: u16) {

        debug_assert_eq!(table.len() % 16, 0);

        let ymm_wsize = unsafe { _mm256_set1_epi16(wsize as i16) };

        for chunk in table.chunks_exact_mut(16) {

            let chunk = chunk.as_mut_ptr() as *mut __m256i;

            unsafe {

                let value = _mm256_loadu_si256(chunk);

                let result = _mm256_subs_epu16(value, ymm_wsize);

                _mm256_storeu_si256(chunk, result);

#[cfg(target_arch = "wasm32")]

mod wasm {

    use core::arch::wasm32::{u16x8_splat, u16x8_sub_sat, v128, v128_load, v128_store};

    pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {

        assert_eq!(table.len() % 8, 0);

        slide_hash_chain_internal(table, wsize)

    #[target_feature(enable = "simd128")]

    fn slide_hash_chain_internal(table: &mut [u16], wsize: u16) {

        let wsize_v128 = u16x8_splat(wsize);

        for chunk in table.chunks_exact_mut(8) {

            let chunk_ptr = chunk.as_mut_ptr() as *mut v128;

            // Load the 128-bit value.

            // SAFETY: the pointer we get from chunks_exact_mut() is valid.

            let value = unsafe { v128_load(chunk_ptr) };

            // Perform saturating subtraction

            let result = u16x8_sub_sat(value, wsize_v128);

            // Store the result back.

            // SAFETY: the pointer we get from chunks_exact_mut() is valid.

            unsafe { v128_store(chunk_ptr, result) };

#[cfg(test)]

mod tests {

    use super::*;

    const WSIZE: u16 = 32768;

    const INPUT: [u16; 64] = [

        0, 0, 28790, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43884, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

        0, 0, 0, 0, 64412, 0, 0, 0, 0, 0, 21043, 0, 0, 0, 0, 0, 23707, 0, 0, 0, 0, 0, 0, 0, 0, 0,

        0, 0, 0, 0, 0, 0, 64026, 0, 0, 20182,

];

    const OUTPUT: [u16; 64] = [

        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

        0, 0, 0, 31644, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

        0, 0, 31258, 0, 0, 0,

];

    #[test]

    fn test_slide_hash_rust() {

        let mut input = INPUT;

        rust::slide_hash_chain(&mut input, WSIZE);

        assert_eq!(input, OUTPUT);

    #[test]

    #[cfg(target_arch = "x86_64")]

    fn test_slide_hash_avx2() {

        if crate::cpu_features::is_enabled_avx2() {

            let mut input = INPUT;

            avx2::slide_hash_chain(&mut input, WSIZE);

            assert_eq!(input, OUTPUT);

    #[test]

    #[cfg(target_arch = "aarch64")]

    fn test_slide_hash_neon() {

        if crate::cpu_features::is_enabled_neon() {

            let mut input = INPUT;

            neon::slide_hash_chain(&mut input, WSIZE);

            assert_eq!(input, OUTPUT);

    #[test]

    #[cfg(target_arch = "wasm32")]

    fn test_slide_hash_neon() {

        if crate::cpu_features::is_enabled_simd128() {

            let mut input = INPUT;

            wasm::slide_hash_chain(&mut input, WSIZE);

            assert_eq!(input, OUTPUT);

    quickcheck::quickcheck! {

        fn slide_is_rust_slide(v: Vec<u16>, wsize: u16) -> bool {

            // pad to a multiple of 32

            let difference = v.len().next_multiple_of(32) - v.len();

            let mut v = v;

            v.extend(core::iter::repeat(u16::MAX).take(difference));

            let mut a = v.clone();

            let mut b = v;

            rust::slide_hash_chain(&mut a, wsize);

            slide_hash_chain(&mut b, wsize);

            a == b