aarch64.rs - mozsearch

Enable keyboard shortcuts

//! SHA-256 `aarch64` backend.

// Implementation adapted from mbedtls.

// TODO: stdarch intrinsics: RustCrypto/hashes#257

use core::arch::{aarch64::*, asm};

use crate::consts::K32;

cpufeatures::new!(sha2_hwcap, "sha2");

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {

    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725

    // after stabilization

    if sha2_hwcap::get() {

        unsafe { sha256_compress(state, blocks) }

    } else {

        super::soft::compress(state, blocks);

#[target_feature(enable = "sha2")]

unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {

    // SAFETY: Requires the sha2 feature.

    // Load state into vectors.

    let mut abcd = vld1q_u32(state[0..4].as_ptr());

    let mut efgh = vld1q_u32(state[4..8].as_ptr());

    // Iterate through the message blocks.

    for block in blocks {

        // Keep original state values.

        let abcd_orig = abcd;

        let efgh_orig = efgh;

        // Load the message block into vectors, assuming little endianness.

        let mut s0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[0..16].as_ptr())));

        let mut s1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[16..32].as_ptr())));

        let mut s2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[32..48].as_ptr())));

        let mut s3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[48..64].as_ptr())));

        // Rounds 0 to 3

        let mut tmp = vaddq_u32(s0, vld1q_u32(&K32[0]));

        let mut abcd_prev = abcd;

        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

        // Rounds 4 to 7

        tmp = vaddq_u32(s1, vld1q_u32(&K32[4]));

        abcd_prev = abcd;

        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

        // Rounds 8 to 11

        tmp = vaddq_u32(s2, vld1q_u32(&K32[8]));

        abcd_prev = abcd;

        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

        // Rounds 12 to 15

        tmp = vaddq_u32(s3, vld1q_u32(&K32[12]));

        abcd_prev = abcd;

        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

        for t in (16..64).step_by(16) {

            // Rounds t to t + 3

            s0 = vsha256su1q_u32(vsha256su0q_u32(s0, s1), s2, s3);

            tmp = vaddq_u32(s0, vld1q_u32(&K32[t]));

            abcd_prev = abcd;

            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

            // Rounds t + 4 to t + 7

            s1 = vsha256su1q_u32(vsha256su0q_u32(s1, s2), s3, s0);

            tmp = vaddq_u32(s1, vld1q_u32(&K32[t + 4]));

            abcd_prev = abcd;

            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

            // Rounds t + 8 to t + 11

            s2 = vsha256su1q_u32(vsha256su0q_u32(s2, s3), s0, s1);

            tmp = vaddq_u32(s2, vld1q_u32(&K32[t + 8]));

            abcd_prev = abcd;

            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

            // Rounds t + 12 to t + 15

            s3 = vsha256su1q_u32(vsha256su0q_u32(s3, s0), s1, s2);

            tmp = vaddq_u32(s3, vld1q_u32(&K32[t + 12]));

            abcd_prev = abcd;

            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);

            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);

        // Add the block-specific state to the original state.

        abcd = vaddq_u32(abcd, abcd_orig);

        efgh = vaddq_u32(efgh, efgh_orig);

    // Store vectors into state.

    vst1q_u32(state[0..4].as_mut_ptr(), abcd);

    vst1q_u32(state[4..8].as_mut_ptr(), efgh);

// TODO remove these polyfills once SHA2 intrinsics land

#[inline(always)]

unsafe fn vsha256hq_u32(

    mut hash_efgh: uint32x4_t,

    hash_abcd: uint32x4_t,

    wk: uint32x4_t,

) -> uint32x4_t {

    asm!(

        "SHA256H {:q}, {:q}, {:v}.4S",

        inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,

        options(pure, nomem, nostack, preserves_flags)

);

    hash_efgh

#[inline(always)]

unsafe fn vsha256h2q_u32(

    mut hash_efgh: uint32x4_t,

    hash_abcd: uint32x4_t,

    wk: uint32x4_t,

) -> uint32x4_t {

    asm!(

        "SHA256H2 {:q}, {:q}, {:v}.4S",

        inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,

        options(pure, nomem, nostack, preserves_flags)

);

    hash_efgh

#[inline(always)]

unsafe fn vsha256su0q_u32(mut w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {

    asm!(

        "SHA256SU0 {:v}.4S, {:v}.4S",

        inout(vreg) w0_3, in(vreg) w4_7,

        options(pure, nomem, nostack, preserves_flags)

);

    w0_3

#[inline(always)]

unsafe fn vsha256su1q_u32(

    mut tw0_3: uint32x4_t,

    w8_11: uint32x4_t,

    w12_15: uint32x4_t,

) -> uint32x4_t {

    asm!(

        "SHA256SU1 {:v}.4S, {:v}.4S, {:v}.4S",

        inout(vreg) tw0_3, in(vreg) w8_11, in(vreg) w12_15,

        options(pure, nomem, nostack, preserves_flags)

);

    tw0_3