Revision control

Copy as Markdown

Other Tools

/*
* AES using POWER8/POWER9 crypto extensions
*
* Contributed by Jeffrey Walton
*
* Further changes
* (C) 2018,2019 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
#include <botan/aes.h>
#include <botan/cpuid.h>
#include <altivec.h>
#undef vector
#undef bool
namespace Botan {
typedef __vector unsigned long long Altivec64x2;
typedef __vector unsigned int Altivec32x4;
typedef __vector unsigned char Altivec8x16;
namespace {
inline Altivec8x16 reverse_vec(Altivec8x16 src)
{
if(CPUID::is_little_endian())
{
const Altivec8x16 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
const Altivec8x16 zero = {0};
return vec_perm(src, zero, mask);
}
else
{
return src;
}
}
inline Altivec64x2 load_key(const uint32_t key[])
{
return (Altivec64x2)reverse_vec((Altivec8x16)vec_vsx_ld(0, key));;
}
inline Altivec64x2 load_block(const uint8_t src[])
{
return (Altivec64x2)reverse_vec(vec_vsx_ld(0, src));
}
inline void store_block(Altivec64x2 src, uint8_t dest[])
{
vec_vsx_st(reverse_vec((Altivec8x16)src), 0, dest);
}
inline void store_blocks(Altivec64x2 B0, Altivec64x2 B1,
Altivec64x2 B2, Altivec64x2 B3,
uint8_t out[])
{
store_block(B0, out);
store_block(B1, out+16);
store_block(B2, out+16*2);
store_block(B3, out+16*3);
}
#define AES_XOR_4(B0, B1, B2, B3, K) do { \
B0 = vec_xor(B0, K); \
B1 = vec_xor(B1, K); \
B2 = vec_xor(B2, K); \
B3 = vec_xor(B3, K); \
} while(0)
#define AES_ENCRYPT_4(B0, B1, B2, B3, K) do { \
B0 = __builtin_crypto_vcipher(B0, K); \
B1 = __builtin_crypto_vcipher(B1, K); \
B2 = __builtin_crypto_vcipher(B2, K); \
B3 = __builtin_crypto_vcipher(B3, K); \
} while(0)
#define AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K) do { \
B0 = __builtin_crypto_vcipherlast(B0, K); \
B1 = __builtin_crypto_vcipherlast(B1, K); \
B2 = __builtin_crypto_vcipherlast(B2, K); \
B3 = __builtin_crypto_vcipherlast(B3, K); \
} while(0)
#define AES_DECRYPT_4(B0, B1, B2, B3, K) do { \
B0 = __builtin_crypto_vncipher(B0, K); \
B1 = __builtin_crypto_vncipher(B1, K); \
B2 = __builtin_crypto_vncipher(B2, K); \
B3 = __builtin_crypto_vncipher(B3, K); \
} while(0)
#define AES_DECRYPT_4_LAST(B0, B1, B2, B3, K) do { \
B0 = __builtin_crypto_vncipherlast(B0, K); \
B1 = __builtin_crypto_vncipherlast(B1, K); \
B2 = __builtin_crypto_vncipherlast(B2, K); \
B3 = __builtin_crypto_vncipherlast(B3, K); \
} while(0)
}
BOTAN_FUNC_ISA("crypto")
void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[0]);
const Altivec64x2 K1 = load_key(&m_EK[4]);
const Altivec64x2 K2 = load_key(&m_EK[8]);
const Altivec64x2 K3 = load_key(&m_EK[12]);
const Altivec64x2 K4 = load_key(&m_EK[16]);
const Altivec64x2 K5 = load_key(&m_EK[20]);
const Altivec64x2 K6 = load_key(&m_EK[24]);
const Altivec64x2 K7 = load_key(&m_EK[28]);
const Altivec64x2 K8 = load_key(&m_EK[32]);
const Altivec64x2 K9 = load_key(&m_EK[36]);
const Altivec64x2 K10 = load_key(&m_EK[40]);
while(blocks >= 4)
{
Altivec64x2 B0 = load_block(in);
Altivec64x2 B1 = load_block(in+16);
Altivec64x2 B2 = load_block(in+16*2);
Altivec64x2 B3 = load_block(in+16*3);
AES_XOR_4(B0, B1, B2, B3, K0);
AES_ENCRYPT_4(B0, B1, B2, B3, K1);
AES_ENCRYPT_4(B0, B1, B2, B3, K2);
AES_ENCRYPT_4(B0, B1, B2, B3, K3);
AES_ENCRYPT_4(B0, B1, B2, B3, K4);
AES_ENCRYPT_4(B0, B1, B2, B3, K5);
AES_ENCRYPT_4(B0, B1, B2, B3, K6);
AES_ENCRYPT_4(B0, B1, B2, B3, K7);
AES_ENCRYPT_4(B0, B1, B2, B3, K8);
AES_ENCRYPT_4(B0, B1, B2, B3, K9);
AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K10);
store_blocks(B0, B1, B2, B3, out);
out += 4*16;
in += 4*16;
blocks -= 4;
}
for(size_t i = 0; i != blocks; ++i)
{
Altivec64x2 B = load_block(in);
B = vec_xor(B, K0);
B = __builtin_crypto_vcipher(B, K1);
B = __builtin_crypto_vcipher(B, K2);
B = __builtin_crypto_vcipher(B, K3);
B = __builtin_crypto_vcipher(B, K4);
B = __builtin_crypto_vcipher(B, K5);
B = __builtin_crypto_vcipher(B, K6);
B = __builtin_crypto_vcipher(B, K7);
B = __builtin_crypto_vcipher(B, K8);
B = __builtin_crypto_vcipher(B, K9);
B = __builtin_crypto_vcipherlast(B, K10);
store_block(B, out);
out += 16;
in += 16;
}
}
BOTAN_FUNC_ISA("crypto")
void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[40]);
const Altivec64x2 K1 = load_key(&m_EK[36]);
const Altivec64x2 K2 = load_key(&m_EK[32]);
const Altivec64x2 K3 = load_key(&m_EK[28]);
const Altivec64x2 K4 = load_key(&m_EK[24]);
const Altivec64x2 K5 = load_key(&m_EK[20]);
const Altivec64x2 K6 = load_key(&m_EK[16]);
const Altivec64x2 K7 = load_key(&m_EK[12]);
const Altivec64x2 K8 = load_key(&m_EK[8]);
const Altivec64x2 K9 = load_key(&m_EK[4]);
const Altivec64x2 K10 = load_key(&m_EK[0]);
while(blocks >= 4)
{
Altivec64x2 B0 = load_block(in);
Altivec64x2 B1 = load_block(in+16);
Altivec64x2 B2 = load_block(in+16*2);
Altivec64x2 B3 = load_block(in+16*3);
AES_XOR_4(B0, B1, B2, B3, K0);
AES_DECRYPT_4(B0, B1, B2, B3, K1);
AES_DECRYPT_4(B0, B1, B2, B3, K2);
AES_DECRYPT_4(B0, B1, B2, B3, K3);
AES_DECRYPT_4(B0, B1, B2, B3, K4);
AES_DECRYPT_4(B0, B1, B2, B3, K5);
AES_DECRYPT_4(B0, B1, B2, B3, K6);
AES_DECRYPT_4(B0, B1, B2, B3, K7);
AES_DECRYPT_4(B0, B1, B2, B3, K8);
AES_DECRYPT_4(B0, B1, B2, B3, K9);
AES_DECRYPT_4_LAST(B0, B1, B2, B3, K10);
store_blocks(B0, B1, B2, B3, out);
out += 4*16;
in += 4*16;
blocks -= 4;
}
for(size_t i = 0; i != blocks; ++i)
{
Altivec64x2 B = load_block(in);
B = vec_xor(B, K0);
B = __builtin_crypto_vncipher(B, K1);
B = __builtin_crypto_vncipher(B, K2);
B = __builtin_crypto_vncipher(B, K3);
B = __builtin_crypto_vncipher(B, K4);
B = __builtin_crypto_vncipher(B, K5);
B = __builtin_crypto_vncipher(B, K6);
B = __builtin_crypto_vncipher(B, K7);
B = __builtin_crypto_vncipher(B, K8);
B = __builtin_crypto_vncipher(B, K9);
B = __builtin_crypto_vncipherlast(B, K10);
store_block(B, out);
out += 16;
in += 16;
}
}
BOTAN_FUNC_ISA("crypto")
void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[0]);
const Altivec64x2 K1 = load_key(&m_EK[4]);
const Altivec64x2 K2 = load_key(&m_EK[8]);
const Altivec64x2 K3 = load_key(&m_EK[12]);
const Altivec64x2 K4 = load_key(&m_EK[16]);
const Altivec64x2 K5 = load_key(&m_EK[20]);
const Altivec64x2 K6 = load_key(&m_EK[24]);
const Altivec64x2 K7 = load_key(&m_EK[28]);
const Altivec64x2 K8 = load_key(&m_EK[32]);
const Altivec64x2 K9 = load_key(&m_EK[36]);
const Altivec64x2 K10 = load_key(&m_EK[40]);
const Altivec64x2 K11 = load_key(&m_EK[44]);
const Altivec64x2 K12 = load_key(&m_EK[48]);
while(blocks >= 4)
{
Altivec64x2 B0 = load_block(in);
Altivec64x2 B1 = load_block(in+16);
Altivec64x2 B2 = load_block(in+16*2);
Altivec64x2 B3 = load_block(in+16*3);
AES_XOR_4(B0, B1, B2, B3, K0);
AES_ENCRYPT_4(B0, B1, B2, B3, K1);
AES_ENCRYPT_4(B0, B1, B2, B3, K2);
AES_ENCRYPT_4(B0, B1, B2, B3, K3);
AES_ENCRYPT_4(B0, B1, B2, B3, K4);
AES_ENCRYPT_4(B0, B1, B2, B3, K5);
AES_ENCRYPT_4(B0, B1, B2, B3, K6);
AES_ENCRYPT_4(B0, B1, B2, B3, K7);
AES_ENCRYPT_4(B0, B1, B2, B3, K8);
AES_ENCRYPT_4(B0, B1, B2, B3, K9);
AES_ENCRYPT_4(B0, B1, B2, B3, K10);
AES_ENCRYPT_4(B0, B1, B2, B3, K11);
AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K12);
store_blocks(B0, B1, B2, B3, out);
out += 4*16;
in += 4*16;
blocks -= 4;
}
for(size_t i = 0; i != blocks; ++i)
{
Altivec64x2 B = load_block(in);
B = vec_xor(B, K0);
B = __builtin_crypto_vcipher(B, K1);
B = __builtin_crypto_vcipher(B, K2);
B = __builtin_crypto_vcipher(B, K3);
B = __builtin_crypto_vcipher(B, K4);
B = __builtin_crypto_vcipher(B, K5);
B = __builtin_crypto_vcipher(B, K6);
B = __builtin_crypto_vcipher(B, K7);
B = __builtin_crypto_vcipher(B, K8);
B = __builtin_crypto_vcipher(B, K9);
B = __builtin_crypto_vcipher(B, K10);
B = __builtin_crypto_vcipher(B, K11);
B = __builtin_crypto_vcipherlast(B, K12);
store_block(B, out);
out += 16;
in += 16;
}
}
BOTAN_FUNC_ISA("crypto")
void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[48]);
const Altivec64x2 K1 = load_key(&m_EK[44]);
const Altivec64x2 K2 = load_key(&m_EK[40]);
const Altivec64x2 K3 = load_key(&m_EK[36]);
const Altivec64x2 K4 = load_key(&m_EK[32]);
const Altivec64x2 K5 = load_key(&m_EK[28]);
const Altivec64x2 K6 = load_key(&m_EK[24]);
const Altivec64x2 K7 = load_key(&m_EK[20]);
const Altivec64x2 K8 = load_key(&m_EK[16]);
const Altivec64x2 K9 = load_key(&m_EK[12]);
const Altivec64x2 K10 = load_key(&m_EK[8]);
const Altivec64x2 K11 = load_key(&m_EK[4]);
const Altivec64x2 K12 = load_key(&m_EK[0]);
while(blocks >= 4)
{
Altivec64x2 B0 = load_block(in);
Altivec64x2 B1 = load_block(in+16);
Altivec64x2 B2 = load_block(in+16*2);
Altivec64x2 B3 = load_block(in+16*3);
AES_XOR_4(B0, B1, B2, B3, K0);
AES_DECRYPT_4(B0, B1, B2, B3, K1);
AES_DECRYPT_4(B0, B1, B2, B3, K2);
AES_DECRYPT_4(B0, B1, B2, B3, K3);
AES_DECRYPT_4(B0, B1, B2, B3, K4);
AES_DECRYPT_4(B0, B1, B2, B3, K5);
AES_DECRYPT_4(B0, B1, B2, B3, K6);
AES_DECRYPT_4(B0, B1, B2, B3, K7);
AES_DECRYPT_4(B0, B1, B2, B3, K8);
AES_DECRYPT_4(B0, B1, B2, B3, K9);
AES_DECRYPT_4(B0, B1, B2, B3, K10);
AES_DECRYPT_4(B0, B1, B2, B3, K11);
AES_DECRYPT_4_LAST(B0, B1, B2, B3, K12);
store_blocks(B0, B1, B2, B3, out);
out += 4*16;
in += 4*16;
blocks -= 4;
}
for(size_t i = 0; i != blocks; ++i)
{
Altivec64x2 B = load_block(in);
B = vec_xor(B, K0);
B = __builtin_crypto_vncipher(B, K1);
B = __builtin_crypto_vncipher(B, K2);
B = __builtin_crypto_vncipher(B, K3);
B = __builtin_crypto_vncipher(B, K4);
B = __builtin_crypto_vncipher(B, K5);
B = __builtin_crypto_vncipher(B, K6);
B = __builtin_crypto_vncipher(B, K7);
B = __builtin_crypto_vncipher(B, K8);
B = __builtin_crypto_vncipher(B, K9);
B = __builtin_crypto_vncipher(B, K10);
B = __builtin_crypto_vncipher(B, K11);
B = __builtin_crypto_vncipherlast(B, K12);
store_block(B, out);
out += 16;
in += 16;
}
}
BOTAN_FUNC_ISA("crypto")
void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[0]);
const Altivec64x2 K1 = load_key(&m_EK[4]);
const Altivec64x2 K2 = load_key(&m_EK[8]);
const Altivec64x2 K3 = load_key(&m_EK[12]);
const Altivec64x2 K4 = load_key(&m_EK[16]);
const Altivec64x2 K5 = load_key(&m_EK[20]);
const Altivec64x2 K6 = load_key(&m_EK[24]);
const Altivec64x2 K7 = load_key(&m_EK[28]);
const Altivec64x2 K8 = load_key(&m_EK[32]);
const Altivec64x2 K9 = load_key(&m_EK[36]);
const Altivec64x2 K10 = load_key(&m_EK[40]);
const Altivec64x2 K11 = load_key(&m_EK[44]);
const Altivec64x2 K12 = load_key(&m_EK[48]);
const Altivec64x2 K13 = load_key(&m_EK[52]);
const Altivec64x2 K14 = load_key(&m_EK[56]);
while(blocks >= 4)
{
Altivec64x2 B0 = load_block(in);
Altivec64x2 B1 = load_block(in+16);
Altivec64x2 B2 = load_block(in+16*2);
Altivec64x2 B3 = load_block(in+16*3);
AES_XOR_4(B0, B1, B2, B3, K0);
AES_ENCRYPT_4(B0, B1, B2, B3, K1);
AES_ENCRYPT_4(B0, B1, B2, B3, K2);
AES_ENCRYPT_4(B0, B1, B2, B3, K3);
AES_ENCRYPT_4(B0, B1, B2, B3, K4);
AES_ENCRYPT_4(B0, B1, B2, B3, K5);
AES_ENCRYPT_4(B0, B1, B2, B3, K6);
AES_ENCRYPT_4(B0, B1, B2, B3, K7);
AES_ENCRYPT_4(B0, B1, B2, B3, K8);
AES_ENCRYPT_4(B0, B1, B2, B3, K9);
AES_ENCRYPT_4(B0, B1, B2, B3, K10);
AES_ENCRYPT_4(B0, B1, B2, B3, K11);
AES_ENCRYPT_4(B0, B1, B2, B3, K12);
AES_ENCRYPT_4(B0, B1, B2, B3, K13);
AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K14);
store_blocks(B0, B1, B2, B3, out);
out += 4*16;
in += 4*16;
blocks -= 4;
}
for(size_t i = 0; i != blocks; ++i)
{
Altivec64x2 B = load_block(in);
B = vec_xor(B, K0);
B = __builtin_crypto_vcipher(B, K1);
B = __builtin_crypto_vcipher(B, K2);
B = __builtin_crypto_vcipher(B, K3);
B = __builtin_crypto_vcipher(B, K4);
B = __builtin_crypto_vcipher(B, K5);
B = __builtin_crypto_vcipher(B, K6);
B = __builtin_crypto_vcipher(B, K7);
B = __builtin_crypto_vcipher(B, K8);
B = __builtin_crypto_vcipher(B, K9);
B = __builtin_crypto_vcipher(B, K10);
B = __builtin_crypto_vcipher(B, K11);
B = __builtin_crypto_vcipher(B, K12);
B = __builtin_crypto_vcipher(B, K13);
B = __builtin_crypto_vcipherlast(B, K14);
store_block(B, out);
out += 16;
in += 16;
}
}
BOTAN_FUNC_ISA("crypto")
void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
{
const Altivec64x2 K0 = load_key(&m_EK[56]);
const Altivec64x2 K1 = load_key(&m_EK[52]);
const Altivec64x2 K2 = load_key(&m_EK[48]);
const Altivec64x2 K3 = load_key(&m_EK[44]);
const Altivec64x2 K4 = load_key(&m_EK[40]);
const Altivec64x2 K5 = load_key(&m_EK[36]);
const Altivec64x2 K6 = load_key(&m_EK[32]);
const Altivec64x2 K7 = load_key(&m_EK[28]);
const Altivec64x2 K8 = load_key(&m_EK[24]);
const Altivec64x2 K9 = load_key(&m_EK[20]);
const Altivec64x2 K10 = load_key(&m_EK[16]);
const Altivec64x2 K11 = load_key(&m_EK[12]);
const Altivec64x2 K12 = load_key(&m_EK[8]);
const Altivec64x2 K13 = load_key(&m_EK[4]);
const Altivec64x2 K14 = load_key(&m_EK[0]);
while(blocks >= 4)
{
Altivec64x2 B0 = load_block(in);
Altivec64x2 B1 = load_block(in+16);
Altivec64x2 B2 = load_block(in+16*2);
Altivec64x2 B3 = load_block(in+16*3);
AES_XOR_4(B0, B1, B2, B3, K0);
AES_DECRYPT_4(B0, B1, B2, B3, K1);
AES_DECRYPT_4(B0, B1, B2, B3, K2);
AES_DECRYPT_4(B0, B1, B2, B3, K3);
AES_DECRYPT_4(B0, B1, B2, B3, K4);
AES_DECRYPT_4(B0, B1, B2, B3, K5);
AES_DECRYPT_4(B0, B1, B2, B3, K6);
AES_DECRYPT_4(B0, B1, B2, B3, K7);
AES_DECRYPT_4(B0, B1, B2, B3, K8);
AES_DECRYPT_4(B0, B1, B2, B3, K9);
AES_DECRYPT_4(B0, B1, B2, B3, K10);
AES_DECRYPT_4(B0, B1, B2, B3, K11);
AES_DECRYPT_4(B0, B1, B2, B3, K12);
AES_DECRYPT_4(B0, B1, B2, B3, K13);
AES_DECRYPT_4_LAST(B0, B1, B2, B3, K14);
store_blocks(B0, B1, B2, B3, out);
out += 4*16;
in += 4*16;
blocks -= 4;
}
for(size_t i = 0; i != blocks; ++i)
{
Altivec64x2 B = load_block(in);
B = vec_xor(B, K0);
B = __builtin_crypto_vncipher(B, K1);
B = __builtin_crypto_vncipher(B, K2);
B = __builtin_crypto_vncipher(B, K3);
B = __builtin_crypto_vncipher(B, K4);
B = __builtin_crypto_vncipher(B, K5);
B = __builtin_crypto_vncipher(B, K6);
B = __builtin_crypto_vncipher(B, K7);
B = __builtin_crypto_vncipher(B, K8);
B = __builtin_crypto_vncipher(B, K9);
B = __builtin_crypto_vncipher(B, K10);
B = __builtin_crypto_vncipher(B, K11);
B = __builtin_crypto_vncipher(B, K12);
B = __builtin_crypto_vncipher(B, K13);
B = __builtin_crypto_vncipherlast(B, K14);
store_block(B, out);
out += 16;
in += 16;
}
}
#undef AES_XOR_4
#undef AES_ENCRYPT_4
#undef AES_ENCRYPT_4_LAST
#undef AES_DECRYPT_4
#undef AES_DECRYPT_4_LAST
}