rijndael.c - mozsearch

mozilla-central/security/nss/lib/freebl/rijndael.c (file symbol)

Enable keyboard shortcuts

Source code

File a bug in NSS :: Libraries

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifdef FREEBL_NO_DEPEND

#include "stubs.h"

#endif

#include "blapit.h"

#include "prenv.h"

#include "prerr.h"

#include "prinit.h"

#include "secerr.h"

#include "prtypes.h"

#include "blapi.h"

#include "rijndael.h"

#include "cts.h"

#include "ctr.h"

#include "gcm.h"

#include "mpi.h"

#if !defined(IS_LITTLE_ENDIAN) && !defined(NSS_X86_OR_X64)

// not test yet on big endian platform of arm

#undef USE_HW_AES

#endif

#ifdef __powerpc64__

#include "ppc-crypto.h"

#endif

#ifdef USE_HW_AES

#ifdef NSS_X86_OR_X64

#include "intel-aes.h"

#else

#include "aes-armv8.h"

#endif

#endif /* USE_HW_AES */

#ifdef INTEL_GCM

#include "intel-gcm.h"

#endif /* INTEL_GCM */

#if defined(USE_PPC_CRYPTO) && defined(PPC_GCM)

#include "ppc-gcm.h"

#endif

/* Forward declarations */

void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,

                                   unsigned int Nk);

void rijndael_native_encryptBlock(AESContext *cx,

                                  unsigned char *output,

                                  const unsigned char *input);

void rijndael_native_decryptBlock(AESContext *cx,

                                  unsigned char *output,

                                  const unsigned char *input);

void native_xorBlock(unsigned char *out,

                     const unsigned char *a,

                     const unsigned char *b);

/* Stub definitions for the above rijndael_native_* functions, which

 * shouldn't be used unless NSS_X86_OR_X64 is defined */

#ifndef NSS_X86_OR_X64

void

rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,

                              unsigned int Nk)

    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);

    PORT_Assert(0);

void

rijndael_native_encryptBlock(AESContext *cx,

                             unsigned char *output,

                             const unsigned char *input)

    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);

    PORT_Assert(0);

void

rijndael_native_decryptBlock(AESContext *cx,

                             unsigned char *output,

                             const unsigned char *input)

    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);

    PORT_Assert(0);

void

native_xorBlock(unsigned char *out, const unsigned char *a,

                const unsigned char *b)

    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);

    PORT_Assert(0);

#endif /* NSS_X86_OR_X64 */

/*

 * There are currently three ways to build this code, varying in performance

 * and code size.

 * RIJNDAEL_INCLUDE_TABLES         Include all tables from rijndael32.tab

 * RIJNDAEL_GENERATE_VALUES        Do not store tables, generate the table

 *                                 values "on-the-fly", using gfm

 * RIJNDAEL_GENERATE_VALUES_MACRO  Same as above, but use macros

 * The default is RIJNDAEL_INCLUDE_TABLES.

*/

/*

 * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],

 *                                                 T**-1[0..4], IMXC[0..4]

 * When building anything else, includes S, S**-1, Rcon

*/

#include "rijndael32.tab"

#if defined(RIJNDAEL_INCLUDE_TABLES)

/*

 * RIJNDAEL_INCLUDE_TABLES

*/

#define T0(i) _T0[i]

#define T1(i) _T1[i]

#define T2(i) _T2[i]

#define T3(i) _T3[i]

#define TInv0(i) _TInv0[i]

#define TInv1(i) _TInv1[i]

#define TInv2(i) _TInv2[i]

#define TInv3(i) _TInv3[i]

#define IMXC0(b) _IMXC0[b]

#define IMXC1(b) _IMXC1[b]

#define IMXC2(b) _IMXC2[b]

#define IMXC3(b) _IMXC3[b]

/* The S-box can be recovered from the T-tables */

#ifdef IS_LITTLE_ENDIAN

#define SBOX(b) ((PRUint8)_T3[b])

#else

#define SBOX(b) ((PRUint8)_T1[b])

#endif

#define SINV(b) (_SInv[b])

#else /* not RIJNDAEL_INCLUDE_TABLES */

/*

 * Code for generating T-table values.

*/

#ifdef IS_LITTLE_ENDIAN

#define WORD4(b0, b1, b2, b3) \

    ((((PRUint32)b3) << 24) | \

     (((PRUint32)b2) << 16) | \

     (((PRUint32)b1) << 8) |  \

     ((PRUint32)b0))

#else

#define WORD4(b0, b1, b2, b3) \

    ((((PRUint32)b0) << 24) | \

     (((PRUint32)b1) << 16) | \

     (((PRUint32)b2) << 8) |  \

     ((PRUint32)b3))

#endif

/*

 * Define the S and S**-1 tables (both have been stored)

*/

#define SBOX(b) (_S[b])

#define SINV(b) (_SInv[b])

/*

 * The function xtime, used for Galois field multiplication

*/

#define XTIME(a) \

    ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))

/* Choose GFM method (macros or function) */

#if defined(RIJNDAEL_GENERATE_VALUES_MACRO)

/*

 * Galois field GF(2**8) multipliers, in macro form

*/

#define GFM01(a) \

    (a) /* a * 01 = a, the identity */

#define GFM02(a) \

    (XTIME(a) & 0xff) /* a * 02 = xtime(a) */

#define GFM04(a) \

    (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */

#define GFM08(a) \

    (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */

#define GFM03(a) \

    (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */

#define GFM09(a) \

    (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */

#define GFM0B(a) \

    (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */

#define GFM0D(a) \

    (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */

#define GFM0E(a) \

    (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */

#else /* RIJNDAEL_GENERATE_VALUES */

/* GF_MULTIPLY

 * multiply two bytes represented in GF(2**8), mod (x**4 + 1)

*/

PRUint8

gfm(PRUint8 a, PRUint8 b)

    PRUint8 res = 0;

    while (b > 0) {

        res = (b & 0x01) ? res ^ a : res;

        a = XTIME(a);

        b >>= 1;

    return res;

#define GFM01(a) \

    (a) /* a * 01 = a, the identity */

#define GFM02(a) \

    (XTIME(a) & 0xff) /* a * 02 = xtime(a) */

#define GFM03(a) \

    (gfm(a, 0x03)) /* a * 03 */

#define GFM09(a) \

    (gfm(a, 0x09)) /* a * 09 */

#define GFM0B(a) \

    (gfm(a, 0x0B)) /* a * 0B */

#define GFM0D(a) \

    (gfm(a, 0x0D)) /* a * 0D */

#define GFM0E(a) \

    (gfm(a, 0x0E)) /* a * 0E */

#endif /* choosing GFM function */

/*

 * The T-tables

*/

#define G_T0(i) \

    (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i))))

#define G_T1(i) \

    (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i))))

#define G_T2(i) \

    (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i))))

#define G_T3(i) \

    (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i))))

/*

 * The inverse T-tables

*/

#define G_TInv0(i) \

    (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i))))

#define G_TInv1(i) \

    (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i))))

#define G_TInv2(i) \

    (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i))))

#define G_TInv3(i) \

    (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i))))

/*

 * The inverse mix column tables

*/

#define G_IMXC0(i) \

    (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i)))

#define G_IMXC1(i) \

    (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i)))

#define G_IMXC2(i) \

    (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i)))

#define G_IMXC3(i) \

    (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i)))

/* Now choose the T-table indexing method */

#if defined(RIJNDAEL_GENERATE_VALUES)

/* generate values for the tables with a function*/

static PRUint32

gen_TInvXi(PRUint8 tx, PRUint8 i)

    PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;

    si01 = SINV(i);

    si02 = XTIME(si01);

    si04 = XTIME(si02);

    si08 = XTIME(si04);

    si03 = si02 ^ si01;

    si09 = si08 ^ si01;

    si0B = si08 ^ si03;

    si0D = si09 ^ si04;

    si0E = si08 ^ si04 ^ si02;

    switch (tx) {

        case 0:

            return WORD4(si0E, si09, si0D, si0B);

        case 1:

            return WORD4(si0B, si0E, si09, si0D);

        case 2:

            return WORD4(si0D, si0B, si0E, si09);

        case 3:

            return WORD4(si09, si0D, si0B, si0E);

    return -1;

#define T0(i) G_T0(i)

#define T1(i) G_T1(i)

#define T2(i) G_T2(i)

#define T3(i) G_T3(i)

#define TInv0(i) gen_TInvXi(0, i)

#define TInv1(i) gen_TInvXi(1, i)

#define TInv2(i) gen_TInvXi(2, i)

#define TInv3(i) gen_TInvXi(3, i)

#define IMXC0(b) G_IMXC0(b)

#define IMXC1(b) G_IMXC1(b)

#define IMXC2(b) G_IMXC2(b)

#define IMXC3(b) G_IMXC3(b)

#else /* RIJNDAEL_GENERATE_VALUES_MACRO */

/* generate values for the tables with macros */

#define T0(i) G_T0(i)

#define T1(i) G_T1(i)

#define T2(i) G_T2(i)

#define T3(i) G_T3(i)

#define TInv0(i) G_TInv0(i)

#define TInv1(i) G_TInv1(i)

#define TInv2(i) G_TInv2(i)

#define TInv3(i) G_TInv3(i)

#define IMXC0(b) G_IMXC0(b)

#define IMXC1(b) G_IMXC1(b)

#define IMXC2(b) G_IMXC2(b)

#define IMXC3(b) G_IMXC3(b)

#endif /* choose T-table indexing method */

#endif /* not RIJNDAEL_INCLUDE_TABLES */

/**************************************************************************

 * Stuff related to the Rijndael key schedule

 *************************************************************************/

#define SUBBYTE(w)                                \

    ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \

     (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \

     (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) |   \

     (((PRUint32)SBOX((w)&0xff))))

#ifdef IS_LITTLE_ENDIAN

#define ROTBYTE(b) \

    ((b >> 8) | (b << 24))

#else

#define ROTBYTE(b) \

    ((b << 8) | (b >> 24))

#endif

/* rijndael_key_expansion7

 * Generate the expanded key from the key input by the user.

 * XXX

 * Nk == 7 (224 key bits) is a weird case.  Since Nk > 6, an added SubByte

 * transformation is done periodically.  The period is every 4 bytes, and

 * since 7%4 != 0 this happens at different times for each key word (unlike

 * Nk == 8 where it happens twice in every key word, in the same positions).

 * For now, I'm implementing this case "dumbly", w/o any unrolling.

*/

static void

rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)

    unsigned int i;

    PRUint32 *W;

    PRUint32 *pW;

    PRUint32 tmp;

    W = cx->k.expandedKey;

    /* 1.  the first Nk words contain the cipher key */

    memcpy(W, key, Nk * 4);

    i = Nk;

    /* 2.  loop until full expanded key is obtained */

    pW = W + i - 1;

    for (; i < cx->Nb * (cx->Nr + 1); ++i) {

        tmp = *pW++;

        if (i % Nk == 0)

            tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];

        else if (i % Nk == 4)

            tmp = SUBBYTE(tmp);

        *pW = W[i - Nk] ^ tmp;

/* rijndael_key_expansion

 * Generate the expanded key from the key input by the user.

*/

static void

rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)

    unsigned int i;

    PRUint32 *W;

    PRUint32 *pW;

    PRUint32 tmp;

    unsigned int round_key_words = cx->Nb * (cx->Nr + 1);

    if (Nk == 7) {

        rijndael_key_expansion7(cx, key, Nk);

        return;

    W = cx->k.expandedKey;

    /* The first Nk words contain the input cipher key */

    memcpy(W, key, Nk * 4);

    i = Nk;

    pW = W + i - 1;

    /* Loop over all sets of Nk words, except the last */

    while (i < round_key_words - Nk) {

        tmp = *pW++;

        tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];

        *pW = W[i++ - Nk] ^ tmp;

        tmp = *pW++;

        *pW = W[i++ - Nk] ^ tmp;

        tmp = *pW++;

        *pW = W[i++ - Nk] ^ tmp;

        tmp = *pW++;

        *pW = W[i++ - Nk] ^ tmp;

        if (Nk == 4)

            continue;

        switch (Nk) {

            case 8:

                tmp = *pW++;

                tmp = SUBBYTE(tmp);

                *pW = W[i++ - Nk] ^ tmp;

            case 7:

                tmp = *pW++;

                *pW = W[i++ - Nk] ^ tmp;

            case 6:

                tmp = *pW++;

                *pW = W[i++ - Nk] ^ tmp;

            case 5:

                tmp = *pW++;

                *pW = W[i++ - Nk] ^ tmp;

    /* Generate the last word */

    tmp = *pW++;

    tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];

    *pW = W[i++ - Nk] ^ tmp;

    /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0.  However,

     * since the above loop generated all but the last Nk key words, there

     * is no more need for the SubByte transformation.

*/

    if (Nk < 8) {

        for (; i < round_key_words; ++i) {

            tmp = *pW++;

            *pW = W[i - Nk] ^ tmp;

    } else {

        /* except in the case when Nk == 8.  Then one more SubByte may have

         * to be performed, at i % Nk == 4.

*/

        for (; i < round_key_words; ++i) {

            tmp = *pW++;

            if (i % Nk == 4)

                tmp = SUBBYTE(tmp);

            *pW = W[i - Nk] ^ tmp;

/* rijndael_invkey_expansion

 * Generate the expanded key for the inverse cipher from the key input by

 * the user.

*/

static void

rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)

    unsigned int r;

    PRUint32 *roundkeyw;

    PRUint8 *b;

    int Nb = cx->Nb;

    /* begins like usual key expansion ... */

    rijndael_key_expansion(cx, key, Nk);

    /* ... but has the additional step of InvMixColumn,

     * excepting the first and last round keys.

*/

    roundkeyw = cx->k.expandedKey + cx->Nb;

    for (r = 1; r < cx->Nr; ++r) {

        /* each key word, roundkeyw, represents a column in the key

         * matrix.  Each column is multiplied by the InvMixColumn matrix.

         *   [ 0E 0B 0D 09 ]   [ b0 ]

         *   [ 09 0E 0B 0D ] * [ b1 ]

         *   [ 0D 09 0E 0B ]   [ b2 ]

         *   [ 0B 0D 09 0E ]   [ b3 ]

*/

        b = (PRUint8 *)roundkeyw;

        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);

        b = (PRUint8 *)roundkeyw;

        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);

        b = (PRUint8 *)roundkeyw;

        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);

        b = (PRUint8 *)roundkeyw;

        *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);

        if (Nb <= 4)

            continue;

        switch (Nb) {

            case 8:

                b = (PRUint8 *)roundkeyw;

                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^

                               IMXC2(b[2]) ^ IMXC3(b[3]);

            case 7:

                b = (PRUint8 *)roundkeyw;

                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^

                               IMXC2(b[2]) ^ IMXC3(b[3]);

            case 6:

                b = (PRUint8 *)roundkeyw;

                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^

                               IMXC2(b[2]) ^ IMXC3(b[3]);

            case 5:

                b = (PRUint8 *)roundkeyw;

                *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^

                               IMXC2(b[2]) ^ IMXC3(b[3]);

/**************************************************************************

 * Stuff related to Rijndael encryption/decryption.

 *************************************************************************/

#ifdef IS_LITTLE_ENDIAN

#define BYTE0WORD(w) ((w)&0x000000ff)

#define BYTE1WORD(w) ((w)&0x0000ff00)

#define BYTE2WORD(w) ((w)&0x00ff0000)

#define BYTE3WORD(w) ((w)&0xff000000)

#else

#define BYTE0WORD(w) ((w)&0xff000000)

#define BYTE1WORD(w) ((w)&0x00ff0000)

#define BYTE2WORD(w) ((w)&0x0000ff00)

#define BYTE3WORD(w) ((w)&0x000000ff)

#endif

typedef union {

    PRUint32 w[4];

    PRUint8 b[16];

} rijndael_state;

#define COLUMN_0(state) state.w[0]

#define COLUMN_1(state) state.w[1]

#define COLUMN_2(state) state.w[2]

#define COLUMN_3(state) state.w[3]

#define STATE_BYTE(i) state.b[i]

// out = a ^ b

inline static void

xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b)

    for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) {

        (out)[j] = (a)[j] ^ (b)[j];

static void NO_SANITIZE_ALIGNMENT

rijndael_encryptBlock128(AESContext *cx,

                         unsigned char *output,

                         const unsigned char *input)

    unsigned int r;

    PRUint32 *roundkeyw;

    rijndael_state state;

    PRUint32 C0, C1, C2, C3;

#if defined(NSS_X86_OR_X64)

#define pIn input

#define pOut output

#else

    unsigned char *pIn, *pOut;

    PRUint32 inBuf[4], outBuf[4];

    if ((ptrdiff_t)input & 0x3) {

        memcpy(inBuf, input, sizeof inBuf);

        pIn = (unsigned char *)inBuf;

    } else {

        pIn = (unsigned char *)input;

    if ((ptrdiff_t)output & 0x3) {

        pOut = (unsigned char *)outBuf;

    } else {

        pOut = (unsigned char *)output;

#endif

    roundkeyw = cx->k.expandedKey;

    /* Step 1: Add Round Key 0 to initial state */

    COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++;

    COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++;

    COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++;

    COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;

    /* Step 2: Loop over rounds [1..NR-1] */

    for (r = 1; r < cx->Nr; ++r) {

        /* Do ShiftRow, ByteSub, and MixColumn all at once */

        C0 = T0(STATE_BYTE(0)) ^

             T1(STATE_BYTE(5)) ^

             T2(STATE_BYTE(10)) ^

             T3(STATE_BYTE(15));

        C1 = T0(STATE_BYTE(4)) ^

             T1(STATE_BYTE(9)) ^

             T2(STATE_BYTE(14)) ^

             T3(STATE_BYTE(3));

        C2 = T0(STATE_BYTE(8)) ^

             T1(STATE_BYTE(13)) ^

             T2(STATE_BYTE(2)) ^

             T3(STATE_BYTE(7));

        C3 = T0(STATE_BYTE(12)) ^

             T1(STATE_BYTE(1)) ^

             T2(STATE_BYTE(6)) ^

             T3(STATE_BYTE(11));

        /* Round key addition */

        COLUMN_0(state) = C0 ^ *roundkeyw++;

        COLUMN_1(state) = C1 ^ *roundkeyw++;

        COLUMN_2(state) = C2 ^ *roundkeyw++;

        COLUMN_3(state) = C3 ^ *roundkeyw++;

    /* Step 3: Do the last round */

    /* Final round does not employ MixColumn */

    C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |

          (BYTE1WORD(T3(STATE_BYTE(5)))) |

          (BYTE2WORD(T0(STATE_BYTE(10)))) |

          (BYTE3WORD(T1(STATE_BYTE(15))))) ^

         *roundkeyw++;

    C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |

          (BYTE1WORD(T3(STATE_BYTE(9)))) |

          (BYTE2WORD(T0(STATE_BYTE(14)))) |

          (BYTE3WORD(T1(STATE_BYTE(3))))) ^

         *roundkeyw++;

    C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |

          (BYTE1WORD(T3(STATE_BYTE(13)))) |

          (BYTE2WORD(T0(STATE_BYTE(2)))) |

          (BYTE3WORD(T1(STATE_BYTE(7))))) ^

         *roundkeyw++;

    C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |

          (BYTE1WORD(T3(STATE_BYTE(1)))) |

          (BYTE2WORD(T0(STATE_BYTE(6)))) |

          (BYTE3WORD(T1(STATE_BYTE(11))))) ^

         *roundkeyw++;

    *((PRUint32 *)pOut) = C0;

    *((PRUint32 *)(pOut + 4)) = C1;

    *((PRUint32 *)(pOut + 8)) = C2;

    *((PRUint32 *)(pOut + 12)) = C3;

#if defined(NSS_X86_OR_X64)

#undef pIn

#undef pOut

#else

    if ((ptrdiff_t)output & 0x3) {

        memcpy(output, outBuf, sizeof outBuf);

#endif

static void NO_SANITIZE_ALIGNMENT

rijndael_decryptBlock128(AESContext *cx,

                         unsigned char *output,

                         const unsigned char *input)

    int r;

    PRUint32 *roundkeyw;

    rijndael_state state;

    PRUint32 C0, C1, C2, C3;

#if defined(NSS_X86_OR_X64)

#define pIn input

#define pOut output

#else

    unsigned char *pIn, *pOut;

    PRUint32 inBuf[4], outBuf[4];

    if ((ptrdiff_t)input & 0x3) {

        memcpy(inBuf, input, sizeof inBuf);

        pIn = (unsigned char *)inBuf;

    } else {

        pIn = (unsigned char *)input;

    if ((ptrdiff_t)output & 0x3) {

        pOut = (unsigned char *)outBuf;

    } else {

        pOut = (unsigned char *)output;

#endif

    roundkeyw = cx->k.expandedKey + cx->Nb * cx->Nr + 3;

    /* reverse the final key addition */

    COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;

    COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;

    COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;

    COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--;

    /* Loop over rounds in reverse [NR..1] */

    for (r = cx->Nr; r > 1; --r) {

        /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */

        C0 = TInv0(STATE_BYTE(0)) ^

             TInv1(STATE_BYTE(13)) ^

             TInv2(STATE_BYTE(10)) ^

             TInv3(STATE_BYTE(7));

        C1 = TInv0(STATE_BYTE(4)) ^

             TInv1(STATE_BYTE(1)) ^

             TInv2(STATE_BYTE(14)) ^

             TInv3(STATE_BYTE(11));

        C2 = TInv0(STATE_BYTE(8)) ^

             TInv1(STATE_BYTE(5)) ^

             TInv2(STATE_BYTE(2)) ^

             TInv3(STATE_BYTE(15));

        C3 = TInv0(STATE_BYTE(12)) ^

             TInv1(STATE_BYTE(9)) ^

             TInv2(STATE_BYTE(6)) ^

             TInv3(STATE_BYTE(3));

        /* Invert the key addition step */

        COLUMN_3(state) = C3 ^ *roundkeyw--;

        COLUMN_2(state) = C2 ^ *roundkeyw--;

        COLUMN_1(state) = C1 ^ *roundkeyw--;

        COLUMN_0(state) = C0 ^ *roundkeyw--;

    /* inverse sub */

    pOut[0] = SINV(STATE_BYTE(0));

    pOut[1] = SINV(STATE_BYTE(13));

    pOut[2] = SINV(STATE_BYTE(10));

    pOut[3] = SINV(STATE_BYTE(7));

    pOut[4] = SINV(STATE_BYTE(4));

    pOut[5] = SINV(STATE_BYTE(1));

    pOut[6] = SINV(STATE_BYTE(14));

    pOut[7] = SINV(STATE_BYTE(11));

    pOut[8] = SINV(STATE_BYTE(8));

    pOut[9] = SINV(STATE_BYTE(5));

    pOut[10] = SINV(STATE_BYTE(2));

    pOut[11] = SINV(STATE_BYTE(15));

    pOut[12] = SINV(STATE_BYTE(12));

    pOut[13] = SINV(STATE_BYTE(9));

    pOut[14] = SINV(STATE_BYTE(6));

    pOut[15] = SINV(STATE_BYTE(3));

    /* final key addition */

    *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;

    *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;

    *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;

    *((PRUint32 *)pOut) ^= *roundkeyw--;

#if defined(NSS_X86_OR_X64)

#undef pIn

#undef pOut

#else

    if ((ptrdiff_t)output & 0x3) {

        memcpy(output, outBuf, sizeof outBuf);

#endif

/**************************************************************************

 *  Rijndael modes of operation (ECB and CBC)

 *************************************************************************/

static SECStatus

rijndael_encryptECB(AESContext *cx, unsigned char *output,

                    unsigned int *outputLen, unsigned int maxOutputLen,

                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)

    PORT_Assert(blocksize == AES_BLOCK_SIZE);

    PRBool aesni = aesni_support();

    while (inputLen > 0) {

        if (aesni) {

            rijndael_native_encryptBlock(cx, output, input);

        } else {

            rijndael_encryptBlock128(cx, output, input);

        output += AES_BLOCK_SIZE;

        input += AES_BLOCK_SIZE;

        inputLen -= AES_BLOCK_SIZE;

    return SECSuccess;

static SECStatus

rijndael_encryptCBC(AESContext *cx, unsigned char *output,

                    unsigned int *outputLen, unsigned int maxOutputLen,

                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)

    PORT_Assert(blocksize == AES_BLOCK_SIZE);

    unsigned char *lastblock = cx->iv;

    unsigned char inblock[AES_BLOCK_SIZE * 8];

    PRBool aesni = aesni_support();

    if (!inputLen)

        return SECSuccess;

    while (inputLen > 0) {

        if (aesni) {

            /* XOR with the last block (IV if first block) */

            native_xorBlock(inblock, input, lastblock);

            /* encrypt */

            rijndael_native_encryptBlock(cx, output, inblock);

        } else {

            xorBlock(inblock, input, lastblock);

            rijndael_encryptBlock128(cx, output, inblock);

        /* move to the next block */

        lastblock = output;

        output += AES_BLOCK_SIZE;

        input += AES_BLOCK_SIZE;

        inputLen -= AES_BLOCK_SIZE;

    memcpy(cx->iv, lastblock, AES_BLOCK_SIZE);

    return SECSuccess;

static SECStatus

rijndael_decryptECB(AESContext *cx, unsigned char *output,

                    unsigned int *outputLen, unsigned int maxOutputLen,

                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)

    PORT_Assert(blocksize == AES_BLOCK_SIZE);

    PRBool aesni = aesni_support();

    while (inputLen > 0) {

        if (aesni) {

            rijndael_native_decryptBlock(cx, output, input);

        } else {

            rijndael_decryptBlock128(cx, output, input);

        output += AES_BLOCK_SIZE;

        input += AES_BLOCK_SIZE;

        inputLen -= AES_BLOCK_SIZE;

    return SECSuccess;

static SECStatus

rijndael_decryptCBC(AESContext *cx, unsigned char *output,

                    unsigned int *outputLen, unsigned int maxOutputLen,

                    const unsigned char *input, unsigned int inputLen, unsigned int blocksize)

    PORT_Assert(blocksize == AES_BLOCK_SIZE);

    const unsigned char *in;

    unsigned char *out;

    unsigned char newIV[AES_BLOCK_SIZE];

    PRBool aesni = aesni_support();

    if (!inputLen)

        return SECSuccess;

    PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);

    in = input + (inputLen - AES_BLOCK_SIZE);

    memcpy(newIV, in, AES_BLOCK_SIZE);

    out = output + (inputLen - AES_BLOCK_SIZE);

    while (inputLen > AES_BLOCK_SIZE) {

        if (aesni) {

            // Use hardware acceleration for normal AES parameters.

            rijndael_native_decryptBlock(cx, out, in);

            native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]);

        } else {

            rijndael_decryptBlock128(cx, out, in);

            xorBlock(out, out, &in[-AES_BLOCK_SIZE]);

        out -= AES_BLOCK_SIZE;

        in -= AES_BLOCK_SIZE;

        inputLen -= AES_BLOCK_SIZE;

    if (in == input) {

        if (aesni) {

            rijndael_native_decryptBlock(cx, out, in);

            native_xorBlock(out, out, cx->iv);

        } else {

            rijndael_decryptBlock128(cx, out, in);

            xorBlock(out, out, cx->iv);

    memcpy(cx->iv, newIV, AES_BLOCK_SIZE);

    return SECSuccess;

#define FREEBL_CIPHER_WRAP(ctxtype, mmm)                                                    \

    static SECStatus freeblCipher_##mmm(void *vctx, unsigned char *output,                  \

                                        unsigned int *outputLen, unsigned int maxOutputLen, \

                                        const unsigned char *input, unsigned int inputLen,  \

                                        unsigned int blocksize)                             \

    {                                                                                       \

        ctxtype *ctx = vctx;                                                                \

        return mmm(ctx, output, outputLen, maxOutputLen, input, inputLen, blocksize);       \

FREEBL_CIPHER_WRAP(CTRContext, CTR_Update);

FREEBL_CIPHER_WRAP(CTSContext, CTS_DecryptUpdate);

FREEBL_CIPHER_WRAP(CTSContext, CTS_EncryptUpdate);

FREEBL_CIPHER_WRAP(GCMContext, GCM_DecryptUpdate);

FREEBL_CIPHER_WRAP(GCMContext, GCM_EncryptUpdate);

FREEBL_CIPHER_WRAP(AESContext, rijndael_decryptCBC);

FREEBL_CIPHER_WRAP(AESContext, rijndael_decryptECB);

FREEBL_CIPHER_WRAP(AESContext, rijndael_encryptCBC);

FREEBL_CIPHER_WRAP(AESContext, rijndael_encryptECB);

#if defined(INTEL_GCM) && defined(USE_HW_AES)

FREEBL_CIPHER_WRAP(intel_AES_GCMContext, intel_AES_GCM_DecryptUpdate);

FREEBL_CIPHER_WRAP(intel_AES_GCMContext, intel_AES_GCM_EncryptUpdate);

#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)

FREEBL_CIPHER_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DecryptUpdate);

FREEBL_CIPHER_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_EncryptUpdate);

#endif

#if defined(USE_HW_AES)

#if defined(NSS_X86_OR_X64)

FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_128);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_128);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_128);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_128);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_192);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_192);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_192);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_192);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_ecb_256);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_ecb_256);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_encrypt_cbc_256);

FREEBL_CIPHER_WRAP(AESContext, intel_aes_decrypt_cbc_256);

#define freeblCipher_native_aes_ecb_worker(encrypt, keysize)            \

    ((encrypt)                                                          \

         ? ((keysize) == 16   ? freeblCipher_intel_aes_encrypt_ecb_128  \

            : (keysize) == 24 ? freeblCipher_intel_aes_encrypt_ecb_192  \

                              : freeblCipher_intel_aes_encrypt_ecb_256) \

         : ((keysize) == 16   ? freeblCipher_intel_aes_decrypt_ecb_128  \

            : (keysize) == 24 ? freeblCipher_intel_aes_decrypt_ecb_192  \

                              : freeblCipher_intel_aes_decrypt_ecb_256))

#define freeblCipher_native_aes_cbc_worker(encrypt, keysize)            \

    ((encrypt)                                                          \

         ? ((keysize) == 16   ? freeblCipher_intel_aes_encrypt_cbc_128  \

            : (keysize) == 24 ? freeblCipher_intel_aes_encrypt_cbc_192  \

                              : freeblCipher_intel_aes_encrypt_cbc_256) \

         : ((keysize) == 16   ? freeblCipher_intel_aes_decrypt_cbc_128  \

            : (keysize) == 24 ? freeblCipher_intel_aes_decrypt_cbc_192  \

                              : freeblCipher_intel_aes_decrypt_cbc_256))

#else

FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_128);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_128);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_128);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_128);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_192);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_192);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_192);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_192);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_ecb_256);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_ecb_256);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_encrypt_cbc_256);

FREEBL_CIPHER_WRAP(AESContext, arm_aes_decrypt_cbc_256);

#define freeblCipher_native_aes_ecb_worker(encrypt, keysize)          \

    ((encrypt)                                                        \

         ? ((keysize) == 16   ? freeblCipher_arm_aes_encrypt_ecb_128  \

            : (keysize) == 24 ? freeblCipher_arm_aes_encrypt_ecb_192  \

                              : freeblCipher_arm_aes_encrypt_ecb_256) \

         : ((keysize) == 16   ? freeblCipher_arm_aes_decrypt_ecb_128  \

            : (keysize) == 24 ? freeblCipher_arm_aes_decrypt_ecb_192  \

                              : freeblCipher_arm_aes_decrypt_ecb_256))

#define freeblCipher_native_aes_cbc_worker(encrypt, keysize)          \

    ((encrypt)                                                        \

         ? ((keysize) == 16   ? freeblCipher_arm_aes_encrypt_cbc_128  \

            : (keysize) == 24 ? freeblCipher_arm_aes_encrypt_cbc_192  \

                              : freeblCipher_arm_aes_encrypt_cbc_256) \

         : ((keysize) == 16   ? freeblCipher_arm_aes_decrypt_cbc_128  \

            : (keysize) == 24 ? freeblCipher_arm_aes_decrypt_cbc_192  \

                              : freeblCipher_arm_aes_decrypt_cbc_256))

#endif

#endif

#if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64)

FREEBL_CIPHER_WRAP(CTRContext, CTR_Update_HW_AES);

#endif

#define FREEBL_AEAD_WRAP(ctxtype, mmm)                                                                                \

    static SECStatus freeblAead_##mmm(void *vctx, unsigned char *output,                                              \

                                      unsigned int *outputLen, unsigned int maxOutputLen,                             \

                                      const unsigned char *input, unsigned int inputLen,                              \

                                      void *params, unsigned int paramsLen,                                           \

                                      const unsigned char *aad, unsigned int aadLen,                                  \

                                      unsigned int blocksize)                                                         \

    {                                                                                                                 \

        ctxtype *ctx = vctx;                                                                                          \

        return mmm(ctx, output, outputLen, maxOutputLen, input, inputLen, params, paramsLen, aad, aadLen, blocksize); \

FREEBL_AEAD_WRAP(GCMContext, GCM_EncryptAEAD);

FREEBL_AEAD_WRAP(GCMContext, GCM_DecryptAEAD);

#if defined(INTEL_GCM) && defined(USE_HW_AES)

FREEBL_AEAD_WRAP(intel_AES_GCMContext, intel_AES_GCM_EncryptAEAD);

FREEBL_AEAD_WRAP(intel_AES_GCMContext, intel_AES_GCM_DecryptAEAD);

#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)

FREEBL_AEAD_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_EncryptAEAD);

FREEBL_AEAD_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DecryptAEAD);

#endif

#define FREEBL_DESTROY_WRAP(ctxtype, mmm)                      \

    static void freeblDestroy_##mmm(void *vctx, PRBool freeit) \

    {                                                          \

        ctxtype *ctx = vctx;                                   \

        mmm(ctx, freeit);                                      \

FREEBL_DESTROY_WRAP(CTRContext, CTR_DestroyContext);

FREEBL_DESTROY_WRAP(CTSContext, CTS_DestroyContext);

FREEBL_DESTROY_WRAP(GCMContext, GCM_DestroyContext);

#if defined(INTEL_GCM) && defined(USE_HW_AES)

FREEBL_DESTROY_WRAP(intel_AES_GCMContext, intel_AES_GCM_DestroyContext);

#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)

FREEBL_DESTROY_WRAP(ppc_AES_GCMContext, ppc_AES_GCM_DestroyContext);

#endif

/************************************************************************

 * BLAPI Interface functions

 * The following functions implement the encryption routines defined in

 * BLAPI for the AES cipher, Rijndael.

 ***********************************************************************/

AESContext *

AES_AllocateContext(void)

    return PORT_ZNewAligned(AESContext, 16, mem);

/*

** Initialize a new AES context suitable for AES encryption/decryption in

** the ECB or CBC mode.

**  "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC

*/

static SECStatus

aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,

                const unsigned char *iv, int mode, unsigned int encrypt)

    unsigned int Nk;

    PRBool use_hw_aes;

    /* According to AES, block lengths are 128 and key lengths are 128, 192, or

     * 256 bits. We support other key sizes as well [128, 256] as long as the

     * length in bytes is divisible by 4.

*/

    if (key == NULL ||

        keysize < AES_BLOCK_SIZE ||

        keysize > 32 ||

        keysize % 4 != 0) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

    if (mode != NSS_AES && mode != NSS_AES_CBC) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

    if (mode == NSS_AES_CBC && iv == NULL) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

    if (!cx) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

#if defined(NSS_X86_OR_X64) || defined(USE_HW_AES)

    use_hw_aes = (aesni_support() || arm_aes_support()) && (keysize % 8) == 0;

#else

    use_hw_aes = PR_FALSE;

#endif

    /* Nb = (block size in bits) / 32 */

    cx->Nb = AES_BLOCK_SIZE / 4;

    /* Nk = (key size in bits) / 32 */

    Nk = keysize / 4;

    /* Obtain number of rounds from "table" */

    cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);

    /* copy in the iv, if neccessary */

    if (mode == NSS_AES_CBC) {

        memcpy(cx->iv, iv, AES_BLOCK_SIZE);

#ifdef USE_HW_AES

        if (use_hw_aes) {

            cx->worker = freeblCipher_native_aes_cbc_worker(encrypt, keysize);

        } else

#endif

            cx->worker = encrypt ? freeblCipher_rijndael_encryptCBC : freeblCipher_rijndael_decryptCBC;

    } else {

#ifdef USE_HW_AES

        if (use_hw_aes) {

            cx->worker = freeblCipher_native_aes_ecb_worker(encrypt, keysize);

        } else

#endif

            cx->worker = encrypt ? freeblCipher_rijndael_encryptECB : freeblCipher_rijndael_decryptECB;

    PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);

    if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {

        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);

        return SECFailure;

#ifdef USE_HW_AES

    if (use_hw_aes) {

        native_aes_init(encrypt, keysize);

    } else

#endif

        /* Generate expanded key */

        if (encrypt) {

            if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES ||

                               cx->mode == NSS_AES_CTR)) {

                PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);

                /* Prepare hardware key for normal AES parameters. */

                rijndael_native_key_expansion(cx, key, Nk);

            } else {

                rijndael_key_expansion(cx, key, Nk);

        } else {

            rijndael_invkey_expansion(cx, key, Nk);

        BLAPI_CLEAR_STACK(256)

    cx->worker_cx = cx;

    cx->destroy = NULL;

    cx->isBlock = PR_TRUE;

    return SECSuccess;

SECStatus

AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,

                const unsigned char *iv, int mode, unsigned int encrypt,

                unsigned int blocksize)

    int basemode = mode;

    PRBool baseencrypt = encrypt;

    SECStatus rv;

    if (blocksize != AES_BLOCK_SIZE) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

    switch (mode) {

        case NSS_AES_CTS:

            basemode = NSS_AES_CBC;

            break;

        case NSS_AES_GCM:

        case NSS_AES_CTR:

            basemode = NSS_AES;

            baseencrypt = PR_TRUE;

            break;

    /* Make sure enough is initialized so we can safely call Destroy. */

    cx->worker_cx = NULL;

    cx->destroy = NULL;

    cx->mode = mode;

    rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt);

    if (rv != SECSuccess) {

        AES_DestroyContext(cx, PR_FALSE);

        return rv;

    /* finally, set up any mode specific contexts */

    cx->worker_aead = 0;

    switch (mode) {

        case NSS_AES_CTS:

            cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv);

            cx->worker = encrypt ? freeblCipher_CTS_EncryptUpdate : freeblCipher_CTS_DecryptUpdate;

            cx->destroy = freeblDestroy_CTS_DestroyContext;

            cx->isBlock = PR_FALSE;

            break;

        case NSS_AES_GCM:

#if defined(INTEL_GCM) && defined(USE_HW_AES)

            if (aesni_support() && (keysize % 8) == 0 && avx_support() &&

                clmul_support()) {

                cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv);

                cx->worker = encrypt ? freeblCipher_intel_AES_GCM_EncryptUpdate

                                     : freeblCipher_intel_AES_GCM_DecryptUpdate;

                cx->worker_aead = encrypt ? freeblAead_intel_AES_GCM_EncryptAEAD

                                          : freeblAead_intel_AES_GCM_DecryptAEAD;

                cx->destroy = freeblDestroy_intel_AES_GCM_DestroyContext;

                cx->isBlock = PR_FALSE;

            } else

#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)

            if (ppc_crypto_support() && (keysize % 8) == 0) {

                cx->worker_cx = ppc_AES_GCM_CreateContext(cx, cx->worker, iv);

                cx->worker = encrypt ? freeblCipher_ppc_AES_GCM_EncryptUpdate

                                     : freeblCipher_ppc_AES_GCM_DecryptUpdate;

                cx->worker_aead = encrypt ? freeblAead_ppc_AES_GCM_EncryptAEAD

                                          : freeblAead_ppc_AES_GCM_DecryptAEAD;

                cx->destroy = freeblDestroy_ppc_AES_GCM_DestroyContext;

                cx->isBlock = PR_FALSE;

            } else

#endif

                cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv);

                cx->worker = encrypt ? freeblCipher_GCM_EncryptUpdate

                                     : freeblCipher_GCM_DecryptUpdate;

                cx->worker_aead = encrypt ? freeblAead_GCM_EncryptAEAD

                                          : freeblAead_GCM_DecryptAEAD;

                cx->destroy = freeblDestroy_GCM_DestroyContext;

                cx->isBlock = PR_FALSE;

            break;

        case NSS_AES_CTR:

            cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv);

#if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64)

            if (aesni_support() && (keysize % 8) == 0) {

                cx->worker = freeblCipher_CTR_Update_HW_AES;

            } else

#endif

                cx->worker = freeblCipher_CTR_Update;

            cx->destroy = freeblDestroy_CTR_DestroyContext;

            cx->isBlock = PR_FALSE;

            break;

        default:

            /* everything has already been set up by aes_InitContext, just

             * return */

            return SECSuccess;

    /* check to see if we succeeded in getting the worker context */

    if (cx->worker_cx == NULL) {

        /* no, just destroy the existing context */

        cx->destroy = NULL; /* paranoia, though you can see a dozen lines */

                            /* below that this isn't necessary */

        AES_DestroyContext(cx, PR_FALSE);

        return SECFailure;

    return SECSuccess;

/* AES_CreateContext

 * create a new context for Rijndael operations

*/

AESContext *

AES_CreateContext(const unsigned char *key, const unsigned char *iv,

                  int mode, int encrypt,

                  unsigned int keysize, unsigned int blocksize)

    AESContext *cx = AES_AllocateContext();

    if (cx) {

        SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,

                                       blocksize);

        if (rv != SECSuccess) {

            AES_DestroyContext(cx, PR_TRUE);

            cx = NULL;

    return cx;

/*

 * AES_DestroyContext

 * Zero an AES cipher context.  If freeit is true, also free the pointer

 * to the context.

*/

void

AES_DestroyContext(AESContext *cx, PRBool freeit)

    void *mem = cx->mem;

    if (cx->worker_cx && cx->destroy) {

        (*cx->destroy)(cx->worker_cx, PR_TRUE);

        cx->worker_cx = NULL;

        cx->destroy = NULL;

    PORT_Memset(cx, 0, sizeof(AESContext));

    if (freeit) {

        PORT_Free(mem);

    } else {

        /* if we are not freeing the context, restore mem, We may get called

         * again to actually free the context */

        cx->mem = mem;

/*

 * AES_Encrypt

 * Encrypt an arbitrary-length buffer.  The output buffer must already be

 * allocated to at least inputLen.

*/

SECStatus

AES_Encrypt(AESContext *cx, unsigned char *output,

            unsigned int *outputLen, unsigned int maxOutputLen,

            const unsigned char *input, unsigned int inputLen)

    /* Check args */

    SECStatus rv;

    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

    if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {

        PORT_SetError(SEC_ERROR_INPUT_LEN);

        return SECFailure;

    if (maxOutputLen < inputLen) {

        PORT_SetError(SEC_ERROR_OUTPUT_LEN);

        return SECFailure;

    *outputLen = inputLen;

#if UINT_MAX > MP_32BIT_MAX

/*

     * we can guarentee that GSM won't overlfow if we limit the input to

     * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.

     * We do it here to cover both hardware and software GCM operations.

*/

        PR_STATIC_ASSERT(sizeof(unsigned int) > 4);

    if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) {

        PORT_SetError(SEC_ERROR_OUTPUT_LEN);

        return SECFailure;

#else

    /* if we can't pass in a 32_bit number, then no such check needed */

        PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);

#endif

    rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,

                       input, inputLen, AES_BLOCK_SIZE);

    BLAPI_CLEAR_STACK(256)

    return rv;

/*

 * AES_Decrypt

 * Decrypt and arbitrary-length buffer.  The output buffer must already be

 * allocated to at least inputLen.

*/

SECStatus

AES_Decrypt(AESContext *cx, unsigned char *output,

            unsigned int *outputLen, unsigned int maxOutputLen,

            const unsigned char *input, unsigned int inputLen)

    SECStatus rv;

    /* Check args */

    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

    if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {

        PORT_SetError(SEC_ERROR_INPUT_LEN);

        return SECFailure;

    if ((cx->mode != NSS_AES_GCM) && (maxOutputLen < inputLen)) {

        PORT_SetError(SEC_ERROR_OUTPUT_LEN);

        return SECFailure;

    *outputLen = inputLen;

    rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,

                       input, inputLen, AES_BLOCK_SIZE);

    BLAPI_CLEAR_STACK(256)

    return rv;

/*

 * AES_Encrypt_AEAD

 * Encrypt using GCM or CCM. include the nonce, extra data, and the tag

*/

SECStatus

AES_AEAD(AESContext *cx, unsigned char *output,

         unsigned int *outputLen, unsigned int maxOutputLen,

         const unsigned char *input, unsigned int inputLen,

         void *params, unsigned int paramsLen,

         const unsigned char *aad, unsigned int aadLen)

    SECStatus rv;

    /* Check args */

    if (cx == NULL || output == NULL || (input == NULL && inputLen != 0) || (aad == NULL && aadLen != 0) || params == NULL) {

        PORT_SetError(SEC_ERROR_INVALID_ARGS);

        return SECFailure;

    if (cx->worker_aead == NULL) {

        PORT_SetError(SEC_ERROR_NOT_INITIALIZED);

        return SECFailure;

    if (maxOutputLen < inputLen) {

        PORT_SetError(SEC_ERROR_OUTPUT_LEN);

        return SECFailure;

    *outputLen = inputLen;

#if UINT_MAX > MP_32BIT_MAX

/*

     * we can guarentee that GSM won't overlfow if we limit the input to

     * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.

     * We do it here to cover both hardware and software GCM operations.

*/

        PR_STATIC_ASSERT(sizeof(unsigned int) > 4);

    if (inputLen > MP_32BIT_MAX) {

        PORT_SetError(SEC_ERROR_OUTPUT_LEN);

        return SECFailure;

#else

    /* if we can't pass in a 32_bit number, then no such check needed */

        PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);

#endif

    rv = (*cx->worker_aead)(cx->worker_cx, output, outputLen, maxOutputLen,

                            input, inputLen, params, paramsLen, aad, aadLen,

                            AES_BLOCK_SIZE);

    BLAPI_CLEAR_STACK(256)

    return rv;