fstar_uint128_msvc.h

mozilla-central/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h

Enable keyboard shortcuts

Source code

File a bug in NSS :: Libraries

Revision control

Copy as Markdown

Other Tools

/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.

   Licensed under the Apache 2.0 License. */

/* This file was generated by KaRaMeL <https://github.com/FStarLang/karamel>

 * then hand-edited to use MSVC intrinsics KaRaMeL invocation:

 * C:\users\barrybo\mitls2c\karamel\_build\src\Karamel.native -minimal

 * -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir

 * ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include

 * "krmllib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims F*

 * version: 15104ff8 KaRaMeL version: 318b7fa8

*/

#ifndef FSTAR_UINT128_MSVC

#define FSTAR_UINT128_MSVC

#include "FStar_UInt128.h"

#include "FStar_UInt_8_16_32_64.h"

#include "krml/internal/types.h"

#ifndef _MSC_VER

#error This file only works with the MSVC compiler

#endif

/* JP: need to rip out HAS_OPTIMIZED since the header guards in types.h are now

 * done properly and only include this file when we know for sure we are on

 * 64-bit MSVC. */

#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128)

#define HAS_OPTIMIZED 1

#else

#define HAS_OPTIMIZED 0

#endif

// Define .low and .high in terms of the __m128i fields, to reduce

// the amount of churn in this file.

#if HAS_OPTIMIZED

#include <immintrin.h>

#include <intrin.h>

#define low m128i_u64[0]

#define high m128i_u64[1]

#endif

inline static FStar_UInt128_uint128

load128_le(uint8_t *b)

#if HAS_OPTIMIZED

    return _mm_loadu_si128((__m128i *)b);

#else

    FStar_UInt128_uint128 lit;

    lit.low = load64_le(b);

    lit.high = load64_le(b + 8);

    return lit;

#endif

inline static void

store128_le(uint8_t *b, FStar_UInt128_uint128 n)

    store64_le(b, n.low);

    store64_le(b + 8, n.high);

inline static FStar_UInt128_uint128

load128_be(uint8_t *b)

    uint64_t l = load64_be(b + 8);

    uint64_t h = load64_be(b);

#if HAS_OPTIMIZED

    return _mm_set_epi64x(h, l);

#else

    FStar_UInt128_uint128 lit;

    lit.low = l;

    lit.high = h;

    return lit;

#endif

inline static void

store128_be(uint8_t *b, uint128_t n)

    store64_be(b, n.high);

    store64_be(b + 8, n.low);

inline static uint64_t

FStar_UInt128_constant_time_carry(uint64_t a,

                                  uint64_t b)

    return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U;

inline static uint64_t

FStar_UInt128_carry(uint64_t a, uint64_t b)

    return FStar_UInt128_constant_time_carry(a, b);

inline static FStar_UInt128_uint128

FStar_UInt128_add(FStar_UInt128_uint128 a,

                  FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    uint64_t l, h;

    unsigned char carry =

        _addcarry_u64(0, a.low, b.low, &l);   // low/CF = a.low+b.low+0

    _addcarry_u64(carry, a.high, b.high, &h); // high   = a.high+b.high+CF

    return _mm_set_epi64x(h, l);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low + b.low;

    lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_add_underspec(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    return FStar_UInt128_add(a, b);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low + b.low;

  lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low;

  return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_add_mod(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    return FStar_UInt128_add(a, b);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low + b.low;

    lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_sub(FStar_UInt128_uint128 a,

                  FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    uint64_t l, h;

    unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l);

    _subborrow_u64(borrow, a.high, b.high, &h);

    return _mm_set_epi64x(h, l);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low - b.low;

    lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_sub_underspec(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    return FStar_UInt128_sub(a, b);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low - b.low;

    lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_sub_mod_impl(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

    FStar_UInt128_uint128 lit;

    lit.low = a.low - b.low;

    lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);

    return lit;

inline static FStar_UInt128_uint128

FStar_UInt128_sub_mod(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    return FStar_UInt128_sub(a, b);

#else

    return FStar_UInt128_sub_mod_impl(a, b);

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_logand(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    return _mm_and_si128(a, b);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low & b.low;

    lit.high = a.high & b.high;

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_logxor(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    return _mm_xor_si128(a, b);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low ^ b.low;

    lit.high = a.high ^ b.high;

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_logor(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    return _mm_or_si128(a, b);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a.low | b.low;

    lit.high = a.high | b.high;

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_lognot(

    FStar_UInt128_uint128 a)

#if HAS_OPTIMIZED

    return _mm_andnot_si128(a, a);

#else

    FStar_UInt128_uint128 lit;

    lit.low = ~a.low;

    lit.high = ~a.high;

    return lit;

#endif

static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;

inline static uint64_t

FStar_UInt128_add_u64_shift_left(uint64_t hi,

                                 uint64_t lo,

                                 uint32_t s)

    return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));

inline static uint64_t

FStar_UInt128_add_u64_shift_left_respec(uint64_t hi,

                                        uint64_t lo,

                                        uint32_t s)

    return FStar_UInt128_add_u64_shift_left(hi, lo, s);

inline static FStar_UInt128_uint128

FStar_UInt128_shift_left_small(

    FStar_UInt128_uint128 a, uint32_t s)

    if (s == (uint32_t)0U)

        return a;

    else {

        FStar_UInt128_uint128 lit;

        lit.low = a.low << s;

        lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s);

        return lit;

inline static FStar_UInt128_uint128

FStar_UInt128_shift_left_large(

    FStar_UInt128_uint128 a, uint32_t s)

    FStar_UInt128_uint128 lit;

    lit.low = (uint64_t)0U;

    lit.high = a.low << (s - FStar_UInt128_u32_64);

    return lit;

inline static FStar_UInt128_uint128

FStar_UInt128_shift_left(

    FStar_UInt128_uint128 a, uint32_t s)

#if HAS_OPTIMIZED

    if (s == 0) {

        return a;

    } else if (s < FStar_UInt128_u32_64) {

        uint64_t l = a.low << s;

        uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s);

        return _mm_set_epi64x(h, l);

    } else {

        return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0);

#else

    if (s < FStar_UInt128_u32_64)

        return FStar_UInt128_shift_left_small(a, s);

    else

        return FStar_UInt128_shift_left_large(a, s);

#endif

inline static uint64_t

FStar_UInt128_add_u64_shift_right(uint64_t hi,

                                  uint64_t lo,

                                  uint32_t s)

    return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));

inline static uint64_t

FStar_UInt128_add_u64_shift_right_respec(uint64_t hi,

                                         uint64_t lo,

                                         uint32_t s)

    return FStar_UInt128_add_u64_shift_right(hi, lo, s);

inline static FStar_UInt128_uint128

FStar_UInt128_shift_right_small(

    FStar_UInt128_uint128 a, uint32_t s)

    if (s == (uint32_t)0U)

        return a;

    else {

        FStar_UInt128_uint128 lit;

        lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s);

        lit.high = a.high >> s;

        return lit;

inline static FStar_UInt128_uint128

FStar_UInt128_shift_right_large(

    FStar_UInt128_uint128 a, uint32_t s)

    FStar_UInt128_uint128 lit;

    lit.low = a.high >> (s - FStar_UInt128_u32_64);

    lit.high = (uint64_t)0U;

    return lit;

inline static FStar_UInt128_uint128

FStar_UInt128_shift_right(

    FStar_UInt128_uint128 a, uint32_t s)

#if HAS_OPTIMIZED

    if (s == 0) {

        return a;

    } else if (s < FStar_UInt128_u32_64) {

        uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s);

        uint64_t h = a.high >> s;

        return _mm_set_epi64x(h, l);

    } else {

        return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64));

#else

    if (s < FStar_UInt128_u32_64)

        return FStar_UInt128_shift_right_small(a, s);

    else

        return FStar_UInt128_shift_right_large(a, s);

#endif

inline static bool

FStar_UInt128_eq(FStar_UInt128_uint128 a,

                 FStar_UInt128_uint128 b)

    return a.low == b.low && a.high == b.high;

inline static bool

FStar_UInt128_gt(FStar_UInt128_uint128 a,

                 FStar_UInt128_uint128 b)

    return a.high > b.high || a.high == b.high && a.low > b.low;

inline static bool

FStar_UInt128_lt(FStar_UInt128_uint128 a,

                 FStar_UInt128_uint128 b)

    return a.high < b.high || a.high == b.high && a.low < b.low;

inline static bool

FStar_UInt128_gte(FStar_UInt128_uint128 a,

                  FStar_UInt128_uint128 b)

    return a.high > b.high || a.high == b.high && a.low >= b.low;

inline static bool

FStar_UInt128_lte(FStar_UInt128_uint128 a,

                  FStar_UInt128_uint128 b)

    return a.high < b.high || a.high == b.high && a.low <= b.low;

inline static FStar_UInt128_uint128

FStar_UInt128_eq_mask(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED

    // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff

    __m128i r32 = _mm_cmpeq_epi32(a, b);

    // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half)

    __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1));

    // Bitwise and to compute (3&2),(2&3),(1&0),(0&1)

    __m128i ret64 = _mm_and_si128(r32, s32);

    // Swap the two 64-bit values to form s64

    __m128i s64 =

        _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2

    // And them together

    return _mm_and_si128(ret64, s64);

#else

    FStar_UInt128_uint128 lit;

    lit.low =

        FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);

    lit.high =

        FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_gte_mask(

    FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)

#if HAS_OPTIMIZED && 0

    // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each

    // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each

    // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3)

    // then splat slot 0 to 3,2,1,0

    __m128i gt = _mm_cmpgt_epi32(a, b);

    __m128i eq = _mm_cmpeq_epi32(a, b);

    __m128i ge = _mm_or_si128(gt, eq);

    __m128i ge0 = ge;

    __m128i eq0 = eq;

    __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1

    __m128i t1 = _mm_and_si128(eq0, ge1);

    __m128i ret = _mm_or_si128(ge, t1);  // ge0 | (eq0 & ge1) is now in 0

    __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1

    __m128i ge2 =

        _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2

    __m128i t2 =

        _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2)

    ret = _mm_or_si128(ret, t2);

    __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3

    __m128i ge3 = _mm_srli_si128(

        ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3

    __m128i t3 = _mm_and_si128(

        eq0, _mm_and_si128(

                 eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3)

    ret = _mm_or_si128(ret, t3);

    return _mm_shuffle_epi32(

        ret, _MM_SHUFFLE(0, 0, 0,

                         0)); // the result is in 0.  Shuffle into all dwords.

#else

    FStar_UInt128_uint128 lit;

    lit.low = FStar_UInt64_gte_mask(a.high, b.high) &

                  ~FStar_UInt64_eq_mask(a.high, b.high) |

              FStar_UInt64_eq_mask(a.high, b.high) &

                  FStar_UInt64_gte_mask(a.low, b.low);

    lit.high = FStar_UInt64_gte_mask(a.high, b.high) &

                   ~FStar_UInt64_eq_mask(a.high, b.high) |

               FStar_UInt64_eq_mask(a.high, b.high) &

                   FStar_UInt64_gte_mask(a.low, b.low);

    return lit;

#endif

inline static FStar_UInt128_uint128

FStar_UInt128_uint64_to_uint128(

    uint64_t a)

#if HAS_OPTIMIZED

    return _mm_set_epi64x(0, a);

#else

    FStar_UInt128_uint128 lit;

    lit.low = a;

    lit.high = (uint64_t)0U;

    return lit;

#endif

inline static uint64_t

FStar_UInt128_uint128_to_uint64(

    FStar_UInt128_uint128 a)

    return a.low;

inline static uint64_t

FStar_UInt128_u64_mod_32(uint64_t a)

    return a & (uint64_t)0xffffffffU;

static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;

inline static uint64_t

FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo)

    return lo + (hi << FStar_UInt128_u32_32);

inline static FStar_UInt128_uint128

FStar_UInt128_mul32(uint64_t x,

                    uint32_t y)

#if HAS_OPTIMIZED

    uint64_t l, h;

    l = _umul128(x, (uint64_t)y, &h);

    return _mm_set_epi64x(h, l);

#else

    FStar_UInt128_uint128 lit;

    lit.low = FStar_UInt128_u32_combine(

        (x >> FStar_UInt128_u32_32) * (uint64_t)y +

            (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32),

        FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y));

    lit.high =

        (x >> FStar_UInt128_u32_32) * (uint64_t)y +

            (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32) >>

        FStar_UInt128_u32_32;

    return lit;

#endif

/* Note: static headers bring scope collision issues when they define types!

 * Because now client (karamel-generated) code will include this header and

 * there might be type collisions if the client code uses quadruples of uint64s.

 * So, we cannot use the karamel-generated name. */

typedef struct K_quad_s {

    uint64_t fst;

    uint64_t snd;

    uint64_t thd;

    uint64_t f3;

} K_quad;

inline static K_quad

FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y)

    K_quad tmp;

    tmp.fst = FStar_UInt128_u64_mod_32(x);

    tmp.snd = FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) *

                                       FStar_UInt128_u64_mod_32(y));

    tmp.thd = x >> FStar_UInt128_u32_32;

    tmp.f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) +

             (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >>

              FStar_UInt128_u32_32);

    return tmp;

static uint64_t

FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)

    return lo + (hi << FStar_UInt128_u32_32);

inline static FStar_UInt128_uint128

FStar_UInt128_mul_wide_impl(uint64_t x,

                            uint64_t y)

    K_quad scrut = FStar_UInt128_mul_wide_impl_t_(x, y);

    uint64_t u1 = scrut.fst;

    uint64_t w3 = scrut.snd;

    uint64_t x_ = scrut.thd;

    uint64_t t_ = scrut.f3;

    FStar_UInt128_uint128 lit;

    lit.low = FStar_UInt128_u32_combine_(

        u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3);

    lit.high =

        x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) +

        ((u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_)) >>

         FStar_UInt128_u32_32);

    return lit;

inline static FStar_UInt128_uint128

FStar_UInt128_mul_wide(uint64_t x,

                       uint64_t y)

#if HAS_OPTIMIZED

    uint64_t l, h;

    l = _umul128(x, y, &h);

    return _mm_set_epi64x(h, l);

#else

    return FStar_UInt128_mul_wide_impl(x, y);

#endif

#undef low

#undef high

#endif