mul128.h - mozsearch

Enable keyboard shortcuts

/*

* 64x64->128 bit multiply operation

* (C) 2013,2015 Jack Lloyd

* Botan is released under the Simplified BSD License (see license.txt)

*/

#ifndef BOTAN_UTIL_MUL128_H_

#define BOTAN_UTIL_MUL128_H_

#include <botan/types.h>

BOTAN_FUTURE_INTERNAL_HEADER(mul128.h)

namespace Botan {

#if defined(__SIZEOF_INT128__) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)

   #define BOTAN_TARGET_HAS_NATIVE_UINT128

   // Prefer TI mode over __int128 as GCC rejects the latter in pendantic mode

   #if defined(__GNUG__)

     typedef unsigned int uint128_t __attribute__((mode(TI)));

   #else

     typedef unsigned __int128 uint128_t;

   #endif

#endif

#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)

#define BOTAN_FAST_64X64_MUL(a,b,lo,hi)      \

   do {                                      \

      const uint128_t r = static_cast<uint128_t>(a) * b;   \

      *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF;  \

      *lo = (r      ) & 0xFFFFFFFFFFFFFFFF;  \

   } while(0)

#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)

#include <intrin.h>

#pragma intrinsic(_umul128)

#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \

   do { *lo = _umul128(a, b, hi); } while(0)

#elif defined(BOTAN_USE_GCC_INLINE_ASM)

#if defined(BOTAN_TARGET_ARCH_IS_X86_64)

#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {                           \

   asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \

   } while(0)

#elif defined(BOTAN_TARGET_ARCH_IS_ALPHA)

#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {              \

   asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \

   *lo = a * b;                                           \

} while(0)

#elif defined(BOTAN_TARGET_ARCH_IS_IA64)

#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {                \

   asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \

   *lo = a * b;                                             \

} while(0)

#elif defined(BOTAN_TARGET_ARCH_IS_PPC64)

#define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do {                      \

   asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \

   *lo = a * b;                                                   \

} while(0)

#endif

#endif

namespace Botan {

/**

* Perform a 64x64->128 bit multiplication

*/

inline void mul64x64_128(uint64_t a, uint64_t b, uint64_t* lo, uint64_t* hi)

#if defined(BOTAN_FAST_64X64_MUL)

   BOTAN_FAST_64X64_MUL(a, b, lo, hi);

#else

/*

   * Do a 64x64->128 multiply using four 32x32->64 multiplies plus

   * some adds and shifts. Last resort for CPUs like UltraSPARC (with

   * 64-bit registers/ALU, but no 64x64->128 multiply) or 32-bit CPUs.

*/

   const size_t HWORD_BITS = 32;

   const uint32_t HWORD_MASK = 0xFFFFFFFF;

   const uint32_t a_hi = (a >> HWORD_BITS);

   const uint32_t a_lo = (a  & HWORD_MASK);

   const uint32_t b_hi = (b >> HWORD_BITS);

   const uint32_t b_lo = (b  & HWORD_MASK);

   uint64_t x0 = static_cast<uint64_t>(a_hi) * b_hi;

   uint64_t x1 = static_cast<uint64_t>(a_lo) * b_hi;

   uint64_t x2 = static_cast<uint64_t>(a_hi) * b_lo;

   uint64_t x3 = static_cast<uint64_t>(a_lo) * b_lo;

   // this cannot overflow as (2^32-1)^2 + 2^32-1 < 2^64-1

   x2 += x3 >> HWORD_BITS;

   // this one can overflow

   x2 += x1;

   // propagate the carry if any

   x0 += static_cast<uint64_t>(static_cast<bool>(x2 < x1)) << HWORD_BITS;

   *hi = x0 + (x2 >> HWORD_BITS);

   *lo  = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK);

#endif

#endif