Source code
Revision control
Copy as Markdown
Other Tools
//! Defines rounding schemes for floating-point numbers.
use super::float::ExtendedFloat;
use super::num::*;
use super::shift::*;
use core::mem;
// MASKS
/// Calculate a scalar factor of 2 above the halfway point.
#[inline]
pub(crate) fn nth_bit(n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n < bits, "nth_bit() overflow in shl.");
1 << n
}
/// Generate a bitwise mask for the lower `n` bits.
#[inline]
pub(crate) fn lower_n_mask(n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n <= bits, "lower_n_mask() overflow in shl.");
if n == bits {
u64::MAX
} else {
(1 << n) - 1
}
}
/// Calculate the halfway point for the lower `n` bits.
#[inline]
pub(crate) fn lower_n_halfway(n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n <= bits, "lower_n_halfway() overflow in shl.");
if n == 0 {
0
} else {
nth_bit(n - 1)
}
}
/// Calculate a bitwise mask with `n` 1 bits starting at the `bit` position.
#[inline]
pub(crate) fn internal_n_mask(bit: u64, n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(bit <= bits, "internal_n_halfway() overflow in shl.");
debug_assert!(n <= bits, "internal_n_halfway() overflow in shl.");
debug_assert!(bit >= n, "internal_n_halfway() overflow in sub.");
lower_n_mask(bit) ^ lower_n_mask(bit - n)
}
// NEAREST ROUNDING
// Shift right N-bytes and round to the nearest.
//
// Return if we are above halfway and if we are halfway.
#[inline]
pub(crate) fn round_nearest(fp: &mut ExtendedFloat, shift: i32) -> (bool, bool) {
// Extract the truncated bits using mask.
// Calculate if the value of the truncated bits are either above
// the mid-way point, or equal to it.
//
// For example, for 4 truncated bytes, the mask would be b1111
// and the midway point would be b1000.
let mask: u64 = lower_n_mask(shift as u64);
let halfway: u64 = lower_n_halfway(shift as u64);
let truncated_bits = fp.mant & mask;
let is_above = truncated_bits > halfway;
let is_halfway = truncated_bits == halfway;
// Bit shift so the leading bit is in the hidden bit.
overflowing_shr(fp, shift);
(is_above, is_halfway)
}
// Tie rounded floating point to event.
#[inline]
pub(crate) fn tie_even(fp: &mut ExtendedFloat, is_above: bool, is_halfway: bool) {
// Extract the last bit after shifting (and determine if it is odd).
let is_odd = fp.mant & 1 == 1;
// Calculate if we need to roundup.
// We need to roundup if we are above halfway, or if we are odd
// and at half-way (need to tie-to-even).
if is_above || (is_odd && is_halfway) {
fp.mant += 1;
}
}
// Shift right N-bytes and round nearest, tie-to-even.
//
// Floating-point arithmetic uses round to nearest, ties to even,
// which rounds to the nearest value, if the value is halfway in between,
// round to an even value.
#[inline]
pub(crate) fn round_nearest_tie_even(fp: &mut ExtendedFloat, shift: i32) {
let (is_above, is_halfway) = round_nearest(fp, shift);
tie_even(fp, is_above, is_halfway);
}
// DIRECTED ROUNDING
// Shift right N-bytes and round towards a direction.
//
// Return if we have any truncated bytes.
#[inline]
fn round_toward(fp: &mut ExtendedFloat, shift: i32) -> bool {
let mask: u64 = lower_n_mask(shift as u64);
let truncated_bits = fp.mant & mask;
// Bit shift so the leading bit is in the hidden bit.
overflowing_shr(fp, shift);
truncated_bits != 0
}
// Round down.
#[inline]
fn downard(_: &mut ExtendedFloat, _: bool) {}
// Shift right N-bytes and round toward zero.
//
// Floating-point arithmetic defines round toward zero, which rounds
// towards positive zero.
#[inline]
pub(crate) fn round_downward(fp: &mut ExtendedFloat, shift: i32) {
// Bit shift so the leading bit is in the hidden bit.
// No rounding schemes, so we just ignore everything else.
let is_truncated = round_toward(fp, shift);
downard(fp, is_truncated);
}
// ROUND TO FLOAT
// Shift the ExtendedFloat fraction to the fraction bits in a native float.
//
// Floating-point arithmetic uses round to nearest, ties to even,
// which rounds to the nearest value, if the value is halfway in between,
// round to an even value.
#[inline]
pub(crate) fn round_to_float<F, Algorithm>(fp: &mut ExtendedFloat, algorithm: Algorithm)
where
F: Float,
Algorithm: FnOnce(&mut ExtendedFloat, i32),
{
// Calculate the difference to allow a single calculation
// rather than a loop, to minimize the number of ops required.
// This does underflow detection.
let final_exp = fp.exp + F::DEFAULT_SHIFT;
if final_exp < F::DENORMAL_EXPONENT {
// We would end up with a denormal exponent, try to round to more
// digits. Only shift right if we can avoid zeroing out the value,
// which requires the exponent diff to be < M::BITS. The value
// is already normalized, so we shouldn't have any issue zeroing
// out the value.
let diff = F::DENORMAL_EXPONENT - fp.exp;
if diff <= u64::FULL {
// We can avoid underflow, can get a valid representation.
algorithm(fp, diff);
} else {
// Certain underflow, assign literal 0s.
fp.mant = 0;
fp.exp = 0;
}
} else {
algorithm(fp, F::DEFAULT_SHIFT);
}
if fp.mant & F::CARRY_MASK == F::CARRY_MASK {
// Roundup carried over to 1 past the hidden bit.
shr(fp, 1);
}
}
// AVOID OVERFLOW/UNDERFLOW
// Avoid overflow for large values, shift left as needed.
//
// Shift until a 1-bit is in the hidden bit, if the mantissa is not 0.
#[inline]
pub(crate) fn avoid_overflow<F>(fp: &mut ExtendedFloat)
where
F: Float,
{
// Calculate the difference to allow a single calculation
// rather than a loop, minimizing the number of ops required.
if fp.exp >= F::MAX_EXPONENT {
let diff = fp.exp - F::MAX_EXPONENT;
if diff <= F::MANTISSA_SIZE {
// Our overflow mask needs to start at the hidden bit, or at
// `F::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set,
// to see if our value overflows.
let bit = (F::MANTISSA_SIZE + 1) as u64;
let n = (diff + 1) as u64;
let mask = internal_n_mask(bit, n);
if (fp.mant & mask) == 0 {
// If we have no 1-bit in the hidden-bit position,
// which is index 0, we need to shift 1.
let shift = diff + 1;
shl(fp, shift);
}
}
}
}
// ROUND TO NATIVE
// Round an extended-precision float to a native float representation.
#[inline]
pub(crate) fn round_to_native<F, Algorithm>(fp: &mut ExtendedFloat, algorithm: Algorithm)
where
F: Float,
Algorithm: FnOnce(&mut ExtendedFloat, i32),
{
// Shift all the way left, to ensure a consistent representation.
// The following right-shifts do not work for a non-normalized number.
fp.normalize();
// Round so the fraction is in a native mantissa representation,
// and avoid overflow/underflow.
round_to_float::<F, _>(fp, algorithm);
avoid_overflow::<F>(fp);
}