float_test.cc - mozsearch

mozilla-central/third_party/highway/hwy/tests/float_test.cc

Enable keyboard shortcuts

Source code

File a bug in Core :: Graphics: ImageLib

Revision control

Copy as Markdown

Other Tools

// Copyright 2019 Google LLC

// SPDX-License-Identifier: Apache-2.0

//

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

//

//      http://www.apache.org/licenses/LICENSE-2.0

//

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.

// Tests some ops specific to floating-point types (Div, Round etc.)

#include <stdio.h>

#include <cmath>  // std::ceil, std::floor

#include "hwy/base.h"

#undef HWY_TARGET_INCLUDE

#define HWY_TARGET_INCLUDE "tests/float_test.cc"

#include "hwy/foreach_target.h"  // IWYU pragma: keep

#include "hwy/highway.h"

#include "hwy/tests/test_util-inl.h"

HWY_BEFORE_NAMESPACE();

namespace hwy {

namespace HWY_NAMESPACE {

HWY_NOINLINE void TestAllF16FromF32() {

  const FixedTag<float, 1> d1;

  // +/- 0

  HWY_ASSERT_EQ(0, BitCastScalar<uint16_t>(hwy::F16FromF32(0.0f)));

  HWY_ASSERT_EQ(0x8000, BitCastScalar<uint16_t>(hwy::F16FromF32(-0.0f)));

  // smallest f32 subnormal

  HWY_ASSERT_EQ(0,

                BitCastScalar<uint16_t>(hwy::F16FromF32(5.87747175411E-39f)));

  HWY_ASSERT_EQ(0x8000,

                BitCastScalar<uint16_t>(hwy::F16FromF32(-5.87747175411E-39f)));

  // largest f16 subnormal

  HWY_ASSERT_EQ(0x3FF, BitCastScalar<uint16_t>(hwy::F16FromF32(6.0975552E-5f)));

  HWY_ASSERT_EQ(0x83FF,

                BitCastScalar<uint16_t>(hwy::F16FromF32(-6.0975552E-5f)));

  // smallest normalized f16

  HWY_ASSERT_EQ(0x400,

                BitCastScalar<uint16_t>(hwy::F16FromF32(6.103515625E-5f)));

  HWY_ASSERT_EQ(0x8400,

                BitCastScalar<uint16_t>(hwy::F16FromF32(-6.103515625E-5f)));

  // rounding to nearest even

  HWY_ASSERT_EQ((15 << 10) + 0,  // round down to even: 0[10..0] => 0

                BitCastScalar<uint16_t>(hwy::F16FromF32(1.00048828125f)));

  HWY_ASSERT_EQ((15 << 10) + 1,  // round up: 0[1..1] => 1

                BitCastScalar<uint16_t>(hwy::F16FromF32(1.00097644329f)));

  HWY_ASSERT_EQ((15 << 10) + 2,  // round up to even: 1[10..0] => 10

                BitCastScalar<uint16_t>(hwy::F16FromF32(1.00146484375f)));

  // greater than f16 max => inf

  HWY_ASSERT_EQ(0x7C00, BitCastScalar<uint16_t>(hwy::F16FromF32(7E4f)));

  HWY_ASSERT_EQ(0xFC00, BitCastScalar<uint16_t>(hwy::F16FromF32(-7E4f)));

  // infinity

  HWY_ASSERT_EQ(0x7C00,

                BitCastScalar<uint16_t>(hwy::F16FromF32(GetLane(Inf(d1)))));

  HWY_ASSERT_EQ(0xFC00,

                BitCastScalar<uint16_t>(hwy::F16FromF32(-GetLane(Inf(d1)))));

  // NaN

  HWY_ASSERT_EQ(0x7FFF,

                BitCastScalar<uint16_t>(hwy::F16FromF32(GetLane(NaN(d1)))));

  HWY_ASSERT_EQ(0xFFFF,

                BitCastScalar<uint16_t>(hwy::F16FromF32(-GetLane(NaN(d1)))));

HWY_NOINLINE void TestAllF32FromF16() {

  const FixedTag<float, 1> d1;

  // +/- 0

  HWY_ASSERT_EQ(0.0f, hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0})));

  HWY_ASSERT_EQ(-0.0f,

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x8000})));

  // largest f16 subnormal

  HWY_ASSERT_EQ(6.0975552E-5f,

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x3FF})));

  HWY_ASSERT_EQ(-6.0975552E-5f,

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x83FF})));

  // smallest normalized f16

  HWY_ASSERT_EQ(6.103515625E-5f,

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x400})));

  HWY_ASSERT_EQ(-6.103515625E-5f,

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x8400})));

  // infinity

  HWY_ASSERT_EQ(GetLane(Inf(d1)),

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x7C00})));

  HWY_ASSERT_EQ(-GetLane(Inf(d1)),

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0xFC00})));

  // NaN

  HWY_ASSERT_EQ(GetLane(NaN(d1)),

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x7FFF})));

  HWY_ASSERT_EQ(-GetLane(NaN(d1)),

                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0xFFFF})));

struct TestDiv {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T /*unused*/, D d) {

    const auto v = Iota(d, -2);

    const auto v1 = Set(d, ConvertScalarTo<T>(1));

    // Unchanged after division by 1.

    HWY_ASSERT_VEC_EQ(d, v, Div(v, v1));

    const size_t N = Lanes(d);

    auto expected = AllocateAligned<T>(N);

    HWY_ASSERT(expected);

    for (size_t i = 0; i < N; ++i) {

      expected[i] = ConvertScalarTo<T>((static_cast<double>(i) - 2.0) / 2.0);

    HWY_ASSERT_VEC_EQ(d, expected.get(), Div(v, Set(d, ConvertScalarTo<T>(2))));

};

HWY_NOINLINE void TestAllDiv() { ForFloatTypes(ForPartialVectors<TestDiv>()); }

struct TestApproximateReciprocal {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T /*unused*/, D d) {

    const auto v = Iota(d, -2);

    const auto nonzero =

        IfThenElse(Eq(v, Zero(d)), Set(d, ConvertScalarTo<T>(1)), v);

    const size_t N = Lanes(d);

    auto input = AllocateAligned<T>(N);

    auto actual = AllocateAligned<T>(N);

    HWY_ASSERT(input && actual);

    Store(nonzero, d, input.get());

    Store(ApproximateReciprocal(nonzero), d, actual.get());

    double max_l1 = 0.0;

    double worst_expected = 0.0;

    double worst_actual = 0.0;

    for (size_t i = 0; i < N; ++i) {

      const double expected = 1.0 / input[i];

      const double l1 = ScalarAbs(expected - actual[i]);

      if (l1 > max_l1) {

        max_l1 = l1;

        worst_expected = expected;

        worst_actual = actual[i];

    const double abs_worst_expected = ScalarAbs(worst_expected);

    if (abs_worst_expected > 1E-5) {

      const double max_rel = max_l1 / abs_worst_expected;

      fprintf(stderr, "max l1 %f rel %f (%f vs %f)\n", max_l1, max_rel,

              worst_expected, worst_actual);

      HWY_ASSERT(max_rel < 0.004);

};

HWY_NOINLINE void TestAllApproximateReciprocal() {

  ForFloatTypes(ForPartialVectors<TestApproximateReciprocal>());

struct TestSquareRoot {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T /*unused*/, D d) {

    const auto vi = Iota(d, 0);

    HWY_ASSERT_VEC_EQ(d, vi, Sqrt(Mul(vi, vi)));

};

HWY_NOINLINE void TestAllSquareRoot() {

  ForFloatTypes(ForPartialVectors<TestSquareRoot>());

struct TestReciprocalSquareRoot {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T /*unused*/, D d) {

    const Vec<D> v = Set(d, ConvertScalarTo<T>(123.0f));

    const size_t N = Lanes(d);

    auto lanes = AllocateAligned<T>(N);

    HWY_ASSERT(lanes);

    Store(ApproximateReciprocalSqrt(v), d, lanes.get());

    for (size_t i = 0; i < N; ++i) {

      T err = ConvertScalarTo<T>(ConvertScalarTo<float>(lanes[i]) - 0.090166f);

      if (err < ConvertScalarTo<T>(0)) err = -err;

      if (static_cast<double>(err) >= 4E-4) {

        HWY_ABORT("Lane %d (%d): actual %f err %f\n", static_cast<int>(i),

                  static_cast<int>(N), static_cast<double>(lanes[i]),

                  static_cast<double>(err));

};

HWY_NOINLINE void TestAllReciprocalSquareRoot() {

  ForFloatTypes(ForPartialVectors<TestReciprocalSquareRoot>());

template <typename T, class D>

AlignedFreeUniquePtr<T[]> RoundTestCases(T /*unused*/, D d, size_t& padded) {

  const T eps = Epsilon<T>();

  const T huge = ConvertScalarTo<T>(sizeof(T) >= 4 ? 1E34 : 3E4);

  const T test_cases[] = {

      // +/- 1

      ConvertScalarTo<T>(1), ConvertScalarTo<T>(-1),

      // +/- 0

      ConvertScalarTo<T>(0), ConvertScalarTo<T>(-0),

      // near 0

      ConvertScalarTo<T>(0.4), ConvertScalarTo<T>(-0.4),

      // +/- integer

      ConvertScalarTo<T>(4), ConvertScalarTo<T>(-32),

      // positive near limit

      ConvertScalarTo<T>(MantissaEnd<T>() - ConvertScalarTo<T>(1.5)),

      ConvertScalarTo<T>(MantissaEnd<T>() + ConvertScalarTo<T>(1.5)),

      // negative near limit

      ConvertScalarTo<T>(-MantissaEnd<T>() - ConvertScalarTo<T>(1.5)),

      ConvertScalarTo<T>(-MantissaEnd<T>() + ConvertScalarTo<T>(1.5)),

      // positive tiebreak

      ConvertScalarTo<T>(1.5), ConvertScalarTo<T>(2.5),

      // negative tiebreak

      ConvertScalarTo<T>(-1.5), ConvertScalarTo<T>(-2.5),

      // positive +/- delta

      ConvertScalarTo<T>(2.0001), ConvertScalarTo<T>(3.9999),

      // negative +/- delta

      ConvertScalarTo<T>(-999.9999), ConvertScalarTo<T>(-998.0001),

      // positive +/- epsilon

      ConvertScalarTo<T>(ConvertScalarTo<T>(1) + eps),

      ConvertScalarTo<T>(ConvertScalarTo<T>(1) - eps),

      // negative +/- epsilon

      ConvertScalarTo<T>(ConvertScalarTo<T>(-1) + eps),

      ConvertScalarTo<T>(ConvertScalarTo<T>(-1) - eps),

      // +/- huge (but still fits in float)

      huge, -huge,

      // +/- infinity

      GetLane(Inf(d)), GetLane(Neg(Inf(d))),

      // qNaN

      GetLane(NaN(d))};

  const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);

  const size_t N = Lanes(d);

  padded = RoundUpTo(kNumTestCases, N);  // allow loading whole vectors

  auto in = AllocateAligned<T>(padded);

  auto expected = AllocateAligned<T>(padded);

  HWY_ASSERT(in && expected);

  CopyBytes(test_cases, in.get(), kNumTestCases * sizeof(T));

  ZeroBytes(in.get() + kNumTestCases, (padded - kNumTestCases) * sizeof(T));

  return in;

struct TestRound {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T t, D d) {

    size_t padded;

    auto in = RoundTestCases(t, d, padded);

    auto expected = AllocateAligned<T>(padded);

    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {

// Avoid [std::]round, which does not round to nearest *even*.

// NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see

// https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html

// Cast to f32/64 because nearbyint does not support _Float16.

#if HWY_HAVE_FLOAT64

      const double f = ConvertScalarTo<double>(in[i]);

#else

      const float f = ConvertScalarTo<float>(in[i]);

#endif

      expected[i] = ConvertScalarTo<T>(nearbyint(f));

    for (size_t i = 0; i < padded; i += Lanes(d)) {

      HWY_ASSERT_VEC_EQ(d, &expected[i], Round(Load(d, &in[i])));

};

HWY_NOINLINE void TestAllRound() {

  ForFloatTypes(ForPartialVectors<TestRound>());

struct TestNearestInt {

  template <typename TF, class DF>

  HWY_NOINLINE void operator()(TF tf, const DF df) {

    using TI = MakeSigned<TF>;

    const RebindToSigned<DF> di;

    size_t padded;

    auto in = RoundTestCases(tf, df, padded);

    auto expected = AllocateAligned<TI>(padded);

    HWY_ASSERT(expected);

    constexpr double kMax = static_cast<double>(LimitsMax<TI>());

    for (size_t i = 0; i < padded; ++i) {

      if (ScalarIsNaN(in[i])) {

        // We replace NaN with 0 below (no_nan)

        expected[i] = 0;

      } else if (ScalarIsInf(in[i]) ||

                 static_cast<double>(ScalarAbs(in[i])) >= kMax) {

        // Avoid undefined result for lrintf

        expected[i] = std::signbit(in[i]) ? LimitsMin<TI>() : LimitsMax<TI>();

      } else {

        expected[i] = static_cast<TI>(lrintf(ConvertScalarTo<float>(in[i])));

    for (size_t i = 0; i < padded; i += Lanes(df)) {

      const auto v = Load(df, &in[i]);

      const auto no_nan = IfThenElse(Eq(v, v), v, Zero(df));

      HWY_ASSERT_VEC_EQ(di, &expected[i], NearestInt(no_nan));

};

HWY_NOINLINE void TestAllNearestInt() {

  ForPartialVectors<TestNearestInt>()(float());

struct TestTrunc {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T t, D d) {

    size_t padded;

    auto in = RoundTestCases(t, d, padded);

    auto expected = AllocateAligned<T>(padded);

    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {

      // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see

      // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html

      // Cast to double because trunc does not support _Float16.

      expected[i] = ConvertScalarTo<T>(trunc(ConvertScalarTo<double>(in[i])));

    for (size_t i = 0; i < padded; i += Lanes(d)) {

      HWY_ASSERT_VEC_EQ(d, &expected[i], Trunc(Load(d, &in[i])));

};

HWY_NOINLINE void TestAllTrunc() {

  ForFloatTypes(ForPartialVectors<TestTrunc>());

struct TestCeil {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T t, D d) {

    size_t padded;

    auto in = RoundTestCases(t, d, padded);

    auto expected = AllocateAligned<T>(padded);

    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {

      // Cast to double because ceil does not support _Float16.

      expected[i] =

          ConvertScalarTo<T>(std::ceil(ConvertScalarTo<double>(in[i])));

    for (size_t i = 0; i < padded; i += Lanes(d)) {

      HWY_ASSERT_VEC_EQ(d, &expected[i], Ceil(Load(d, &in[i])));

};

HWY_NOINLINE void TestAllCeil() {

  ForFloatTypes(ForPartialVectors<TestCeil>());

struct TestFloor {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T t, D d) {

    size_t padded;

    auto in = RoundTestCases(t, d, padded);

    auto expected = AllocateAligned<T>(padded);

    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {

      // Cast to double because floor does not support _Float16.

      expected[i] =

          ConvertScalarTo<T>(std::floor(ConvertScalarTo<double>(in[i])));

    for (size_t i = 0; i < padded; i += Lanes(d)) {

      HWY_ASSERT_VEC_EQ(d, &expected[i], Floor(Load(d, &in[i])));

};

HWY_NOINLINE void TestAllFloor() {

  ForFloatTypes(ForPartialVectors<TestFloor>());

struct TestAbsDiff {

  template <typename T, class D>

  HWY_NOINLINE void operator()(T /*unused*/, D d) {

    const size_t N = Lanes(d);

    auto in_lanes_a = AllocateAligned<T>(N);

    auto in_lanes_b = AllocateAligned<T>(N);

    auto out_lanes = AllocateAligned<T>(N);

    HWY_ASSERT(in_lanes_a && in_lanes_b && out_lanes);

    for (size_t i = 0; i < N; ++i) {

      in_lanes_a[i] = ConvertScalarTo<T>((i ^ 1u) << i);

      in_lanes_b[i] = ConvertScalarTo<T>(i << i);

      out_lanes[i] = ConvertScalarTo<T>(

          ScalarAbs(ConvertScalarTo<T>(in_lanes_a[i] - in_lanes_b[i])));

    const auto a = Load(d, in_lanes_a.get());

    const auto b = Load(d, in_lanes_b.get());

    const auto expected = Load(d, out_lanes.get());

    HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(a, b));

    HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(b, a));

};

HWY_NOINLINE void TestAllAbsDiff() {

  ForFloatTypes(ForPartialVectors<TestAbsDiff>());

// NOLINTNEXTLINE(google-readability-namespace-comments)

}  // namespace HWY_NAMESPACE

}  // namespace hwy

HWY_AFTER_NAMESPACE();

#if HWY_ONCE

namespace hwy {

HWY_BEFORE_TEST(HwyFloatTest);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF16FromF32);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF32FromF16);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllDiv);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllApproximateReciprocal);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllSquareRoot);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllReciprocalSquareRoot);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllRound);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllNearestInt);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllTrunc);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllCeil);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllFloor);

HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllAbsDiff);

}  // namespace hwy

#endif