Source code

Revision control

Copy as Markdown

Other Tools

// Copyright 2019 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "tests/compare_test.cc"
#include "hwy/foreach_target.h" // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
// All types.
struct TestEquality {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto v2 = Iota(d, 2);
const auto v2b = Iota(d, 2);
const auto v3 = Iota(d, 3);
const auto mask_false = MaskFalse(d);
const auto mask_true = MaskTrue(d);
HWY_ASSERT_MASK_EQ(d, mask_false, Eq(v2, v3));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq(v3, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq(v2, v2b));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne(v2, v3));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne(v3, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne(v2, v2b));
}
};
HWY_NOINLINE void TestAllEquality() {
ForAllTypes(ForPartialVectors<TestEquality>());
}
// a > b should be true, verify that for Gt/Lt and with swapped args.
template <class D>
void EnsureGreater(D d, TFromD<D> a, TFromD<D> b, const char* file, int line) {
const auto mask_false = MaskFalse(d);
const auto mask_true = MaskTrue(d);
const auto va = Set(d, a);
const auto vb = Set(d, b);
AssertMaskEqual(d, mask_true, Gt(va, vb), file, line);
AssertMaskEqual(d, mask_false, Lt(va, vb), file, line);
// Swapped order
AssertMaskEqual(d, mask_false, Gt(vb, va), file, line);
AssertMaskEqual(d, mask_true, Lt(vb, va), file, line);
// Also ensure irreflexive
AssertMaskEqual(d, mask_false, Gt(va, va), file, line);
AssertMaskEqual(d, mask_false, Gt(vb, vb), file, line);
AssertMaskEqual(d, mask_false, Lt(va, va), file, line);
AssertMaskEqual(d, mask_false, Lt(vb, vb), file, line);
}
#define HWY_ENSURE_GREATER(d, a, b) EnsureGreater(d, a, b, __FILE__, __LINE__)
// a >= b should be true, verify that for Ge/Le and with swapped args.
template <class D>
void EnsureGreaterOrEqual(D d, TFromD<D> a, TFromD<D> b, const char* file,
int line) {
const auto mask_true = MaskTrue(d);
const auto va = Set(d, a);
const auto vb = Set(d, b);
const auto mask_eq = Eq(va, vb);
AssertMaskEqual(d, mask_true, Ge(va, vb), file, line);
AssertMaskEqual(d, mask_eq, Le(va, vb), file, line);
// Swapped order
AssertMaskEqual(d, mask_eq, Ge(vb, va), file, line);
AssertMaskEqual(d, mask_true, Le(vb, va), file, line);
// va >= va, vb >= vb, va <= va, and vb <= vb should all be true if
// both a and b are non-NaN values
AssertMaskEqual(d, mask_true, Ge(va, va), file, line);
AssertMaskEqual(d, mask_true, Ge(vb, vb), file, line);
AssertMaskEqual(d, mask_true, Le(va, va), file, line);
AssertMaskEqual(d, mask_true, Le(vb, vb), file, line);
}
#define HWY_ENSURE_GREATER_OR_EQUAL(d, a, b) \
EnsureGreaterOrEqual(d, a, b, __FILE__, __LINE__)
struct TestStrictUnsigned {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const T max = LimitsMax<T>();
const Vec<D> v0 = Zero(d);
const Vec<D> v2 = And(Iota(d, 2), Set(d, 255)); // 0..255
const Mask<D> mask_false = MaskFalse(d);
// Individual values of interest
HWY_ENSURE_GREATER(d, 2, 1);
HWY_ENSURE_GREATER(d, 1, 0);
HWY_ENSURE_GREATER(d, 128, 127);
HWY_ENSURE_GREATER(d, max, max / 2);
HWY_ENSURE_GREATER(d, max, 1);
HWY_ENSURE_GREATER(d, max, 0);
// Also use Iota to ensure lanes are independent
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v0));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2));
}
};
HWY_NOINLINE void TestAllStrictUnsigned() {
ForUnsignedTypes(ForPartialVectors<TestStrictUnsigned>());
}
struct TestWeakUnsigned {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const T max = LimitsMax<T>();
const Vec<D> v0 = Zero(d);
const Vec<D> v1 = Set(d, 1u);
const Vec<D> v2 = And(Iota(d, 2), Set(d, 255u)); // 0..255
const Mask<D> mask_true = MaskTrue(d);
// Individual values of interest
HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 2);
HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 1);
HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 1);
HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 0);
HWY_ENSURE_GREATER_OR_EQUAL(d, 0, 0);
HWY_ENSURE_GREATER_OR_EQUAL(d, 128, 127);
HWY_ENSURE_GREATER_OR_EQUAL(d, 128, 128);
HWY_ENSURE_GREATER_OR_EQUAL(d, 127, 127);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, max);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, max / 2);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, 1);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, 0);
// Also use Iota to ensure lanes are independent
const auto mask_v2_is_eq_to_v0 = Eq(v2, v0);
HWY_ASSERT_MASK_EQ(d, mask_v2_is_eq_to_v0, Le(v2, v0));
HWY_ASSERT_MASK_EQ(d, mask_v2_is_eq_to_v0, Ge(v0, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, v2));
const auto v2_plus_1 = Add(v2, v1);
HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_plus_1), Le(v2, v2_plus_1));
HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_plus_1), Ge(v2, v2_plus_1));
HWY_ASSERT_MASK_EQ(d, Lt(v2_plus_1, v2), Le(v2_plus_1, v2));
HWY_ASSERT_MASK_EQ(d, Gt(v2_plus_1, v2), Ge(v2_plus_1, v2));
const auto v2_minus_1 = Sub(v2, v1);
HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_minus_1), Le(v2, v2_minus_1));
HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_minus_1), Ge(v2, v2_minus_1));
HWY_ASSERT_MASK_EQ(d, Lt(v2_minus_1, v2), Le(v2_minus_1, v2));
HWY_ASSERT_MASK_EQ(d, Gt(v2_minus_1, v2), Ge(v2_minus_1, v2));
}
};
HWY_NOINLINE void TestAllWeakUnsigned() {
ForUnsignedTypes(ForPartialVectors<TestStrictUnsigned>());
}
struct TestStrictInt {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const T min = LimitsMin<T>();
const T max = LimitsMax<T>();
const Vec<D> v0 = Zero(d);
const Vec<D> v2 = And(Iota(d, 2), Set(d, 127)); // 0..127
const Vec<D> vn = Sub(Neg(v2), Set(d, 1)); // -1..-128
const Mask<D> mask_false = MaskFalse(d);
const Mask<D> mask_true = MaskTrue(d);
// Individual values of interest
HWY_ENSURE_GREATER(d, 2, 1);
HWY_ENSURE_GREATER(d, 1, 0);
HWY_ENSURE_GREATER(d, 0, -1);
HWY_ENSURE_GREATER(d, -1, -2);
HWY_ENSURE_GREATER(d, max, max / 2);
HWY_ENSURE_GREATER(d, max, 1);
HWY_ENSURE_GREATER(d, max, 0);
HWY_ENSURE_GREATER(d, max, -1);
HWY_ENSURE_GREATER(d, max, min);
HWY_ENSURE_GREATER(d, 0, min);
HWY_ENSURE_GREATER(d, min / 2, min);
// Also use Iota to ensure lanes are independent
HWY_ASSERT_MASK_EQ(d, mask_true, Gt(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt(vn, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(vn, vn));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, vn));
}
};
// S-SSE3 bug (#795): same upper, differing MSB in lower
struct TestStrictInt64 {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const auto m0 = MaskFalse(d);
const auto m1 = MaskTrue(d);
HWY_ASSERT_MASK_EQ(d, m0, Lt(Set(d, 0x380000000LL), Set(d, 0x300000001LL)));
HWY_ASSERT_MASK_EQ(d, m1, Lt(Set(d, 0xF00000000LL), Set(d, 0xF80000000LL)));
HWY_ASSERT_MASK_EQ(d, m1, Lt(Set(d, 0xF00000000LL), Set(d, 0xF80000001LL)));
}
};
HWY_NOINLINE void TestAllStrictInt() {
ForSignedTypes(ForPartialVectors<TestStrictInt>());
ForPartialVectors<TestStrictInt64>()(int64_t());
}
struct TestWeakInt {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const T min = LimitsMin<T>();
const T max = LimitsMax<T>();
const Vec<D> v0 = Zero(d);
const Vec<D> v1 = Set(d, 1);
const Vec<D> v2 = And(Iota(d, 2), Set(d, 127)); // 0..127
const Vec<D> vn = Sub(Neg(v2), Set(d, 1)); // -1..-128
const auto mask_false = MaskFalse(d);
const auto mask_true = MaskTrue(d);
// Individual values of interest
HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 2);
HWY_ENSURE_GREATER_OR_EQUAL(d, 2, 1);
HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 1);
HWY_ENSURE_GREATER_OR_EQUAL(d, 1, 0);
HWY_ENSURE_GREATER_OR_EQUAL(d, 0, 0);
HWY_ENSURE_GREATER_OR_EQUAL(d, 0, -1);
HWY_ENSURE_GREATER_OR_EQUAL(d, -1, -1);
HWY_ENSURE_GREATER_OR_EQUAL(d, -1, -2);
HWY_ENSURE_GREATER_OR_EQUAL(d, -2, -2);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, max);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, max / 2);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, 1);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, 0);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, -1);
HWY_ENSURE_GREATER_OR_EQUAL(d, max, min);
HWY_ENSURE_GREATER_OR_EQUAL(d, 0, min);
HWY_ENSURE_GREATER_OR_EQUAL(d, min / 2, min);
HWY_ENSURE_GREATER_OR_EQUAL(d, min, min);
// Also use Iota to ensure lanes are independent
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Le(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_false, Ge(vn, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, vn));
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(vn, vn));
const auto v2_plus_1 = Add(v2, v1);
HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_plus_1), Le(v2, v2_plus_1));
HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_plus_1), Ge(v2, v2_plus_1));
HWY_ASSERT_MASK_EQ(d, Lt(v2_plus_1, v2), Le(v2_plus_1, v2));
HWY_ASSERT_MASK_EQ(d, Gt(v2_plus_1, v2), Ge(v2_plus_1, v2));
const auto v2_minus_1 = Sub(v2, v1);
HWY_ASSERT_MASK_EQ(d, Lt(v2, v2_minus_1), Le(v2, v2_minus_1));
HWY_ASSERT_MASK_EQ(d, Gt(v2, v2_minus_1), Ge(v2, v2_minus_1));
HWY_ASSERT_MASK_EQ(d, Lt(v2_minus_1, v2), Le(v2_minus_1, v2));
HWY_ASSERT_MASK_EQ(d, Gt(v2_minus_1, v2), Ge(v2_minus_1, v2));
}
};
HWY_NOINLINE void TestAllWeakInt() {
ForSignedTypes(ForPartialVectors<TestWeakInt>());
}
struct TestStrictFloat {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const T huge_pos = ConvertScalarTo<T>(sizeof(T) >= 4 ? 1E36 : 1E4);
const T huge_neg = -huge_pos;
const Vec<D> v0 = Zero(d);
const Vec<D> v2 = Iota(d, 2);
const Vec<D> vn = Neg(v2);
const Mask<D> mask_false = MaskFalse(d);
const Mask<D> mask_true = MaskTrue(d);
// Individual values of interest
HWY_ENSURE_GREATER(d, 2, 1);
HWY_ENSURE_GREATER(d, 1, 0);
HWY_ENSURE_GREATER(d, 0, -1);
HWY_ENSURE_GREATER(d, -1, -2);
HWY_ENSURE_GREATER(d, huge_pos, 1);
HWY_ENSURE_GREATER(d, huge_pos, 0);
HWY_ENSURE_GREATER(d, huge_pos, -1);
HWY_ENSURE_GREATER(d, huge_pos, huge_neg);
HWY_ENSURE_GREATER(d, 0, huge_neg);
// Also use Iota to ensure lanes are independent
HWY_ASSERT_MASK_EQ(d, mask_true, Gt(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt(vn, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt(vn, vn));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v0, v0));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Gt(vn, vn));
}
};
HWY_NOINLINE void TestAllStrictFloat() {
ForFloatTypes(ForPartialVectors<TestStrictFloat>());
}
struct TestWeakFloat {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const Vec<D> v2 = Iota(d, 2);
const Vec<D> vn = Iota(d, -ConvertScalarTo<T>(Lanes(d)));
const Mask<D> mask_false = MaskFalse(d);
const Mask<D> mask_true = MaskTrue(d);
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, v2));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, vn));
HWY_ASSERT_MASK_EQ(d, mask_true, Ge(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_true, Le(vn, v2));
HWY_ASSERT_MASK_EQ(d, mask_false, Le(v2, vn));
HWY_ASSERT_MASK_EQ(d, mask_false, Ge(vn, v2));
}
};
HWY_NOINLINE void TestAllWeakFloat() {
ForFloatTypes(ForPartialVectors<TestWeakFloat>());
}
template <class D>
static HWY_NOINLINE Vec<D> Make128(D d, uint64_t hi, uint64_t lo) {
alignas(16) uint64_t in[2];
in[0] = lo;
in[1] = hi;
return LoadDup128(d, in);
}
struct TestLt128 {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
using V = Vec<D>;
const V v00 = Zero(d);
const V v01 = Make128(d, 0, 1);
const V v10 = Make128(d, 1, 0);
const V v11 = Add(v01, v10);
const auto mask_false = MaskFalse(d);
const auto mask_true = MaskTrue(d);
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v00, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v01, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v10, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v00, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v01, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v01, v11));
// Reversed order
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v01, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v10, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, v11, v01));
// Also check 128-bit blocks are independent
const V iota = Iota(d, 1);
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, iota, Add(iota, v01)));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, iota, Add(iota, v10)));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, Add(iota, v01), iota));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, Add(iota, v10), iota));
// Max value
const V vm = Make128(d, LimitsMax<T>(), LimitsMax<T>());
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128(d, vm, v11));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v00, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v01, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v10, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128(d, v11, vm));
}
};
HWY_NOINLINE void TestAllLt128() { ForGEVectors<128, TestLt128>()(uint64_t()); }
struct TestLt128Upper {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
using V = Vec<D>;
const V v00 = Zero(d);
const V v01 = Make128(d, 0, 1);
const V v10 = Make128(d, 1, 0);
const V v11 = Add(v01, v10);
const auto mask_false = MaskFalse(d);
const auto mask_true = MaskTrue(d);
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v00, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v01, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v10, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v00, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v01, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v01, v11));
// Reversed order
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v01, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v10, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, v11, v01));
// Also check 128-bit blocks are independent
const V iota = Iota(d, 1);
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, iota, Add(iota, v01)));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, iota, Add(iota, v10)));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, Add(iota, v01), iota));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, Add(iota, v10), iota));
// Max value
const V vm = Make128(d, LimitsMax<T>(), LimitsMax<T>());
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Lt128Upper(d, vm, v11));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v00, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v01, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v10, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Lt128Upper(d, v11, vm));
}
};
HWY_NOINLINE void TestAllLt128Upper() {
ForGEVectors<128, TestLt128Upper>()(uint64_t());
}
struct TestEq128 { // Also Ne128
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
using V = Vec<D>;
const V v00 = Zero(d);
const V v01 = Make128(d, 0, 1);
const V v10 = Make128(d, 1, 0);
const V v11 = Add(v01, v10);
const auto mask_false = MaskFalse(d);
const auto mask_true = MaskTrue(d);
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, v00, v00));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, v01, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, v10, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, v00, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, v01, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, v10, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v00, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, v11));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v00, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, v11));
// Reversed order
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v10, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v11, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, v00));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v10, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v11, v01));
// Also check 128-bit blocks are independent
const V iota = Iota(d, 1);
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, iota, Add(iota, v01)));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, iota, Add(iota, v10)));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, Add(iota, v01), iota));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, Add(iota, v10), iota));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, iota, Add(iota, v01)));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, iota, Add(iota, v10)));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, Add(iota, v01), iota));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, Add(iota, v10), iota));
// Max value
const V vm = Make128(d, LimitsMax<T>(), LimitsMax<T>());
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128(d, vm, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128(d, vm, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, vm, v11));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v00, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v01, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v10, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128(d, v11, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v00));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, vm, v11));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v00, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v01, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v10, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128(d, v11, vm));
}
};
HWY_NOINLINE void TestAllEq128() { ForGEVectors<128, TestEq128>()(uint64_t()); }
struct TestEq128Upper { // Also Ne128Upper
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
using V = Vec<D>;
const V v00 = Zero(d);
const V v01 = Make128(d, 0, 1);
const V v10 = Make128(d, 1, 0);
const V v11 = Add(v01, v10);
const auto mask_false = MaskFalse(d);
const auto mask_true = MaskTrue(d);
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v00, v00));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v01, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v10, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v00, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v01, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v10, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v00, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v00, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v01, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v01, v11));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v01, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v01, v11));
// Reversed order
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, v01, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, v01, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v10, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v11, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v10, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v11, v01));
// Also check 128-bit blocks are independent
const V iota = Iota(d, 1);
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, iota, Add(iota, v01)));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, iota, Add(iota, v01)));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, iota, Add(iota, v10)));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, iota, Add(iota, v10)));
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, Add(iota, v01), iota));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, Add(iota, v01), iota));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, Add(iota, v10), iota));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, Add(iota, v10), iota));
// Max value
const V vm = Make128(d, LimitsMax<T>(), LimitsMax<T>());
HWY_ASSERT_MASK_EQ(d, mask_true, Eq128Upper(d, vm, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Ne128Upper(d, vm, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v00));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v01));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v10));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, vm, v11));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v00, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v01, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v10, vm));
HWY_ASSERT_MASK_EQ(d, mask_false, Eq128Upper(d, v11, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v00));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v01));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v10));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, vm, v11));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v00, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v01, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v10, vm));
HWY_ASSERT_MASK_EQ(d, mask_true, Ne128Upper(d, v11, vm));
}
};
HWY_NOINLINE void TestAllEq128Upper() {
ForGEVectors<128, TestEq128Upper>()(uint64_t());
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
HWY_BEFORE_TEST(HwyCompareTest);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEquality);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictUnsigned);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictInt);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllStrictFloat);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllWeakUnsigned);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllWeakInt);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllWeakFloat);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllLt128);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllLt128Upper);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEq128);
HWY_EXPORT_AND_TEST_P(HwyCompareTest, TestAllEq128Upper);
} // namespace hwy
#endif