Source code

Revision control

Copy as Markdown

Other Tools

// Copyright 2019 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stddef.h>
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "tests/table_test.cc"
#include "hwy/foreach_target.h" // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
struct TestTableLookupLanes {
template <class T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const RebindToSigned<D> di;
using TI = TFromD<decltype(di)>;
#if HWY_TARGET != HWY_SCALAR
const size_t N = Lanes(d);
auto idx = AllocateAligned<TI>(N);
auto expected = AllocateAligned<T>(N);
HWY_ASSERT(idx && expected);
ZeroBytes(idx.get(), N * sizeof(TI));
const auto v = Iota(d, 1);
if (N <= 8) { // Test all permutations
for (size_t i0 = 0; i0 < N; ++i0) {
idx[0] = static_cast<TI>(i0);
for (size_t i1 = 0; i1 < N; ++i1) {
if (N >= 2) idx[1] = static_cast<TI>(i1);
for (size_t i2 = 0; i2 < N; ++i2) {
if (N >= 4) idx[2] = static_cast<TI>(i2);
for (size_t i3 = 0; i3 < N; ++i3) {
if (N >= 4) idx[3] = static_cast<TI>(i3);
for (size_t i = 0; i < N; ++i) {
expected[i] = ConvertScalarTo<T>(idx[i] + 1); // == v[idx[i]]
}
const auto opaque1 = IndicesFromVec(d, Load(di, idx.get()));
const auto actual1 = TableLookupLanes(v, opaque1);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1);
const auto opaque2 = SetTableIndices(d, idx.get());
const auto actual2 = TableLookupLanes(v, opaque2);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2);
}
}
}
}
} else {
// Too many permutations to test exhaustively; choose one with repeated
// and cross-block indices and ensure indices do not exceed #lanes.
// For larger vectors, upper lanes will be zero.
HWY_ALIGN TI idx_source[16] = {1, 3, 2, 2, 8, 1, 7, 6,
15, 14, 14, 15, 4, 9, 8, 5};
for (size_t i = 0; i < N; ++i) {
idx[i] = (i < 16) ? idx_source[i] : 0;
// Avoid undefined results / asan error for scalar by capping indices.
if (idx[i] >= static_cast<TI>(N)) {
idx[i] = static_cast<TI>(N - 1);
}
expected[i] = ConvertScalarTo<T>(idx[i] + 1); // == v[idx[i]]
}
const auto opaque1 = IndicesFromVec(d, Load(di, idx.get()));
const auto actual1 = TableLookupLanes(v, opaque1);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1);
const auto opaque2 = SetTableIndices(d, idx.get());
const auto actual2 = TableLookupLanes(v, opaque2);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2);
}
#else
const TI index = 0;
const auto v = Set(d, 1);
const auto opaque1 = SetTableIndices(d, &index);
HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque1));
const auto opaque2 = IndicesFromVec(d, Zero(di));
HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque2));
#endif
}
};
HWY_NOINLINE void TestAllTableLookupLanes() {
ForAllTypes(ForPartialVectors<TestTableLookupLanes>());
}
struct TestTwoTablesLookupLanes {
template <class T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const RebindToUnsigned<D> du;
using TU = TFromD<decltype(du)>;
const size_t N = Lanes(d);
const size_t twiceN = N * 2;
auto idx = AllocateAligned<TU>(twiceN);
auto expected = AllocateAligned<T>(twiceN);
HWY_ASSERT(idx && expected);
ZeroBytes(idx.get(), twiceN * sizeof(TU));
const auto a = Iota(d, 1);
const auto b = Add(a, Set(d, ConvertScalarTo<T>(N)));
if (twiceN <= 8) { // Test all permutations
for (size_t i0 = 0; i0 < twiceN; ++i0) {
idx[0] = static_cast<TU>(i0);
for (size_t i1 = 0; i1 < twiceN; ++i1) {
if (twiceN >= 2) idx[1] = static_cast<TU>(i1);
for (size_t i2 = 0; i2 < twiceN; ++i2) {
if (twiceN >= 4) idx[2] = static_cast<TU>(i2);
for (size_t i3 = 0; i3 < twiceN; ++i3) {
if (twiceN >= 4) idx[3] = static_cast<TU>(i3);
for (size_t i = 0; i < twiceN; ++i) {
expected[i] = ConvertScalarTo<T>(idx[i] + 1); // == v[idx[i]]
}
const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get()));
const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N));
const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a);
const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b);
const auto opaque2_a = SetTableIndices(d, idx.get());
const auto opaque2_b = SetTableIndices(d, idx.get() + N);
const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a);
const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b);
}
}
}
}
} else {
constexpr size_t kLanesPerBlock = 16 / sizeof(T);
constexpr size_t kMaxBlockIdx = static_cast<size_t>(LimitsMax<TU>()) >> 1;
static_assert(kMaxBlockIdx > 0, "kMaxBlockIdx > 0 must be true");
const size_t num_of_blocks_per_vect = HWY_MAX(N / kLanesPerBlock, 1);
const size_t num_of_blocks_to_check =
HWY_MIN(num_of_blocks_per_vect * 2, kMaxBlockIdx);
for (size_t i = 0; i < num_of_blocks_to_check; i++) {
// Too many permutations to test exhaustively; choose one with repeated
// and cross-block indices and ensure indices do not exceed #lanes.
// For larger vectors, upper lanes will be zero.
HWY_ALIGN TU idx_source[16] = {1, 3, 2, 2, 8, 1, 7, 6,
15, 14, 14, 15, 4, 9, 8, 5};
for (size_t j = 0; j < twiceN; ++j) {
idx[j] = static_cast<TU>((i * kLanesPerBlock + idx_source[j & 15] +
(j & static_cast<size_t>(-16))) &
(twiceN - 1));
expected[j] = ConvertScalarTo<T>(idx[j] + 1); // == v[idx[j]]
}
const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get()));
const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N));
const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a);
const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b);
const auto opaque2_a = SetTableIndices(d, idx.get());
const auto opaque2_b = SetTableIndices(d, idx.get() + N);
const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a);
const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b);
}
}
}
};
HWY_NOINLINE void TestAllTwoTablesLookupLanes() {
ForAllTypes(ForPartialVectors<TestTwoTablesLookupLanes>());
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
HWY_BEFORE_TEST(HwyTableTest);
HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTableLookupLanes);
HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTwoTablesLookupLanes);
} // namespace hwy
#endif