Source code
Revision control
Copy as Markdown
Other Tools
// Copyright 2019 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stddef.h>
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "tests/shuffle4_test.cc"
#include "hwy/foreach_target.h" // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
class TestPer4LaneBlockShuffle {
private:
template <class D, HWY_IF_LANES_LE_D(D, 1)>
static HWY_INLINE VFromD<D> InterleaveMaskVectors(D /*d*/, VFromD<D> a,
VFromD<D> /*b*/) {
return a;
}
#if HWY_TARGET != HWY_SCALAR
template <class D, HWY_IF_LANES_GT_D(D, 1)>
static HWY_INLINE VFromD<D> InterleaveMaskVectors(D d, VFromD<D> a,
VFromD<D> b) {
return InterleaveLower(d, a, b);
}
#endif
template <class D>
static HWY_INLINE Mask<D> Per4LaneBlockShufValidMask(D d, const size_t N,
const size_t idx1,
const size_t idx0) {
if (N < 4) {
const RebindToSigned<decltype(d)> di;
using TI = TFromD<decltype(di)>;
const auto lane_0_valid =
Set(di, static_cast<TI>(-static_cast<int>(idx0 < N)));
if (N > 1) {
const auto lane_1_valid =
Set(di, static_cast<TI>(-static_cast<int>(idx1 < N)));
return RebindMask(d, MaskFromVec(InterleaveMaskVectors(di, lane_0_valid,
lane_1_valid)));
}
return RebindMask(d, MaskFromVec(lane_0_valid));
}
return FirstN(d, N);
}
// TODO(b/287462770): inline to work around incorrect SVE codegen
template <class D>
static HWY_INLINE void DoCheckPer4LaneBlkShufResult(
D d, const size_t N, VFromD<D> actual,
const TFromD<D>* HWY_RESTRICT src_lanes, TFromD<D>* HWY_RESTRICT expected,
size_t idx3, size_t idx2, size_t idx1, size_t idx0) {
for (size_t i = 0; i < N; i += 4) {
expected[i] = src_lanes[i + idx0];
expected[i + 1] = src_lanes[i + idx1];
expected[i + 2] = src_lanes[i + idx2];
expected[i + 3] = src_lanes[i + idx3];
}
if (N < 4) {
if (idx0 >= N) expected[0] = TFromD<D>{0};
if (idx1 >= N) expected[1] = TFromD<D>{0};
}
const auto valid_lanes_mask = Per4LaneBlockShufValidMask(d, N, idx1, idx0);
HWY_ASSERT_VEC_EQ(d, expected, IfThenElseZero(valid_lanes_mask, actual));
}
#if HWY_TARGET != HWY_SCALAR
template <class D>
static HWY_NOINLINE void TestTblLookupPer4LaneBlkShuf(
D d, const size_t N, const TFromD<D>* HWY_RESTRICT src_lanes,
TFromD<D>* HWY_RESTRICT expected) {
const auto v = Load(d, src_lanes);
for (size_t idx3210 = 0; idx3210 <= 0xFF; idx3210++) {
const size_t idx3 = (idx3210 >> 6) & 3;
const size_t idx2 = (idx3210 >> 4) & 3;
const size_t idx1 = (idx3210 >> 2) & 3;
const size_t idx0 = idx3210 & 3;
const auto actual = detail::TblLookupPer4LaneBlkShuf(v, idx3210);
DoCheckPer4LaneBlkShufResult(d, N, actual, src_lanes, expected, idx3,
idx2, idx1, idx0);
}
}
#endif
template <size_t kIdx3, size_t kIdx2, size_t kIdx1, size_t kIdx0, class D>
static HWY_INLINE void DoTestPer4LaneBlkShuffle(
D d, const size_t N, const VFromD<D> v,
const TFromD<D>* HWY_RESTRICT src_lanes,
TFromD<D>* HWY_RESTRICT expected) {
const auto actual = Per4LaneBlockShuffle<kIdx3, kIdx2, kIdx1, kIdx0>(v);
DoCheckPer4LaneBlkShufResult(d, N, actual, src_lanes, expected, kIdx3,
kIdx2, kIdx1, kIdx0);
}
template <class D>
static HWY_NOINLINE void DoTestPer4LaneBlkShuffles(
D d, const size_t N, const VecArg<VFromD<D>> v,
TFromD<D>* HWY_RESTRICT src_lanes, TFromD<D>* HWY_RESTRICT expected) {
Store(v, d, src_lanes);
#if HWY_TARGET != HWY_SCALAR
TestTblLookupPer4LaneBlkShuf(d, N, src_lanes, expected);
#endif
DoTestPer4LaneBlkShuffle<0, 1, 2, 3>(d, N, v, src_lanes, expected);
#if !HWY_COMPILER_MSVC // speed up MSVC builds
DoTestPer4LaneBlkShuffle<0, 1, 3, 2>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<0, 2, 3, 1>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<0, 3, 0, 2>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<1, 0, 1, 0>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<1, 0, 3, 1>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<1, 0, 3, 2>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<1, 2, 0, 3>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<1, 2, 1, 3>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<1, 1, 0, 0>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<2, 0, 1, 3>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<2, 0, 2, 0>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<2, 1, 2, 0>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<2, 2, 0, 0>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<2, 3, 0, 1>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<2, 3, 3, 0>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 0, 2, 1>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 1, 0, 3>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 1, 3, 1>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 2, 1, 0>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 2, 3, 2>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 3, 0, 1>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 3, 1, 1>(d, N, v, src_lanes, expected);
DoTestPer4LaneBlkShuffle<3, 3, 2, 2>(d, N, v, src_lanes, expected);
#endif
}
template <class D>
static HWY_INLINE Vec<D> GenerateTestVect(hwy::NonFloatTag /*tag*/, D d) {
const RebindToUnsigned<decltype(d)> du;
using TU = TFromD<decltype(du)>;
constexpr TU kIotaStart =
static_cast<TU>(0x0706050403020101u & LimitsMax<TU>());
return BitCast(d, Iota(du, kIotaStart));
}
template <class D>
static HWY_INLINE Vec<D> GenerateTestVect(hwy::FloatTag /*tag*/, D d) {
const RebindToUnsigned<decltype(d)> du;
using T = TFromD<decltype(d)>;
using TU = TFromD<decltype(du)>;
constexpr size_t kNumOfBitsInT = sizeof(T) * 8;
constexpr TU kIntBitsMask =
(kNumOfBitsInT > 16) ? static_cast<TU>(static_cast<TU>(~TU{0}) >> 16)
: TU{0};
const auto flt_iota = Set(d, 1);
if (kIntBitsMask == 0) return flt_iota;
const auto int_iota =
And(GenerateTestVect(hwy::NonFloatTag(), du), Set(du, kIntBitsMask));
return Or(flt_iota, BitCast(d, int_iota));
}
public:
template <class T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) {
const size_t N = Lanes(d);
const size_t alloc_len = static_cast<size_t>((N + 3) & (~size_t{3}));
HWY_ASSERT(alloc_len >= 4);
auto expected = AllocateAligned<T>(alloc_len);
auto src_lanes = AllocateAligned<T>(alloc_len);
HWY_ASSERT(expected && src_lanes);
const T k0 = ConvertScalarTo<T>(0);
expected[alloc_len - 4] = k0;
expected[alloc_len - 3] = k0;
expected[alloc_len - 2] = k0;
expected[alloc_len - 1] = k0;
src_lanes[alloc_len - 4] = k0;
src_lanes[alloc_len - 3] = k0;
src_lanes[alloc_len - 2] = k0;
src_lanes[alloc_len - 1] = k0;
const auto v = GenerateTestVect(hwy::IsFloatTag<T>(), d);
DoTestPer4LaneBlkShuffles(d, N, v, src_lanes.get(), expected.get());
const RebindToUnsigned<decltype(d)> du;
using TU = TFromD<decltype(du)>;
const auto msb_mask =
BitCast(d, Set(du, static_cast<TU>(TU{1} << (sizeof(TU) * 8 - 1))));
DoTestPer4LaneBlkShuffles(d, N, Xor(v, msb_mask), src_lanes.get(),
expected.get());
}
};
HWY_NOINLINE void TestAllPer4LaneBlockShuffle() {
ForAllTypes(ForPartialFixedOrFullScalableVectors<TestPer4LaneBlockShuffle>());
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
HWY_BEFORE_TEST(HwyShuffle4Test);
HWY_EXPORT_AND_TEST_P(HwyShuffle4Test, TestAllPer4LaneBlockShuffle);
} // namespace hwy
#endif