Source code

Revision control

Copy as Markdown

Other Tools

/*
* Copyright (c) 2025, Alliance for Open Media. All rights reserved.
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AOM_AOM_DSP_REDUCE_SUM_HWY_H_
#define AOM_AOM_DSP_REDUCE_SUM_HWY_H_
#include <type_traits>
#include "third_party/highway/hwy/highway.h"
HWY_BEFORE_NAMESPACE();
namespace {
namespace HWY_NAMESPACE {
namespace hn = hwy::HWY_NAMESPACE;
template <size_t NumBlocks>
struct BlockReduceTraits;
template <>
struct BlockReduceTraits<1> {
template <typename D>
HWY_ATTR HWY_INLINE static hn::VFromD<D> ReduceSum(D d, hn::VFromD<D> v) {
(void)d;
return v;
}
};
template <size_t NumBlocks>
struct BlockReduceTraits {
static_assert(NumBlocks > 1,
"Primary template BlockReduceTraits assumes NumBlocks > 1");
static_assert((NumBlocks & (NumBlocks - 1)) == 0,
"BlockReduceTraits requires NumBlocks to be a power of 2.");
template <typename D>
HWY_ATTR HWY_INLINE static hn::VFromD<hn::BlockDFromD<D>> ReduceSum(
D d, hn::VFromD<D> v) {
(void)d;
constexpr hn::Half<D> half_d;
auto v_half = hn::Add(hn::LowerHalf(half_d, v), hn::UpperHalf(half_d, v));
return BlockReduceTraits<NumBlocks / 2>::ReduceSum(half_d, v_half);
}
};
// ReduceSum across blocks.
// For example, with a 4-block vector with 16 lanes of uint32_t:
// [a3 b3 c3 d3 a2 b2 c2 d2 a1 b1 c1 d1 a0 b0 c0 d0]
// returns a vector with 4 lanes:
// [a3+a2+a1+a0 b3+b2+b1+b0 c3+c2+c1+c0 d3+d2+d1+d0]
template <typename D>
HWY_ATTR HWY_INLINE hn::Vec<hn::BlockDFromD<D>> BlockReduceSum(
D int_tag, hn::VFromD<D> v) {
return BlockReduceTraits<int_tag.MaxBlocks()>::ReduceSum(int_tag, v);
}
} // namespace HWY_NAMESPACE
} // namespace
HWY_AFTER_NAMESPACE();
#endif // AOM_AOM_DSP_REDUCE_SUM_HWY_H_