Source code

Revision control

Copy as Markdown

Other Tools

// Copyright 2022 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Interface to vectorized quicksort with dynamic dispatch. For static dispatch
// without any DLLEXPORT, avoid including this header and instead define
// VQSORT_ONLY_STATIC, then call VQSortStatic* in vqsort-inl.h.
//
// Paper with measurements: https://arxiv.org/abs/2205.05982
//
// To ensure the overhead of using wide vectors (e.g. AVX2 or AVX-512) is
// worthwhile, we recommend using this code for sorting arrays whose size is at
// least 100 KiB. See the README for details.
#ifndef HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
#define HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_
// IWYU pragma: begin_exports
#include "hwy/base.h"
#include "hwy/contrib/sort/order.h" // SortAscending
// IWYU pragma: end_exports
namespace hwy {
// Vectorized Quicksort: sorts keys[0, n). Does not preserve the ordering of
// equivalent keys (defined as: neither greater nor less than another).
// Dispatches to the best available instruction set. Does not allocate memory.
// Uses about 1.2 KiB stack plus an internal 3-word TLS cache for random state.
HWY_CONTRIB_DLLEXPORT void VQSort(uint16_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(uint16_t* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(uint32_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(uint32_t* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(uint64_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(uint64_t* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(int16_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(int16_t* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(int32_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(int32_t* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(int64_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(int64_t* HWY_RESTRICT keys, size_t n,
SortDescending);
// These two must only be called if hwy::HaveFloat16() is true.
HWY_CONTRIB_DLLEXPORT void VQSort(float16_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(float16_t* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(float* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(float* HWY_RESTRICT keys, size_t n,
SortDescending);
// These two must only be called if hwy::HaveFloat64() is true.
HWY_CONTRIB_DLLEXPORT void VQSort(double* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(double* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(uint128_t* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(uint128_t* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(K64V64* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(K64V64* HWY_RESTRICT keys, size_t n,
SortDescending);
HWY_CONTRIB_DLLEXPORT void VQSort(K32V32* HWY_RESTRICT keys, size_t n,
SortAscending);
HWY_CONTRIB_DLLEXPORT void VQSort(K32V32* HWY_RESTRICT keys, size_t n,
SortDescending);
// User-level caching is no longer required, so this class is no longer
// beneficial. We recommend using the simpler VQSort() interface instead, and
// retain this class only for compatibility. It now just calls VQSort.
class HWY_CONTRIB_DLLEXPORT Sorter {
public:
Sorter();
~Sorter() { Delete(); }
// Move-only
Sorter(const Sorter&) = delete;
Sorter& operator=(const Sorter&) = delete;
Sorter(Sorter&& /*other*/) {}
Sorter& operator=(Sorter&& /*other*/) { return *this; }
void operator()(uint16_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(uint16_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(uint32_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(uint32_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(uint64_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(uint64_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(int16_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(int16_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(int32_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(int32_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(int64_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(int64_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
// These two must only be called if hwy::HaveFloat16() is true.
void operator()(float16_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(float16_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(float* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(float* HWY_RESTRICT keys, size_t n, SortDescending) const;
// These two must only be called if hwy::HaveFloat64() is true.
void operator()(double* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(double* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(uint128_t* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(uint128_t* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(K64V64* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(K64V64* HWY_RESTRICT keys, size_t n, SortDescending) const;
void operator()(K32V32* HWY_RESTRICT keys, size_t n, SortAscending) const;
void operator()(K32V32* HWY_RESTRICT keys, size_t n, SortDescending) const;
// Unused
static void Fill24Bytes(const void*, size_t, void*);
static bool HaveFloat64(); // Can also use hwy::HaveFloat64 directly.
private:
void Delete();
template <typename T>
T* Get() const {
return unused_;
}
#if HWY_COMPILER_CLANG
HWY_DIAGNOSTICS(push)
HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wunused-private-field")
#endif
void* unused_ = nullptr;
#if HWY_COMPILER_CLANG
HWY_DIAGNOSTICS(pop)
#endif
};
// Used by vqsort-inl unless VQSORT_ONLY_STATIC.
HWY_CONTRIB_DLLEXPORT bool Fill16BytesSecure(void* bytes);
// Unused, only provided for binary compatibility.
HWY_CONTRIB_DLLEXPORT uint64_t* GetGeneratorState();
} // namespace hwy
#endif // HIGHWAY_HWY_CONTRIB_SORT_VQSORT_H_