Source code

Revision control

Copy as Markdown

Other Tools

// Copyright (c) the JPEG XL Project Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <jxl/memory_manager.h>
#include "lib/jxl/base/status.h"
#include "lib/jxl/memory_manager_internal.h"
// Suppress any -Wdeprecated-declarations warning that might be emitted by
// GCC or Clang by std::stable_sort in C++17 or later mode
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#elif defined(__GNUC__)
#pragma GCC push_options
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#include <algorithm>
#ifdef __clang__
#pragma clang diagnostic pop
#elif defined(__GNUC__)
#pragma GCC pop_options
#endif
#include <cmath>
#include <cstdint>
#include <vector>
#include "lib/jxl/ac_strategy.h"
#include "lib/jxl/base/rect.h"
#include "lib/jxl/coeff_order.h"
#include "lib/jxl/coeff_order_fwd.h"
#include "lib/jxl/dct_util.h"
#include "lib/jxl/enc_ans.h"
#include "lib/jxl/enc_bit_writer.h"
#include "lib/jxl/lehmer_code.h"
namespace jxl {
struct AuxOut;
enum class LayerType : uint8_t;
std::pair<uint32_t, uint32_t> ComputeUsedOrders(
const SpeedTier speed, const AcStrategyImage& ac_strategy,
const Rect& rect) {
// No coefficient reordering in Falcon or faster.
// Only uses DCT8 = 0, so bitfield = 1.
if (speed >= SpeedTier::kFalcon) return {1, 1};
uint32_t ret = 0;
uint32_t ret_customize = 0;
size_t xsize_blocks = rect.xsize();
size_t ysize_blocks = rect.ysize();
// TODO(veluca): precompute when doing DCT.
for (size_t by = 0; by < ysize_blocks; ++by) {
AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
for (size_t bx = 0; bx < xsize_blocks; ++bx) {
int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
// Do not customize coefficient orders for blocks bigger than 32x32.
ret |= 1u << ord;
if (ord > 6) {
continue;
}
ret_customize |= 1u << ord;
}
}
// Use default orders for small images.
if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0};
return {ret, ret_customize};
}
Status ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
const AcStrategyImage& ac_strategy,
const FrameDimensions& frame_dim,
uint32_t& all_used_orders, uint32_t prev_used_acs,
uint32_t current_used_acs,
uint32_t current_used_orders,
coeff_order_t* JXL_RESTRICT order) {
JxlMemoryManager* memory_manager = ac_strategy.memory_manager();
std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
// If compressing at high speed and only using 8x8 DCTs, only consider a
// subset of blocks.
double block_fraction = 1.0f;
// TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
// encoding significantly less dense.
if (speed >= SpeedTier::kSquirrel && current_used_orders == 1) {
block_fraction = 0.5f;
}
// No need to compute number of zero coefficients if all orders are the
// default.
if (current_used_orders != 0) {
uint64_t threshold =
(std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
// Xorshift128+ adapted from xorshift128+-inl.h
auto use_sample = [&]() {
auto s1 = s[0];
const auto s0 = s[1];
const auto bits = s1 + s0; // b, c
s[0] = s0;
s1 ^= s1 << 23;
s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
s[1] = s1;
return (bits >> 32) <= threshold;
};
// Count number of zero coefficients, separately for each DCT band.
// TODO(veluca): precompute when doing DCT.
for (size_t group_index = 0; group_index < frame_dim.num_groups;
group_index++) {
const size_t gx = group_index % frame_dim.xsize_groups;
const size_t gy = group_index / frame_dim.xsize_groups;
const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
kGroupDimInBlocks, kGroupDimInBlocks,
frame_dim.xsize_blocks, frame_dim.ysize_blocks);
ConstACPtr rows[3];
ACType type = acs.Type();
for (size_t c = 0; c < 3; c++) {
rows[c] = acs.PlaneRow(c, group_index, 0);
}
size_t ac_offset = 0;
// TODO(veluca): SIMDfy.
for (size_t by = 0; by < rect.ysize(); ++by) {
AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
for (size_t bx = 0; bx < rect.xsize(); ++bx) {
AcStrategy acs = acs_row[bx];
if (!acs.IsFirstBlock()) continue;
if (!use_sample()) continue;
size_t size = kDCTBlockSize << acs.log2_covered_blocks();
for (size_t c = 0; c < 3; ++c) {
const size_t order_offset =
CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
if (type == ACType::k16) {
for (size_t k = 0; k < size; k++) {
bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
num_zeros[order_offset + k] += is_zero ? 1 : 0;
}
} else {
for (size_t k = 0; k < size; k++) {
bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
num_zeros[order_offset + k] += is_zero ? 1 : 0;
}
}
// Ensure LLFs are first in the order.
size_t cx = acs.covered_blocks_x();
size_t cy = acs.covered_blocks_y();
CoefficientLayout(&cy, &cx);
for (size_t iy = 0; iy < cy; iy++) {
for (size_t ix = 0; ix < cx; ix++) {
num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
}
}
}
ac_offset += size;
}
}
}
}
struct PosAndCount {
uint32_t pos;
uint32_t count;
};
size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(PosAndCount);
JXL_ASSIGN_OR_RETURN(auto mem,
AlignedMemory::Create(memory_manager, mem_bytes));
std::vector<coeff_order_t> natural_order_buffer;
uint16_t computed = 0;
for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
uint8_t ord = kStrategyOrder[o];
if (computed & (1 << ord)) continue;
computed |= 1 << ord;
AcStrategy acs = AcStrategy::FromRawStrategy(o);
size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
// Do nothing for transforms that don't appear.
if ((1 << ord) & ~current_used_acs) continue;
// Do nothing if we already committed to this custom order previously.
if ((1 << ord) & prev_used_acs) continue;
if ((1 << ord) & all_used_orders) continue;
if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz);
acs.ComputeNaturalCoeffOrder(natural_order_buffer.data());
// Ensure natural coefficient order is not permuted if the order is
// not transmitted.
if ((1 << ord) & ~current_used_orders) {
for (size_t c = 0; c < 3; c++) {
size_t offset = CoeffOrderOffset(ord, c);
JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz);
memcpy(&order[offset], natural_order_buffer.data(),
sz * sizeof(*order));
}
continue;
}
bool is_nondefault = false;
for (uint8_t c = 0; c < 3; c++) {
// Apply zig-zag order.
PosAndCount* pos_and_val = mem.address<PosAndCount>();
size_t offset = CoeffOrderOffset(ord, c);
JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz);
float inv_sqrt_sz = 1.0f / std::sqrt(sz);
for (size_t i = 0; i < sz; ++i) {
size_t pos = natural_order_buffer[i];
pos_and_val[i].pos = pos;
// We don't care for the exact number -> quantize number of zeros,
// to get less permuted order.
pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
}
// Stable-sort -> elements with same number of zeros will preserve their
// order.
auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
return a.count < b.count;
};
std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
// Grab indices.
for (size_t i = 0; i < sz; ++i) {
order[offset + i] = pos_and_val[i].pos;
is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos;
}
}
if (!is_nondefault) {
current_used_orders &= ~(1 << ord);
}
}
all_used_orders |= current_used_orders;
return true;
}
namespace {
Status TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
size_t size, std::vector<Token>* tokens) {
std::vector<LehmerT> lehmer(size);
std::vector<uint32_t> temp(size + 1);
JXL_RETURN_IF_ERROR(
ComputeLehmerCode(order, temp.data(), size, lehmer.data()));
size_t end = size;
while (end > skip && lehmer[end - 1] == 0) {
--end;
}
tokens->emplace_back(CoeffOrderContext(size), end - skip);
uint32_t last = 0;
for (size_t i = skip; i < end; ++i) {
tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
last = lehmer[i];
}
return true;
}
} // namespace
Status EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
size_t size, BitWriter* writer, LayerType layer,
AuxOut* aux_out) {
JxlMemoryManager* memory_manager = writer->memory_manager();
std::vector<std::vector<Token>> tokens(1);
JXL_RETURN_IF_ERROR(TokenizePermutation(order, skip, size, tokens.data()));
std::vector<uint8_t> context_map;
EntropyEncodingData codes;
JXL_ASSIGN_OR_RETURN(
size_t cost, BuildAndEncodeHistograms(
memory_manager, HistogramParams(), kPermutationContexts,
tokens, &codes, &context_map, writer, layer, aux_out));
(void)cost;
JXL_RETURN_IF_ERROR(
WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out));
return true;
}
namespace {
Status EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
std::vector<Token>* tokens, coeff_order_t* order_zigzag,
std::vector<coeff_order_t>& natural_order_lut) {
const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
const size_t size = kDCTBlockSize * llf;
for (size_t i = 0; i < size; ++i) {
order_zigzag[i] = natural_order_lut[order[i]];
}
JXL_RETURN_IF_ERROR(TokenizePermutation(order_zigzag, llf, size, tokens));
return true;
}
} // namespace
Status EncodeCoeffOrders(uint16_t used_orders,
const coeff_order_t* JXL_RESTRICT order,
BitWriter* writer, LayerType layer,
AuxOut* JXL_RESTRICT aux_out) {
JxlMemoryManager* memory_manager = writer->memory_manager();
size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(coeff_order_t);
JXL_ASSIGN_OR_RETURN(auto mem,
AlignedMemory::Create(memory_manager, mem_bytes));
uint16_t computed = 0;
std::vector<std::vector<Token>> tokens(1);
std::vector<coeff_order_t> natural_order_lut;
for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
uint8_t ord = kStrategyOrder[o];
if (computed & (1 << ord)) continue;
computed |= 1 << ord;
if ((used_orders & (1 << ord)) == 0) continue;
AcStrategy acs = AcStrategy::FromRawStrategy(o);
const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
const size_t size = kDCTBlockSize * llf;
if (natural_order_lut.size() < size) natural_order_lut.resize(size);
acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data());
for (size_t c = 0; c < 3; c++) {
JXL_RETURN_IF_ERROR(
EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, tokens.data(),
mem.address<coeff_order_t>(), natural_order_lut));
}
}
// Do not write anything if no order is used.
if (used_orders != 0) {
std::vector<uint8_t> context_map;
EntropyEncodingData codes;
JXL_ASSIGN_OR_RETURN(
size_t cost,
BuildAndEncodeHistograms(memory_manager, HistogramParams(),
kPermutationContexts, tokens, &codes,
&context_map, writer, layer, aux_out));
(void)cost;
JXL_RETURN_IF_ERROR(
WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out));
}
return true;
}
} // namespace jxl