transform.rs - mozsearch

comm-central/third_party/rust/jxl_transforms/src/transform.rs

Enable keyboard shortcuts

Revision control

Copy as Markdown

Other Tools

HG Web

// Copyright (c) the JPEG XL Project Authors. All rights reserved.

//

// Use of this source code is governed by a BSD-style

// license that can be found in the LICENSE file.

#![allow(clippy::excessive_precision)]

#![allow(clippy::approx_constant)]

use crate::{transform_map::HfTransformType, *};

use jxl_simd::{simd_function, SimdDescriptor};

fn idct2_top_block(s: usize, block_in: &[f32], block_out: &mut [f32]) {

    let num_2x2 = s / 2;

    for y in 0..num_2x2 {

        for x in 0..num_2x2 {

            let c00 = block_in[y * 8 + x];

            let c01 = block_in[y * 8 + num_2x2 + x];

            let c10 = block_in[(y + num_2x2) * 8 + x];

            let c11 = block_in[(y + num_2x2) * 8 + num_2x2 + x];

            let r00 = c00 + c01 + c10 + c11;

            let r01 = c00 + c01 - c10 - c11;

            let r10 = c00 - c01 + c10 - c11;

            let r11 = c00 - c01 - c10 + c11;

            block_out[y * 2 * 8 + x * 2] = r00;

            block_out[y * 2 * 8 + x * 2 + 1] = r01;

            block_out[(y * 2 + 1) * 8 + x * 2] = r10;

            block_out[(y * 2 + 1) * 8 + x * 2 + 1] = r11;

const AFV4X4BASIS: [f32; 256] = [

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.25,

    0.876902929799142,

    0.2206518106944235,

    -0.10140050393753763,

    -0.1014005039375375,

    0.2206518106944236,

    -0.10140050393753777,

    -0.10140050393753772,

    -0.10140050393753763,

    -0.10140050393753758,

    -0.10140050393753769,

    -0.1014005039375375,

    -0.10140050393753768,

    -0.10140050393753768,

    -0.10140050393753759,

    -0.10140050393753763,

    -0.10140050393753741,

    0.0,

    0.0,

    0.40670075830260755,

    0.44444816619734445,

    0.0,

    0.0,

    0.19574399372042936,

    0.2929100136981264,

    -0.40670075830260716,

    -0.19574399372042872,

    0.0,

    0.11379074460448091,

    -0.44444816619734384,

    -0.29291001369812636,

    -0.1137907446044814,

    0.0,

    0.0,

    0.0,

    -0.21255748058288748,

    0.3085497062849767,

    0.0,

    0.4706702258572536,

    -0.1621205195722993,

    0.0,

    -0.21255748058287047,

    -0.16212051957228327,

    -0.47067022585725277,

    -0.1464291867126764,

    0.3085497062849487,

    0.0,

    -0.14642918671266536,

    0.4251149611657548,

    0.0,

    -0.7071067811865474,

    0.0,

    0.0,

    0.7071067811865476,

    0.0,

    0.0,

    0.0,

    0.0,

    0.0,

    0.0,

    0.0,

    0.0,

    0.0,

    0.0,

    0.0,

    -0.4105377591765233,

    0.6235485373547691,

    -0.06435071657946274,

    -0.06435071657946266,

    0.6235485373547694,

    -0.06435071657946284,

    -0.0643507165794628,

    -0.06435071657946274,

    -0.06435071657946272,

    -0.06435071657946279,

    -0.06435071657946266,

    -0.06435071657946277,

    -0.06435071657946277,

    -0.06435071657946273,

    -0.06435071657946274,

    -0.0643507165794626,

    0.0,

    0.0,

    -0.4517556589999482,

    0.15854503551840063,

    0.0,

    -0.04038515160822202,

    0.0074182263792423875,

    0.39351034269210167,

    -0.45175565899994635,

    0.007418226379244351,

    0.1107416575309343,

    0.08298163094882051,

    0.15854503551839705,

    0.3935103426921022,

    0.0829816309488214,

    -0.45175565899994796,

    0.0,

    0.0,

    -0.304684750724869,

    0.5112616136591823,

    0.0,

    0.0,

    -0.290480129728998,

    -0.06578701549142804,

    0.304684750724884,

    0.2904801297290076,

    0.0,

    -0.23889773523344604,

    -0.5112616136592012,

    0.06578701549142545,

    0.23889773523345467,

    0.0,

    0.0,

    0.0,

    0.3017929516615495,

    0.25792362796341184,

    0.0,

    0.16272340142866204,

    0.09520022653475037,

    0.0,

    0.3017929516615503,

    0.09520022653475055,

    -0.16272340142866173,

    -0.35312385449816297,

    0.25792362796341295,

    0.0,

    -0.3531238544981624,

    -0.6035859033230976,

    0.0,

    0.0,

    0.40824829046386274,

    0.0,

    0.0,

    0.0,

    0.0,

    -0.4082482904638628,

    -0.4082482904638635,

    0.0,

    0.0,

    -0.40824829046386296,

    0.0,

    0.4082482904638634,

    0.408248290463863,

    0.0,

    0.0,

    0.0,

    0.1747866975480809,

    0.0812611176717539,

    0.0,

    0.0,

    -0.3675398009862027,

    -0.307882213957909,

    -0.17478669754808135,

    0.3675398009862011,

    0.0,

    0.4826689115059883,

    -0.08126111767175039,

    0.30788221395790305,

    -0.48266891150598584,

    0.0,

    0.0,

    0.0,

    -0.21105601049335784,

    0.18567180916109802,

    0.0,

    0.0,

    0.49215859013738733,

    -0.38525013709251915,

    0.21105601049335806,

    -0.49215859013738905,

    0.0,

    0.17419412659916217,

    -0.18567180916109904,

    0.3852501370925211,

    -0.1741941265991621,

    0.0,

    0.0,

    0.0,

    -0.14266084808807264,

    -0.3416446842253372,

    0.0,

    0.7367497537172237,

    0.24627107722075148,

    -0.08574019035519306,

    -0.14266084808807344,

    0.24627107722075137,

    0.14883399227113567,

    -0.04768680350229251,

    -0.3416446842253373,

    -0.08574019035519267,

    -0.047686803502292804,

    -0.14266084808807242,

    0.0,

    0.0,

    -0.13813540350758585,

    0.3302282550303788,

    0.0,

    0.08755115000587084,

    -0.07946706605909573,

    -0.4613374887461511,

    -0.13813540350758294,

    -0.07946706605910261,

    0.49724647109535086,

    0.12538059448563663,

    0.3302282550303805,

    -0.4613374887461554,

    0.12538059448564315,

    -0.13813540350758452,

    0.0,

    0.0,

    -0.17437602599651067,

    0.0702790691196284,

    0.0,

    -0.2921026642334881,

    0.3623817333531167,

    0.0,

    -0.1743760259965108,

    0.36238173335311646,

    0.29210266423348785,

    -0.4326608024727445,

    0.07027906911962818,

    0.0,

    -0.4326608024727457,

    0.34875205199302267,

    0.0,

    0.0,

    0.11354987314994337,

    -0.07417504595810355,

    0.0,

    0.19402893032594343,

    -0.435190496523228,

    0.21918684838857466,

    0.11354987314994257,

    -0.4351904965232251,

    0.5550443808910661,

    -0.25468277124066463,

    -0.07417504595810233,

    0.2191868483885728,

    -0.25468277124066413,

    0.1135498731499429,

];

#[allow(clippy::excessive_precision)]

#[allow(clippy::approx_constant)]

fn avfidct4x4(coeffs: &[f32], pixels: &mut [f32]) {

    for i in 0..16 {

        let mut pixel: f32 = 0.0;

        for j in 0..16 {

            pixel += coeffs[j] * AFV4X4BASIS[j * 16 + i];

        pixels[i] = pixel;

#[inline(always)]

fn afv_transform_to_pixels<D: SimdDescriptor>(

    d: D,

    afv_kind: usize,

    coefficients: &[f32],

    pixels: &mut [f32],

) {

    let afv_x = afv_kind & 1;

    let afv_y = afv_kind / 2;

    let block00 = coefficients[0];

    let block01 = coefficients[1];

    let block10 = coefficients[8];

    let dcs: [f32; 3] = [

        (block00 + block10 + block01) * 4.0,

        block00 + block10 - block01,

        block00 - block10,

];

    // IAFV: (even, even) positions.

    let mut coeff: Vec<f32> = vec![0.0; 4 * 4];

    for iy in 0..4 {

        for ix in 0..4 {

            coeff[iy * 4 + ix] = if ix == 0 && iy == 0 {

                dcs[0]

            } else {

                coefficients[iy * 2 * 8 + ix * 2]

};

    let mut block: Vec<f32> = vec![0.0; 4 * 8];

    avfidct4x4(&coeff, &mut block);

    for iy in 0..4 {

        let block_y = if afv_y == 1 { 3 - iy } else { iy };

        for ix in 0..4 {

            let block_x = if afv_x == 1 { 3 - ix } else { ix };

            pixels[(iy + afv_y * 4) * 8 + afv_x * 4 + ix] = block[block_y * 4 + block_x];

    // IDCT4x4 in (odd, even) positions.

    for iy in 0..4 {

        for ix in 0..4 {

            block[iy * 4 + ix] = if ix == 0 && iy == 0 {

                dcs[1]

            } else {

                coefficients[iy * 2 * 8 + ix * 2 + 1]

};

    idct2d_4_4(d, &mut block[0..16]);

    for iy in 0..4 {

        for ix in 0..4 {

            pixels[(iy + afv_y * 4) * 8 + (1 - afv_x) * 4 + ix] = block[iy * 4 + ix];

    // IDCT4x8.

    for iy in 0..4 {

        for ix in 0..8 {

            block[iy * 8 + ix] = if ix == 0 && iy == 0 {

                dcs[2]

            } else {

                coefficients[(1 + iy * 2) * 8 + ix]

};

    idct2d_4_8(d, &mut block);

    for iy in 0..4 {

        for ix in 0..8 {

            pixels[(iy + (1 - afv_y) * 4) * 8 + ix] = block[iy * 8 + ix];

#[inline(always)]

pub fn transform_to_pixels_impl<D: SimdDescriptor>(

    d: D,

    transform_type: HfTransformType,

    lf: &mut [f32],

    transform_buffer: &mut [f32],

) {

    match transform_type {

        HfTransformType::DCT => d.call(

            #[inline(always)]

            |_| {

                transform_buffer[0] = lf[0];

                idct2d_8_8(d, &mut transform_buffer[0..64]);

},

),

        HfTransformType::DCT16X16 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_2_2(d, &mut lf[..4], transform_buffer);

                idct2d_16_16(d, &mut transform_buffer[0..256]);

},

),

        HfTransformType::DCT32X32 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_4_4(d, &mut lf[..16], transform_buffer);

                idct2d_32_32(d, &mut transform_buffer[0..1024]);

},

),

        HfTransformType::DCT16X8 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_2_1(d, &mut lf[..2], transform_buffer);

                idct2d_16_8(d, &mut transform_buffer[0..128]);

},

),

        HfTransformType::DCT8X16 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_1_2(d, &mut lf[..2], transform_buffer);

                idct2d_8_16(d, &mut transform_buffer[0..128]);

},

),

        HfTransformType::DCT32X8 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_4_1(d, &mut lf[..4], transform_buffer);

                idct2d_32_8(d, &mut transform_buffer[0..256]);

},

),

        HfTransformType::DCT8X32 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_1_4(d, &mut lf[..4], transform_buffer);

                idct2d_8_32(d, &mut transform_buffer[0..256]);

},

),

        HfTransformType::DCT32X16 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_4_2(d, &mut lf[..8], transform_buffer);

                idct2d_32_16(d, &mut transform_buffer[0..512]);

},

),

        HfTransformType::DCT16X32 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_2_4(d, &mut lf[..8], transform_buffer);

                idct2d_16_32(d, &mut transform_buffer[0..512]);

},

),

        HfTransformType::DCT64X64 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_8_8(d, &mut lf[..64], transform_buffer);

                idct2d_64_64(d, &mut transform_buffer[0..4096]);

},

),

        HfTransformType::DCT64X32 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_8_4(d, &mut lf[..32], transform_buffer);

                idct2d_64_32(d, &mut transform_buffer[0..2048]);

},

),

        HfTransformType::DCT32X64 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_4_8(d, &mut lf[..32], transform_buffer);

                idct2d_32_64(d, &mut transform_buffer[0..2048]);

},

),

        HfTransformType::DCT128X128 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_16_16(d, &mut lf[..256], transform_buffer);

                idct2d_128_128(d, &mut transform_buffer[0..16384]);

},

),

        HfTransformType::DCT128X64 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_16_8(d, &mut lf[..128], transform_buffer);

                idct2d_128_64(d, &mut transform_buffer[0..8192]);

},

),

        HfTransformType::DCT64X128 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_8_16(d, &mut lf[..128], transform_buffer);

                idct2d_64_128(d, &mut transform_buffer[0..8192]);

},

),

        HfTransformType::DCT256X256 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_32_32(d, &mut lf[..1024], transform_buffer);

                idct2d_256_256(d, &mut transform_buffer[0..65536]);

},

),

        HfTransformType::DCT256X128 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_32_16(d, &mut lf[..512], transform_buffer);

                idct2d_256_128(d, &mut transform_buffer[0..32768]);

},

),

        HfTransformType::DCT128X256 => d.call(

            #[inline(always)]

            |_| {

                reinterpreting_dct2d_16_32(d, &mut lf[..512], transform_buffer);

                idct2d_128_256(d, &mut transform_buffer[0..32768]);

},

),

        HfTransformType::AFV0 => {

            transform_buffer[0] = lf[0];

            let block: Vec<f32> = transform_buffer[0..64].to_vec();

            afv_transform_to_pixels::<D>(d, 0, &block, &mut transform_buffer[0..64]);

        HfTransformType::AFV1 => {

            transform_buffer[0] = lf[0];

            let block: Vec<f32> = transform_buffer[0..64].to_vec();

            afv_transform_to_pixels::<D>(d, 1, &block, &mut transform_buffer[0..64]);

        HfTransformType::AFV2 => {

            transform_buffer[0] = lf[0];

            let block: Vec<f32> = transform_buffer[0..64].to_vec();

            afv_transform_to_pixels::<D>(d, 2, &block, &mut transform_buffer[0..64]);

        HfTransformType::AFV3 => {

            transform_buffer[0] = lf[0];

            let block: Vec<f32> = transform_buffer[0..64].to_vec();

            afv_transform_to_pixels::<D>(d, 3, &block, &mut transform_buffer[0..64]);

        HfTransformType::IDENTITY => {

            transform_buffer[0] = lf[0];

            let coefficients: [f32; 64] = transform_buffer[0..64].try_into().unwrap();

            let block00 = coefficients[0];

            let block01 = coefficients[1];

            let block10 = coefficients[8];

            let block11 = coefficients[9];

            let dcs: [f32; 4] = [

                block00 + block01 + block10 + block11,

                block00 + block01 - block10 - block11,

                block00 - block01 + block10 - block11,

                block00 - block01 - block10 + block11,

];

            for y in 0..2 {

                for x in 0..2 {

                    let block_dc = dcs[y * 2 + x];

                    let mut residual_sum = 0.0;

                    for iy in 0..4 {

                        for ix in 0..4 {

                            if ix == 0 && iy == 0 {

                                continue;

                            residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];

                    transform_buffer[(4 * y + 1) * 8 + 4 * x + 1] =

                        block_dc - residual_sum * (1.0 / 16.0);

                    for iy in 0..4 {

                        for ix in 0..4 {

                            if ix == 1 && iy == 1 {

                                continue;

                            transform_buffer[(y * 4 + iy) * 8 + x * 4 + ix] = coefficients

                                [(y + iy * 2) * 8 + x + ix * 2]

                                + transform_buffer[(4 * y + 1) * 8 + 4 * x + 1];

                    transform_buffer[y * 4 * 8 + x * 4] = coefficients[(y + 2) * 8 + x + 2]

                        + transform_buffer[(4 * y + 1) * 8 + 4 * x + 1];

        HfTransformType::DCT2X2 => {

            transform_buffer[0] = lf[0];

            let mut tmp: [f32; 64] = transform_buffer[0..64].try_into().unwrap();

            idct2_top_block(2, &tmp, &mut transform_buffer[0..64]);

            idct2_top_block(4, &transform_buffer[0..64], &mut tmp);

            idct2_top_block(8, &tmp, &mut transform_buffer[0..64]);

        HfTransformType::DCT4X4 => {

            transform_buffer[0] = lf[0];

            let coefficients: [f32; 64] = transform_buffer[0..64].try_into().unwrap();

            let block00 = coefficients[0];

            let block01 = coefficients[1];

            let block10 = coefficients[8];

            let block11 = coefficients[9];

            let dcs: [f32; 4] = [

                block00 + block01 + block10 + block11,

                block00 + block01 - block10 - block11,

                block00 - block01 + block10 - block11,

                block00 - block01 - block10 + block11,

];

            for y in 0..2 {

                for x in 0..2 {

                    let mut block = [0.0; 4 * 4];

                    block[0] = dcs[y * 2 + x];

                    for iy in 0..4 {

                        for ix in 0..4 {

                            if ix == 0 && iy == 0 {

                                continue;

                            block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];

                    idct2d_4_4(d, &mut block);

                    for iy in 0..4 {

                        for ix in 0..4 {

                            transform_buffer[(y * 4 + iy) * 8 + x * 4 + ix] = block[iy * 4 + ix];

        HfTransformType::DCT8X4 => {

            transform_buffer[0] = lf[0];

            let coefficients: [f32; 64] = transform_buffer[0..64].try_into().unwrap();

            let block0 = coefficients[0];

            let block1 = coefficients[8];

            let dcs: [f32; 2] = [block0 + block1, block0 - block1];

            for x in 0..2 {

                let mut block = [0.0; 8 * 4];

                for iy in 0..4 {

                    for ix in 0..8 {

                        block[iy * 8 + ix] = if ix == 0 && iy == 0 {

                            dcs[x]

                        } else {

                            coefficients[(x + iy * 2) * 8 + ix]

                idct2d_8_4(d, &mut block);

                for iy in 0..8 {

                    for ix in 0..4 {

                        transform_buffer[iy * 8 + x * 4 + ix] = block[iy * 4 + ix];

        HfTransformType::DCT4X8 => {

            transform_buffer[0] = lf[0];

            let coefficients: [f32; 64] = transform_buffer[0..64].try_into().unwrap();

            let block0 = coefficients[0];

            let block1 = coefficients[8];

            let dcs: [f32; 2] = [block0 + block1, block0 - block1];

            for y in 0..2 {

                let mut block = [0.0; 4 * 8];

                for iy in 0..4 {

                    for ix in 0..8 {

                        block[iy * 8 + ix] = if ix == 0 && iy == 0 {

                            dcs[y]

                        } else {

                            coefficients[(y + iy * 2) * 8 + ix]

                idct2d_4_8(d, &mut block);

                for iy in 0..4 {

                    for ix in 0..8 {

                        transform_buffer[(y * 4 + iy) * 8 + ix] = block[iy * 8 + ix];

};

simd_function!(

    transform_to_pixels,

    d: D,

    /// This includes a call to what libjxl calls lowest_frequencies_from_lf.

    pub fn transform_to_pixels_trampoline(

        transform_type: HfTransformType,

        lf: &mut [f32],

        transform_buffer: &mut [f32],

) {

        transform_to_pixels_impl(d, transform_type, lf, transform_buffer);

);