transform.rs - mozsearch

mozilla-central/gfx/qcms/src/transform.rs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Graphics

Revision control

Copy as Markdown

Other Tools

//  qcms

//  Copyright (C) 2009 Mozilla Foundation

//  Copyright (C) 1998-2007 Marti Maria

//

// Permission is hereby granted, free of charge, to any person obtaining

// a copy of this software and associated documentation files (the "Software"),

// to deal in the Software without restriction, including without limitation

// the rights to use, copy, modify, merge, publish, distribute, sublicense,

// and/or sell copies of the Software, and to permit persons to whom the Software

// is furnished to do so, subject to the following conditions:

//

// The above copyright notice and this permission notice shall be included in

// all copies or substantial portions of the Software.

//

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO

// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE

// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION

// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#![allow(clippy::missing_safety_doc)]

#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), feature = "neon"))]

use crate::transform_neon::{

    qcms_transform_data_bgra_out_lut_neon, qcms_transform_data_rgb_out_lut_neon,

    qcms_transform_data_rgba_out_lut_neon,

};

use crate::{

    chain::chain_transform,

    double_to_s15Fixed16Number,

    iccread::SUPPORTS_ICCV4,

    matrix::*,

    transform_util::{

        build_colorant_matrix, build_input_gamma_table, build_output_lut, compute_precache,

        lut_interp_linear,

},

};

use crate::{

    iccread::{qcms_CIE_xyY, qcms_CIE_xyYTRIPLE, Profile, GRAY_SIGNATURE, RGB_SIGNATURE},

    transform_util::clamp_float,

    Intent,

};

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]

use crate::{

    transform_avx::{

        qcms_transform_data_bgra_out_lut_avx, qcms_transform_data_rgb_out_lut_avx,

        qcms_transform_data_rgba_out_lut_avx,

},

    transform_sse2::{

        qcms_transform_data_bgra_out_lut_sse2, qcms_transform_data_rgb_out_lut_sse2,

        qcms_transform_data_rgba_out_lut_sse2,

},

};

use std::sync::atomic::Ordering;

use std::sync::Arc;

#[cfg(all(target_arch = "arm", feature = "neon"))]

use std::arch::is_arm_feature_detected;

#[cfg(all(target_arch = "aarch64", feature = "neon"))]

use std::arch::is_aarch64_feature_detected;

pub const PRECACHE_OUTPUT_SIZE: usize = 8192;

pub const PRECACHE_OUTPUT_MAX: usize = PRECACHE_OUTPUT_SIZE - 1;

pub const FLOATSCALE: f32 = PRECACHE_OUTPUT_SIZE as f32;

pub const CLAMPMAXVAL: f32 = ((PRECACHE_OUTPUT_SIZE - 1) as f32) / PRECACHE_OUTPUT_SIZE as f32;

#[repr(C)]

#[derive(Debug)]

pub struct PrecacheOuput {

    /* We previously used a count of 65536 here but that seems like more

     * precision than we actually need.  By reducing the size we can

     * improve startup performance and reduce memory usage. ColorSync on

     * 10.5 uses 4097 which is perhaps because they use a fixed point

     * representation where 1. is represented by 0x1000. */

    pub lut_r: [u8; PRECACHE_OUTPUT_SIZE],

    pub lut_g: [u8; PRECACHE_OUTPUT_SIZE],

    pub lut_b: [u8; PRECACHE_OUTPUT_SIZE],

impl Default for PrecacheOuput {

    fn default() -> PrecacheOuput {

        PrecacheOuput {

            lut_r: [0; PRECACHE_OUTPUT_SIZE],

            lut_g: [0; PRECACHE_OUTPUT_SIZE],

            lut_b: [0; PRECACHE_OUTPUT_SIZE],

/* used as a lookup table for the output transformation.

 * we refcount them so we only need to have one around per output

 * profile, instead of duplicating them per transform */

#[repr(C)]

#[repr(align(16))]

#[derive(Clone, Default)]

pub struct qcms_transform {

    pub matrix: [[f32; 4]; 3],

    pub input_gamma_table_r: Option<Box<[f32; 256]>>,

    pub input_gamma_table_g: Option<Box<[f32; 256]>>,

    pub input_gamma_table_b: Option<Box<[f32; 256]>>,

    pub input_clut_table_length: u16,

    pub clut: Option<Vec<f32>>,

    pub grid_size: u16,

    pub output_clut_table_length: u16,

    pub input_gamma_table_gray: Option<Box<[f32; 256]>>,

    pub out_gamma_r: f32,

    pub out_gamma_g: f32,

    pub out_gamma_b: f32,

    pub out_gamma_gray: f32,

    pub output_gamma_lut_r: Option<Vec<u16>>,

    pub output_gamma_lut_g: Option<Vec<u16>>,

    pub output_gamma_lut_b: Option<Vec<u16>>,

    pub output_gamma_lut_gray: Option<Vec<u16>>,

    pub output_gamma_lut_r_length: usize,

    pub output_gamma_lut_g_length: usize,

    pub output_gamma_lut_b_length: usize,

    pub output_gamma_lut_gray_length: usize,

    pub precache_output: Option<Arc<PrecacheOuput>>,

    pub transform_fn: transform_fn_t,

pub type transform_fn_t =

    Option<unsafe fn(_: &qcms_transform, _: *const u8, _: *mut u8, _: usize) -> ()>;

/// The format of pixel data

#[repr(u32)]

#[derive(PartialEq, Eq, Clone, Copy)]

#[allow(clippy::upper_case_acronyms)]

pub enum DataType {

    RGB8 = 0,

    RGBA8 = 1,

    BGRA8 = 2,

    Gray8 = 3,

    GrayA8 = 4,

    CMYK = 5,

impl DataType {

    pub fn bytes_per_pixel(&self) -> usize {

        match self {

            RGB8 => 3,

            RGBA8 => 4,

            BGRA8 => 4,

            Gray8 => 1,

            GrayA8 => 2,

            CMYK => 4,

use DataType::*;

#[repr(C)]

#[derive(Copy, Clone)]

#[allow(clippy::upper_case_acronyms)]

pub struct CIE_XYZ {

    pub X: f64,

    pub Y: f64,

    pub Z: f64,

pub trait Format {

    const kRIndex: usize;

    const kGIndex: usize;

    const kBIndex: usize;

    const kAIndex: usize;

#[allow(clippy::upper_case_acronyms)]

pub struct BGRA;

impl Format for BGRA {

    const kBIndex: usize = 0;

    const kGIndex: usize = 1;

    const kRIndex: usize = 2;

    const kAIndex: usize = 3;

#[allow(clippy::upper_case_acronyms)]

pub struct RGBA;

impl Format for RGBA {

    const kRIndex: usize = 0;

    const kGIndex: usize = 1;

    const kBIndex: usize = 2;

    const kAIndex: usize = 3;

#[allow(clippy::upper_case_acronyms)]

pub struct RGB;

impl Format for RGB {

    const kRIndex: usize = 0;

    const kGIndex: usize = 1;

    const kBIndex: usize = 2;

    const kAIndex: usize = 0xFF;

pub trait GrayFormat {

    const has_alpha: bool;

pub struct Gray;

impl GrayFormat for Gray {

    const has_alpha: bool = false;

pub struct GrayAlpha;

impl GrayFormat for GrayAlpha {

    const has_alpha: bool = true;

#[inline]

fn clamp_u8(v: f32) -> u8 {

    if v > 255. {

    } else if v < 0. {

    } else {

        (v + 0.5).floor() as u8

// Build a White point, primary chromas transfer matrix from RGB to CIE XYZ

// This is just an approximation, I am not handling all the non-linear

// aspects of the RGB to XYZ process, and assumming that the gamma correction

// has transitive property in the tranformation chain.

//

// the alghoritm:

//

//            - First I build the absolute conversion matrix using

//              primaries in XYZ. This matrix is next inverted

//            - Then I eval the source white point across this matrix

//              obtaining the coeficients of the transformation

//            - Then, I apply these coeficients to the original matrix

fn build_RGB_to_XYZ_transfer_matrix(

    white: qcms_CIE_xyY,

    primrs: qcms_CIE_xyYTRIPLE,

) -> Option<Matrix> {

    let xn = white.x;

    let yn = white.y;

    if yn == 0.0 {

        return None;

    let xr = primrs.red.x;

    let yr = primrs.red.y;

    let xg = primrs.green.x;

    let yg = primrs.green.y;

    let xb = primrs.blue.x;

    let yb = primrs.blue.y;

    let primaries = Matrix {

        m: [

            [xr as f32, xg as f32, xb as f32],

            [yr as f32, yg as f32, yb as f32],

                (1. - xr - yr) as f32,

                (1. - xg - yg) as f32,

                (1. - xb - yb) as f32,

],

],

};

    let white_point = Vector {

        v: [(xn / yn) as f32, 1., ((1. - xn - yn) / yn) as f32],

};

    let primaries_invert = primaries.invert()?;

    let coefs = primaries_invert.eval(white_point);

    Some(Matrix {

        m: [

                (coefs.v[0] as f64 * xr) as f32,

                (coefs.v[1] as f64 * xg) as f32,

                (coefs.v[2] as f64 * xb) as f32,

],

                (coefs.v[0] as f64 * yr) as f32,

                (coefs.v[1] as f64 * yg) as f32,

                (coefs.v[2] as f64 * yb) as f32,

],

                (coefs.v[0] as f64 * (1. - xr - yr)) as f32,

                (coefs.v[1] as f64 * (1. - xg - yg)) as f32,

                (coefs.v[2] as f64 * (1. - xb - yb)) as f32,

],

],

})

/* CIE Illuminant D50 */

const D50_XYZ: CIE_XYZ = CIE_XYZ {

    X: 0.9642,

    Y: 1.0000,

    Z: 0.8249,

};

/* from lcms: xyY2XYZ()

 * corresponds to argyll: icmYxy2XYZ() */

fn xyY2XYZ(source: qcms_CIE_xyY) -> CIE_XYZ {

    CIE_XYZ {

        X: source.x / source.y * source.Y,

        Y: source.Y,

        Z: (1. - source.x - source.y) / source.y * source.Y,

/* from lcms: ComputeChromaticAdaption */

// Compute chromatic adaption matrix using chad as cone matrix

fn compute_chromatic_adaption(

    source_white_point: CIE_XYZ,

    dest_white_point: CIE_XYZ,

    chad: Matrix,

) -> Option<Matrix> {

    let cone_source_XYZ = Vector {

        v: [

            source_white_point.X as f32,

            source_white_point.Y as f32,

            source_white_point.Z as f32,

],

};

    let cone_source_rgb = chad.eval(cone_source_XYZ);

    let cone_dest_XYZ = Vector {

        v: [

            dest_white_point.X as f32,

            dest_white_point.Y as f32,

            dest_white_point.Z as f32,

],

};

    let cone_dest_rgb = chad.eval(cone_dest_XYZ);

    let cone = Matrix {

        m: [

            [cone_dest_rgb.v[0] / cone_source_rgb.v[0], 0., 0.],

            [0., cone_dest_rgb.v[1] / cone_source_rgb.v[1], 0.],

            [0., 0., cone_dest_rgb.v[2] / cone_source_rgb.v[2]],

],

};

    let chad_inv = chad.invert()?;

    // Normalize

    Some(Matrix::multiply(chad_inv, Matrix::multiply(cone, chad)))

/* from lcms: cmsAdaptionMatrix */

// Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll

// Bradford is assumed

fn adaption_matrix(source_illumination: CIE_XYZ, target_illumination: CIE_XYZ) -> Option<Matrix> {

    let lam_rigg = {

        Matrix {

            m: [

                [0.8951, 0.2664, -0.1614],

                [-0.7502, 1.7135, 0.0367],

                [0.0389, -0.0685, 1.0296],

],

};

    compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg)

/* from lcms: cmsAdaptMatrixToD50 */

fn adapt_matrix_to_D50(r: Option<Matrix>, source_white_pt: qcms_CIE_xyY) -> Option<Matrix> {

    if source_white_pt.y == 0.0 {

        return None;

    let Dn: CIE_XYZ = xyY2XYZ(source_white_pt);

    let Bradford = adaption_matrix(Dn, D50_XYZ)?;

    Some(Matrix::multiply(Bradford, r?))

pub(crate) fn set_rgb_colorants(

    profile: &mut Profile,

    white_point: qcms_CIE_xyY,

    primaries: qcms_CIE_xyYTRIPLE,

) -> bool {

    let colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);

    let colorants = match adapt_matrix_to_D50(colorants, white_point) {

        Some(colorants) => colorants,

        None => return false,

};

    /* note: there's a transpose type of operation going on here */

    profile.redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0] as f64);

    profile.redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0] as f64);

    profile.redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0] as f64);

    profile.greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1] as f64);

    profile.greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1] as f64);

    profile.greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1] as f64);

    profile.blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2] as f64);

    profile.blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2] as f64);

    profile.blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2] as f64);

    true

pub(crate) fn get_rgb_colorants(

    white_point: qcms_CIE_xyY,

    primaries: qcms_CIE_xyYTRIPLE,

) -> Option<Matrix> {

    let colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);

    adapt_matrix_to_D50(colorants, white_point)

/* Alpha is not corrected.

   A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If

   RGB Is?" Tech Memo 17 (December 14, 1998).

    See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf

*/

unsafe extern "C" fn qcms_transform_data_gray_template_lut<I: GrayFormat, F: Format>(

    transform: &qcms_transform,

    mut src: *const u8,

    mut dest: *mut u8,

    length: usize,

) {

    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;

    let input_gamma_table_gray = transform.input_gamma_table_gray.as_ref().unwrap();

    let mut i: u32 = 0;

    while (i as usize) < length {

        let device: u8 = *src;

        src = src.offset(1);

        let mut alpha: u8 = 0xffu8;

        if I::has_alpha {

            alpha = *src;

            src = src.offset(1);

        let linear: f32 = input_gamma_table_gray[device as usize];

        let out_device_r: f32 = lut_interp_linear(

            linear as f64,

            &transform.output_gamma_lut_r.as_ref().unwrap(),

);

        let out_device_g: f32 = lut_interp_linear(

            linear as f64,

            &transform.output_gamma_lut_g.as_ref().unwrap(),

);

        let out_device_b: f32 = lut_interp_linear(

            linear as f64,

            &transform.output_gamma_lut_b.as_ref().unwrap(),

);

        *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32);

        *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32);

        *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32);

        if F::kAIndex != 0xff {

            *dest.add(F::kAIndex) = alpha

        dest = dest.offset(components as isize);

        i += 1

unsafe fn qcms_transform_data_gray_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_lut::<Gray, RGB>(transform, src, dest, length);

unsafe fn qcms_transform_data_gray_rgba_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_lut::<Gray, RGBA>(transform, src, dest, length);

unsafe fn qcms_transform_data_gray_bgra_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_lut::<Gray, BGRA>(transform, src, dest, length);

unsafe fn qcms_transform_data_graya_rgba_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_lut::<GrayAlpha, RGBA>(transform, src, dest, length);

unsafe fn qcms_transform_data_graya_bgra_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_lut::<GrayAlpha, BGRA>(transform, src, dest, length);

unsafe fn qcms_transform_data_gray_template_precache<I: GrayFormat, F: Format>(

    transform: &qcms_transform,

    mut src: *const u8,

    mut dest: *mut u8,

    length: usize,

) {

    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;

    let precache_output = transform.precache_output.as_deref().unwrap();

    let output_r = &precache_output.lut_r;

    let output_g = &precache_output.lut_g;

    let output_b = &precache_output.lut_b;

    let input_gamma_table_gray = transform

        .input_gamma_table_gray

        .as_ref()

        .unwrap()

        .as_ptr();

    let mut i: u32 = 0;

    while (i as usize) < length {

        let device: u8 = *src;

        src = src.offset(1);

        let mut alpha: u8 = 0xffu8;

        if I::has_alpha {

            alpha  = *src;

            src = src.offset(1);

        let linear: f32 = *input_gamma_table_gray.offset(device as isize);

        /* we could round here... */

        let gray: u16 = (linear * PRECACHE_OUTPUT_MAX as f32) as u16;

        *dest.add(F::kRIndex) = output_r[gray as usize];

        *dest.add(F::kGIndex) = output_g[gray as usize];

        *dest.add(F::kBIndex) = output_b[gray as usize];

        if F::kAIndex != 0xff {

            *dest.add(F::kAIndex) = alpha

        dest = dest.offset(components as isize);

        i += 1

unsafe fn qcms_transform_data_gray_out_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_precache::<Gray, RGB>(transform, src, dest, length);

unsafe fn qcms_transform_data_gray_rgba_out_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_precache::<Gray, RGBA>(transform, src, dest, length);

unsafe fn qcms_transform_data_gray_bgra_out_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_precache::<Gray, BGRA>(transform, src, dest, length);

unsafe fn qcms_transform_data_graya_rgba_out_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_precache::<GrayAlpha, RGBA>(transform, src, dest, length);

unsafe fn qcms_transform_data_graya_bgra_out_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_gray_template_precache::<GrayAlpha, BGRA>(transform, src, dest, length);

unsafe fn qcms_transform_data_template_lut_precache<F: Format>(

    transform: &qcms_transform,

    mut src: *const u8,

    mut dest: *mut u8,

    length: usize,

) {

    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;

    let output_table_r = &transform.precache_output.as_deref().unwrap().lut_r;

    let output_table_g = &transform.precache_output.as_deref().unwrap().lut_g;

    let output_table_b = &transform.precache_output.as_deref().unwrap().lut_b;

    let input_gamma_table_r = transform.input_gamma_table_r.as_ref().unwrap().as_ptr();

    let input_gamma_table_g = transform.input_gamma_table_g.as_ref().unwrap().as_ptr();

    let input_gamma_table_b = transform.input_gamma_table_b.as_ref().unwrap().as_ptr();

    let mat = &transform.matrix;

    let mut i: u32 = 0;

    while (i as usize) < length {

        let device_r: u8 = *src.add(F::kRIndex);

        let device_g: u8 = *src.add(F::kGIndex);

        let device_b: u8 = *src.add(F::kBIndex);

        let mut alpha: u8 = 0;

        if F::kAIndex != 0xff {

            alpha = *src.add(F::kAIndex)

        src = src.offset(components as isize);

        let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize);

        let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize);

        let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize);

        let mut out_linear_r = mat[0][0] * linear_r + mat[1][0] * linear_g + mat[2][0] * linear_b;

        let mut out_linear_g = mat[0][1] * linear_r + mat[1][1] * linear_g + mat[2][1] * linear_b;

        let mut out_linear_b = mat[0][2] * linear_r + mat[1][2] * linear_g + mat[2][2] * linear_b;

        out_linear_r = clamp_float(out_linear_r);

        out_linear_g = clamp_float(out_linear_g);

        out_linear_b = clamp_float(out_linear_b);

        /* we could round here... */

        let r: u16 = (out_linear_r * PRECACHE_OUTPUT_MAX as f32) as u16;

        let g: u16 = (out_linear_g * PRECACHE_OUTPUT_MAX as f32) as u16;

        let b: u16 = (out_linear_b * PRECACHE_OUTPUT_MAX as f32) as u16;

        *dest.add(F::kRIndex) = output_table_r[r as usize];

        *dest.add(F::kGIndex) = output_table_g[g as usize];

        *dest.add(F::kBIndex) = output_table_b[b as usize];

        if F::kAIndex != 0xff {

            *dest.add(F::kAIndex) = alpha

        dest = dest.offset(components as isize);

        i += 1

#[no_mangle]

pub unsafe fn qcms_transform_data_rgb_out_lut_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_template_lut_precache::<RGB>(transform, src, dest, length);

#[no_mangle]

pub unsafe fn qcms_transform_data_rgba_out_lut_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_template_lut_precache::<RGBA>(transform, src, dest, length);

#[no_mangle]

pub unsafe fn qcms_transform_data_bgra_out_lut_precache(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_template_lut_precache::<BGRA>(transform, src, dest, length);

// Not used

/*

static void qcms_transform_data_clut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) {

    unsigned int i;

    int xy_len = 1;

    int x_len = transform->grid_size;

    int len = x_len * x_len;

    const float* r_table = transform->r_clut;

    const float* g_table = transform->g_clut;

    const float* b_table = transform->b_clut;

    for (i = 0; i < length; i++) {

        unsigned char in_r = *src++;

        unsigned char in_g = *src++;

        unsigned char in_b = *src++;

        float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;

        int x = floorf(linear_r * (transform->grid_size-1));

        int y = floorf(linear_g * (transform->grid_size-1));

        int z = floorf(linear_b * (transform->grid_size-1));

        int x_n = ceilf(linear_r * (transform->grid_size-1));

        int y_n = ceilf(linear_g * (transform->grid_size-1));

        int z_n = ceilf(linear_b * (transform->grid_size-1));

        float x_d = linear_r * (transform->grid_size-1) - x;

        float y_d = linear_g * (transform->grid_size-1) - y;

        float z_d = linear_b * (transform->grid_size-1) - z;

        float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);

        float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);

        float r_y1 = lerp(r_x1, r_x2, y_d);

        float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);

        float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);

        float r_y2 = lerp(r_x3, r_x4, y_d);

        float clut_r = lerp(r_y1, r_y2, z_d);

        float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);

        float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);

        float g_y1 = lerp(g_x1, g_x2, y_d);

        float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);

        float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);

        float g_y2 = lerp(g_x3, g_x4, y_d);

        float clut_g = lerp(g_y1, g_y2, z_d);

        float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);

        float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);

        float b_y1 = lerp(b_x1, b_x2, y_d);

        float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);

        float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);

        float b_y2 = lerp(b_x3, b_x4, y_d);

        float clut_b = lerp(b_y1, b_y2, z_d);

        *dest++ = clamp_u8(clut_r*255.0f);

        *dest++ = clamp_u8(clut_g*255.0f);

        *dest++ = clamp_u8(clut_b*255.0f);

*/

fn int_div_ceil(value: i32, div: i32) -> i32 {

    (value + div - 1) / div

// Using lcms' tetra interpolation algorithm.

unsafe extern "C" fn qcms_transform_data_tetra_clut_template<F: Format>(

    transform: &qcms_transform,

    mut src: *const u8,

    mut dest: *mut u8,

    length: usize,

) {

    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;

    let xy_len: i32 = 1;

    let x_len: i32 = transform.grid_size as i32;

    let len: i32 = x_len * x_len;

    let table = transform.clut.as_ref().unwrap().as_ptr();

    let r_table: *const f32 = table;

    let g_table: *const f32 = table.offset(1);

    let b_table: *const f32 = table.offset(2);

    let mut i: u32 = 0;

    while (i as usize) < length {

        let c0_r: f32;

        let c1_r: f32;

        let c2_r: f32;

        let c3_r: f32;

        let c0_g: f32;

        let c1_g: f32;

        let c2_g: f32;

        let c3_g: f32;

        let c0_b: f32;

        let c1_b: f32;

        let c2_b: f32;

        let c3_b: f32;

        let in_r: u8 = *src.add(F::kRIndex);

        let in_g: u8 = *src.add(F::kGIndex);

        let in_b: u8 = *src.add(F::kBIndex);

        let mut in_a: u8 = 0;

        if F::kAIndex != 0xff {

            in_a = *src.add(F::kAIndex)

        src = src.offset(components as isize);

        let linear_r: f32 = in_r as i32 as f32 / 255.0;

        let linear_g: f32 = in_g as i32 as f32 / 255.0;

        let linear_b: f32 = in_b as i32 as f32 / 255.0;

        let x: i32 = in_r as i32 * (transform.grid_size as i32 - 1) / 255;

        let y: i32 = in_g as i32 * (transform.grid_size as i32 - 1) / 255;

        let z: i32 = in_b as i32 * (transform.grid_size as i32 - 1) / 255;

        let x_n: i32 = int_div_ceil(in_r as i32 * (transform.grid_size as i32 - 1), 255);

        let y_n: i32 = int_div_ceil(in_g as i32 * (transform.grid_size as i32 - 1), 255);

        let z_n: i32 = int_div_ceil(in_b as i32 * (transform.grid_size as i32 - 1), 255);

        let rx: f32 = linear_r * (transform.grid_size as i32 - 1) as f32 - x as f32;

        let ry: f32 = linear_g * (transform.grid_size as i32 - 1) as f32 - y as f32;

        let rz: f32 = linear_b * (transform.grid_size as i32 - 1) as f32 - z as f32;

        let CLU = |table: *const f32, x, y, z| {

            *table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize)

};

        c0_r = CLU(r_table, x, y, z);

        c0_g = CLU(g_table, x, y, z);

        c0_b = CLU(b_table, x, y, z);

        if rx >= ry {

            if ry >= rz {

                //rx >= ry && ry >= rz

                c1_r = CLU(r_table, x_n, y, z) - c0_r;

                c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);

                c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);

                c1_g = CLU(g_table, x_n, y, z) - c0_g;

                c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);

                c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);

                c1_b = CLU(b_table, x_n, y, z) - c0_b;

                c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);

                c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);

            } else if rx >= rz {

                //rx >= rz && rz >= ry

                c1_r = CLU(r_table, x_n, y, z) - c0_r;

                c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);

                c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);

                c1_g = CLU(g_table, x_n, y, z) - c0_g;

                c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);

                c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);

                c1_b = CLU(b_table, x_n, y, z) - c0_b;

                c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);

                c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);

            } else {

                //rz > rx && rx >= ry

                c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);

                c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);

                c3_r = CLU(r_table, x, y, z_n) - c0_r;

                c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);

                c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);

                c3_g = CLU(g_table, x, y, z_n) - c0_g;

                c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);

                c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);

                c3_b = CLU(b_table, x, y, z_n) - c0_b;

        } else if rx >= rz {

            //ry > rx && rx >= rz

            c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);

            c2_r = CLU(r_table, x, y_n, z) - c0_r;

            c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);

            c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);

            c2_g = CLU(g_table, x, y_n, z) - c0_g;

            c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);

            c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);

            c2_b = CLU(b_table, x, y_n, z) - c0_b;

            c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);

        } else if ry >= rz {

            //ry >= rz && rz > rx

            c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);

            c2_r = CLU(r_table, x, y_n, z) - c0_r;

            c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);

            c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);

            c2_g = CLU(g_table, x, y_n, z) - c0_g;

            c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);

            c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);

            c2_b = CLU(b_table, x, y_n, z) - c0_b;

            c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);

        } else {

            //rz > ry && ry > rx

            c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);

            c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);

            c3_r = CLU(r_table, x, y, z_n) - c0_r;

            c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);

            c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);

            c3_g = CLU(g_table, x, y, z_n) - c0_g;

            c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);

            c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);

            c3_b = CLU(b_table, x, y, z_n) - c0_b;

        let clut_r = c0_r + c1_r * rx + c2_r * ry + c3_r * rz;

        let clut_g = c0_g + c1_g * rx + c2_g * ry + c3_g * rz;

        let clut_b = c0_b + c1_b * rx + c2_b * ry + c3_b * rz;

        *dest.add(F::kRIndex) = clamp_u8(clut_r * 255.0);

        *dest.add(F::kGIndex) = clamp_u8(clut_g * 255.0);

        *dest.add(F::kBIndex) = clamp_u8(clut_b * 255.0);

        if F::kAIndex != 0xff {

            *dest.add(F::kAIndex) = in_a

        dest = dest.offset(components as isize);

        i += 1

unsafe fn tetra(

    transform: &qcms_transform,

    table: *const f32,

    in_r: u8,

    in_g: u8,

    in_b: u8,

) -> (f32, f32, f32) {

    let r_table: *const f32 = table;

    let g_table: *const f32 = table.offset(1);

    let b_table: *const f32 = table.offset(2);

    let linear_r: f32 = in_r as i32 as f32 / 255.0;

    let linear_g: f32 = in_g as i32 as f32 / 255.0;

    let linear_b: f32 = in_b as i32 as f32 / 255.0;

    let xy_len: i32 = 1;

    let x_len: i32 = transform.grid_size as i32;

    let len: i32 = x_len * x_len;

    let x: i32 = in_r as i32 * (transform.grid_size as i32 - 1) / 255;

    let y: i32 = in_g as i32 * (transform.grid_size as i32 - 1) / 255;

    let z: i32 = in_b as i32 * (transform.grid_size as i32 - 1) / 255;

    let x_n: i32 = int_div_ceil(in_r as i32 * (transform.grid_size as i32 - 1), 255);

    let y_n: i32 = int_div_ceil(in_g as i32 * (transform.grid_size as i32 - 1), 255);

    let z_n: i32 = int_div_ceil(in_b as i32 * (transform.grid_size as i32 - 1), 255);

    let rx: f32 = linear_r * (transform.grid_size as i32 - 1) as f32 - x as f32;

    let ry: f32 = linear_g * (transform.grid_size as i32 - 1) as f32 - y as f32;

    let rz: f32 = linear_b * (transform.grid_size as i32 - 1) as f32 - z as f32;

    let CLU = |table: *const f32, x, y, z| {

        *table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize)

};

    let c0_r: f32;

    let c1_r: f32;

    let c2_r: f32;

    let c3_r: f32;

    let c0_g: f32;

    let c1_g: f32;

    let c2_g: f32;

    let c3_g: f32;

    let c0_b: f32;

    let c1_b: f32;

    let c2_b: f32;

    let c3_b: f32;

    c0_r = CLU(r_table, x, y, z);

    c0_g = CLU(g_table, x, y, z);

    c0_b = CLU(b_table, x, y, z);

    if rx >= ry {

        if ry >= rz {

            //rx >= ry && ry >= rz

            c1_r = CLU(r_table, x_n, y, z) - c0_r;

            c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);

            c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);

            c1_g = CLU(g_table, x_n, y, z) - c0_g;

            c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);

            c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);

            c1_b = CLU(b_table, x_n, y, z) - c0_b;

            c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);

            c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);

        } else if rx >= rz {

            //rx >= rz && rz >= ry

            c1_r = CLU(r_table, x_n, y, z) - c0_r;

            c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);

            c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);

            c1_g = CLU(g_table, x_n, y, z) - c0_g;

            c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);

            c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);

            c1_b = CLU(b_table, x_n, y, z) - c0_b;

            c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);

            c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);

        } else {

            //rz > rx && rx >= ry

            c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);

            c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);

            c3_r = CLU(r_table, x, y, z_n) - c0_r;

            c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);

            c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);

            c3_g = CLU(g_table, x, y, z_n) - c0_g;

            c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);

            c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);

            c3_b = CLU(b_table, x, y, z_n) - c0_b;

    } else if rx >= rz {

        //ry > rx && rx >= rz

        c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);

        c2_r = CLU(r_table, x, y_n, z) - c0_r;

        c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);

        c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);

        c2_g = CLU(g_table, x, y_n, z) - c0_g;

        c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);

        c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);

        c2_b = CLU(b_table, x, y_n, z) - c0_b;

        c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);

    } else if ry >= rz {

        //ry >= rz && rz > rx

        c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);

        c2_r = CLU(r_table, x, y_n, z) - c0_r;

        c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);

        c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);

        c2_g = CLU(g_table, x, y_n, z) - c0_g;

        c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);

        c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);

        c2_b = CLU(b_table, x, y_n, z) - c0_b;

        c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);

    } else {

        //rz > ry && ry > rx

        c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);

        c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);

        c3_r = CLU(r_table, x, y, z_n) - c0_r;

        c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);

        c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);

        c3_g = CLU(g_table, x, y, z_n) - c0_g;

        c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);

        c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);

        c3_b = CLU(b_table, x, y, z_n) - c0_b;

    let clut_r = c0_r + c1_r * rx + c2_r * ry + c3_r * rz;

    let clut_g = c0_g + c1_g * rx + c2_g * ry + c3_g * rz;

    let clut_b = c0_b + c1_b * rx + c2_b * ry + c3_b * rz;

    (clut_r, clut_g, clut_b)

#[inline]

fn lerp(a: f32, b: f32, t: f32) -> f32 {

    a * (1. - t) + b * t

// lerp between two tetrahedral interpolations

// See lcms:Eval4InputsFloat

#[allow(clippy::many_single_char_names)]

unsafe fn qcms_transform_data_tetra_clut_cmyk(

    transform: &qcms_transform,

    mut src: *const u8,

    mut dest: *mut u8,

    length: usize,

) {

    let table = transform.clut.as_ref().unwrap().as_ptr();

    assert!(

        transform.clut.as_ref().unwrap().len()

            >= ((transform.grid_size as i32).pow(4) * 3) as usize

);

    for _ in 0..length {

        let c: u8 = *src.add(0);

        let m: u8 = *src.add(1);

        let y: u8 = *src.add(2);

        let k: u8 = *src.add(3);

        src = src.offset(4);

        let linear_k: f32 = k as i32 as f32 / 255.0;

        let grid_size = transform.grid_size as i32;

        let w: i32 = k as i32 * (transform.grid_size as i32 - 1) / 255;

        let w_n: i32 = int_div_ceil(k as i32 * (transform.grid_size as i32 - 1), 255);

        let t: f32 = linear_k * (transform.grid_size as i32 - 1) as f32 - w as f32;

        let table1 = table.offset((w * grid_size * grid_size * grid_size * 3) as isize);

        let table2 = table.offset((w_n * grid_size * grid_size * grid_size * 3) as isize);

        let (r1, g1, b1) = tetra(transform, table1, c, m, y);

        let (r2, g2, b2) = tetra(transform, table2, c, m, y);

        let r = lerp(r1, r2, t);

        let g = lerp(g1, g2, t);

        let b = lerp(b1, b2, t);

        *dest.add(0) = clamp_u8(r * 255.0);

        *dest.add(1) = clamp_u8(g * 255.0);

        *dest.add(2) = clamp_u8(b * 255.0);

        dest = dest.offset(3);

unsafe fn qcms_transform_data_tetra_clut_rgb(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_tetra_clut_template::<RGB>(transform, src, dest, length);

unsafe fn qcms_transform_data_tetra_clut_rgba(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_tetra_clut_template::<RGBA>(transform, src, dest, length);

unsafe fn qcms_transform_data_tetra_clut_bgra(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_tetra_clut_template::<BGRA>(transform, src, dest, length);

unsafe fn qcms_transform_data_template_lut<F: Format>(

    transform: &qcms_transform,

    mut src: *const u8,

    mut dest: *mut u8,

    length: usize,

) {

    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;

    let mat = &transform.matrix;

    let mut i: u32 = 0;

    let input_gamma_table_r = transform.input_gamma_table_r.as_ref().unwrap().as_ptr();

    let input_gamma_table_g = transform.input_gamma_table_g.as_ref().unwrap().as_ptr();

    let input_gamma_table_b = transform.input_gamma_table_b.as_ref().unwrap().as_ptr();

    while (i as usize) < length {

        let device_r: u8 = *src.add(F::kRIndex);

        let device_g: u8 = *src.add(F::kGIndex);

        let device_b: u8 = *src.add(F::kBIndex);

        let mut alpha: u8 = 0;

        if F::kAIndex != 0xff {

            alpha = *src.add(F::kAIndex)

        src = src.offset(components as isize);

        let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize);

        let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize);

        let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize);

        let mut out_linear_r = mat[0][0] * linear_r + mat[1][0] * linear_g + mat[2][0] * linear_b;

        let mut out_linear_g = mat[0][1] * linear_r + mat[1][1] * linear_g + mat[2][1] * linear_b;

        let mut out_linear_b = mat[0][2] * linear_r + mat[1][2] * linear_g + mat[2][2] * linear_b;

        out_linear_r = clamp_float(out_linear_r);

        out_linear_g = clamp_float(out_linear_g);

        out_linear_b = clamp_float(out_linear_b);

        let out_device_r: f32 = lut_interp_linear(

            out_linear_r as f64,

            &transform.output_gamma_lut_r.as_ref().unwrap(),

);

        let out_device_g: f32 = lut_interp_linear(

            out_linear_g as f64,

            transform.output_gamma_lut_g.as_ref().unwrap(),

);

        let out_device_b: f32 = lut_interp_linear(

            out_linear_b as f64,

            transform.output_gamma_lut_b.as_ref().unwrap(),

);

        *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32);

        *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32);

        *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32);

        if F::kAIndex != 0xff {

            *dest.add(F::kAIndex) = alpha

        dest = dest.offset(components as isize);

        i += 1

#[no_mangle]

pub unsafe fn qcms_transform_data_rgb_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_template_lut::<RGB>(transform, src, dest, length);

#[no_mangle]

pub unsafe fn qcms_transform_data_rgba_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_template_lut::<RGBA>(transform, src, dest, length);

#[no_mangle]

pub unsafe fn qcms_transform_data_bgra_out_lut(

    transform: &qcms_transform,

    src: *const u8,

    dest: *mut u8,

    length: usize,

) {

    qcms_transform_data_template_lut::<BGRA>(transform, src, dest, length);

fn precache_create() -> Arc<PrecacheOuput> {

    Arc::new(PrecacheOuput::default())

#[no_mangle]

pub unsafe extern "C" fn qcms_transform_release(t: *mut qcms_transform) {

    drop(Box::from_raw(t));

const bradford_matrix: Matrix = Matrix {

    m: [

        [0.8951, 0.2664, -0.1614],

        [-0.7502, 1.7135, 0.0367],

        [0.0389, -0.0685, 1.0296],

],

};

const bradford_matrix_inv: Matrix = Matrix {

    m: [

        [0.9869929, -0.1470543, 0.1599627],

        [0.4323053, 0.5183603, 0.0492912],

        [-0.0085287, 0.0400428, 0.9684867],

],

};

// See ICCv4 E.3

fn compute_whitepoint_adaption(X: f32, Y: f32, Z: f32) -> Matrix {

    let p: f32 = (0.96422 * bradford_matrix.m[0][0]

        + 1.000 * bradford_matrix.m[1][0]

        + 0.82521 * bradford_matrix.m[2][0])

        / (X * bradford_matrix.m[0][0] + Y * bradford_matrix.m[1][0] + Z * bradford_matrix.m[2][0]);

    let y: f32 = (0.96422 * bradford_matrix.m[0][1]

        + 1.000 * bradford_matrix.m[1][1]

        + 0.82521 * bradford_matrix.m[2][1])

        / (X * bradford_matrix.m[0][1] + Y * bradford_matrix.m[1][1] + Z * bradford_matrix.m[2][1]);

    let b: f32 = (0.96422 * bradford_matrix.m[0][2]

        + 1.000 * bradford_matrix.m[1][2]

        + 0.82521 * bradford_matrix.m[2][2])

        / (X * bradford_matrix.m[0][2] + Y * bradford_matrix.m[1][2] + Z * bradford_matrix.m[2][2]);

    let white_adaption = Matrix {

        m: [[p, 0., 0.], [0., y, 0.], [0., 0., b]],

};

    Matrix::multiply(

        bradford_matrix_inv,

        Matrix::multiply(white_adaption, bradford_matrix),

#[no_mangle]

pub extern "C" fn qcms_profile_precache_output_transform(profile: &mut Profile) {

    /* we only support precaching on rgb profiles */

    if profile.color_space != RGB_SIGNATURE {

        return;

    if SUPPORTS_ICCV4.load(Ordering::Relaxed) {

        /* don't precache since we will use the B2A LUT */

        if profile.B2A0.is_some() {

            return;

        /* don't precache since we will use the mBA LUT */

        if profile.mBA.is_some() {

            return;

    /* don't precache if we do not have the TRC curves */

    if profile.redTRC.is_none() || profile.greenTRC.is_none() || profile.blueTRC.is_none() {

        return;

    if profile.precache_output.is_none() {

        let mut precache = precache_create();

        compute_precache(

            profile.redTRC.as_deref().unwrap(),

            &mut Arc::get_mut(&mut precache).unwrap().lut_r,

);

        compute_precache(

            profile.greenTRC.as_deref().unwrap(),

            &mut Arc::get_mut(&mut precache).unwrap().lut_g,

);

        compute_precache(

            profile.blueTRC.as_deref().unwrap(),

            &mut Arc::get_mut(&mut precache).unwrap().lut_b,

);

        profile.precache_output = Some(precache);

/* Replace the current transformation with a LUT transformation using a given number of sample points */

fn transform_precacheLUT_float(

    mut transform: Box<qcms_transform>,

    input: &Profile,

    output: &Profile,

    samples: i32,

    in_type: DataType,

) -> Option<Box<qcms_transform>> {

    /* The range between which 2 consecutive sample points can be used to interpolate */

    let lutSize: u32 = (3 * samples * samples * samples) as u32;

    let mut src = Vec::with_capacity(lutSize as usize);

    let dest = vec![0.; lutSize as usize];

    /* Prepare a list of points we want to sample */

    for x in 0..samples {

        for y in 0..samples {

            for z in 0..samples {

                src.push(x as f32 / (samples - 1) as f32);

                src.push(y as f32 / (samples - 1) as f32);

                src.push(z as f32 / (samples - 1) as f32);

    let lut = chain_transform(input, output, src, dest, lutSize as usize);

    if let Some(lut) = lut {

        transform.clut = Some(lut);

        transform.grid_size = samples as u16;

        if in_type == RGBA8 {

            transform.transform_fn = Some(qcms_transform_data_tetra_clut_rgba)

        } else if in_type == BGRA8 {

            transform.transform_fn = Some(qcms_transform_data_tetra_clut_bgra)

        } else if in_type == RGB8 {

            transform.transform_fn = Some(qcms_transform_data_tetra_clut_rgb)

        debug_assert!(transform.transform_fn.is_some());

    } else {

        return None;

    Some(transform)

fn transform_precacheLUT_cmyk_float(

    mut transform: Box<qcms_transform>,

    input: &Profile,

    output: &Profile,

    samples: i32,

    in_type: DataType,

) -> Option<Box<qcms_transform>> {

    /* The range between which 2 consecutive sample points can be used to interpolate */

    let lutSize: u32 = (4 * samples * samples * samples * samples) as u32;

    let mut src = Vec::with_capacity(lutSize as usize);

    let dest = vec![0.; lutSize as usize];

    /* Prepare a list of points we want to sample */

    for k in 0..samples {

        for c in 0..samples {

            for m in 0..samples {

                for y in 0..samples {

                    src.push(c as f32 / (samples - 1) as f32);

                    src.push(m as f32 / (samples - 1) as f32);

                    src.push(y as f32 / (samples - 1) as f32);

                    src.push(k as f32 / (samples - 1) as f32);

    let lut = chain_transform(input, output, src, dest, lutSize as usize);

    if let Some(lut) = lut {

        transform.clut = Some(lut);

        transform.grid_size = samples as u16;

        assert!(in_type == DataType::CMYK);

        transform.transform_fn = Some(qcms_transform_data_tetra_clut_cmyk)

    } else {

        return None;

    Some(transform)

pub fn transform_create(

    input: &Profile,

    in_type: DataType,

    output: &Profile,

    out_type: DataType,

    _intent: Intent,

) -> Option<Box<qcms_transform>> {

    // Ensure the requested input and output types make sense.

    let matching_format = match (in_type, out_type) {

        (RGB8, RGB8) => true,

        (RGBA8, RGBA8) => true,

        (BGRA8, BGRA8) => true,

        (Gray8, out_type) => matches!(out_type, RGB8 | RGBA8 | BGRA8),

        (GrayA8, out_type) => matches!(out_type, RGBA8 | BGRA8),

        (CMYK, RGB8) => true,

        _ => false,

};

    if !matching_format {

        debug_assert!(false, "input/output type");

        return None;

    let mut transform: Box<qcms_transform> = Box::new(Default::default());

    let mut precache: bool = false;

    if output.precache_output.is_some() {

        precache = true

    // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance)

    if SUPPORTS_ICCV4.load(Ordering::Relaxed)

        && (in_type == RGB8 || in_type == RGBA8 || in_type == BGRA8 || in_type == CMYK)

        && (input.A2B0.is_some()

            || output.B2A0.is_some()

            || input.mAB.is_some()

            || output.mAB.is_some())

        if in_type == CMYK {

            return transform_precacheLUT_cmyk_float(transform, input, output, 17, in_type);

        // Precache the transformation to a CLUT 33x33x33 in size.

        // 33 is used by many profiles and works well in pratice.

        // This evenly divides 256 into blocks of 8x8x8.

        // TODO For transforming small data sets of about 200x200 or less

        // precaching should be avoided.

        let result = transform_precacheLUT_float(transform, input, output, 33, in_type);

        debug_assert!(result.is_some(), "precacheLUT failed");

        return result;

    if precache {

        transform.precache_output = Some(Arc::clone(output.precache_output.as_ref().unwrap()));

    } else {

        if output.redTRC.is_none() || output.greenTRC.is_none() || output.blueTRC.is_none() {

            return None;

        transform.output_gamma_lut_r = build_output_lut(output.redTRC.as_deref().unwrap());

        transform.output_gamma_lut_g = build_output_lut(output.greenTRC.as_deref().unwrap());

        transform.output_gamma_lut_b = build_output_lut(output.blueTRC.as_deref().unwrap());

        if transform.output_gamma_lut_r.is_none()

            || transform.output_gamma_lut_g.is_none()

            || transform.output_gamma_lut_b.is_none()

            return None;

    if input.color_space == RGB_SIGNATURE {

        if precache {

            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]

            if is_x86_feature_detected!("avx") {

                if in_type == RGB8 {

                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_avx)

                } else if in_type == RGBA8 {

                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_avx)

                } else if in_type == BGRA8 {

                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_avx)

            } else if cfg!(not(miri)) && is_x86_feature_detected!("sse2") {

                if in_type == RGB8 {

                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_sse2)

                } else if in_type == RGBA8 {

                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_sse2)

                } else if in_type == BGRA8 {

                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_sse2)

            #[cfg(all(target_arch = "arm", feature = "neon"))]

            let neon_supported = is_arm_feature_detected!("neon");

            #[cfg(all(target_arch = "aarch64", feature = "neon"))]

            let neon_supported = is_aarch64_feature_detected!("neon");

            #[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), feature = "neon"))]

            if neon_supported {

                if in_type == RGB8 {

                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_neon)

                } else if in_type == RGBA8 {

                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_neon)

                } else if in_type == BGRA8 {

                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_neon)

            if transform.transform_fn.is_none() {

                if in_type == RGB8 {

                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_precache)

                } else if in_type == RGBA8 {

                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_precache)

                } else if in_type == BGRA8 {

                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_precache)

        } else if in_type == RGB8 {

            transform.transform_fn = Some(qcms_transform_data_rgb_out_lut)

        } else if in_type == RGBA8 {

            transform.transform_fn = Some(qcms_transform_data_rgba_out_lut)

        } else if in_type == BGRA8 {

            transform.transform_fn = Some(qcms_transform_data_bgra_out_lut)

        //XXX: avoid duplicating tables if we can

        transform.input_gamma_table_r = Some(Box::new(build_input_gamma_table(input.redTRC.as_deref()?)));

        transform.input_gamma_table_g = Some(Box::new(build_input_gamma_table(input.greenTRC.as_deref()?)));

        transform.input_gamma_table_b = Some(Box::new(build_input_gamma_table(input.blueTRC.as_deref()?)));

        /* build combined colorant matrix */

        let in_matrix = build_colorant_matrix(input);

        let mut out_matrix = build_colorant_matrix(output);

        out_matrix = out_matrix.invert()?;

        let result_0 = Matrix::multiply(out_matrix, in_matrix);

        /* check for NaN values in the matrix and bail if we find any */

        let mut i: u32 = 0;

        while i < 3 {

            let mut j: u32 = 0;

            while j < 3 {

                #[allow(clippy::eq_op, clippy::float_cmp)]

                if result_0.m[i as usize][j as usize].is_nan() {

                    return None;

                j += 1

            i += 1

        /* store the results in column major mode

         * this makes doing the multiplication with sse easier */

        transform.matrix[0][0] = result_0.m[0][0];

        transform.matrix[1][0] = result_0.m[0][1];

        transform.matrix[2][0] = result_0.m[0][2];

        transform.matrix[0][1] = result_0.m[1][0];

        transform.matrix[1][1] = result_0.m[1][1];

        transform.matrix[2][1] = result_0.m[1][2];

        transform.matrix[0][2] = result_0.m[2][0];

        transform.matrix[1][2] = result_0.m[2][1];

        transform.matrix[2][2] = result_0.m[2][2]

    } else if input.color_space == GRAY_SIGNATURE {

        transform.input_gamma_table_gray = Some(Box::new(build_input_gamma_table(input.grayTRC.as_deref()?)));

        if precache {

            if out_type == RGB8 {

                transform.transform_fn = Some(qcms_transform_data_gray_out_precache)

            } else if out_type == RGBA8 {

                if in_type == Gray8 {

                    transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_precache)

                } else {

                    transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_precache)

            } else if out_type == BGRA8 {

                if in_type == Gray8 {

                    transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_precache)

                } else {

                    transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_precache)

        } else if out_type == RGB8 {

            transform.transform_fn = Some(qcms_transform_data_gray_out_lut)

        } else if out_type == RGBA8 {

            if in_type == Gray8 {

                transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_lut)

            } else {

                transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_lut)

        } else if out_type == BGRA8 {

            if in_type == Gray8 {

                transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_lut)

            } else {

                transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_lut)

    } else {

        debug_assert!(false, "unexpected colorspace");

        return None;

    debug_assert!(transform.transform_fn.is_some());

    Some(transform)

/// A transform from an input profile to an output one.

pub struct Transform {

    src_ty: DataType,

    dst_ty: DataType,

    xfm: Box<qcms_transform>,

impl Transform {

    /// Create a new transform from `input` to `output` for pixels of `DataType` `ty` with `intent`

    pub fn new(input: &Profile, output: &Profile, ty: DataType, intent: Intent) -> Option<Self> {

        transform_create(input, ty, output, ty, intent).map(|xfm| Transform {

            src_ty: ty,

            dst_ty: ty,

            xfm,

})

    /// Create a new transform from `input` to `output` for pixels of `DataType` `ty` with `intent`

    pub fn new_to(

        input: &Profile,

        output: &Profile,

        src_ty: DataType,

        dst_ty: DataType,

        intent: Intent,

    ) -> Option<Self> {

        transform_create(input, src_ty, output, dst_ty, intent).map(|xfm| Transform {

            src_ty,

            dst_ty,

            xfm,

})

    /// Apply the color space transform to `data`

    pub fn apply(&self, data: &mut [u8]) {

        if data.len() % self.src_ty.bytes_per_pixel() != 0 {

            panic!(

                "incomplete pixels: should be a multiple of {} got {}",

                self.src_ty.bytes_per_pixel(),

                data.len()

        unsafe {

            self.xfm.transform_fn.expect("non-null function pointer")(

                &*self.xfm,

                data.as_ptr(),

                data.as_mut_ptr(),

                data.len() / self.src_ty.bytes_per_pixel(),

);

    /// Apply the color space transform to `data`

    pub fn convert(&self, src: &[u8], dst: &mut [u8]) {

        if src.len() % self.src_ty.bytes_per_pixel() != 0 {

            panic!(

                "incomplete pixels: should be a multiple of {} got {}",

                self.src_ty.bytes_per_pixel(),

                src.len()

        if dst.len() % self.dst_ty.bytes_per_pixel() != 0 {

            panic!(

                "incomplete pixels: should be a multiple of {} got {}",

                self.dst_ty.bytes_per_pixel(),

                dst.len()

        assert_eq!(

            src.len() / self.src_ty.bytes_per_pixel(),

            dst.len() / self.dst_ty.bytes_per_pixel()

);

        unsafe {

            self.xfm.transform_fn.expect("non-null function pointer")(

                &*self.xfm,

                src.as_ptr(),

                dst.as_mut_ptr(),

                src.len() / self.src_ty.bytes_per_pixel(),

);

#[no_mangle]

pub extern "C" fn qcms_enable_iccv4() {

    SUPPORTS_ICCV4.store(true, Ordering::Relaxed);