cs_clip_rectangle_FAST_PATH.c

#define SWGL 1

#define __VERSION__ 150

#define WR_MAX_VERTEX_TEXTURE_WIDTH 1024U

#define WR_FEATURE_FAST_PATH

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifdef WR_FEATURE_TEXTURE_EXTERNAL

// Please check https://www.khronos.org/registry/OpenGL/extensions/OES/OES_EGL_image_external_essl3.txt

// for this extension.

#endif

#ifdef WR_FEATURE_TEXTURE_EXTERNAL_ESSL1

// Some GLES 3 devices do not support GL_OES_EGL_image_external_essl3, so we

// must use GL_OES_EGL_image_external instead and make the shader ESSL1

// compatible.

#endif

#ifdef WR_FEATURE_TEXTURE_EXTERNAL_BT709

#endif

#ifdef WR_FEATURE_ADVANCED_BLEND

#endif

#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING

#ifdef GL_ES

#else

#endif

#endif

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#if defined(GL_ES)

    #if GL_ES == 1

        // Sampler default precision is lowp on mobile GPUs.

        // This causes RGBA32F texture data to be clamped to 16 bit floats on some GPUs (e.g. Mali-T880).

        // Define highp precision macro to allow lossless FLOAT texture sampling.

        #define HIGHP_SAMPLER_FLOAT highp

        // Default int precision in GLES 3 is highp (32 bits) in vertex shaders

        // and mediump (16 bits) in fragment shaders. If an int is being used as

        // a texel address in a fragment shader it, and therefore requires > 16

        // bits, it must be qualified with this.

        #define HIGHP_FS_ADDRESS highp

        // texelFetchOffset is buggy on some Android GPUs (see issue #1694).

        // Fallback to texelFetch on mobile GPUs.

        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetch(sampler, position + offset, lod)

    #else

        #define HIGHP_SAMPLER_FLOAT

        #define HIGHP_FS_ADDRESS

        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetchOffset(sampler, position, lod, offset)

    #endif

#else

    #define HIGHP_SAMPLER_FLOAT

    #define HIGHP_FS_ADDRESS

    #if defined(PLATFORM_MACOS) && !defined(SWGL)

        // texelFetchOffset introduces a variety of shader compilation bugs on macOS Intel so avoid it.

        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetch(sampler, position + offset, lod)

    #else

        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetchOffset(sampler, position, lod, offset)

    #endif

#endif

#ifdef SWGL

    #define SWGL_DRAW_SPAN

    #define SWGL_CLIP_MASK

    #define SWGL_ANTIALIAS

    #define SWGL_BLEND

    #define SWGL_CLIP_DIST

#endif

#ifdef WR_VERTEX_SHADER

    #ifdef SWGL

        // Annotate a vertex attribute as being flat per each drawn primitive instance.

        // SWGL can use this information to avoid redundantly loading the attribute in all SIMD lanes.

        #define PER_INSTANCE flat

    #else

        #define PER_INSTANCE

    #endif

    #if __VERSION__ != 100

        #define varying out

        #define attribute in

    #endif

#endif

#ifdef WR_FRAGMENT_SHADER

    precision highp float;

    #if __VERSION__ != 100

        #define varying in

    #endif

#endif

// Flat interpolation is not supported on ESSL 1

#if __VERSION__ == 100

    #define flat

#endif

#if defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)

#define TEX_SAMPLE(sampler, tex_coord) texture2D(sampler, tex_coord.xy)

#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_BT709)

// Force conversion from yuv to rgb using BT709 colorspace

#define TEX_SAMPLE(sampler, tex_coord) vec4(yuv_2_rgb(texture(sampler, tex_coord.xy).xyz, itu_709), 1.0)

#else

#define TEX_SAMPLE(sampler, tex_coord) texture(sampler, tex_coord.xy)

#endif

#if defined(WR_FEATURE_TEXTURE_EXTERNAL) && defined(PLATFORM_ANDROID)

// On some Mali GPUs we have encountered crashes in glDrawElements when using

// textureSize(samplerExternalOES) in a vertex shader without potentially

// sampling from the texture. This tricks the driver in to thinking the texture

// may be sampled from, avoiding the crash. See bug 1692848.

uniform bool u_mali_workaround_dummy;

#define TEX_SIZE(sampler) (u_mali_workaround_dummy ? ivec2(texture(sampler, vec2(0.0, 0.0)).rr) : textureSize(sampler, 0))

#else

#define TEX_SIZE(sampler) textureSize(sampler, 0)

#endif

// Keep these in sync with the corresponding constants in gpu_types.rs

// Specifies that the UV coordinates supplied to certain shaders are normalized.

#define UV_TYPE_NORMALIZED 0

// Specifies that the UV coordinates supplied to certain shaders are not normalized.

#define UV_TYPE_UNNORMALIZED 1

//======================================================================================

// Vertex shader attributes and uniforms

//======================================================================================

#ifdef WR_VERTEX_SHADER

    // Uniform inputs

    uniform mat4 uTransform;       // Orthographic projection

    // Attribute inputs

    attribute vec2 aPosition;

    // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.

    // TODO: convert back to a function once the driver issues are resolved, if ever.

    // https://github.com/servo/webrender/pull/623

    // https://github.com/servo/servo/issues/13953

    // Do the division with unsigned ints because that's more efficient with D3D

    #define get_fetch_uv(i, vpi)  ivec2(int(vpi * (uint(i) % (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi))), int(uint(i) / (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi)))

#endif

//======================================================================================

// Fragment shader attributes and uniforms

//======================================================================================

#ifdef WR_FRAGMENT_SHADER

    // Uniform inputs

    // Fragment shader outputs

    #ifdef WR_FEATURE_ADVANCED_BLEND

        layout(blend_support_all_equations) out;

    #endif

    #if __VERSION__ == 100

        #define oFragColor gl_FragColor

    #elif defined(WR_FEATURE_DUAL_SOURCE_BLENDING)

        layout(location = 0, index = 0) out vec4 oFragColor;

        layout(location = 0, index = 1) out vec4 oFragBlend;

    #else

        out vec4 oFragColor;

    #endif

    // Write an output color in normal shaders.

    void write_output(vec4 color) {

        oFragColor = color;

    #define EPSILON                     0.0001

    // "Show Overdraw" color. Premultiplied.

    #define WR_DEBUG_OVERDRAW_COLOR     vec4(0.110, 0.077, 0.027, 0.125)

    float distance_to_line(vec2 p0, vec2 perp_dir, vec2 p) {

        vec2 dir_to_p0 = p0 - p;

        return dot(normalize(perp_dir), dir_to_p0);

// fwidth is not defined in ESSL 1, but that's okay because we don't need

// it for any ESSL 1 shader variants.

#if __VERSION__ != 100

    /// Find the appropriate half range to apply the AA approximation over.

    /// This range represents a coefficient to go from one CSS pixel to half a device pixel.

    vec2 compute_aa_range_xy(vec2 position) {

        return fwidth(position);

    float compute_aa_range(vec2 position) {

        // The constant factor is chosen to compensate for the fact that length(fw) is equal

        // to sqrt(2) times the device pixel ratio in the typical case.

//

        // This coefficient is chosen to ensure that any sample 0.5 pixels or more inside of

        // the shape has no anti-aliasing applied to it (since pixels are sampled at their center,

        // such a pixel (axis aligned) is fully inside the border). We need this so that antialiased

        // curves properly connect with non-antialiased vertical or horizontal lines, among other things.

//

        // Lines over a half-pixel away from the pixel center *can* intersect with the pixel square;

        // indeed, unless they are horizontal or vertical, they are guaranteed to. However, choosing

        // a nonzero area for such pixels causes noticeable artifacts at the junction between an anti-

        // aliased corner and a straight edge.

//

        // We may want to adjust this constant in specific scenarios (for example keep the principled

        // value for straight edges where we want pixel-perfect equivalence with non antialiased lines

        // when axis aligned, while selecting a larger and smoother aa range on curves).

//

        // As a further optimization, we compute the reciprocal of this range, such that we

        // can then use the cheaper inversesqrt() instead of length(). This also elides a

        // division that would otherwise be necessary inside distance_aa.

        #ifdef SWGL

            // SWGL uses an approximation for fwidth() such that it returns equal x and y.

            // Thus, sqrt(2)/length(w) = sqrt(2)/sqrt(x*x + x*x) = recip(x).

            return recip(fwidth(position).x);

        #else

            // sqrt(2)/length(w) = inversesqrt(0.5 * dot(w, w))

            vec2 w = fwidth(position);

            return inversesqrt(0.5 * dot(w, w));

        #endif

#endif

    /// Return the blending coefficient for distance antialiasing.

///

    /// 0.0 means inside the shape, 1.0 means outside.

///

    /// This makes the simplifying assumption that the area of a 1x1 pixel square

    /// under a line is reasonably similar to just the signed Euclidian distance

    /// from the center of the square to that line. This diverges slightly from

    /// better approximations of the exact area, but the difference between the

    /// methods is not perceptibly noticeable, while this approximation is much

    /// faster to compute.

///

    /// See the comments in `compute_aa_range()` for more information on the

    /// cutoff values of -0.5 and 0.5.

    float distance_aa_xy(vec2 aa_range, vec2 signed_distance) {

        // The aa_range is the raw per-axis filter width, so we need to divide

        // the local signed distance by the filter width to get an approximation

        // of screen distance.

        #ifdef SWGL

            // The SWGL fwidth() approximation returns uniform X and Y ranges.

            vec2 dist = signed_distance * recip(aa_range.x);

        #else

            vec2 dist = signed_distance / aa_range;

        #endif

        // Choose whichever axis is further outside the rectangle for AA.

        return clamp(0.5 - max(dist.x, dist.y), 0.0, 1.0);

    float distance_aa(float aa_range, float signed_distance) {

        // The aa_range is already stored as a reciprocal with uniform scale,

        // so just multiply it, then use that for AA.

        float dist = signed_distance * aa_range;

        return clamp(0.5 - dist, 0.0, 1.0);

    /// Component-wise selection.

///

    /// The idea of using this is to ensure both potential branches are executed before

    /// selecting the result, to avoid observable timing differences based on the condition.

///

    /// Example usage: color = if_then_else(LessThanEqual(color, vec3(0.5)), vec3(0.0), vec3(1.0));

///

    /// The above example sets each component to 0.0 or 1.0 independently depending on whether

    /// their values are below or above 0.5.

///

    /// This is written as a macro in order to work with vectors of any dimension.

///

    /// Note: Some older android devices don't support mix with bvec. If we ever run into them

    /// the only option we have is to polyfill it with a branch per component.

    #define if_then_else(cond, then_branch, else_branch) mix(else_branch, then_branch, cond)

#endif

//======================================================================================

// Shared shader uniforms

//======================================================================================

#ifdef WR_FEATURE_TEXTURE_2D

uniform sampler2D sColor0;

uniform sampler2D sColor1;

uniform sampler2D sColor2;

#elif defined WR_FEATURE_TEXTURE_RECT

uniform sampler2DRect sColor0;

uniform sampler2DRect sColor1;

uniform sampler2DRect sColor2;

#elif defined(WR_FEATURE_TEXTURE_EXTERNAL) || defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)

uniform samplerExternalOES sColor0;

uniform samplerExternalOES sColor1;

uniform samplerExternalOES sColor2;

#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_BT709)

uniform __samplerExternal2DY2YEXT sColor0;

uniform __samplerExternal2DY2YEXT sColor1;

uniform __samplerExternal2DY2YEXT sColor2;

#endif

#ifdef WR_FEATURE_DITHERING

uniform sampler2D sDither;

#endif

//======================================================================================

// Interpolator definitions

//======================================================================================

//======================================================================================

// VS only types and UBOs

//======================================================================================

//======================================================================================

// VS only functions

//======================================================================================

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

struct RectWithSize {

    vec2 p0;

    vec2 size;

};

struct RectWithEndpoint {

    vec2 p0;

    vec2 p1;

};

float point_inside_rect(vec2 p, vec2 p0, vec2 p1) {

    vec2 s = step(p0, p) - step(p1, p);

    return s.x * s.y;

vec2 signed_distance_rect_xy(vec2 pos, vec2 p0, vec2 p1) {

    // Instead of using a true signed distance to rect here, we just use the

    // simpler approximation of the maximum distance on either axis from the

    // outside of the rectangle. This avoids expensive use of length() and only

    // causes mostly imperceptible differences at corner pixels.

    return max(p0 - pos, pos - p1);

float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {

    // Collapse the per-axis distances to edges to a single approximate value.

    vec2 d = signed_distance_rect_xy(pos, p0, p1);

    return max(d.x, d.y);

vec2 rect_clamp(RectWithEndpoint rect, vec2 pt) {

    return clamp(pt, rect.p0, rect.p1);

vec2 rect_size(RectWithEndpoint rect) {

    return rect.p1 - rect.p0;

// this is similar to rect_clamp but repeats the image for coordinates outside

// the rect, used in SVG feTile filter

vec2 rect_repeat(vec2 p, vec2 p0, vec2 p1) {

    vec2 r = p - p0;

    vec2 s = p1 - p0;

    vec2 is = 1.0 / max(s, vec2(0.000001));

    return p0 + s * fract(is * r);

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifdef WR_VERTEX_SHADER

#define VECS_PER_RENDER_TASK        2U

uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;

struct RenderTaskData {

    RectWithEndpoint task_rect;

    vec4 user_data;

};

// See RenderTaskData in render_task.rs

RenderTaskData fetch_render_task_data(int index) {

    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);

    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));

    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));

    RectWithEndpoint task_rect = RectWithEndpoint(

        texel0.xy,

        texel0.zw

);

    RenderTaskData data = RenderTaskData(

        task_rect,

        texel1

);

    return data;

RectWithEndpoint fetch_render_task_rect(int index) {

    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);

    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));

    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));

    RectWithEndpoint task_rect = RectWithEndpoint(

        texel0.xy,

        texel0.zw

);

    return task_rect;

#define PIC_TYPE_IMAGE          1

#define PIC_TYPE_TEXT_SHADOW    2

/*

 The dynamic picture that this brush exists on. Right now, it

 contains minimal information. In the future, it will describe

 the transform mode of primitives on this picture, among other things.

*/

struct PictureTask {

    RectWithEndpoint task_rect;

    float device_pixel_scale;

    vec2 content_origin;

};

PictureTask fetch_picture_task(int address) {

    RenderTaskData task_data = fetch_render_task_data(address);

    PictureTask task = PictureTask(

        task_data.task_rect,

        task_data.user_data.x,

        task_data.user_data.yz

);

    return task;

#define CLIP_TASK_EMPTY 0x7FFFFFFF

struct ClipArea {

    RectWithEndpoint task_rect;

    float device_pixel_scale;

    vec2 screen_origin;

};

ClipArea fetch_clip_area(int index) {

    RenderTaskData task_data;

    if (index >= CLIP_TASK_EMPTY) {

      // We deliberately create a dummy RenderTaskData here then convert to a

      // ClipArea after this if-else statement, rather than initialize the

      // ClipArea in separate branches, to avoid a miscompile in some Adreno

      // drivers. See bug 1884791. Unfortunately the specific details of the bug

      // are unknown, so please take extra care not to regress this when

      // refactoring.

      task_data = RenderTaskData(RectWithEndpoint(vec2(0.0), vec2(0.0)),

                                 vec4(0.0));

    } else {

      task_data = fetch_render_task_data(index);

    return ClipArea(task_data.task_rect, task_data.user_data.x,

                    task_data.user_data.yz);

#endif //WR_VERTEX_SHADER

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache;

#define VECS_PER_IMAGE_RESOURCE     2

// TODO(gw): This is here temporarily while we have

//           both GPU store and cache. When the GPU

//           store code is removed, we can change the

//           PrimitiveInstance instance structure to

//           use 2x unsigned shorts as vertex attributes

//           instead of an int, and encode the UV directly

//           in the vertices.

ivec2 get_gpu_cache_uv(HIGHP_FS_ADDRESS int address) {

    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,

                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);

vec4[2] fetch_from_gpu_cache_2_direct(ivec2 address) {

    return vec4[2](

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0))

);

vec4[2] fetch_from_gpu_cache_2(HIGHP_FS_ADDRESS int address) {

    ivec2 uv = get_gpu_cache_uv(address);

    return vec4[2](

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0))

);

vec4 fetch_from_gpu_cache_1_direct(ivec2 address) {

    return texelFetch(sGpuCache, address, 0);

vec4 fetch_from_gpu_cache_1(HIGHP_FS_ADDRESS int address) {

    ivec2 uv = get_gpu_cache_uv(address);

    return texelFetch(sGpuCache, uv, 0);

#ifdef WR_VERTEX_SHADER

vec4[8] fetch_from_gpu_cache_8(int address) {

    ivec2 uv = get_gpu_cache_uv(address);

    return vec4[8](

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(4, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(5, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(6, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(7, 0))

);

vec4[3] fetch_from_gpu_cache_3(int address) {

    ivec2 uv = get_gpu_cache_uv(address);

    return vec4[3](

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0))

);

vec4[3] fetch_from_gpu_cache_3_direct(ivec2 address) {

    return vec4[3](

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0))

);

vec4[4] fetch_from_gpu_cache_4_direct(ivec2 address) {

    return vec4[4](

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)),

        TEXEL_FETCH(sGpuCache, address, 0, ivec2(3, 0))

);

vec4[4] fetch_from_gpu_cache_4(int address) {

    ivec2 uv = get_gpu_cache_uv(address);

    return vec4[4](

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),

        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0))

);

//TODO: image resource is too specific for this module

struct ImageSource {

    RectWithEndpoint uv_rect;

    vec4 user_data;

};

ImageSource fetch_image_source(int address) {

    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`

    vec4 data[2] = fetch_from_gpu_cache_2(address);

    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);

    return ImageSource(uv_rect, data[1]);

ImageSource fetch_image_source_direct(ivec2 address) {

    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);

    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);

    return ImageSource(uv_rect, data[1]);

// Fetch optional extra data for a texture cache resource. This can contain

// a polygon defining a UV rect within the texture cache resource.

// Note: the polygon coordinates are in homogeneous space.

struct ImageSourceExtra {

    vec4 st_tl;

    vec4 st_tr;

    vec4 st_bl;

    vec4 st_br;

};

ImageSourceExtra fetch_image_source_extra(int address) {

    vec4 data[4] = fetch_from_gpu_cache_4(address + VECS_PER_IMAGE_RESOURCE);

    return ImageSourceExtra(

        data[0],

        data[1],

        data[2],

        data[3]

);

#endif //WR_VERTEX_SHADER

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

flat varying highp vec4 vTransformBounds;

#ifdef WR_VERTEX_SHADER

#define VECS_PER_TRANSFORM   8U

uniform HIGHP_SAMPLER_FLOAT sampler2D sTransformPalette;

void rectangle_aa_vertex(vec4 local_bounds) {

    vTransformBounds = local_bounds;

struct Transform {

    mat4 m;

    mat4 inv_m;

    bool is_axis_aligned;

};

Transform fetch_transform(int id) {

    Transform transform;

    transform.is_axis_aligned = (id >> 23) == 0;

    int index = id & 0x007fffff;

    // Create a UV base coord for each 8 texels.

    // This is required because trying to use an offset

    // of more than 8 texels doesn't work on some versions

    // of macOS.

    ivec2 uv = get_fetch_uv(index, VECS_PER_TRANSFORM);

    ivec2 uv0 = ivec2(uv.x + 0, uv.y);

    transform.m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(0, 0));

    transform.m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(1, 0));

    transform.m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(2, 0));

    transform.m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(3, 0));

    transform.inv_m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(4, 0));

    transform.inv_m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(5, 0));

    transform.inv_m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(6, 0));

    transform.inv_m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(7, 0));

    return transform;

// Return the intersection of the plane (set up by "normal" and "point")

// with the ray (set up by "ray_origin" and "ray_dir"),

// writing the resulting scaler into "t".

bool ray_plane(vec3 normal, vec3 pt, vec3 ray_origin, vec3 ray_dir, out float t)

    float denom = dot(normal, ray_dir);

    if (abs(denom) > 1e-6) {

        vec3 d = pt - ray_origin;

        t = dot(d, normal) / denom;

        return t >= 0.0;

    return false;

// Apply the inverse transform "inv_transform"

// to the reference point "ref" in CSS space,

// producing a local point on a Transform plane,

// set by a base point "a" and a normal "n".

vec4 untransform(vec2 ref, vec3 n, vec3 a, mat4 inv_transform) {

    vec3 p = vec3(ref, -10000.0);

    vec3 d = vec3(0, 0, 1.0);

    float t = 0.0;

    // get an intersection of the Transform plane with Z axis vector,

    // originated from the "ref" point

    ray_plane(n, a, p, d, t);

    float z = p.z + d.z * t; // Z of the visible point on the Transform

    vec4 r = inv_transform * vec4(ref, z, 1.0);

    return r;

// Given a CSS space position, transform it back into the Transform space.

vec4 get_node_pos(vec2 pos, Transform transform) {

    // get a point on the scroll node plane

    vec4 ah = transform.m * vec4(0.0, 0.0, 0.0, 1.0);

    vec3 a = ah.xyz / ah.w;

    // get the normal to the scroll node plane

    vec3 n = transpose(mat3(transform.inv_m)) * vec3(0.0, 0.0, 1.0);

    return untransform(pos, n, a, transform.inv_m);

#endif //WR_VERTEX_SHADER

#ifdef WR_FRAGMENT_SHADER

// Assume transform bounds are set to a large scale to signal they are invalid.

bool has_valid_transform_bounds() {

    return vTransformBounds.w < 1.0e15;

float rectangle_aa_fragment(vec2 local_pos) {

    // Ideally we want to track distances in screen space after transformation

    // as signed distance calculations lose context about the direction vector

    // to exit the geometry, merely remembering the minimum distance to the

    // exit. However, we can't always sanely track distances in screen space

    // due to perspective transforms, clipping, and other concerns, so we do

    // this in local space. However, this causes problems tracking distances

    // in local space when attempting to scale by a uniform AA range later in

    // the presence of a transform which actually has non-uniform scaling.

//

    // To work around this, we independently track the distances on the local

    // space X and Y axes and then scale them by the independent AA ranges (as

    // computed from fwidth derivatives) for the X and Y axes. This can break

    // down at certain angles (45 degrees or close to it), but still gives a

    // better approximation of screen-space distances in the presence of non-

    // uniform scaling for other rotations.

//

    // Get signed distance from local rect bounds.

    vec2 d = signed_distance_rect_xy(

        local_pos,

        vTransformBounds.xy,

        vTransformBounds.zw

);

    // Find the appropriate distance to apply the AA smoothstep over.

    vec2 aa_range = compute_aa_range_xy(local_pos);

    // Only apply AA to fragments outside the signed distance field.

    return distance_aa_xy(aa_range, d);

float rectangle_aa_rough_fragment(vec2 local_pos) {

    return point_inside_rect(

        local_pos,

        vTransformBounds.xy,

        vTransformBounds.zw

);

#endif //WR_FRAGMENT_SHADER

#ifdef WR_VERTEX_SHADER

PER_INSTANCE in vec4 aClipDeviceArea;

PER_INSTANCE in vec4 aClipOrigins;

PER_INSTANCE in float aDevicePixelScale;

PER_INSTANCE in ivec2 aTransformIds;

struct ClipMaskInstanceCommon {

    RectWithEndpoint sub_rect;

    vec2 task_origin;

    vec2 screen_origin;

    float device_pixel_scale;

    int clip_transform_id;

    int prim_transform_id;

};

ClipMaskInstanceCommon fetch_clip_item_common() {

    ClipMaskInstanceCommon cmi;

    cmi.sub_rect = RectWithEndpoint(aClipDeviceArea.xy, aClipDeviceArea.zw);

    cmi.task_origin = aClipOrigins.xy;

    cmi.screen_origin = aClipOrigins.zw;

    cmi.device_pixel_scale = aDevicePixelScale;

    cmi.clip_transform_id = aTransformIds.x;

    cmi.prim_transform_id = aTransformIds.y;

    return cmi;

struct ClipVertexInfo {

    vec4 local_pos;

    RectWithEndpoint clipped_local_rect;

};

// The transformed vertex function that always covers the whole clip area,

// which is the intersection of all clip instances of a given primitive

ClipVertexInfo write_clip_tile_vertex(RectWithEndpoint local_clip_rect,

                                      Transform prim_transform,

                                      Transform clip_transform,

                                      RectWithEndpoint sub_rect,

                                      vec2 task_origin,

                                      vec2 screen_origin,

                                      float device_pixel_scale) {

    vec2 device_pos = screen_origin + mix(sub_rect.p0, sub_rect.p1, aPosition.xy);

    vec2 world_pos = device_pos / device_pixel_scale;

    vec4 pos = prim_transform.m * vec4(world_pos, 0.0, 1.0);

    pos.xyz /= pos.w;

    vec4 p = get_node_pos(pos.xy, clip_transform);

    vec4 local_pos = p * pos.w;

    //TODO: Interpolate in clip space, where "local_pos.w" contains

    // the W of the homogeneous transform *from* clip space into the world.

    //    float interpolate_w = 1.0 / local_pos.w;

    // This is problematic today, because the W<=0 hemisphere is going to be

    // clipped, while we currently want this shader to fill out the whole rect.

    // We can therefore simplify this when the clip construction is rewritten

    // to only affect the areas touched by a clip.

    vec4 vertex_pos = vec4(

        task_origin + mix(sub_rect.p0, sub_rect.p1, aPosition.xy),

        0.0,

1.0

);

    gl_Position = uTransform * vertex_pos;

    rectangle_aa_vertex(vec4(local_clip_rect.p0, local_clip_rect.p1));

    ClipVertexInfo vi = ClipVertexInfo(local_pos, local_clip_rect);

    return vi;

#endif //WR_VERTEX_SHADER

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// Preprocess the radii for computing the distance approximation. This should

// be used in the vertex shader if possible to avoid doing expensive division

// in the fragment shader. When dealing with a point (zero radii), approximate

// it as an ellipse with very small radii so that we don't need to branch.

vec2 inverse_radii_squared(vec2 radii) {

    return 1.0 / max(radii * radii, 1.0e-6);

#ifdef WR_FRAGMENT_SHADER

// One iteration of Newton's method on the 2D equation of an ellipse:

//

//     E(x, y) = x^2/a^2 + y^2/b^2 - 1

//

// The Jacobian of this equation is:

//

//     J(E(x, y)) = [ 2*x/a^2 2*y/b^2 ]

//

// We approximate the distance with:

//

//     E(x, y) / ||J(E(x, y))||

//

// See G. Taubin, "Distance Approximations for Rasterizing Implicit

// Curves", section 3.

//

// A scale relative to the unit scale of the ellipse may be passed in to cause

// the math to degenerate to length(p) when scale is 0, or otherwise give the

// normal distance approximation if scale is 1.

float distance_to_ellipse_approx(vec2 p, vec2 inv_radii_sq, float scale) {

    vec2 p_r = p * inv_radii_sq;

    float g = dot(p, p_r) - scale;

    vec2 dG = (1.0 + scale) * p_r;

    return g * inversesqrt(dot(dG, dG));

// Slower but more accurate version that uses the exact distance when dealing

// with a 0-radius point distance and otherwise uses the faster approximation

// when dealing with non-zero radii.

float distance_to_ellipse(vec2 p, vec2 radii) {

    return distance_to_ellipse_approx(p, inverse_radii_squared(radii),

                                      float(all(greaterThan(radii, vec2(0.0)))));

float distance_to_rounded_rect(

    vec2 pos,

    vec3 plane_tl,

    vec4 center_radius_tl,

    vec3 plane_tr,

    vec4 center_radius_tr,

    vec3 plane_br,

    vec4 center_radius_br,

    vec3 plane_bl,

    vec4 center_radius_bl,

    vec4 rect_bounds

) {

    // Clip against each ellipse. If the fragment is in a corner, one of the

    // branches below will select it as the corner to calculate the distance

    // to. We use half-space planes to detect which corner's ellipse the

    // fragment is inside, where the plane is defined by a normal and offset.

    // If outside any ellipse, default to a small offset so a negative distance

    // is returned for it.

    vec4 corner = vec4(vec2(1.0e-6), vec2(1.0));

    // Calculate the ellipse parameters for each corner.

    center_radius_tl.xy = center_radius_tl.xy - pos;

    center_radius_tr.xy = (center_radius_tr.xy - pos) * vec2(-1.0, 1.0);

    center_radius_br.xy = pos - center_radius_br.xy;

    center_radius_bl.xy = (center_radius_bl.xy - pos) * vec2(1.0, -1.0);

    // Evaluate each half-space plane in turn to select a corner.

    if (dot(pos, plane_tl.xy) > plane_tl.z) {

      corner = center_radius_tl;

    if (dot(pos, plane_tr.xy) > plane_tr.z) {

      corner = center_radius_tr;

    if (dot(pos, plane_br.xy) > plane_br.z) {

      corner = center_radius_br;

    if (dot(pos, plane_bl.xy) > plane_bl.z) {

      corner = center_radius_bl;

    // Calculate the distance of the selected corner and the rectangle bounds,

    // whichever is greater.

    return max(distance_to_ellipse_approx(corner.xy, corner.zw, 1.0),

               signed_distance_rect(pos, rect_bounds.xy, rect_bounds.zw));

#endif

varying highp vec4 vLocalPos;

#ifdef WR_FEATURE_FAST_PATH

flat varying highp vec4 v_clip_radii;

flat varying highp vec2 v_clip_size;

#else

flat varying highp vec4 vClipCenter_Radius_TL;

flat varying highp vec4 vClipCenter_Radius_TR;

flat varying highp vec4 vClipCenter_Radius_BL;

flat varying highp vec4 vClipCenter_Radius_BR;

flat varying highp vec3 vClipPlane_TL;

flat varying highp vec3 vClipPlane_TR;

flat varying highp vec3 vClipPlane_BL;

flat varying highp vec3 vClipPlane_BR;

#endif

// Clip mode. Packed in to a vector to work around bug 1630356.

flat varying mediump vec2 vClipMode;

#ifdef WR_VERTEX_SHADER

PER_INSTANCE in vec2 aClipLocalPos;

PER_INSTANCE in vec4 aClipLocalRect;

PER_INSTANCE in float aClipMode;

PER_INSTANCE in vec4 aClipRect_TL;

PER_INSTANCE in vec4 aClipRadii_TL;

PER_INSTANCE in vec4 aClipRect_TR;

PER_INSTANCE in vec4 aClipRadii_TR;

PER_INSTANCE in vec4 aClipRect_BL;

PER_INSTANCE in vec4 aClipRadii_BL;

PER_INSTANCE in vec4 aClipRect_BR;

PER_INSTANCE in vec4 aClipRadii_BR;

struct ClipMaskInstanceRect {

    ClipMaskInstanceCommon base;

    vec2 local_pos;

};

ClipMaskInstanceRect fetch_clip_item() {

    ClipMaskInstanceRect cmi;

    cmi.base = fetch_clip_item_common();

    cmi.local_pos = aClipLocalPos;

    return cmi;

struct ClipRect {

    RectWithEndpoint rect;

    float mode;

};

struct ClipCorner {

    RectWithEndpoint rect;

    vec4 outer_inner_radius;

};

struct ClipData {

    ClipRect rect;

    ClipCorner top_left;

    ClipCorner top_right;

    ClipCorner bottom_left;

    ClipCorner bottom_right;

};

ClipData fetch_clip() {

    ClipData clip;

    clip.rect = ClipRect(RectWithEndpoint(aClipLocalRect.xy, aClipLocalRect.zw), aClipMode);

    clip.top_left = ClipCorner(RectWithEndpoint(aClipRect_TL.xy, aClipRect_TL.zw), aClipRadii_TL);

    clip.top_right = ClipCorner(RectWithEndpoint(aClipRect_TR.xy, aClipRect_TR.zw), aClipRadii_TR);

    clip.bottom_left = ClipCorner(RectWithEndpoint(aClipRect_BL.xy, aClipRect_BL.zw), aClipRadii_BL);

    clip.bottom_right = ClipCorner(RectWithEndpoint(aClipRect_BR.xy, aClipRect_BR.zw), aClipRadii_BR);

    return clip;

void main(void) {

    ClipMaskInstanceRect cmi = fetch_clip_item();

    Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);

    Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);

    ClipData clip = fetch_clip();

    RectWithEndpoint local_rect = clip.rect.rect;

    vec2 diff = cmi.local_pos - local_rect.p0;

    local_rect.p0 = cmi.local_pos;

    local_rect.p1 += diff;

    ClipVertexInfo vi = write_clip_tile_vertex(

        local_rect,

        prim_transform,

        clip_transform,

        cmi.base.sub_rect,

        cmi.base.task_origin,

        cmi.base.screen_origin,

        cmi.base.device_pixel_scale

);

    vClipMode.x = clip.rect.mode;

    vLocalPos = vi.local_pos;

#ifdef WR_FEATURE_FAST_PATH

    // If the radii are all uniform, we can use a much simpler 2d

    // signed distance function to get a rounded rect clip.

    vec2 half_size = 0.5 * rect_size(local_rect);

    vLocalPos.xy -= (half_size + cmi.local_pos) * vi.local_pos.w;

    v_clip_size = half_size;

    v_clip_radii = vec4(

      clip.bottom_right.outer_inner_radius.x,

      clip.top_right.outer_inner_radius.x,

      clip.bottom_left.outer_inner_radius.x,

      clip.top_left.outer_inner_radius.x

);

#else

    RectWithEndpoint clip_rect = local_rect;

    vec2 r_tl = clip.top_left.outer_inner_radius.xy;

    vec2 r_tr = clip.top_right.outer_inner_radius.xy;

    vec2 r_br = clip.bottom_right.outer_inner_radius.xy;

    vec2 r_bl = clip.bottom_left.outer_inner_radius.xy;

    vClipCenter_Radius_TL = vec4(clip_rect.p0 + r_tl,

                                 inverse_radii_squared(r_tl));

    vClipCenter_Radius_TR = vec4(clip_rect.p1.x - r_tr.x,

                                 clip_rect.p0.y + r_tr.y,

                                 inverse_radii_squared(r_tr));

    vClipCenter_Radius_BR = vec4(clip_rect.p1 - r_br,

                                 inverse_radii_squared(r_br));

    vClipCenter_Radius_BL = vec4(clip_rect.p0.x + r_bl.x,

                                 clip_rect.p1.y - r_bl.y,

                                 inverse_radii_squared(r_bl));

    // We need to know the half-spaces of the corners separate from the center

    // and radius. We compute a point that falls on the diagonal (which is just

    // an inner vertex pushed out along one axis, but not on both) to get the

    // plane offset of the half-space. We also compute the direction vector of

    // the half-space, which is a perpendicular vertex (-y,x) of the vector of

    // the diagonal. We leave the scales of the vectors unchanged.

    vec2 n_tl = -r_tl.yx;

    vec2 n_tr = vec2(r_tr.y, -r_tr.x);

    vec2 n_br = r_br.yx;

    vec2 n_bl = vec2(-r_bl.y, r_bl.x);

    vClipPlane_TL = vec3(n_tl,

                         dot(n_tl, vec2(clip_rect.p0.x, clip_rect.p0.y + r_tl.y)));

    vClipPlane_TR = vec3(n_tr,

                         dot(n_tr, vec2(clip_rect.p1.x - r_tr.x, clip_rect.p0.y)));

    vClipPlane_BR = vec3(n_br,

                         dot(n_br, vec2(clip_rect.p1.x, clip_rect.p1.y - r_br.y)));

    vClipPlane_BL = vec3(n_bl,

                         dot(n_bl, vec2(clip_rect.p0.x + r_bl.x, clip_rect.p1.y)));

#endif

#endif

#ifdef WR_FRAGMENT_SHADER

#ifdef WR_FEATURE_FAST_PATH

// See https://www.shadertoy.com/view/4llXD7

// Notes:

//  * pos is centered in the origin (so 0,0 is the center of the box).

//  * The border radii must not be larger than half_box_size.

float sd_round_box(in vec2 pos, in vec2 half_box_size, in vec4 radii) {

    radii.xy = (pos.x > 0.0) ? radii.xy : radii.zw;

    radii.x  = (pos.y > 0.0) ? radii.x  : radii.y;

    vec2 q = abs(pos) - half_box_size + radii.x;

    return min(max(q.x, q.y), 0.0) + length(max(q, 0.0)) - radii.x;

#endif

void main(void) {

    vec2 local_pos = vLocalPos.xy / vLocalPos.w;

    float aa_range = compute_aa_range(local_pos);

#ifdef WR_FEATURE_FAST_PATH

    float dist = sd_round_box(local_pos, v_clip_size, v_clip_radii);

#else

    float dist = distance_to_rounded_rect(

        local_pos,

        vClipPlane_TL,

        vClipCenter_Radius_TL,

        vClipPlane_TR,

        vClipCenter_Radius_TR,

        vClipPlane_BR,

        vClipCenter_Radius_BR,

        vClipPlane_BL,

        vClipCenter_Radius_BL,

        vTransformBounds

);

#endif

    // Compute AA for the given dist and range.

    float alpha = distance_aa(aa_range, dist);

    // Select alpha or inverse alpha depending on clip in/out.

    float final_alpha = mix(alpha, 1.0 - alpha, vClipMode.x);

    float final_final_alpha = vLocalPos.w > 0.0 ? final_alpha : 0.0;

    oFragColor = vec4(final_final_alpha, 0.0, 0.0, 1.0);

#ifdef SWGL_DRAW_SPAN

// Currently the cs_clip_rectangle shader is slow because it always evaluates

// the corner ellipse segments and the rectangle AA for every fragment the

// shader is run on. To alleviate this for now with SWGL, this essentially

// implements a rounded-rectangle span rasterizer inside the span shader. The

// motivation is that we can separate out the parts of the span which are fully

// opaque and fully transparent, outputting runs of fixed color in those areas,

// while only evaluating the ellipse segments and AA in the smaller outlying

// parts of the span that actually need it.

// The shader conceptually represents a rounded rectangle as an inner octagon

// (8 half-spaces) bounding the opaque region and an outer octagon bounding the

// curve and AA parts. Everything outside is transparent. The line of the span

// is intersected with half-spaces, looking for interior spans that minimally

// intersect the half-spaces (start max, end min). In the ideal case we hit a

// start corner ellipse segment and an end corner ellipse segment, rendering

// the two curves on the ends with an opaque run in between, outputting clear

// for any transparent runs before and after the start and end curves.

// This is slightly complicated by the fact that the results here must agree

// with the main results of the fragment shader, in case SWGL has to fall back

// to the main fragment shader for any reason. So, we make an effort to handle

// both ways of operating - the uniform radius fast-path and the varying radius

// slow-path.

void swgl_drawSpanR8() {

    // Perspective is not supported.

    if (swgl_interpStep(vLocalPos).w != 0.0) {

        return;

    // If the span is completely outside the Z-range and clipped out, just

    // output clear so we don't need to consider invalid W in the rest of the

    // shader.

    float w = swgl_forceScalar(vLocalPos.w);

    if (w <= 0.0) {

        swgl_commitSolidR8(0.0);

        return;

    // To start, we evaluate the rounded-rectangle in local space relative to

    // the local-space position. This will be interpolated across the span to

    // track whether we intersect any half-spaces.

    w = 1.0 / w;

    vec2 local_pos = vLocalPos.xy * w;

    vec2 local_pos0 = swgl_forceScalar(local_pos);

    vec2 local_step = swgl_interpStep(vLocalPos).xy * w;

    float step_scale = max(dot(local_step, local_step), 1.0e-6);

    // Get the local-space AA range. This range represents 1/fwidth(local_pos),

    // essentially the scale of how much local-space maps to an AA pixel. We

    // need to know the inverse, how much local-space we traverse per AA pixel

    // pixel step. We then scale this to represent the amount of span steps

    // traversed per AA pixel step.

    float aa_range = compute_aa_range(local_pos);

    float aa_margin = inversesqrt(aa_range * aa_range * step_scale);

    // We need to know the bounds of the aligned rectangle portion of the rrect

    // in local-space. If we're using the fast-path, this is specified as the

    // half-width of the rrect in v_clip_size, which we map to the outer

    // bounding-box. For the general case, we have already stored the outer

    // bounding box in vTransformBounds.

    #ifdef WR_FEATURE_FAST_PATH

        vec4 clip_rect = vec4(-v_clip_size, v_clip_size);

    #else

        vec4 clip_rect = vTransformBounds;

    #endif

    // We need to compute the local-space distance to the bounding box and then

    // figure out how many processing steps that maps to. If we are stepping in

    // a negative direction on an axis, we need to swap the sides of the box

    // which we consider as the start or end. If there is no local-space step

    // on an axis (i.e. constant Y), we need to take care to force the steps to

    // either the start or end of the span depending on if we are inside or

    // outside of the bounding box.

    vec4 clip_dist =

        mix(clip_rect, clip_rect.zwxy, lessThan(local_step, vec2(0.0)).xyxy)

            - local_pos0.xyxy;

    clip_dist =

        mix(1.0e6 * step(0.0, clip_dist),

            clip_dist * recip(local_step).xyxy,

            notEqual(local_step, vec2(0.0)).xyxy);

    // Initially, the opaque region is bounded by the further start intersect

    // with the bounding box and the nearest end intersect with the bounding

    // box.

    float opaque_start = max(clip_dist.x, clip_dist.y);

    float opaque_end = min(clip_dist.z, clip_dist.w);

    float aa_start = opaque_start;

    float aa_end = opaque_end;

    // Here we actually intersect with the half-space of the corner. We get the

    // plane distance of the local-space position from the diagonal bounding

    // ellipse segment from the opaque region. The half-space is defined by the

    // direction vector of the plane and an offset point that falls on the

    // dividing line (which is a vertex on the corner box, which is actually on

    // the outer radius of the bounding box, but not a corner vertex). This

    // distance is positive if on the curve side and negative if on the inner

    // opaque region. If we are on the curve side, we need to verify we are

    // traveling in direction towards the opaque region so that we will

    // eventually intersect the diagonal so we can calculate when the start

    // corner segment will end, otherwise we are going away from the rrect.

    // If we are inside the opaque interior, we need to verify we are traveling

    // in direction towards the curve, so that we can calculate when the end

    // corner segment will start. Further, if we intersect, we calculate the

    // offset of the outer octagon where AA starts from the inner octagon of

    // where the opaque region starts using the apex vector (which is transpose

    // of the half-space's direction).

//

    // We need to intersect the corner ellipse segments. Significantly, we need

    // to know where the apex of the ellipse segment is and how far to push the

    // outer diagonal of the octagon from the inner diagonal. The position of

    // the inner diagonal simply runs diagonal across the corner box and has a

    // constant offset from vertex on the inner bounding box. The apex also has

    // a constant offset along the opposite diagonal relative to the diagonal

    // intersect which is 1/sqrt(2) - 0.5 assuming unit length for the diagonal.

    // We then need to project the vector to the apex onto the local-space step

    // scale, but we do this with reference to the normal vector of the diagonal

    // using dot(normal, apex) / dot(normal, local_step), where the apex vector

    // is (0.7071 - 0.5) * abs(normal).yx * sign(normal).

    vec3 start_plane = vec3(1.0e6);

    vec3 end_plane = vec3(1.0e6);

    // plane is assumed to be a vec3 with normal in (X, Y) and offset in Z.

    #define CLIP_CORNER(plane, info) do {                                     \

        float dist = dot(local_pos0, plane.xy) - plane.z;                     \

        float scale = -dot(local_step, plane.xy);                             \

        if (scale >= 0.0) {                                                   \

            if (dist > opaque_start * scale) {                                \

                SET_CORNER(start_corner, info);                               \

                start_plane = plane;                                          \

                float inv_scale = recip(max(scale, 1.0e-6));                  \

                opaque_start = dist * inv_scale;                              \

                float apex = (0.7071 - 0.5) * 2.0 * abs(plane.x * plane.y);   \

                aa_start = opaque_start - apex * inv_scale;                   \

            }                                                                 \

        } else if (dist > opaque_end * scale) {                               \

            SET_CORNER(end_corner, info);                                     \

            end_plane = plane;                                                \

            float inv_scale = recip(min(scale, -1.0e-6));                     \

            opaque_end = dist * inv_scale;                                    \

            float apex = (0.7071 - 0.5) * 2.0 * abs(plane.x * plane.y);       \

            aa_end = opaque_end - apex * inv_scale;                           \

        }                                                                     \

    } while (false)

    #ifdef WR_FEATURE_FAST_PATH

        // For the fast-path, we only have the half-width of the outer bounding

        // box. We need to map this to points that fall on the diagonal of the

        // half-space for each corner. To do this we just need to push out the

        // vertex in the right direction on a single axis, leaving the other

        // unchanged.

        // However, since the corner radii are symmetric, and since the local

        // origin of each ellipse is assumed to be at (0, 0), the plane offset

        // of the half-space is similar for each case.

        // So for a given corner radii of z, given a corner offset (x, y - z)

        // and a vector of (z, z), the dot product becomes:

        //   x * z + (y-z)*z == x*z + y*z - z*z

        // The direction vector of the corner half-space has constant length,

        // but just needs an appropriate direction set.

        #define OFFSET_FOR(radii) \

          (v_clip_size.x + v_clip_size.y - radii) * radii

        vec3 plane_br = vec3(v_clip_radii.xx, OFFSET_FOR(v_clip_radii.x));

        vec3 plane_tr = vec3(v_clip_radii.y, -v_clip_radii.y, OFFSET_FOR(v_clip_radii.y));

        vec3 plane_bl = vec3(-v_clip_radii.z, v_clip_radii.z, OFFSET_FOR(v_clip_radii.z));

        vec3 plane_tl = vec3(-v_clip_radii.ww, OFFSET_FOR(v_clip_radii.w));

        #define SET_CORNER(corner, info)

        // Clip against the corner half-spaces.

        CLIP_CORNER(plane_tl, );

        CLIP_CORNER(plane_tr, );

        CLIP_CORNER(plane_br, );

        CLIP_CORNER(plane_bl, );

        // Later we need to calculate distance AA for both corners and the

        // outer bounding rect. For the fast-path, this is all done inside

        // sd_round_box.

        #define AA_RECT(local_pos) \

            sd_round_box(local_pos, v_clip_size, v_clip_radii)

    #else

        // For the general case, we need to remember which of the actual start

        // and end corners we intersect, so that we can evaluate the curve AA

        // against only those corners rather than having to try against all 4

        // corners for both sides of the span. Initialize these values so that

        // if no corner is intersected, they will just zero the AA.

        vec4 start_corner = vec4(vec2(1.0e6), vec2(1.0));

        vec4 end_corner = vec4(vec2(1.0e6), vec2(1.0));

        #define SET_CORNER(corner, info) corner = info

        // Clip against the corner half-spaces. We have already computed the

        // corner half-spaces in the vertex shader.

        CLIP_CORNER(vClipPlane_TL, vClipCenter_Radius_TL);

        CLIP_CORNER(vClipPlane_TR, vClipCenter_Radius_TR);

        CLIP_CORNER(vClipPlane_BR, vClipCenter_Radius_BR);

        CLIP_CORNER(vClipPlane_BL, vClipCenter_Radius_BL);

        // Later we need to calculate distance AA for both corners and the

        // outer bounding rect. For the general case, we need to explicitly

        // evaluate either the ellipse segment distance or the rect distance.

        #define AA_RECT(local_pos) \

            signed_distance_rect(local_pos, vTransformBounds.xy, vTransformBounds.zw)

        #define AA_CORNER(local_pos, corner) \

            distance_to_ellipse_approx(local_pos - corner.xy, corner.zw, 1.0)

    #endif

    // Pad the AA region by a margin, as the intersections take place assuming

    // pixel centers, but AA actually starts half a pixel away from the center.

    // If the AA region narrows to nothing, be careful not to inflate so much

    // that we start processing AA for fragments that don't need it.

    aa_margin = max(aa_margin - max(aa_start - aa_end, 0.0), 0.0);

    aa_start -= aa_margin;

    aa_end += aa_margin;

    // Compute the thresholds at which we need to transition between various

    // segments of the span, from fully transparent outside to the start of

    // the outer octagon where AA starts, from there to where the inner opaque

    // octagon starts, from there to where the opaque inner octagon ends and

    // AA starts again, to finally where the outer octagon/AA ends and we're

    // back to fully transparent. These thresholds are just flipped offsets

    // from the start of the span so we can compare against the remaining

    // span length which automatically deducts as we commit fragments.

    ivec4 steps = ivec4(clamp(

        swgl_SpanLength -

            swgl_StepSize *

                vec4(floor(aa_start), ceil(opaque_start), floor(opaque_end), ceil(aa_end)),

        0.0, swgl_SpanLength));

    int aa_start_len = steps.x;

    int opaque_start_len = steps.y;

    int opaque_end_len = steps.z;

    int aa_end_len = steps.w;

    // Output fully clear while we're outside the AA region.

    if (swgl_SpanLength > aa_start_len) {

        int num_aa = swgl_SpanLength - aa_start_len;

        swgl_commitPartialSolidR8(num_aa, vClipMode.x);

        local_pos += float(num_aa / swgl_StepSize) * local_step;

    #ifdef AA_CORNER

    if (start_plane.x < 1.0e5) {

        // We're now in the outer octagon which requires AA. Evaluate the corner

        // distance of the start corner here and output AA for it. Before we hit

        // the actual opaque inner octagon, we have a transitional step where the

        // diagonal might intersect mid-way through the step. We have consider

        // either the corner or rect distance depending on which side we're on.

        while (swgl_SpanLength > opaque_start_len) {

            float alpha = distance_aa(aa_range,

                dot(local_pos, start_plane.xy) > start_plane.z

                    ? AA_CORNER(local_pos, start_corner)

                    : AA_RECT(local_pos));

            swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));

            local_pos += local_step;

    #endif

    // If there's no start corner, just do rect AA until opaque.

    while (swgl_SpanLength > opaque_start_len) {

        float alpha = distance_aa(aa_range, AA_RECT(local_pos));

        swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));

        local_pos += local_step;

    // Now we're finally in the opaque inner octagon part of the span. Just

    // output a solid run.

    if (swgl_SpanLength > opaque_end_len) {

        int num_opaque = swgl_SpanLength - opaque_end_len;

        swgl_commitPartialSolidR8(num_opaque, 1.0 - vClipMode.x);

        local_pos += float(num_opaque / swgl_StepSize) * local_step;

    #ifdef AA_CORNER

    if (end_plane.x < 1.0e5) {

        // Finally we're in the AA region on the other side, inside the outer

        // octagon again. Just evaluate the distance to the end corner and

        // compute AA for it. We're leaving the opaque inner octagon, but like

        // before, we have to be careful we're not dealing with a step partially

        // intersected by the end corner's diagonal. Check which side we are on

        // and use either the corner or rect distance as appropriate.

        while (swgl_SpanLength > aa_end_len) {

            float alpha = distance_aa(aa_range,

                dot(local_pos, end_plane.xy) > end_plane.z

                    ? AA_CORNER(local_pos, end_corner)

                    : AA_RECT(local_pos));

            swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));

            local_pos += local_step;

    #endif

    // If there's no end corner, just do rect AA until clear.

    while (swgl_SpanLength > aa_end_len) {

        float alpha = distance_aa(aa_range, AA_RECT(local_pos));

        swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));

        local_pos += local_step;

    // We're now outside the outer AA octagon on the other side. Just output

    // fully clear.

    if (swgl_SpanLength > 0) {

        swgl_commitPartialSolidR8(swgl_SpanLength, vClipMode.x);

#endif

#endif

Copy as Markdown

Other Tools