Source code
Revision control
Copy as Markdown
Other Tools
static const char *hb_gpu_paint_fragment_hlsl =
"/*\n"
" * Copyright (C) 2026 Behdad Esfahbod\n"
" *\n"
" * This is part of HarfBuzz, a text shaping library.\n"
" *\n"
" * Permission is hereby granted, without written agreement and without\n"
" * license or royalty fees, to use, copy, modify, and distribute this\n"
" * software and its documentation for any purpose, provided that the\n"
" * above copyright notice and the following two paragraphs appear in\n"
" * all copies of this software.\n"
" *\n"
" * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR\n"
" * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES\n"
" * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN\n"
" * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n"
" * DAMAGE.\n"
" *\n"
" * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,\n"
" * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\n"
" * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS\n"
" * ON AN \"AS IS\" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO\n"
" * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.\n"
" */\n"
"\n"
"\n"
"/* Paint-renderer fragment shader (HLSL).\n"
" *\n"
" * Assumes the shared fragment helpers (hb-gpu-fragment.hlsl) and\n"
" * the draw-renderer fragment helpers (hb-gpu-draw-fragment.hlsl)\n"
" * are prepended to this source.\n"
" */\n"
"\n"
"\n"
"float4 _hb_gpu_stop_color (int stops_base, int i, float4 foreground,\n"
" out float offset)\n"
"{\n"
" int4 a = hb_gpu_fetch (stops_base + i * 2);\n"
" offset = (float) a.r / 32767.0;\n"
" int4 b = hb_gpu_fetch (stops_base + i * 2 + 1);\n"
" if ((a.g & 1) != 0)\n"
" return float4 (foreground.rgb, foreground.a * ((float) b.a / 32767.0));\n"
" return (float4) b / 32767.0;\n"
"}\n"
"\n"
"float _hb_gpu_extend_t (float t, int extend)\n"
"{\n"
" if (extend == 1) return t - floor (t);\n"
" if (extend == 2) {\n"
" float u = t - 2.0 * floor (t * 0.5);\n"
" return u > 1.0 ? 2.0 - u : u;\n"
" }\n"
" return clamp (t, 0.0, 1.0);\n"
"}\n"
"\n"
"float4 _hb_gpu_eval_stops (int stops_base, int stop_count, float t, float4 foreground)\n"
"{\n"
" float off_prev;\n"
" float4 col_prev = _hb_gpu_stop_color (stops_base, 0, foreground, off_prev);\n"
" if (t <= off_prev) return col_prev;\n"
" for (int i = 1; i < stop_count; i++)\n"
" {\n"
" float off;\n"
" float4 col = _hb_gpu_stop_color (stops_base, i, foreground, off);\n"
" if (t <= off)\n"
" {\n"
" float span = off - off_prev;\n"
" float f = span > 1e-6 ? (t - off_prev) / span : 0.0;\n"
" float4 p0 = float4 (col_prev.rgb * col_prev.a, col_prev.a);\n"
" float4 p1 = float4 (col.rgb * col.a, col.a);\n"
" float4 pm = lerp (p0, p1, f);\n"
" return pm.a > 1e-6 ? float4 (pm.rgb / pm.a, pm.a) : float4 (0.0);\n"
" }\n"
" col_prev = col;\n"
" off_prev = off;\n"
" }\n"
" return col_prev;\n"
"}\n"
"\n"
"/* Apply the stored 2x2 M^-1 (row-major i16 Q10) to a vector. */\n"
"float2 _hb_gpu_apply_minv (int4 m, float2 v)\n"
"{\n"
" float4 mf = (float4) m * (1.0 / 1024.0);\n"
" return float2 (mf.x * v.x + mf.y * v.y,\n"
" mf.z * v.x + mf.w * v.y);\n"
"}\n"
"\n"
"float4 _hb_gpu_sample_linear (float2 renderCoord, int grad_base,\n"
" int stop_count, int extend, float4 foreground)\n"
"{\n"
" int4 t0 = hb_gpu_fetch (grad_base);\n"
" int4 m = hb_gpu_fetch (grad_base + 1);\n"
" float2 p0_r = float2 ((float) t0.r, (float) t0.g);\n"
" float2 d = float2 ((float) t0.b, (float) t0.a);\n"
" float denom = dot (d, d);\n"
" if (denom < 1e-6) return float4 (0.0, 0.0, 0.0, 0.0);\n"
" float2 p = _hb_gpu_apply_minv (m, renderCoord - p0_r);\n"
" float t = dot (p, d) / denom;\n"
" t = _hb_gpu_extend_t (t, extend);\n"
" return _hb_gpu_eval_stops (grad_base + 2, stop_count, t, foreground);\n"
"}\n"
"\n"
"float4 _hb_gpu_sample_radial (float2 renderCoord, int grad_base,\n"
" int stop_count, int extend, float4 foreground)\n"
"{\n"
" int4 t0 = hb_gpu_fetch (grad_base);\n"
" int4 t1 = hb_gpu_fetch (grad_base + 1);\n"
" int4 m = hb_gpu_fetch (grad_base + 2);\n"
" float2 c0_r = float2 ((float) t0.r, (float) t0.g);\n"
" float2 cd = float2 ((float) t0.b, (float) t0.a);\n"
" float r0 = (float) t1.r;\n"
" float r1 = (float) t1.g;\n"
"\n"
" float dr = r1 - r0;\n"
" float2 p = _hb_gpu_apply_minv (m, renderCoord - c0_r);\n"
"\n"
" float A = dot (cd, cd) - dr * dr;\n"
" float B = -2.0 * (dot (p, cd) + r0 * dr);\n"
" float C = dot (p, p) - r0 * r0;\n"
"\n"
" float t;\n"
" if (abs (A) > 1e-6)\n"
" {\n"
" float disc = B * B - 4.0 * A * C;\n"
" if (disc < 0.0) return float4 (0.0, 0.0, 0.0, 0.0);\n"
" float sq = sqrt (disc);\n"
" float t1r = (-B + sq) / (2.0 * A);\n"
" float t2r = (-B - sq) / (2.0 * A);\n"
" t = (r0 + t1r * dr >= 0.0) ? t1r : t2r;\n"
" }\n"
" else\n"
" {\n"
" if (abs (B) < 1e-6) return float4 (0.0, 0.0, 0.0, 0.0);\n"
" t = -C / B;\n"
" }\n"
" t = _hb_gpu_extend_t (t, extend);\n"
" return _hb_gpu_eval_stops (grad_base + 3, stop_count, t, foreground);\n"
"}\n"
"\n"
"float4 _hb_gpu_sample_sweep (float2 renderCoord, int grad_base,\n"
" int stop_count, int extend, float4 foreground)\n"
"{\n"
" int4 t0 = hb_gpu_fetch (grad_base);\n"
" int4 m = hb_gpu_fetch (grad_base + 1);\n"
" float2 c_r = float2 ((float) t0.r, (float) t0.g);\n"
" float a0 = (float) t0.b / 16384.0;\n"
" float a1 = (float) t0.a / 16384.0;\n"
" float span = a1 - a0;\n"
" if (abs (span) < 1e-6) return float4 (0.0, 0.0, 0.0, 0.0);\n"
"\n"
" float2 p = _hb_gpu_apply_minv (m, renderCoord - c_r);\n"
" float ang = atan2 (p.y, p.x) / 3.14159265358979;\n"
" if (ang < 0.0) ang += 2.0;\n"
" float t = (ang - a0) / span;\n"
" t = _hb_gpu_extend_t (t, extend);\n"
" return _hb_gpu_eval_stops (grad_base + 2, stop_count, t, foreground);\n"
"}\n"
"\n"
"float4 _hb_gpu_composite (float4 src, float4 dst, int mode)\n"
"{\n"
" float4 r = src + dst * (1.0 - src.a); /* SRC_OVER default */\n"
"\n"
" /* Mode numbers match hb_paint_composite_mode_t. Approximate\n"
" * unsupported modes with the nearest Porter-Duff mode we do\n"
" * implement; DIFFERENCE / EXCLUSION / HSL_* still fall through to\n"
" * SRC_OVER below. */\n"
" if (mode == 14 || mode == 18 || mode == 19) mode = 23; /* OVERLAY / COLOR_BURN / HARD_LIGHT -> MULTIPLY */\n"
" else if (mode == 17 || mode == 20) mode = 13; /* COLOR_DODGE / SOFT_LIGHT -> SCREEN */\n"
"\n"
" if (mode == 0) r = float4 (0.0, 0.0, 0.0, 0.0); /* CLEAR */\n"
" else if (mode == 1) r = src; /* SRC */\n"
" else if (mode == 2) r = dst; /* DST */\n"
" else if (mode == 4) r = dst + src * (1.0 - dst.a); /* DST_OVER */\n"
" else if (mode == 5) r = src * dst.a; /* SRC_IN */\n"
" else if (mode == 6) r = dst * src.a; /* DST_IN */\n"
" else if (mode == 7) r = src * (1.0 - dst.a); /* SRC_OUT */\n"
" else if (mode == 8) r = dst * (1.0 - src.a); /* DST_OUT */\n"
" else if (mode == 9) r = src * dst.a + dst * (1.0 - src.a); /* SRC_ATOP */\n"
" else if (mode == 10) r = dst * src.a + src * (1.0 - dst.a); /* DST_ATOP */\n"
" else if (mode == 11) r = src * (1.0 - dst.a) + dst * (1.0 - src.a); /* XOR */\n"
" else if (mode == 12) r = min (src + dst, float4 (1.0, 1.0, 1.0, 1.0)); /* PLUS */\n"
" else if (mode == 13) { /* SCREEN (premul) */\n"
" r.rgb = src.rgb + dst.rgb - src.rgb * dst.rgb;\n"
" r.a = src.a + dst.a - src.a * dst.a;\n"
" }\n"
" else if (mode == 15) { /* DARKEN */\n"
" r.rgb = min (src.rgb * dst.a, dst.rgb * src.a)\n"
" + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a);\n"
" r.a = src.a + dst.a - src.a * dst.a;\n"
" }\n"
" else if (mode == 16) { /* LIGHTEN */\n"
" r.rgb = max (src.rgb * dst.a, dst.rgb * src.a)\n"
" + src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a);\n"
" r.a = src.a + dst.a - src.a * dst.a;\n"
" }\n"
" else if (mode == 23) { /* MULTIPLY (premul) */\n"
" r.rgb = src.rgb * (1.0 - dst.a) + dst.rgb * (1.0 - src.a)\n"
" + src.rgb * dst.rgb;\n"
" r.a = src.a + dst.a - src.a * dst.a;\n"
" }\n"
" /* SRC_OVER (3) and DIFFERENCE / EXCLUSION / HSL_* (21, 22, 24-27)\n"
" * fall through to the SRC_OVER default. */\n"
" return r;\n"
"}\n"
"\n"
"/* Wrap _hb_gpu_slug with a sub-glyph extents bail-out. Many\n"
" * paint layers cover a small region of the outer glyph quad; for\n"
" * fragments outside the layer's bbox (with an AA + MSAA-spread\n"
" * margin) the slug coverage is exactly 0, so we can skip the\n"
" * band/curve walk entirely. */\n"
"float _hb_gpu_slug_clipped (float2 renderCoord, float2 pixelsPerEm, uint glyphLoc_)\n"
"{\n"
" int4 header0 = hb_gpu_fetch ((int) glyphLoc_);\n"
" float4 ext = (float4) header0 * HB_GPU_INV_UNITS;\n"
" float2 margin = 2.0 / pixelsPerEm;\n"
" if (any (renderCoord < ext.xy - margin) ||\n"
" any (renderCoord > ext.zw + margin))\n"
" return 0.0;\n"
" return _hb_gpu_slug (renderCoord, pixelsPerEm, glyphLoc_);\n"
"}\n"
"\n"
"/* Combine slug coverages from all clip outlines on the layer.\n"
" * Factored so the shader has one set of inlined slug walks\n"
" * instead of two (one per LAYER op type). flags bits: 0x100 =\n"
" * HAS_CLIP2; 0x200 = HAS_CLIP3 (HAS_CLIP3 implies HAS_CLIP2). */\n"
"float _hb_gpu_layer_coverage (float2 renderCoord, float2 pixelsPerEm,\n"
" int base, int flags,\n"
" int clip1_payload, int clip2_payload, int clip3_payload)\n"
"{\n"
" float cov = _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n"
" (uint) (base + clip1_payload));\n"
" if ((flags & 0x100) != 0)\n"
" {\n"
" cov *= _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n"
" (uint) (base + clip2_payload));\n"
" if ((flags & 0x200) != 0)\n"
" cov *= _hb_gpu_slug_clipped (renderCoord, pixelsPerEm,\n"
" (uint) (base + clip3_payload));\n"
" }\n"
" return cov;\n"
"}\n"
"\n"
"#define HB_GPU_PAINT_GROUP_DEPTH 4\n"
"\n"
"float4 hb_gpu_paint (float2 renderCoord, uint glyphLoc_, float4 foreground,\n"
" out float coverage)\n"
"{\n"
" /* fwidth once, at uniform control flow. */\n"
" float2 pixelsPerEm = 1.0 / fwidth (renderCoord);\n"
"\n"
" int base = (int) glyphLoc_;\n"
" int4 h0 = hb_gpu_fetch (base); /* (num_ops, _, _, _) */\n"
" int4 h2 = hb_gpu_fetch (base + 2); /* (ops_offset, _, _, _) */\n"
" int num_ops = h0.r;\n"
" int cursor = base + h2.r;\n"
"\n"
" float4 acc = float4 (0.0, 0.0, 0.0, 0.0);\n"
" float4 group_stack[HB_GPU_PAINT_GROUP_DEPTH];\n"
" coverage = 0.0;\n"
" int sp = 0;\n"
"\n"
" for (int i = 0; i < num_ops; i++)\n"
" {\n"
" int4 op = hb_gpu_fetch (cursor);\n"
" int op_type = op.r;\n"
" int aux = op.g;\n"
" int payload = (op.b << 16) | (op.a & 0xffff);\n"
"\n"
" if (op_type == 0) /* LAYER_SOLID */\n"
" {\n"
" /* texel 1: (clip2_hi, clip2_lo, clip3_hi, clip3_lo) -- valid\n"
" * per HAS_CLIP2 / HAS_CLIP3 flag bits.\n"
" * texel 2: RGBA as signed Q15. */\n"
" int4 op2 = hb_gpu_fetch (cursor + 1);\n"
" int clip2_payload = (op2.r << 16) | (op2.g & 0xffff);\n"
" int clip3_payload = (op2.b << 16) | (op2.a & 0xffff);\n"
" int4 ct = hb_gpu_fetch (cursor + 2);\n"
" float4 col = ((aux & 1) != 0)\n"
" ? float4 (foreground.rgb, foreground.a * ((float) ct.a / 32767.0))\n"
" : (float4) ct / 32767.0;\n"
"\n"
" float cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm,\n"
" base, aux,\n"
" payload, clip2_payload, clip3_payload);\n"
" coverage = max (coverage, cov);\n"
" float4 src = float4 (col.rgb * col.a, col.a) * cov;\n"
" acc = src + acc * (1.0 - src.a);\n"
"\n"
" cursor += 3;\n"
" }\n"
" else if (op_type == 1) /* LAYER_GRADIENT */\n"
" {\n"
" int4 op2 = hb_gpu_fetch (cursor + 1);\n"
" int clip2_payload = (op2.r << 16) | (op2.g & 0xffff);\n"
" int clip3_payload = (op2.b << 16) | (op2.a & 0xffff);\n"
" int4 op3 = hb_gpu_fetch (cursor + 2);\n"
" int grad_payload = (op3.r << 16) | (op3.g & 0xffff);\n"
" int extend = op3.b;\n"
" int stop_count = op3.a;\n"
" int subtype = aux & 0xff;\n"
"\n"
" float4 col = float4 (0.0, 0.0, 0.0, 0.0);\n"
" if (subtype == 0)\n"
" col = _hb_gpu_sample_linear (renderCoord,\n"
" base + grad_payload,\n"
" stop_count, extend, foreground);\n"
" else if (subtype == 1)\n"
" col = _hb_gpu_sample_radial (renderCoord,\n"
" base + grad_payload,\n"
" stop_count, extend, foreground);\n"
" else if (subtype == 2)\n"
" col = _hb_gpu_sample_sweep (renderCoord,\n"
" base + grad_payload,\n"
" stop_count, extend, foreground);\n"
"\n"
" float cov = _hb_gpu_layer_coverage (renderCoord, pixelsPerEm,\n"
" base, aux,\n"
" payload, clip2_payload, clip3_payload);\n"
" coverage = max (coverage, cov);\n"
" float4 src = float4 (col.rgb * col.a, col.a) * cov;\n"
" acc = src + acc * (1.0 - src.a);\n"
"\n"
" cursor += 3;\n"
" }\n"
" else if (op_type == 2)\n"
" {\n"
" if (sp < HB_GPU_PAINT_GROUP_DEPTH) {\n"
" group_stack[sp] = acc;\n"
" sp++;\n"
" }\n"
" acc = float4 (0.0, 0.0, 0.0, 0.0);\n"
" cursor += 1;\n"
" }\n"
" else if (op_type == 3)\n"
" {\n"
" if (sp > 0) {\n"
" sp--;\n"
" float4 src = acc;\n"
" float4 dst = group_stack[sp];\n"
" acc = _hb_gpu_composite (src, dst, aux);\n"
" }\n"
" cursor += 1;\n"
" }\n"
" else\n"
" {\n"
" break;\n"
" }\n"
" }\n"
"\n"
" return acc;\n"
"}\n"
;