Source code

Revision control

Other Tools

1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
* License, v. 2.0. If a copy of the MPL was not distributed with this
3
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5
//! The high-level module responsible for interfacing with the GPU.
6
//!
7
//! Much of WebRender's design is driven by separating work into different
8
//! threads. To avoid the complexities of multi-threaded GPU access, we restrict
9
//! all communication with the GPU to one thread, the render thread. But since
10
//! issuing GPU commands is often a bottleneck, we move everything else (i.e.
11
//! the computation of what commands to issue) to another thread, the
12
//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other
13
//! thread (like the SceneBuilder threads or Rayon workers), but the
14
//! Render-vs-RenderBackend distinction is the most important.
15
//!
16
//! The consumer is responsible for initializing the render thread before
17
//! calling into WebRender, which means that this module also serves as the
18
//! initial entry point into WebRender, and is responsible for spawning the
19
//! various other threads discussed above. That said, WebRender initialization
20
//! returns both the `Renderer` instance as well as a channel for communicating
21
//! directly with the `RenderBackend`. Aside from a few high-level operations
22
//! like 'render now', most of interesting commands from the consumer go over
23
//! that channel and operate on the `RenderBackend`.
24
//!
25
//! ## Space conversion guidelines
26
//! At this stage, we shuld be operating with `DevicePixel` and `FramebufferPixel` only.
27
//! "Framebuffer" space represents the final destination of our rendeing,
28
//! and it happens to be Y-flipped on OpenGL. The conversion is done as follows:
29
//! - for rasterized primitives, the orthographics projection transforms
30
//! the content rectangle to -1 to 1
31
//! - the viewport transformation is setup to map the whole range to
32
//! the framebuffer rectangle provided by the document view, stored in `DrawTarget`
33
//! - all the direct framebuffer operations, like blitting, reading pixels, and setting
34
//! up the scissor, are accepting already transformed coordinates, which we can get by
35
//! calling `DrawTarget::to_framebuffer_rect`
36
37
use api::{ApiMsg, BlobImageHandler, ColorF, ColorU, MixBlendMode};
38
use api::{DocumentId, Epoch, ExternalImageHandler, ExternalImageId};
39
use api::{ExternalImageSource, ExternalImageType, FontRenderMode, FrameMsg, ImageFormat};
40
use api::{PipelineId, ImageRendering, Checkpoint, NotificationRequest, OutputImageHandler};
41
use api::{DebugCommand, MemoryReport, VoidPtrToSizeFn, PremultipliedColorF};
42
use api::{RenderApiSender, RenderNotifier, TextureTarget};
43
#[cfg(feature = "replay")]
44
use api::ExternalImage;
45
use api::channel;
46
use api::units::*;
47
pub use api::DebugFlags;
48
use api::channel::MsgSender;
49
use crate::batch::{AlphaBatchContainer, BatchKind, BatchFeatures, BatchTextures, BrushBatchKind, ClipBatchList};
50
#[cfg(any(feature = "capture", feature = "replay"))]
51
use crate::capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
52
use crate::composite::{CompositeState, CompositeTileSurface, CompositeTile, ResolvedExternalSurface};
53
use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeSurfaceFormat};
54
use crate::composite::{CompositorConfig, NativeSurfaceOperationDetails, NativeSurfaceId, NativeSurfaceOperation};
55
use crate::debug_colors;
56
use crate::debug_render::{DebugItem, DebugRenderer};
57
use crate::device::{DepthFunction, Device, GpuFrameId, Program, UploadMethod, Texture, PBO};
58
use crate::device::{DrawTarget, ExternalTexture, FBOId, ReadTarget, TextureSlot};
59
use crate::device::{ShaderError, TextureFilter, TextureFlags,
60
VertexUsageHint, VAO, VBO, CustomVAO};
61
use crate::device::ProgramCache;
62
use crate::device::query::GpuTimer;
63
use euclid::{rect, Transform3D, Scale, default};
64
use crate::frame_builder::{Frame, ChasePrimitive, FrameBuilderConfig};
65
use gleam::gl;
66
use crate::glyph_cache::GlyphCache;
67
use crate::glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
68
use crate::gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
69
use crate::gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
70
use crate::gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, ScalingInstance, SvgFilterInstance, TransformData};
71
use crate::gpu_types::{CompositeInstance, ResolveInstanceData, ZBufferId};
72
use crate::internal_types::{TextureSource, ResourceCacheError};
73
use crate::internal_types::{CacheTextureId, DebugOutput, FastHashMap, FastHashSet, LayerIndex, RenderedDocument, ResultMsg};
74
use crate::internal_types::{TextureCacheAllocationKind, TextureCacheUpdate, TextureUpdateList, TextureUpdateSource};
75
use crate::internal_types::{RenderTargetInfo, SavedTargetIndex, Swizzle};
76
use malloc_size_of::MallocSizeOfOps;
77
use crate::picture::{RecordedDirtyRegion, tile_cache_sizes, ResolvedSurfaceTexture};
78
use crate::prim_store::DeferredResolve;
79
use crate::profiler::{BackendProfileCounters, FrameProfileCounters, TimeProfileCounter,
80
GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
81
use crate::profiler::{Profiler, ChangeIndicator, ProfileStyle, add_event_marker};
82
use crate::device::query::{GpuProfiler, GpuDebugMethod};
83
use rayon::{ThreadPool, ThreadPoolBuilder};
84
use crate::record::ApiRecordingReceiver;
85
use crate::render_backend::{FrameId, RenderBackend};
86
use crate::render_task_graph::RenderTaskGraph;
87
use crate::render_task::{RenderTask, RenderTaskData, RenderTaskKind};
88
use crate::resource_cache::ResourceCache;
89
use crate::scene_builder_thread::{SceneBuilderThread, SceneBuilderThreadChannels, LowPrioritySceneBuilderThread};
90
use crate::screen_capture::AsyncScreenshotGrabber;
91
use crate::shade::{Shaders, WrShaders};
92
use smallvec::SmallVec;
93
use crate::texture_cache::TextureCache;
94
use crate::render_target::{AlphaRenderTarget, ColorRenderTarget, PictureCacheTarget};
95
use crate::render_target::{RenderTarget, TextureCacheRenderTarget, RenderTargetList};
96
use crate::render_target::{RenderTargetKind, BlitJob, BlitJobSource};
97
use crate::render_task_graph::RenderPassKind;
98
use crate::util::drain_filter;
99
use crate::c_str;
100
101
use std;
102
use std::cmp;
103
use std::collections::VecDeque;
104
use std::collections::hash_map::Entry;
105
use std::f32;
106
use std::marker::PhantomData;
107
use std::mem;
108
use std::os::raw::c_void;
109
use std::path::PathBuf;
110
use std::rc::Rc;
111
use std::sync::Arc;
112
use std::sync::atomic::{AtomicBool, Ordering};
113
use std::sync::mpsc::{channel, Receiver};
114
use std::thread;
115
use std::cell::RefCell;
116
use tracy_rs::register_thread_with_profiler;
117
use time::precise_time_ns;
118
119
cfg_if! {
120
if #[cfg(feature = "debugger")] {
121
use serde_json;
122
use crate::debug_server;
123
}
124
}
125
126
const DEFAULT_BATCH_LOOKBACK_COUNT: usize = 10;
127
const VERTEX_TEXTURE_EXTRA_ROWS: i32 = 10;
128
129
/// Is only false if no WR instances have ever been created.
130
static HAS_BEEN_INITIALIZED: AtomicBool = AtomicBool::new(false);
131
132
/// Returns true if a WR instance has ever been initialized in this process.
133
pub fn wr_has_been_initialized() -> bool {
134
HAS_BEEN_INITIALIZED.load(Ordering::SeqCst)
135
}
136
137
pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
138
/// Enabling this toggle would force the GPU cache scattered texture to
139
/// be resized every frame, which enables GPU debuggers to see if this
140
/// is performed correctly.
141
const GPU_CACHE_RESIZE_TEST: bool = false;
142
143
/// Number of GPU blocks per UV rectangle provided for an image.
144
pub const BLOCKS_PER_UV_RECT: usize = 2;
145
146
const GPU_TAG_BRUSH_OPACITY: GpuProfileTag = GpuProfileTag {
147
label: "B_Opacity",
148
color: debug_colors::DARKMAGENTA,
149
};
150
const GPU_TAG_BRUSH_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
151
label: "B_LinearGradient",
152
color: debug_colors::POWDERBLUE,
153
};
154
const GPU_TAG_BRUSH_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
155
label: "B_RadialGradient",
156
color: debug_colors::LIGHTPINK,
157
};
158
const GPU_TAG_BRUSH_CONIC_GRADIENT: GpuProfileTag = GpuProfileTag {
159
label: "B_ConicGradient",
160
color: debug_colors::GREEN,
161
};
162
const GPU_TAG_BRUSH_YUV_IMAGE: GpuProfileTag = GpuProfileTag {
163
label: "B_YuvImage",
164
color: debug_colors::DARKGREEN,
165
};
166
const GPU_TAG_BRUSH_MIXBLEND: GpuProfileTag = GpuProfileTag {
167
label: "B_MixBlend",
168
color: debug_colors::MAGENTA,
169
};
170
const GPU_TAG_BRUSH_BLEND: GpuProfileTag = GpuProfileTag {
171
label: "B_Blend",
172
color: debug_colors::ORANGE,
173
};
174
const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag {
175
label: "B_Image",
176
color: debug_colors::SPRINGGREEN,
177
};
178
const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag {
179
label: "B_Solid",
180
color: debug_colors::RED,
181
};
182
const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag {
183
label: "C_Clip",
184
color: debug_colors::PURPLE,
185
};
186
const GPU_TAG_CACHE_BORDER: GpuProfileTag = GpuProfileTag {
187
label: "C_Border",
188
color: debug_colors::CORNSILK,
189
};
190
const GPU_TAG_CACHE_LINE_DECORATION: GpuProfileTag = GpuProfileTag {
191
label: "C_LineDecoration",
192
color: debug_colors::YELLOWGREEN,
193
};
194
const GPU_TAG_CACHE_GRADIENT: GpuProfileTag = GpuProfileTag {
195
label: "C_Gradient",
196
color: debug_colors::BROWN,
197
};
198
const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag {
199
label: "target init",
200
color: debug_colors::SLATEGREY,
201
};
202
const GPU_TAG_SETUP_DATA: GpuProfileTag = GpuProfileTag {
203
label: "data init",
204
color: debug_colors::LIGHTGREY,
205
};
206
const GPU_TAG_PRIM_SPLIT_COMPOSITE: GpuProfileTag = GpuProfileTag {
207
label: "SplitComposite",
208
color: debug_colors::DARKBLUE,
209
};
210
const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag {
211
label: "TextRun",
212
color: debug_colors::BLUE,
213
};
214
const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag {
215
label: "Blur",
216
color: debug_colors::VIOLET,
217
};
218
const GPU_TAG_BLIT: GpuProfileTag = GpuProfileTag {
219
label: "Blit",
220
color: debug_colors::LIME,
221
};
222
const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag {
223
label: "Scale",
224
color: debug_colors::GHOSTWHITE,
225
};
226
const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag {
227
label: "Alpha Targets",
228
color: debug_colors::BLACK,
229
};
230
const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag {
231
label: "Opaque Pass",
232
color: debug_colors::BLACK,
233
};
234
const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag {
235
label: "Transparent Pass",
236
color: debug_colors::BLACK,
237
};
238
const GPU_TAG_SVG_FILTER: GpuProfileTag = GpuProfileTag {
239
label: "SvgFilter",
240
color: debug_colors::LEMONCHIFFON,
241
};
242
const GPU_TAG_COMPOSITE: GpuProfileTag = GpuProfileTag {
243
label: "Composite",
244
color: debug_colors::TOMATO,
245
};
246
247
/// The clear color used for the texture cache when the debug display is enabled.
248
/// We use a shade of blue so that we can still identify completely blue items in
249
/// the texture cache.
250
const TEXTURE_CACHE_DBG_CLEAR_COLOR: [f32; 4] = [0.0, 0.0, 0.8, 1.0];
251
252
impl BatchKind {
253
#[cfg(feature = "debugger")]
254
fn debug_name(&self) -> &'static str {
255
match *self {
256
BatchKind::SplitComposite => "SplitComposite",
257
BatchKind::Brush(kind) => {
258
match kind {
259
BrushBatchKind::Solid => "Brush (Solid)",
260
BrushBatchKind::Image(..) => "Brush (Image)",
261
BrushBatchKind::Blend => "Brush (Blend)",
262
BrushBatchKind::MixBlend { .. } => "Brush (Composite)",
263
BrushBatchKind::YuvImage(..) => "Brush (YuvImage)",
264
BrushBatchKind::ConicGradient => "Brush (ConicGradient)",
265
BrushBatchKind::RadialGradient => "Brush (RadialGradient)",
266
BrushBatchKind::LinearGradient => "Brush (LinearGradient)",
267
BrushBatchKind::Opacity => "Brush (Opacity)",
268
}
269
}
270
BatchKind::TextRun(_) => "TextRun",
271
}
272
}
273
274
fn sampler_tag(&self) -> GpuProfileTag {
275
match *self {
276
BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
277
BatchKind::Brush(kind) => {
278
match kind {
279
BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
280
BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
281
BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND,
282
BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND,
283
BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE,
284
BrushBatchKind::ConicGradient => GPU_TAG_BRUSH_CONIC_GRADIENT,
285
BrushBatchKind::RadialGradient => GPU_TAG_BRUSH_RADIAL_GRADIENT,
286
BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT,
287
BrushBatchKind::Opacity => GPU_TAG_BRUSH_OPACITY,
288
}
289
}
290
BatchKind::TextRun(_) => GPU_TAG_PRIM_TEXT_RUN,
291
}
292
}
293
}
294
295
fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> {
296
if before & select != after & select {
297
Some(after.contains(select))
298
} else {
299
None
300
}
301
}
302
303
#[repr(C)]
304
#[derive(Copy, Clone, Debug)]
305
pub enum ShaderColorMode {
306
FromRenderPassMode = 0,
307
Alpha = 1,
308
SubpixelConstantTextColor = 2,
309
SubpixelWithBgColorPass0 = 3,
310
SubpixelWithBgColorPass1 = 4,
311
SubpixelWithBgColorPass2 = 5,
312
SubpixelDualSource = 6,
313
Bitmap = 7,
314
ColorBitmap = 8,
315
Image = 9,
316
}
317
318
impl From<GlyphFormat> for ShaderColorMode {
319
fn from(format: GlyphFormat) -> ShaderColorMode {
320
match format {
321
GlyphFormat::Alpha | GlyphFormat::TransformedAlpha => ShaderColorMode::Alpha,
322
GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
323
panic!("Subpixel glyph formats must be handled separately.");
324
}
325
GlyphFormat::Bitmap => ShaderColorMode::Bitmap,
326
GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap,
327
}
328
}
329
}
330
331
/// Enumeration of the texture samplers used across the various WebRender shaders.
332
///
333
/// Each variant corresponds to a uniform declared in shader source. We only bind
334
/// the variants we need for a given shader, so not every variant is bound for every
335
/// batch.
336
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
337
pub(crate) enum TextureSampler {
338
Color0,
339
Color1,
340
Color2,
341
PrevPassAlpha,
342
PrevPassColor,
343
GpuCache,
344
TransformPalette,
345
RenderTasks,
346
Dither,
347
PrimitiveHeadersF,
348
PrimitiveHeadersI,
349
}
350
351
impl TextureSampler {
352
pub(crate) fn color(n: usize) -> TextureSampler {
353
match n {
354
0 => TextureSampler::Color0,
355
1 => TextureSampler::Color1,
356
2 => TextureSampler::Color2,
357
_ => {
358
panic!("There are only 3 color samplers.");
359
}
360
}
361
}
362
}
363
364
impl Into<TextureSlot> for TextureSampler {
365
fn into(self) -> TextureSlot {
366
match self {
367
TextureSampler::Color0 => TextureSlot(0),
368
TextureSampler::Color1 => TextureSlot(1),
369
TextureSampler::Color2 => TextureSlot(2),
370
TextureSampler::PrevPassAlpha => TextureSlot(3),
371
TextureSampler::PrevPassColor => TextureSlot(4),
372
TextureSampler::GpuCache => TextureSlot(5),
373
TextureSampler::TransformPalette => TextureSlot(6),
374
TextureSampler::RenderTasks => TextureSlot(7),
375
TextureSampler::Dither => TextureSlot(8),
376
TextureSampler::PrimitiveHeadersF => TextureSlot(9),
377
TextureSampler::PrimitiveHeadersI => TextureSlot(10),
378
}
379
}
380
}
381
382
#[derive(Debug, Clone, Copy)]
383
#[repr(C)]
384
pub struct PackedVertex {
385
pub pos: [f32; 2],
386
}
387
388
pub(crate) mod desc {
389
use crate::device::{VertexAttribute, VertexAttributeKind, VertexDescriptor};
390
391
pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
392
vertex_attributes: &[
393
VertexAttribute {
394
name: "aPosition",
395
count: 2,
396
kind: VertexAttributeKind::F32,
397
},
398
],
399
instance_attributes: &[
400
VertexAttribute {
401
name: "aData",
402
count: 4,
403
kind: VertexAttributeKind::I32,
404
},
405
],
406
};
407
408
pub const BLUR: VertexDescriptor = VertexDescriptor {
409
vertex_attributes: &[
410
VertexAttribute {
411
name: "aPosition",
412
count: 2,
413
kind: VertexAttributeKind::F32,
414
},
415
],
416
instance_attributes: &[
417
VertexAttribute {
418
name: "aBlurRenderTaskAddress",
419
count: 1,
420
kind: VertexAttributeKind::U16,
421
},
422
VertexAttribute {
423
name: "aBlurSourceTaskAddress",
424
count: 1,
425
kind: VertexAttributeKind::U16,
426
},
427
VertexAttribute {
428
name: "aBlurDirection",
429
count: 1,
430
kind: VertexAttributeKind::I32,
431
},
432
],
433
};
434
435
pub const LINE: VertexDescriptor = VertexDescriptor {
436
vertex_attributes: &[
437
VertexAttribute {
438
name: "aPosition",
439
count: 2,
440
kind: VertexAttributeKind::F32,
441
},
442
],
443
instance_attributes: &[
444
VertexAttribute {
445
name: "aTaskRect",
446
count: 4,
447
kind: VertexAttributeKind::F32,
448
},
449
VertexAttribute {
450
name: "aLocalSize",
451
count: 2,
452
kind: VertexAttributeKind::F32,
453
},
454
VertexAttribute {
455
name: "aWavyLineThickness",
456
count: 1,
457
kind: VertexAttributeKind::F32,
458
},
459
VertexAttribute {
460
name: "aStyle",
461
count: 1,
462
kind: VertexAttributeKind::I32,
463
},
464
VertexAttribute {
465
name: "aAxisSelect",
466
count: 1,
467
kind: VertexAttributeKind::F32,
468
},
469
],
470
};
471
472
pub const GRADIENT: VertexDescriptor = VertexDescriptor {
473
vertex_attributes: &[
474
VertexAttribute {
475
name: "aPosition",
476
count: 2,
477
kind: VertexAttributeKind::F32,
478
},
479
],
480
instance_attributes: &[
481
VertexAttribute {
482
name: "aTaskRect",
483
count: 4,
484
kind: VertexAttributeKind::F32,
485
},
486
VertexAttribute {
487
name: "aStops",
488
count: 4,
489
kind: VertexAttributeKind::F32,
490
},
491
// TODO(gw): We should probably pack these as u32 colors instead
492
// of passing as full float vec4 here. It won't make much
493
// difference in real world, since these are only invoked
494
// rarely, when creating the cache.
495
VertexAttribute {
496
name: "aColor0",
497
count: 4,
498
kind: VertexAttributeKind::F32,
499
},
500
VertexAttribute {
501
name: "aColor1",
502
count: 4,
503
kind: VertexAttributeKind::F32,
504
},
505
VertexAttribute {
506
name: "aColor2",
507
count: 4,
508
kind: VertexAttributeKind::F32,
509
},
510
VertexAttribute {
511
name: "aColor3",
512
count: 4,
513
kind: VertexAttributeKind::F32,
514
},
515
VertexAttribute {
516
name: "aAxisSelect",
517
count: 1,
518
kind: VertexAttributeKind::F32,
519
},
520
VertexAttribute {
521
name: "aStartStop",
522
count: 2,
523
kind: VertexAttributeKind::F32,
524
},
525
],
526
};
527
528
pub const BORDER: VertexDescriptor = VertexDescriptor {
529
vertex_attributes: &[
530
VertexAttribute {
531
name: "aPosition",
532
count: 2,
533
kind: VertexAttributeKind::F32,
534
},
535
],
536
instance_attributes: &[
537
VertexAttribute {
538
name: "aTaskOrigin",
539
count: 2,
540
kind: VertexAttributeKind::F32,
541
},
542
VertexAttribute {
543
name: "aRect",
544
count: 4,
545
kind: VertexAttributeKind::F32,
546
},
547
VertexAttribute {
548
name: "aColor0",
549
count: 4,
550
kind: VertexAttributeKind::F32,
551
},
552
VertexAttribute {
553
name: "aColor1",
554
count: 4,
555
kind: VertexAttributeKind::F32,
556
},
557
VertexAttribute {
558
name: "aFlags",
559
count: 1,
560
kind: VertexAttributeKind::I32,
561
},
562
VertexAttribute {
563
name: "aWidths",
564
count: 2,
565
kind: VertexAttributeKind::F32,
566
},
567
VertexAttribute {
568
name: "aRadii",
569
count: 2,
570
kind: VertexAttributeKind::F32,
571
},
572
VertexAttribute {
573
name: "aClipParams1",
574
count: 4,
575
kind: VertexAttributeKind::F32,
576
},
577
VertexAttribute {
578
name: "aClipParams2",
579
count: 4,
580
kind: VertexAttributeKind::F32,
581
},
582
],
583
};
584
585
pub const SCALE: VertexDescriptor = VertexDescriptor {
586
vertex_attributes: &[
587
VertexAttribute {
588
name: "aPosition",
589
count: 2,
590
kind: VertexAttributeKind::F32,
591
},
592
],
593
instance_attributes: &[
594
VertexAttribute {
595
name: "aScaleTargetRect",
596
count: 4,
597
kind: VertexAttributeKind::F32,
598
},
599
VertexAttribute {
600
name: "aScaleSourceRect",
601
count: 4,
602
kind: VertexAttributeKind::I32,
603
},
604
VertexAttribute {
605
name: "aScaleSourceLayer",
606
count: 1,
607
kind: VertexAttributeKind::I32,
608
},
609
],
610
};
611
612
pub const CLIP: VertexDescriptor = VertexDescriptor {
613
vertex_attributes: &[
614
VertexAttribute {
615
name: "aPosition",
616
count: 2,
617
kind: VertexAttributeKind::F32,
618
},
619
],
620
instance_attributes: &[
621
VertexAttribute {
622
name: "aTransformIds",
623
count: 2,
624
kind: VertexAttributeKind::I32,
625
},
626
VertexAttribute {
627
name: "aClipDataResourceAddress",
628
count: 4,
629
kind: VertexAttributeKind::U16,
630
},
631
VertexAttribute {
632
name: "aClipLocalPos",
633
count: 2,
634
kind: VertexAttributeKind::F32,
635
},
636
VertexAttribute {
637
name: "aClipTileRect",
638
count: 4,
639
kind: VertexAttributeKind::F32,
640
},
641
VertexAttribute {
642
name: "aClipDeviceArea",
643
count: 4,
644
kind: VertexAttributeKind::F32,
645
},
646
VertexAttribute {
647
name: "aClipOrigins",
648
count: 4,
649
kind: VertexAttributeKind::F32,
650
},
651
VertexAttribute {
652
name: "aDevicePixelScale",
653
count: 1,
654
kind: VertexAttributeKind::F32,
655
},
656
],
657
};
658
659
pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
660
vertex_attributes: &[
661
VertexAttribute {
662
name: "aPosition",
663
count: 2,
664
kind: VertexAttributeKind::U16Norm,
665
},
666
VertexAttribute {
667
name: "aValue",
668
count: 4,
669
kind: VertexAttributeKind::F32,
670
},
671
],
672
instance_attributes: &[],
673
};
674
675
pub const RESOLVE: VertexDescriptor = VertexDescriptor {
676
vertex_attributes: &[
677
VertexAttribute {
678
name: "aPosition",
679
count: 2,
680
kind: VertexAttributeKind::F32,
681
},
682
],
683
instance_attributes: &[
684
VertexAttribute {
685
name: "aRect",
686
count: 4,
687
kind: VertexAttributeKind::F32,
688
},
689
],
690
};
691
692
pub const SVG_FILTER: VertexDescriptor = VertexDescriptor {
693
vertex_attributes: &[
694
VertexAttribute {
695
name: "aPosition",
696
count: 2,
697
kind: VertexAttributeKind::F32,
698
},
699
],
700
instance_attributes: &[
701
VertexAttribute {
702
name: "aFilterRenderTaskAddress",
703
count: 1,
704
kind: VertexAttributeKind::U16,
705
},
706
VertexAttribute {
707
name: "aFilterInput1TaskAddress",
708
count: 1,
709
kind: VertexAttributeKind::U16,
710
},
711
VertexAttribute {
712
name: "aFilterInput2TaskAddress",
713
count: 1,
714
kind: VertexAttributeKind::U16,
715
},
716
VertexAttribute {
717
name: "aFilterKind",
718
count: 1,
719
kind: VertexAttributeKind::U16,
720
},
721
VertexAttribute {
722
name: "aFilterInputCount",
723
count: 1,
724
kind: VertexAttributeKind::U16,
725
},
726
VertexAttribute {
727
name: "aFilterGenericInt",
728
count: 1,
729
kind: VertexAttributeKind::U16,
730
},
731
VertexAttribute {
732
name: "aFilterExtraDataAddress",
733
count: 2,
734
kind: VertexAttributeKind::U16,
735
},
736
],
737
};
738
739
pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor {
740
vertex_attributes: &[
741
VertexAttribute {
742
name: "aPosition",
743
count: 2,
744
kind: VertexAttributeKind::F32,
745
},
746
],
747
instance_attributes: &[
748
VertexAttribute {
749
name: "aFromPosition",
750
count: 2,
751
kind: VertexAttributeKind::F32,
752
},
753
VertexAttribute {
754
name: "aCtrlPosition",
755
count: 2,
756
kind: VertexAttributeKind::F32,
757
},
758
VertexAttribute {
759
name: "aToPosition",
760
count: 2,
761
kind: VertexAttributeKind::F32,
762
},
763
VertexAttribute {
764
name: "aFromNormal",
765
count: 2,
766
kind: VertexAttributeKind::F32,
767
},
768
VertexAttribute {
769
name: "aCtrlNormal",
770
count: 2,
771
kind: VertexAttributeKind::F32,
772
},
773
VertexAttribute {
774
name: "aToNormal",
775
count: 2,
776
kind: VertexAttributeKind::F32,
777
},
778
VertexAttribute {
779
name: "aPathID",
780
count: 1,
781
kind: VertexAttributeKind::U16,
782
},
783
VertexAttribute {
784
name: "aPad",
785
count: 1,
786
kind: VertexAttributeKind::U16,
787
},
788
],
789
};
790
791
pub const VECTOR_COVER: VertexDescriptor = VertexDescriptor {
792
vertex_attributes: &[
793
VertexAttribute {
794
name: "aPosition",
795
count: 2,
796
kind: VertexAttributeKind::F32,
797
},
798
],
799
instance_attributes: &[
800
VertexAttribute {
801
name: "aTargetRect",
802
count: 4,
803
kind: VertexAttributeKind::I32,
804
},
805
VertexAttribute {
806
name: "aStencilOrigin",
807
count: 2,
808
kind: VertexAttributeKind::I32,
809
},
810
VertexAttribute {
811
name: "aSubpixel",
812
count: 1,
813
kind: VertexAttributeKind::U16,
814
},
815
VertexAttribute {
816
name: "aPad",
817
count: 1,
818
kind: VertexAttributeKind::U16,
819
},
820
],
821
};
822
823
pub const COMPOSITE: VertexDescriptor = VertexDescriptor {
824
vertex_attributes: &[
825
VertexAttribute {
826
name: "aPosition",
827
count: 2,
828
kind: VertexAttributeKind::F32,
829
},
830
],
831
instance_attributes: &[
832
VertexAttribute {
833
name: "aDeviceRect",
834
count: 4,
835
kind: VertexAttributeKind::F32,
836
},
837
VertexAttribute {
838
name: "aDeviceClipRect",
839
count: 4,
840
kind: VertexAttributeKind::F32,
841
},
842
VertexAttribute {
843
name: "aColor",
844
count: 4,
845
kind: VertexAttributeKind::F32,
846
},
847
VertexAttribute {
848
name: "aParams",
849
count: 4,
850
kind: VertexAttributeKind::F32,
851
},
852
VertexAttribute {
853
name: "aUvRect0",
854
count: 4,
855
kind: VertexAttributeKind::F32,
856
},
857
VertexAttribute {
858
name: "aUvRect1",
859
count: 4,
860
kind: VertexAttributeKind::F32,
861
},
862
VertexAttribute {
863
name: "aUvRect2",
864
count: 4,
865
kind: VertexAttributeKind::F32,
866
},
867
VertexAttribute {
868
name: "aTextureLayers",
869
count: 3,
870
kind: VertexAttributeKind::F32,
871
},
872
],
873
};
874
}
875
876
#[derive(Debug, Copy, Clone)]
877
pub(crate) enum VertexArrayKind {
878
Primitive,
879
Blur,
880
Clip,
881
VectorStencil,
882
VectorCover,
883
Border,
884
Scale,
885
LineDecoration,
886
Gradient,
887
Resolve,
888
SvgFilter,
889
Composite,
890
}
891
892
#[derive(Clone, Debug, PartialEq)]
893
pub enum GraphicsApi {
894
OpenGL,
895
}
896
897
#[derive(Clone, Debug)]
898
pub struct GraphicsApiInfo {
899
pub kind: GraphicsApi,
900
pub renderer: String,
901
pub version: String,
902
}
903
904
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
905
#[cfg_attr(feature = "capture", derive(Serialize))]
906
#[cfg_attr(feature = "replay", derive(Deserialize))]
907
pub enum ImageBufferKind {
908
Texture2D = 0,
909
TextureRect = 1,
910
TextureExternal = 2,
911
Texture2DArray = 3,
912
}
913
914
//TODO: those types are the same, so let's merge them
915
impl From<TextureTarget> for ImageBufferKind {
916
fn from(target: TextureTarget) -> Self {
917
match target {
918
TextureTarget::Default => ImageBufferKind::Texture2D,
919
TextureTarget::Rect => ImageBufferKind::TextureRect,
920
TextureTarget::Array => ImageBufferKind::Texture2DArray,
921
TextureTarget::External => ImageBufferKind::TextureExternal,
922
}
923
}
924
}
925
926
#[derive(Debug, Copy, Clone)]
927
pub enum RendererKind {
928
Native,
929
OSMesa,
930
}
931
932
#[derive(Debug)]
933
pub struct GpuProfile {
934
pub frame_id: GpuFrameId,
935
pub paint_time_ns: u64,
936
}
937
938
impl GpuProfile {
939
fn new<T>(frame_id: GpuFrameId, timers: &[GpuTimer<T>]) -> GpuProfile {
940
let mut paint_time_ns = 0;
941
for timer in timers {
942
paint_time_ns += timer.time_ns;
943
}
944
GpuProfile {
945
frame_id,
946
paint_time_ns,
947
}
948
}
949
}
950
951
#[derive(Debug)]
952
pub struct CpuProfile {
953
pub frame_id: GpuFrameId,
954
pub backend_time_ns: u64,
955
pub composite_time_ns: u64,
956
pub draw_calls: usize,
957
}
958
959
impl CpuProfile {
960
fn new(
961
frame_id: GpuFrameId,
962
backend_time_ns: u64,
963
composite_time_ns: u64,
964
draw_calls: usize,
965
) -> CpuProfile {
966
CpuProfile {
967
frame_id,
968
backend_time_ns,
969
composite_time_ns,
970
draw_calls,
971
}
972
}
973
}
974
975
/// The selected partial present mode for a given frame.
976
#[derive(Debug, Copy, Clone)]
977
enum PartialPresentMode {
978
/// The device supports fewer dirty rects than the number of dirty rects
979
/// that WR produced. In this case, the WR dirty rects are union'ed into
980
/// a single dirty rect, that is provided to the caller.
981
Single {
982
dirty_rect: DeviceRect,
983
},
984
}
985
986
/// A Texture that has been initialized by the `device` module and is ready to
987
/// be used.
988
struct ActiveTexture {
989
texture: Texture,
990
saved_index: Option<SavedTargetIndex>,
991
}
992
993
/// Helper struct for resolving device Textures for use during rendering passes.
994
///
995
/// Manages the mapping between the at-a-distance texture handles used by the
996
/// `RenderBackend` (which does not directly interface with the GPU) and actual
997
/// device texture handles.
998
struct TextureResolver {
999
/// A map to resolve texture cache IDs to native textures.
1000
texture_cache_map: FastHashMap<CacheTextureId, Texture>,
1001
1002
/// Map of external image IDs to native textures.
1003
external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
1004
1005
/// A special 1x1 dummy texture used for shaders that expect to work with
1006
/// the output of the previous pass but are actually running in the first
1007
/// pass.
1008
dummy_cache_texture: Texture,
1009
1010
/// The outputs of the previous pass, if applicable.
1011
prev_pass_color: Option<ActiveTexture>,
1012
prev_pass_alpha: Option<ActiveTexture>,
1013
1014
/// Saved render targets from previous passes. This is used when a pass
1015
/// needs access to the result of a pass other than the immediately-preceding
1016
/// one. In this case, the `RenderTask` will get a non-`None` `saved_index`,
1017
/// which will cause the resulting render target to be persisted in this list
1018
/// (at that index) until the end of the frame.
1019
saved_targets: Vec<Texture>,
1020
1021
/// Pool of idle render target textures ready for re-use.
1022
///
1023
/// Naively, it would seem like we only ever need two pairs of (color,
1024
/// alpha) render targets: one for the output of the previous pass (serving
1025
/// as input to the current pass), and one for the output of the current
1026
/// pass. However, there are cases where the output of one pass is used as
1027
/// the input to multiple future passes. For example, drop-shadows draw the
1028
/// picture in pass X, then reference it in pass X+1 to create the blurred
1029
/// shadow, and pass the results of both X and X+1 to pass X+2 draw the
1030
/// actual content.
1031
///
1032
/// See the comments in `allocate_target_texture` for more insight on why
1033
/// reuse is a win.
1034
render_target_pool: Vec<Texture>,
1035
}
1036
1037
impl TextureResolver {
1038
fn new(device: &mut Device) -> TextureResolver {
1039
let dummy_cache_texture = device
1040
.create_texture(
1041
TextureTarget::Array,
1042
ImageFormat::RGBA8,
1043
1,
1044
1,
1045
TextureFilter::Linear,
1046
None,
1047
1,
1048
);
1049
device.upload_texture_immediate(
1050
&dummy_cache_texture,
1051
&[0xff, 0xff, 0xff, 0xff],
1052
);
1053
1054
TextureResolver {
1055
texture_cache_map: FastHashMap::default(),
1056
external_images: FastHashMap::default(),
1057
dummy_cache_texture,
1058
prev_pass_alpha: None,
1059
prev_pass_color: None,
1060
saved_targets: Vec::default(),
1061
render_target_pool: Vec::new(),
1062
}
1063
}
1064
1065
fn deinit(self, device: &mut Device) {
1066
device.delete_texture(self.dummy_cache_texture);
1067
1068
for (_id, texture) in self.texture_cache_map {
1069
device.delete_texture(texture);
1070
}
1071
1072
for texture in self.render_target_pool {
1073
device.delete_texture(texture);
1074
}
1075
}
1076
1077
fn begin_frame(&mut self) {
1078
assert!(self.prev_pass_color.is_none());
1079
assert!(self.prev_pass_alpha.is_none());
1080
assert!(self.saved_targets.is_empty());
1081
}
1082
1083
fn end_frame(&mut self, device: &mut Device, frame_id: GpuFrameId) {
1084
// return the cached targets to the pool
1085
self.end_pass(device, None, None);
1086
// return the saved targets as well
1087
while let Some(target) = self.saved_targets.pop() {
1088
self.return_to_pool(device, target);
1089
}
1090
1091
// GC the render target pool.
1092
//
1093
// We use a simple scheme whereby we drop any texture that hasn't been used
1094
// in the last 30 frames. This should generally prevent any sustained build-
1095
// up of unused textures, unless we don't generate frames for a long period.
1096
// This can happen when the window is minimized, and we probably want to
1097
// flush all the WebRender caches in that case [1].
1098
//
1100
self.retain_targets(device, |texture| texture.used_recently(frame_id, 30));
1101
}
1102
1103
/// Transfers ownership of a render target back to the pool.
1104
fn return_to_pool(&mut self, device: &mut Device, target: Texture) {
1105
device.invalidate_render_target(&target);
1106
self.render_target_pool.push(target);
1107
}
1108
1109
/// Drops all targets from the render target pool that do not satisfy the predicate.
1110
pub fn retain_targets<F: Fn(&Texture) -> bool>(&mut self, device: &mut Device, f: F) {
1111
// We can't just use retain() because `Texture` requires manual cleanup.
1112
let mut tmp = SmallVec::<[Texture; 8]>::new();
1113
for target in self.render_target_pool.drain(..) {
1114
if f(&target) {
1115
tmp.push(target);
1116
} else {
1117
device.delete_texture(target);
1118
}
1119
}
1120
self.render_target_pool.extend(tmp);
1121
}
1122
1123
fn end_pass(
1124
&mut self,
1125
device: &mut Device,
1126
a8_texture: Option<ActiveTexture>,
1127
rgba8_texture: Option<ActiveTexture>,
1128
) {
1129
// If we have cache textures from previous pass, return them to the pool.
1130
// Also assign the pool index of those cache textures to last pass's index because this is
1131
// the result of last pass.
1132
// Note: the order here is important, needs to match the logic in `RenderPass::build()`.
1133
if let Some(at) = self.prev_pass_color.take() {
1134
if let Some(index) = at.saved_index {
1135
assert_eq!(self.saved_targets.len(), index.0);
1136
self.saved_targets.push(at.texture);
1137
} else {
1138
self.return_to_pool(device, at.texture);
1139
}
1140
}
1141
if let Some(at) = self.prev_pass_alpha.take() {
1142
if let Some(index) = at.saved_index {
1143
assert_eq!(self.saved_targets.len(), index.0);
1144
self.saved_targets.push(at.texture);
1145
} else {
1146
self.return_to_pool(device, at.texture);
1147
}
1148
}
1149
1150
// We have another pass to process, make these textures available
1151
// as inputs to the next pass.
1152
self.prev_pass_color = rgba8_texture;
1153
self.prev_pass_alpha = a8_texture;
1154
}
1155
1156
// Bind a source texture to the device.
1157
fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) -> Swizzle {
1158
match *texture_id {
1159
TextureSource::Invalid => {
1160
Swizzle::default()
1161
}
1162
TextureSource::Dummy => {
1163
let swizzle = Swizzle::default();
1164
device.bind_texture(sampler, &self.dummy_cache_texture, swizzle);
1165
swizzle
1166
}
1167
TextureSource::PrevPassAlpha => {
1168
let texture = match self.prev_pass_alpha {
1169
Some(ref at) => &at.texture,
1170
None => &self.dummy_cache_texture,
1171
};
1172
let swizzle = Swizzle::default();
1173
device.bind_texture(sampler, texture, swizzle);
1174
swizzle
1175
}
1176
TextureSource::PrevPassColor => {
1177
let texture = match self.prev_pass_color {
1178
Some(ref at) => &at.texture,
1179
None => &self.dummy_cache_texture,
1180
};
1181
let swizzle = Swizzle::default();
1182
device.bind_texture(sampler, texture, swizzle);
1183
swizzle
1184
}
1185
TextureSource::External(external_image) => {
1186
let texture = self.external_images
1187
.get(&(external_image.id, external_image.channel_index))
1188
.expect("BUG: External image should be resolved by now");
1189
device.bind_external_texture(sampler, texture);
1190
Swizzle::default()
1191
}
1192
TextureSource::TextureCache(index, swizzle) => {
1193
let texture = &self.texture_cache_map[&index];
1194
device.bind_texture(sampler, texture, swizzle);
1195
swizzle
1196
}
1197
TextureSource::RenderTaskCache(saved_index, swizzle) => {
1198
if saved_index.0 < self.saved_targets.len() {
1199
let texture = &self.saved_targets[saved_index.0];
1200
device.bind_texture(sampler, texture, swizzle)
1201
} else {
1202
// Check if this saved index is referring to a the prev pass
1203
if Some(saved_index) == self.prev_pass_color.as_ref().and_then(|at| at.saved_index) {
1204
let texture = match self.prev_pass_color {
1205
Some(ref at) => &at.texture,
1206
None => &self.dummy_cache_texture,
1207
};
1208
device.bind_texture(sampler, texture, swizzle);
1209
} else if Some(saved_index) == self.prev_pass_alpha.as_ref().and_then(|at| at.saved_index) {
1210
let texture = match self.prev_pass_alpha {
1211
Some(ref at) => &at.texture,
1212
None => &self.dummy_cache_texture,
1213
};
1214
device.bind_texture(sampler, texture, swizzle);
1215
}
1216
}
1217
swizzle
1218
}
1219
}
1220
}
1221
1222
// Get the real (OpenGL) texture ID for a given source texture.
1223
// For a texture cache texture, the IDs are stored in a vector
1224
// map for fast access.
1225
fn resolve(&self, texture_id: &TextureSource) -> Option<(&Texture, Swizzle)> {
1226
match *texture_id {
1227
TextureSource::Invalid => None,
1228
TextureSource::Dummy => {
1229
Some((&self.dummy_cache_texture, Swizzle::default()))
1230
}
1231
TextureSource::PrevPassAlpha => Some((
1232
match self.prev_pass_alpha {
1233
Some(ref at) => &at.texture,
1234
None => &self.dummy_cache_texture,
1235
},
1236
Swizzle::default(),
1237
)),
1238
TextureSource::PrevPassColor => Some((
1239
match self.prev_pass_color {
1240
Some(ref at) => &at.texture,
1241
None => &self.dummy_cache_texture,
1242
},
1243
Swizzle::default(),
1244
)),
1245
TextureSource::External(..) => {
1246
panic!("BUG: External textures cannot be resolved, they can only be bound.");
1247
}
1248
TextureSource::TextureCache(index, swizzle) => {
1249
Some((&self.texture_cache_map[&index], swizzle))
1250
}
1251
TextureSource::RenderTaskCache(saved_index, swizzle) => {
1252
Some((&self.saved_targets[saved_index.0], swizzle))
1253
}
1254
}
1255
}
1256
1257
// Retrieve the deferred / resolved UV rect if an external texture, otherwise
1258
// return the default supplied UV rect.
1259
fn get_uv_rect(
1260
&self,
1261
source: &TextureSource,
1262
default_value: TexelRect,
1263
) -> TexelRect {
1264
match source {
1265
TextureSource::External(ref external_image) => {
1266
let texture = self.external_images
1267
.get(&(external_image.id, external_image.channel_index))
1268
.expect("BUG: External image should be resolved by now");
1269
texture.get_uv_rect()
1270
}
1271
_ => {
1272
default_value
1273
}
1274
}
1275
}
1276
1277
fn report_memory(&self) -> MemoryReport {
1278
let mut report = MemoryReport::default();
1279
1280
// We're reporting GPU memory rather than heap-allocations, so we don't
1281
// use size_of_op.
1282
for t in self.texture_cache_map.values() {
1283
report.texture_cache_textures += t.size_in_bytes();
1284
}
1285
for t in self.render_target_pool.iter() {
1286
report.render_target_textures += t.size_in_bytes();
1287
}
1288
1289
report
1290
}
1291
}
1292
1293
#[derive(Debug, Copy, Clone, PartialEq)]
1294
#[cfg_attr(feature = "capture", derive(Serialize))]
1295
#[cfg_attr(feature = "replay", derive(Deserialize))]
1296
pub enum BlendMode {
1297
None,
1298
Alpha,
1299
PremultipliedAlpha,
1300
PremultipliedDestOut,
1301
SubpixelDualSource,
1302
SubpixelConstantTextColor(ColorF),
1303
SubpixelWithBgColor,
1304
Advanced(MixBlendMode),
1305
}
1306
1307
/// Tracks the state of each row in the GPU cache texture.
1308
struct CacheRow {
1309
/// Mirrored block data on CPU for this row. We store a copy of
1310
/// the data on the CPU side to improve upload batching.
1311
cpu_blocks: Box<[GpuBlockData; MAX_VERTEX_TEXTURE_WIDTH]>,
1312
/// The first offset in this row that is dirty.
1313
min_dirty: u16,
1314
/// The last offset in this row that is dirty.
1315
max_dirty: u16,
1316
}
1317
1318
impl CacheRow {
1319
fn new() -> Self {
1320
CacheRow {
1321
cpu_blocks: Box::new([GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]),
1322
min_dirty: MAX_VERTEX_TEXTURE_WIDTH as _,
1323
max_dirty: 0,
1324
}
1325
}
1326
1327
fn is_dirty(&self) -> bool {
1328
return self.min_dirty < self.max_dirty;
1329
}
1330
1331
fn clear_dirty(&mut self) {
1332
self.min_dirty = MAX_VERTEX_TEXTURE_WIDTH as _;
1333
self.max_dirty = 0;
1334
}
1335
1336
fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
1337
self.min_dirty = self.min_dirty.min(block_offset as _);
1338
self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
1339
}
1340
1341
fn dirty_blocks(&self) -> &[GpuBlockData] {
1342
return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
1343
}
1344
}
1345
1346
/// The bus over which CPU and GPU versions of the GPU cache
1347
/// get synchronized.
1348
enum GpuCacheBus {
1349
/// PBO-based updates, currently operate on a row granularity.
1350
/// Therefore, are subject to fragmentation issues.
1351
PixelBuffer {
1352
/// PBO used for transfers.
1353
buffer: PBO,
1354
/// Per-row data.
1355
rows: Vec<CacheRow>,
1356
},
1357
/// Shader-based scattering updates. Currently rendered by a set
1358
/// of points into the GPU texture, each carrying a `GpuBlockData`.
1359
Scatter {
1360
/// Special program to run the scattered update.
1361
program: Program,
1362
/// VAO containing the source vertex buffers.
1363
vao: CustomVAO,
1364
/// VBO for positional data, supplied as normalized `u16`.
1365
buf_position: VBO<[u16; 2]>,
1366
/// VBO for gpu block data.
1367
buf_value: VBO<GpuBlockData>,
1368
/// Currently stored block count.
1369
count: usize,
1370
},
1371
}
1372
1373
/// The device-specific representation of the cache texture in gpu_cache.rs
1374
struct GpuCacheTexture {
1375
texture: Option<Texture>,
1376
bus: GpuCacheBus,
1377
}
1378
1379
impl GpuCacheTexture {
1380
1381
/// Ensures that we have an appropriately-sized texture. Returns true if a
1382
/// new texture was created.
1383
fn ensure_texture(&mut self, device: &mut Device, height: i32) {
1384
// If we already have a texture that works, we're done.
1385
if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
1386
if GPU_CACHE_RESIZE_TEST {
1387
// Special debug mode - resize the texture even though it's fine.
1388
} else {
1389
return;
1390
}
1391
}
1392
1393
// Take the old texture, if any.
1394
let blit_source = self.texture.take();
1395
1396
// Create the new texture.
1397
assert!(height >= 2, "Height is too small for ANGLE");
1398
let new_size = DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, height);
1399
// If glCopyImageSubData is supported, this texture doesn't need
1400
// to be a render target. This prevents GL errors due to framebuffer
1401
// incompleteness on devices that don't support RGBAF32 render targets.
1402
// TODO(gw): We still need a proper solution for the subset of devices
1403
// that don't support glCopyImageSubData *OR* rendering to a
1404
// RGBAF32 render target. These devices will currently fail
1405
// to resize the GPU cache texture.
1406
let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
1407
let rt_info = if supports_copy_image_sub_data {
1408
None
1409
} else {
1410
Some(RenderTargetInfo { has_depth: false })
1411
};
1412
let mut texture = device.create_texture(
1413
TextureTarget::Default,
1414
ImageFormat::RGBAF32,
1415
new_size.width,
1416
new_size.height,
1417
TextureFilter::Nearest,
1418
rt_info,
1419
1,
1420
);
1421
1422
// Blit the contents of the previous texture, if applicable.
1423
if let Some(blit_source) = blit_source {
1424
device.blit_renderable_texture(&mut texture, &blit_source);
1425
device.delete_texture(blit_source);
1426
}
1427
1428
self.texture = Some(texture);
1429
}
1430
1431
fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
1432
let bus = if use_scatter {
1433
let program = device.create_program_linked(
1434
"gpu_cache_update",
1435
String::new(),
1436
&desc::GPU_CACHE_UPDATE,
1437
)?;
1438
let buf_position = device.create_vbo();
1439
let buf_value = device.create_vbo();
1440
//Note: the vertex attributes have to be supplied in the same order
1441
// as for program creation, but each assigned to a different stream.
1442
let vao = device.create_custom_vao(&[
1443
buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
1444
buf_value .stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[1..2]),
1445
]);
1446
GpuCacheBus::Scatter {
1447
program,
1448
vao,
1449
buf_position,
1450
buf_value,
1451
count: 0,
1452
}
1453
} else {
1454
let buffer = device.create_pbo();
1455
GpuCacheBus::PixelBuffer {
1456
buffer,
1457
rows: Vec::new(),
1458
}
1459
};
1460
1461
Ok(GpuCacheTexture {
1462
texture: None,
1463
bus,
1464
})
1465
}
1466
1467
fn deinit(mut self, device: &mut Device) {
1468
if let Some(t) = self.texture.take() {
1469
device.delete_texture(t);
1470
}
1471
match self.bus {
1472
GpuCacheBus::PixelBuffer { buffer, ..} => {
1473
device.delete_pbo(buffer);
1474
}
1475
GpuCacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
1476
device.delete_program(program);
1477
device.delete_custom_vao(vao);
1478
device.delete_vbo(buf_position);
1479
device.delete_vbo(buf_value);
1480
}
1481
}
1482
}
1483
1484
fn get_height(&self) -> i32 {
1485
self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
1486
}
1487
1488
fn prepare_for_updates(
1489
&mut self,
1490
device: &mut Device,
1491
total_block_count: usize,
1492
max_height: i32,
1493
) {
1494
self.ensure_texture(device, max_height);
1495
match self.bus {
1496
GpuCacheBus::PixelBuffer { .. } => {},
1497
GpuCacheBus::Scatter {
1498
ref mut buf_position,
1499
ref mut buf_value,
1500
ref mut count,
1501
..
1502
} => {
1503
*count = 0;
1504
if total_block_count > buf_value.allocated_count() {
1505
device.allocate_vbo(buf_position, total_block_count, VertexUsageHint::Stream);
1506
device.allocate_vbo(buf_value, total_block_count, VertexUsageHint::Stream);
1507
}
1508
}
1509
}
1510
}
1511
1512
fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
1513
match self.bus {
1514
GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
1515
for update in &updates.updates {
1516
match *update {
1517
GpuCacheUpdate::Copy {
1518
block_index,
1519
block_count,
1520
address,
1521
} => {
1522
let row = address.v as usize;
1523
1524
// Ensure that the CPU-side shadow copy of the GPU cache data has enough
1525
// rows to apply this patch.
1526
while rows.len() <= row {
1527
// Add a new row.
1528
rows.push(CacheRow::new());
1529
}
1530
1531
// Copy the blocks from the patch array in the shadow CPU copy.
1532
let block_offset = address.u as usize;
1533
let data = &mut rows[row].cpu_blocks;
1534
for i in 0 .. block_count {
1535
data[block_offset + i] = updates.blocks[block_index + i];
1536
}
1537
1538
// This row is dirty (needs to be updated in GPU texture).
1539
rows[row].add_dirty(block_offset, block_count);
1540
}
1541
}
1542
}
1543
}
1544
GpuCacheBus::Scatter {
1545
ref buf_position,
1546
ref buf_value,
1547
ref mut count,
1548
..
1549
} => {
1550
//TODO: re-use this heap allocation
1551
// Unused positions will be left as 0xFFFF, which translates to
1552
// (1.0, 1.0) in the vertex output position and gets culled out
1553
let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
1554
let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
1555
1556
for update in &updates.updates {
1557
match *update {
1558
GpuCacheUpdate::Copy {
1559
block_index,
1560
block_count,
1561
address,
1562
} => {
1563
// Convert the absolute texel position into normalized
1564
let y = ((2*address.v as usize + 1) << 15) / size.height;
1565
for i in 0 .. block_count {
1566
let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
1567
position_data[block_index + i] = [x as _, y as _];
1568
}
1569
}
1570
}
1571
}
1572
1573
device.fill_vbo(buf_value, &updates.blocks, *count);
1574
device.fill_vbo(buf_position, &position_data, *count);
1575
*count += position_data.len();
1576
}
1577
}
1578
}
1579
1580
fn flush(&mut self, device: &mut Device) -> usize {
1581
let texture = self.texture.as_ref().unwrap();
1582
match self.bus {
1583
GpuCacheBus::PixelBuffer { ref buffer, ref mut rows } => {
1584
let rows_dirty = rows
1585
.iter()
1586
.filter(|row| row.is_dirty())
1587
.count();
1588
if rows_dirty == 0 {
1589
return 0
1590
}
1591
1592
let (upload_size, _) = device.required_upload_size_and_stride(
1593
DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, 1),
1594
texture.get_format(),
1595
);
1596
1597
let mut uploader = device.upload_texture(
1598
texture,
1599
buffer,
1600
rows_dirty * upload_size,
1601
);
1602
1603
for (row_index, row) in rows.iter_mut().enumerate() {
1604
if !row.is_dirty() {
1605
continue;
1606
}
1607
1608
let blocks = row.dirty_blocks();
1609
let rect = DeviceIntRect::new(
1610
DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
1611
DeviceIntSize::new(blocks.len() as i32, 1),
1612
);
1613
1614
uploader.upload(rect, 0, None, None, blocks.as_ptr(), blocks.len());
1615
1616
row.clear_dirty();
1617
}
1618
1619
rows_dirty
1620
}
1621
GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
1622
device.disable_depth();
1623
device.set_blend(false);
1624
device.bind_program(program);
1625
device.bind_custom_vao(vao);
1626
device.bind_draw_target(
1627
DrawTarget::from_texture(
1628
texture,
1629
0,
1630
false,
1631
),
1632
);
1633
device.draw_nonindexed_points(0, count as _);
1634
0
1635
}
1636
}
1637
}
1638
}
1639
1640
struct VertexDataTexture<T> {
1641
texture: Option<Texture>,
1642
format: ImageFormat,
1643
pbo: PBO,
1644
_marker: PhantomData<T>,
1645
}
1646
1647
impl<T> VertexDataTexture<T> {
1648
fn new(
1649
device: &mut Device,
1650
format: ImageFormat,
1651
) -> Self {
1652
VertexDataTexture {
1653
texture: None,
1654
format,
1655
pbo: device.create_pbo(),
1656
_marker: PhantomData,
1657
}
1658
}
1659
1660
/// Returns a borrow of the GPU texture. Panics if it hasn't been initialized.
1661
fn texture(&self) -> &Texture {
1662
self.texture.as_ref().unwrap()
1663
}
1664
1665
/// Returns an estimate of the GPU memory consumed by this VertexDataTexture.
1666
fn size_in_bytes(&self) -> usize {
1667
self.texture.as_ref().map_or(0, |t| t.size_in_bytes())
1668
}
1669
1670
fn update(&mut self, device: &mut Device, data: &mut Vec<T>) {
1671
debug_assert!(mem::size_of::<T>() % 16 == 0);
1672
let texels_per_item = mem::size_of::<T>() / 16;
1673
let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / texels_per_item;
1674
debug_assert_ne!(items_per_row, 0);
1675
1676
// Ensure we always end up with a texture when leaving this method.
1677
let mut len = data.len();
1678
if len == 0 {
1679
if self.texture.is_some() {
1680
return;
1681
}
1682
data.reserve(items_per_row);
1683
len = items_per_row;
1684
} else {
1685
// Extend the data array to have enough capacity to upload at least
1686
// a multiple of the row size. This ensures memory safety when the
1687
// array is passed to OpenGL to upload to the GPU.
1688
let extra = len % items_per_row;
1689
if extra != 0 {
1690
let padding = items_per_row - extra;
1691
data.reserve(padding);
1692
len += padding;
1693
}
1694
}
1695
1696
let needed_height = (len / items_per_row) as i32;
1697
let existing_height = self.texture.as_ref().map_or(0, |t| t.get_dimensions().height);
1698
1699
// Create a new texture if needed.
1700
//
1701
// These textures are generally very small, which is why we don't bother
1702
// with incremental updates and just re-upload every frame. For most pages
1703
// they're one row each, and on stress tests like css-francine they end up
1704
// in the 6-14 range. So we size the texture tightly to what we need (usually
1705
// 1), and shrink it if the waste would be more than `VERTEX_TEXTURE_EXTRA_ROWS`
1706
// rows. This helps with memory overhead, especially because there are several
1707
// instances of these textures per Renderer.
1708
if needed_height > existing_height || needed_height + VERTEX_TEXTURE_EXTRA_ROWS < existing_height {
1709
// Drop the existing texture, if any.
1710
if let Some(t) = self.texture.take() {
1711
device.delete_texture(t);
1712
}
1713
1714
let texture = device.create_texture(
1715
TextureTarget::Default,
1716
self.format,
1717
MAX_VERTEX_TEXTURE_WIDTH as i32,
1718
// Ensure height is at least two to work around
1720
needed_height.max(2),
1721
TextureFilter::Nearest,
1722
None,
1723
1,
1724
);
1725
self.texture = Some(texture);
1726
}
1727
1728
// Note: the actual width can be larger than the logical one, with a few texels
1729
// of each row unused at the tail. This is needed because there is still hardware
1730
// (like Intel iGPUs) that prefers power-of-two sizes of textures ([1]).
1731
//
1733
let logical_width = if needed_height == 1 {
1734
data.len() * texels_per_item
1735
} else {
1736
MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item)
1737
};
1738
1739
let rect = DeviceIntRect::new(
1740
DeviceIntPoint::zero(),
1741
DeviceIntSize::new(logical_width as i32, needed_height),
1742
);
1743
1744
debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds");
1745
let (upload_size, _) = device.required_upload_size_and_stride(
1746
rect.size,
1747
self.texture().get_format(),
1748
);
1749
if upload_size > 0 {
1750
device
1751
.upload_texture(self.texture(), &self.pbo, upload_size)
1752
.upload(rect, 0, None, None, data.as_ptr(), len);
1753
}
1754
}
1755
1756
fn deinit(mut self, device: &mut Device) {
1757
device.delete_pbo(self.pbo);
1758
if let Some(t) = self.texture.take() {
1759
device.delete_texture(t);
1760
}
1761
}
1762
}
1763
1764
struct FrameOutput {
1765
last_access: GpuFrameId,
1766
fbo_id: FBOId,
1767
}
1768
1769
#[derive(PartialEq)]
1770
struct TargetSelector {
1771
size: DeviceIntSize,
1772
num_layers: usize,
1773
format: ImageFormat,
1774
}
1775
1776
struct LazyInitializedDebugRenderer {
1777
debug_renderer: Option<DebugRenderer>,
1778
failed: bool,
1779
}
1780
1781
impl LazyInitializedDebugRenderer {
1782
pub fn new() -> Self {
1783
Self {
1784
debug_renderer: None,
1785
failed: false,
1786
}
1787
}
1788
1789
pub fn get_mut<'a>(&'a mut self, device: &mut Device) -> Option<&'a mut DebugRenderer> {
1790
if self.failed {
1791
return None;
1792
}
1793
if self.debug_renderer.is_none() {
1794
match DebugRenderer::new(device) {
1795
Ok(renderer) => { self.debug_renderer = Some(renderer); }
1796
Err(_) => {
1797
// The shader compilation code already logs errors.
1798
self.failed = true;
1799
}
1800
}
1801
}
1802
1803
self.debug_renderer.as_mut()
1804
}
1805
1806
/// Returns mut ref to `DebugRenderer` if one already exists, otherwise returns `None`.
1807
pub fn try_get_mut<'a>(&'a mut self) -> Option<&'a mut DebugRenderer> {
1808
self.debug_renderer.as_mut()
1809
}
1810
1811
pub fn deinit(self, device: &mut Device) {
1812
if let Some(debug_renderer) = self.debug_renderer {
1813
debug_renderer.deinit(device);
1814
}
1815
}
1816
}
1817
1818
// NB: If you add more VAOs here, be sure to deinitialize them in
1819
// `Renderer::deinit()` below.
1820
pub struct RendererVAOs {
1821
prim_vao: VAO,
1822
blur_vao: VAO,
1823
clip_vao: VAO,
1824
border_vao: VAO,
1825
line_vao: VAO,
1826
scale_vao: VAO,
1827
gradient_vao: VAO,
1828
resolve_vao: VAO,
1829
svg_filter_vao: VAO,
1830
composite_vao: VAO,
1831
}
1832
1833
/// Information about the state of the debugging / profiler overlay in native compositing mode.
1834
struct DebugOverlayState {
1835
/// True if any of the current debug flags will result in drawing a debug overlay.
1836
is_enabled: bool,
1837
1838
/// The current size of the debug overlay surface. None implies that the
1839
/// debug surface isn't currently allocated.
1840
current_size: Option<DeviceIntSize>,
1841
}
1842
1843
impl DebugOverlayState {
1844
fn new() -> Self {
1845
DebugOverlayState {
1846
is_enabled: false,
1847
current_size: None,
1848
}
1849
}
1850
}
1851
1852
/// The renderer is responsible for submitting to the GPU the work prepared by the
1853
/// RenderBackend.
1854
///
1855
/// We have a separate `Renderer` instance for each instance of WebRender (generally
1856
/// one per OS window), and all instances share the same thread.
1857
pub struct Renderer {
1858
result_rx: Receiver<ResultMsg>,
1859
debug_server: Box<dyn DebugServer>,
1860
pub device: Device,
1861
pending_texture_updates: Vec<TextureUpdateList>,
1862
pending_native_surface_updates: Vec<NativeSurfaceOperation>,
1863
pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
1864
pending_gpu_cache_clear: bool,
1865
pending_shader_updates: Vec<PathBuf>,
1866
active_documents: Vec<(DocumentId, RenderedDocument)>,
1867
1868
shaders: Rc<RefCell<Shaders>>,
1869
1870
max_recorded_profiles: usize,
1871