Source code

Revision control

Other Tools

1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
* License, v. 2.0. If a copy of the MPL was not distributed with this
3
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5
//! Overview of the GPU cache.
6
//!
7
//! The main goal of the GPU cache is to allow on-demand
8
//! allocation and construction of GPU resources for the
9
//! vertex shaders to consume.
10
//!
11
//! Every item that wants to be stored in the GPU cache
12
//! should create a GpuCacheHandle that is used to refer
13
//! to a cached GPU resource. Creating a handle is a
14
//! cheap operation, that does *not* allocate room in the
15
//! cache.
16
//!
17
//! On any frame when that data is required, the caller
18
//! must request that handle, via ```request```. If the
19
//! data is not in the cache, the user provided closure
20
//! will be invoked to build the data.
21
//!
22
//! After ```end_frame``` has occurred, callers can
23
//! use the ```get_address``` API to get the allocated
24
//! address in the GPU cache of a given resource slot
25
//! for this frame.
26
27
use api::{DebugFlags, DocumentId, PremultipliedColorF};
28
#[cfg(test)]
29
use api::IdNamespace;
30
use api::units::TexelRect;
31
use euclid::{HomogeneousVector, Rect};
32
use crate::internal_types::{FastHashMap, FastHashSet};
33
use crate::profiler::GpuCacheProfileCounters;
34
use crate::render_backend::{FrameStamp, FrameId};
35
use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
36
use std::{mem, u16, u32};
37
use std::num::NonZeroU32;
38
use std::ops::Add;
39
use std::time::{Duration, Instant};
40
41
42
/// At the time of this writing, Firefox uses about 15 GPU cache rows on
43
/// startup, and then gradually works its way up to the mid-30s with normal
44
/// browsing.
45
pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
46
const NEW_ROWS_PER_RESIZE: i32 = 10;
47
48
/// The number of frames an entry can go unused before being evicted.
49
const FRAMES_BEFORE_EVICTION: usize = 10;
50
51
/// The ratio of utilized blocks to total blocks for which we start the clock
52
/// on reclaiming memory.
53
const RECLAIM_THRESHOLD: f32 = 0.2;
54
55
/// The amount of time utilization must be below the above threshold before we
56
/// blow away the cache and rebuild it.
57
const RECLAIM_DELAY_S: u64 = 5;
58
59
#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq)]
60
#[cfg_attr(feature = "capture", derive(Serialize))]
61
#[cfg_attr(feature = "replay", derive(Deserialize))]
62
struct Epoch(u32);
63
64
impl Epoch {
65
fn next(&mut self) {
66
*self = Epoch(self.0.wrapping_add(1));
67
}
68
}
69
70
#[derive(Debug, Copy, Clone, MallocSizeOf)]
71
#[cfg_attr(feature = "capture", derive(Serialize))]
72
#[cfg_attr(feature = "replay", derive(Deserialize))]
73
struct CacheLocation {
74
block_index: BlockIndex,
75
epoch: Epoch,
76
}
77
78
/// A single texel in RGBAF32 texture - 16 bytes.
79
#[derive(Copy, Clone, Debug, MallocSizeOf)]
80
#[cfg_attr(feature = "capture", derive(Serialize))]
81
#[cfg_attr(feature = "replay", derive(Deserialize))]
82
pub struct GpuBlockData {
83
data: [f32; 4],
84
}
85
86
impl GpuBlockData {
87
pub const EMPTY: Self = GpuBlockData { data: [0.0; 4] };
88
}
89
90
/// Conversion helpers for GpuBlockData
91
impl From<PremultipliedColorF> for GpuBlockData {
92
fn from(c: PremultipliedColorF) -> Self {
93
GpuBlockData {
94
data: [c.r, c.g, c.b, c.a],
95
}
96
}
97
}
98
99
impl From<[f32; 4]> for GpuBlockData {
100
fn from(data: [f32; 4]) -> Self {
101
GpuBlockData { data }
102
}
103
}
104
105
impl<P> From<Rect<f32, P>> for GpuBlockData {
106
fn from(r: Rect<f32, P>) -> Self {
107
GpuBlockData {
108
data: [
109
r.origin.x,
110
r.origin.y,
111
r.size.width,
112
r.size.height,
113
],
114
}
115
}
116
}
117
118
impl<P> From<HomogeneousVector<f32, P>> for GpuBlockData {
119
fn from(v: HomogeneousVector<f32, P>) -> Self {
120
GpuBlockData {
121
data: [
122
v.x,
123
v.y,
124
v.z,
125
v.w,
126
],
127
}
128
}
129
}
130
131
impl From<TexelRect> for GpuBlockData {
132
fn from(tr: TexelRect) -> Self {
133
GpuBlockData {
134
data: [tr.uv0.x, tr.uv0.y, tr.uv1.x, tr.uv1.y],
135
}
136
}
137
}
138
139
140
// Any data type that can be stored in the GPU cache should
141
// implement this trait.
142
pub trait ToGpuBlocks {
143
// Request an arbitrary number of GPU data blocks.
144
fn write_gpu_blocks(&self, _: GpuDataRequest);
145
}
146
147
// A handle to a GPU resource.
148
#[derive(Debug, Copy, Clone, MallocSizeOf)]
149
#[cfg_attr(feature = "capture", derive(Serialize))]
150
#[cfg_attr(feature = "replay", derive(Deserialize))]
151
pub struct GpuCacheHandle {
152
location: Option<CacheLocation>,
153
}
154
155
impl GpuCacheHandle {
156
pub fn new() -> Self {
157
GpuCacheHandle { location: None }
158
}
159
}
160
161
// A unique address in the GPU cache. These are uploaded
162
// as part of the primitive instances, to allow the vertex
163
// shader to fetch the specific data.
164
#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
165
#[cfg_attr(feature = "capture", derive(Serialize))]
166
#[cfg_attr(feature = "replay", derive(Deserialize))]
167
pub struct GpuCacheAddress {
168
pub u: u16,
169
pub v: u16,
170
}
171
172
impl GpuCacheAddress {
173
fn new(u: usize, v: usize) -> Self {
174
GpuCacheAddress {
175
u: u as u16,
176
v: v as u16,
177
}
178
}
179
180
pub const INVALID: GpuCacheAddress = GpuCacheAddress {
181
u: u16::MAX,
182
v: u16::MAX,
183
};
184
}
185
186
impl Add<usize> for GpuCacheAddress {
187
type Output = GpuCacheAddress;
188
189
fn add(self, other: usize) -> GpuCacheAddress {
190
GpuCacheAddress {
191
u: self.u + other as u16,
192
v: self.v,
193
}
194
}
195
}
196
197
// An entry in a free-list of blocks in the GPU cache.
198
#[derive(Debug, MallocSizeOf)]
199
#[cfg_attr(feature = "capture", derive(Serialize))]
200
#[cfg_attr(feature = "replay", derive(Deserialize))]
201
struct Block {
202
// The location in the cache of this block.
203
address: GpuCacheAddress,
204
// The current epoch (generation) of this block.
205
epoch: Epoch,
206
// Index of the next free block in the list it
207
// belongs to (either a free-list or the
208
// occupied list).
209
next: Option<BlockIndex>,
210
// The last frame this block was referenced.
211
last_access_time: FrameId,
212
}
213
214
impl Block {
215
fn new(
216
address: GpuCacheAddress,
217
next: Option<BlockIndex>,
218
frame_id: FrameId,
219
epoch: Epoch,
220
) -> Self {
221
Block {
222
address,
223
next,
224
last_access_time: frame_id,
225
epoch,
226
}
227
}
228
229
fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
230
self.epoch.next();
231
if max_epoch.0 < self.epoch.0 {
232
max_epoch.0 = self.epoch.0;
233
}
234
}
235
236
/// Creates an invalid dummy block ID.
237
pub const INVALID: Block = Block {
238
address: GpuCacheAddress { u: 0, v: 0 },
239
epoch: Epoch(0),
240
next: None,
241
last_access_time: FrameId::INVALID,
242
};
243
}
244
245
/// Represents the index of a Block in the block array. We only create such
246
/// structs for blocks that represent the start of a chunk.
247
///
248
/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32
249
/// here and avoid ever using the index zero.
250
#[derive(Debug, Copy, Clone, MallocSizeOf)]
251
#[cfg_attr(feature = "capture", derive(Serialize))]
252
#[cfg_attr(feature = "replay", derive(Deserialize))]
253
struct BlockIndex(NonZeroU32);
254
255
impl BlockIndex {
256
fn new(idx: usize) -> Self {
257
debug_assert!(idx <= u32::MAX as usize);
258
BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
259
}
260
261
fn get(&self) -> usize {
262
self.0.get() as usize
263
}
264
}
265
266
// A row in the cache texture.
267
#[cfg_attr(feature = "capture", derive(Serialize))]
268
#[cfg_attr(feature = "replay", derive(Deserialize))]
269
#[derive(MallocSizeOf)]
270
struct Row {
271
// The fixed size of blocks that this row supports.
272
// Each row becomes a slab allocator for a fixed block size.
273
// This means no dealing with fragmentation within a cache
274
// row as items are allocated and freed.
275
block_count_per_item: usize,
276
}
277
278
impl Row {
279
fn new(block_count_per_item: usize) -> Self {
280
Row {
281
block_count_per_item,
282
}
283
}
284
}
285
286
// A list of update operations that can be applied on the cache
287
// this frame. The list of updates is created by the render backend
288
// during frame construction. It's passed to the render thread
289
// where GL commands can be applied.
290
#[cfg_attr(feature = "capture", derive(Serialize))]
291
#[cfg_attr(feature = "replay", derive(Deserialize))]
292
#[derive(MallocSizeOf)]
293
pub enum GpuCacheUpdate {
294
Copy {
295
block_index: usize,
296
block_count: usize,
297
address: GpuCacheAddress,
298
},
299
}
300
301
/// Command to inform the debug display in the renderer when chunks are allocated
302
/// or freed.
303
#[derive(MallocSizeOf)]
304
pub enum GpuCacheDebugCmd {
305
/// Describes an allocated chunk.
306
Alloc(GpuCacheDebugChunk),
307
/// Describes a freed chunk.
308
Free(GpuCacheAddress),
309
}
310
311
#[derive(Clone, MallocSizeOf)]
312
pub struct GpuCacheDebugChunk {
313
pub address: GpuCacheAddress,
314
pub size: usize,
315
}
316
317
#[must_use]
318
#[cfg_attr(feature = "capture", derive(Serialize))]
319
#[cfg_attr(feature = "replay", derive(Deserialize))]
320
#[derive(MallocSizeOf)]
321
pub struct GpuCacheUpdateList {
322
/// The frame current update list was generated from.
323
pub frame_id: FrameId,
324
/// Whether the texture should be cleared before updates
325
/// are applied.
326
pub clear: bool,
327
/// The current height of the texture. The render thread
328
/// should resize the texture if required.
329
pub height: i32,
330
/// List of updates to apply.
331
pub updates: Vec<GpuCacheUpdate>,
332
/// A flat list of GPU blocks that are pending upload
333
/// to GPU memory.
334
pub blocks: Vec<GpuBlockData>,
335
/// Whole state GPU block metadata for debugging.
336
#[cfg_attr(feature = "serde", serde(skip))]
337
pub debug_commands: Vec<GpuCacheDebugCmd>,
338
}
339
340
// Holds the free lists of fixed size blocks. Mostly
341
// just serves to work around the borrow checker.
342
#[cfg_attr(feature = "capture", derive(Serialize))]
343
#[cfg_attr(feature = "replay", derive(Deserialize))]
344
#[derive(MallocSizeOf)]
345
struct FreeBlockLists {
346
free_list_1: Option<BlockIndex>,
347
free_list_2: Option<BlockIndex>,
348
free_list_4: Option<BlockIndex>,
349
free_list_8: Option<BlockIndex>,
350
free_list_16: Option<BlockIndex>,
351
free_list_32: Option<BlockIndex>,
352
free_list_64: Option<BlockIndex>,
353
free_list_128: Option<BlockIndex>,
354
free_list_256: Option<BlockIndex>,
355
free_list_341: Option<BlockIndex>,
356
free_list_512: Option<BlockIndex>,
357
free_list_1024: Option<BlockIndex>,
358
}
359
360
impl FreeBlockLists {
361
fn new() -> Self {
362
FreeBlockLists {
363
free_list_1: None,
364
free_list_2: None,
365
free_list_4: None,
366
free_list_8: None,
367
free_list_16: None,
368
free_list_32: None,
369
free_list_64: None,
370
free_list_128: None,
371
free_list_256: None,
372
free_list_341: None,
373
free_list_512: None,
374
free_list_1024: None,
375
}
376
}
377
378
fn get_actual_block_count_and_free_list(
379
&mut self,
380
block_count: usize,
381
) -> (usize, &mut Option<BlockIndex>) {
382
// Find the appropriate free list to use based on the block size.
383
//
384
// Note that we cheat a bit with the 341 bucket, since it's not quite
385
// a divisor of 1024, because purecss-francine allocates many 260-block
386
// chunks, and there's no reason we shouldn't pack these three to a row.
387
// This means the allocation statistics will under-report by one block
388
// for each row using 341-block buckets, which is fine.
389
debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
390
match block_count {
391
0 => panic!("Can't allocate zero sized blocks!"),
392
1 => (1, &mut self.free_list_1),
393
2 => (2, &mut self.free_list_2),
394
3..=4 => (4, &mut self.free_list_4),
395
5..=8 => (8, &mut self.free_list_8),
396
9..=16 => (16, &mut self.free_list_16),
397
17..=32 => (32, &mut self.free_list_32),
398
33..=64 => (64, &mut self.free_list_64),
399
65..=128 => (128, &mut self.free_list_128),
400
129..=256 => (256, &mut self.free_list_256),
401
257..=341 => (341, &mut self.free_list_341),
402
342..=512 => (512, &mut self.free_list_512),
403
513..=1024 => (1024, &mut self.free_list_1024),
404
_ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
405
}
406
}
407
}
408
409
// CPU-side representation of the GPU resource cache texture.
410
#[cfg_attr(feature = "capture", derive(Serialize))]
411
#[cfg_attr(feature = "replay", derive(Deserialize))]
412
#[derive(MallocSizeOf)]
413
struct Texture {
414
// Current texture height
415
height: i32,
416
// All blocks that have been created for this texture
417
blocks: Vec<Block>,
418
// Metadata about each allocated row.
419
rows: Vec<Row>,
420
// The base Epoch for this texture.
421
base_epoch: Epoch,
422
// The maximum epoch reached. We track this along with the above so
423
// that we can rebuild the Texture and avoid collisions with handles
424
// allocated for the old texture.
425
max_epoch: Epoch,
426
// Free lists of available blocks for each supported
427
// block size in the texture. These are intrusive
428
// linked lists.
429
free_lists: FreeBlockLists,
430
// Linked list of currently occupied blocks. This
431
// makes it faster to iterate blocks looking for
432
// candidates to be evicted from the cache.
433
occupied_list_heads: FastHashMap<DocumentId, BlockIndex>,
434
// Pending blocks that have been written this frame
435
// and will need to be sent to the GPU.
436
pending_blocks: Vec<GpuBlockData>,
437
// Pending update commands.
438
updates: Vec<GpuCacheUpdate>,
439
// Profile stats
440
allocated_block_count: usize,
441
// The stamp at which we first reached our threshold for reclaiming `GpuCache`
442
// memory, or `None` if the threshold hasn't been reached.
443
#[cfg_attr(feature = "serde", serde(skip))]
444
reached_reclaim_threshold: Option<Instant>,
445
// List of debug commands to be sent to the renderer when the GPU cache
446
// debug display is enabled.
447
#[cfg_attr(feature = "serde", serde(skip))]
448
debug_commands: Vec<GpuCacheDebugCmd>,
449
// The current debug flags for the system.
450
debug_flags: DebugFlags,
451
}
452
453
impl Texture {
454
fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
455
// Pre-fill the block array with one invalid block so that we never use
456
// 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which
457
// saves memory.
458
let blocks = vec![Block::INVALID];
459
460
Texture {
461
height: GPU_CACHE_INITIAL_HEIGHT,
462
blocks,
463
rows: Vec::new(),
464
base_epoch,
465
max_epoch: base_epoch,
466
free_lists: FreeBlockLists::new(),
467
pending_blocks: Vec::new(),
468
updates: Vec::new(),
469
occupied_list_heads: FastHashMap::default(),
470
allocated_block_count: 0,
471
reached_reclaim_threshold: None,
472
debug_commands: Vec::new(),
473
debug_flags,
474
}
475
}
476
477
// Push new data into the cache. The ```pending_block_index``` field represents
478
// where the data was pushed into the texture ```pending_blocks``` array.
479
// Return the allocated address for this data.
480
fn push_data(
481
&mut self,
482
pending_block_index: Option<usize>,
483
block_count: usize,
484
frame_stamp: FrameStamp
485
) -> CacheLocation {
486
debug_assert!(frame_stamp.is_valid());
487
// Find the appropriate free list to use based on the block size.
488
let (alloc_size, free_list) = self.free_lists
489
.get_actual_block_count_and_free_list(block_count);
490
491
// See if we need a new row (if free-list has nothing available)
492
if free_list.is_none() {
493
if self.rows.len() as i32 == self.height {
494
self.height += NEW_ROWS_PER_RESIZE;
495
}
496
497
// Create a new row.
498
let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / alloc_size;
499
let row_index = self.rows.len();
500
self.rows.push(Row::new(alloc_size));
501
502
// Create a ```Block``` for each possible allocation address
503
// in this row, and link it in to the free-list for this
504
// block size.
505
let mut prev_block_index = None;
506
for i in 0 .. items_per_row {
507
let address = GpuCacheAddress::new(i * alloc_size, row_index);
508
let block_index = BlockIndex::new(self.blocks.len());
509
let block = Block::new(address, prev_block_index, frame_stamp.frame_id(), self.base_epoch);
510
self.blocks.push(block);
511
prev_block_index = Some(block_index);
512
}
513
514
*free_list = prev_block_index;
515
}
516
517
// Given the code above, it's now guaranteed that there is a block
518
// available in the appropriate free-list. Pull a block from the
519
// head of the list.
520
let free_block_index = free_list.take().unwrap();
521
let block = &mut self.blocks[free_block_index.get()];
522
*free_list = block.next;
523
524
// Add the block to the occupied linked list.
525
block.next = self.occupied_list_heads.get(&frame_stamp.document_id()).cloned();
526
block.last_access_time = frame_stamp.frame_id();
527
self.occupied_list_heads.insert(frame_stamp.document_id(), free_block_index);
528
self.allocated_block_count += alloc_size;
529
530
if let Some(pending_block_index) = pending_block_index {
531
// Add this update to the pending list of blocks that need
532
// to be updated on the GPU.
533
self.updates.push(GpuCacheUpdate::Copy {
534
block_index: pending_block_index,
535
block_count,
536
address: block.address,
537
});
538
}
539
540
// If we're using the debug display, communicate the allocation to the
541
// renderer thread. Note that we do this regardless of whether or not
542
// pending_block_index is None (if it is, the renderer thread will fill
543
// in the data via a deferred resolve, but the block is still considered
544
// allocated).
545
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
546
self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
547
address: block.address,
548
size: block_count,
549
}));
550
}
551
552
CacheLocation {
553
block_index: free_block_index,
554
epoch: block.epoch,
555
}
556
}
557
558
// Run through the list of occupied cache blocks and evict
559
// any old blocks that haven't been referenced for a while.
560
fn evict_old_blocks(&mut self, frame_stamp: FrameStamp) {
561
debug_assert!(frame_stamp.is_valid());
562
// Prune any old items from the list to make room.
563
// Traverse the occupied linked list and see
564
// which items have not been used for a long time.
565
let mut current_block = self.occupied_list_heads.get(&frame_stamp.document_id()).map(|x| *x);
566
let mut prev_block: Option<BlockIndex> = None;
567
568
while let Some(index) = current_block {
569
let (next_block, should_unlink) = {
570
let block = &mut self.blocks[index.get()];
571
572
let next_block = block.next;
573
let mut should_unlink = false;
574
575
// If this resource has not been used in the last
576
// few frames, free it from the texture and mark
577
// as empty.
578
if block.last_access_time + FRAMES_BEFORE_EVICTION < frame_stamp.frame_id() {
579
should_unlink = true;
580
581
// Get the row metadata from the address.
582
let row = &mut self.rows[block.address.v as usize];
583
584
// Use the row metadata to determine which free-list
585
// this block belongs to.
586
let (_, free_list) = self.free_lists
587
.get_actual_block_count_and_free_list(row.block_count_per_item);
588
589
block.advance_epoch(&mut self.max_epoch);
590
block.next = *free_list;
591
*free_list = Some(index);
592
593
self.allocated_block_count -= row.block_count_per_item;
594
595
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
596
let cmd = GpuCacheDebugCmd::Free(block.address);
597
self.debug_commands.push(cmd);
598
}
599
};
600
601
(next_block, should_unlink)
602
};
603
604
// If the block was released, we will need to remove it
605
// from the occupied linked list.
606
if should_unlink {
607
match prev_block {
608
Some(prev_block) => {
609
self.blocks[prev_block.get()].next = next_block;
610
}
611
None => {
612
match next_block {
613
Some(next_block) => {
614
self.occupied_list_heads.insert(frame_stamp.document_id(), next_block);
615
}
616
None => {
617
self.occupied_list_heads.remove(&frame_stamp.document_id());
618
}
619
}
620
}
621
}
622
} else {
623
prev_block = current_block;
624
}
625
626
current_block = next_block;
627
}
628
}
629
630
/// Returns the ratio of utilized blocks.
631
fn utilization(&self) -> f32 {
632
let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
633
debug_assert!(total_blocks > 0);
634
let ratio = self.allocated_block_count as f32 / total_blocks as f32;
635
debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
636
ratio
637
}
638
}
639
640
641
/// A wrapper object for GPU data requests,
642
/// works as a container that can only grow.
643
#[must_use]
644
pub struct GpuDataRequest<'a> {
645
handle: &'a mut GpuCacheHandle,
646
frame_stamp: FrameStamp,
647
start_index: usize,
648
max_block_count: usize,
649
texture: &'a mut Texture,
650
}
651
652
impl<'a> GpuDataRequest<'a> {
653
pub fn push<B>(&mut self, block: B)
654
where
655
B: Into<GpuBlockData>,
656
{
657
self.texture.pending_blocks.push(block.into());
658
}
659
660
pub fn current_used_block_num(&self) -> usize {
661
self.texture.pending_blocks.len() - self.start_index
662
}
663
}
664
665
impl<'a> Drop for GpuDataRequest<'a> {
666
fn drop(&mut self) {
667
// Push the data to the texture pending updates list.
668
let block_count = self.current_used_block_num();
669
debug_assert!(block_count <= self.max_block_count);
670
671
let location = self.texture
672
.push_data(Some(self.start_index), block_count, self.frame_stamp);
673
self.handle.location = Some(location);
674
}
675
}
676
677
678
/// The main LRU cache interface.
679
#[cfg_attr(feature = "capture", derive(Serialize))]
680
#[cfg_attr(feature = "replay", derive(Deserialize))]
681
#[derive(MallocSizeOf)]
682
pub struct GpuCache {
683
/// Current FrameId.
684
now: FrameStamp,
685
/// CPU-side texture allocator.
686
texture: Texture,
687
/// Number of blocks requested this frame that don't
688
/// need to be re-uploaded.
689
saved_block_count: usize,
690
/// The current debug flags for the system.
691
debug_flags: DebugFlags,
692
/// Whether there is a pending clear to send with the
693
/// next update.
694
pending_clear: bool,
695
/// Indicates that prepare_for_frames has been called for this group of frames.
696
/// Used for sanity checks.
697
prepared_for_frames: bool,
698
/// This indicates that we performed a cleanup operation which requires all
699
/// documents to build a frame.
700
requires_frame_build: bool,
701
/// The set of documents which have had frames built in this update. Used for
702
/// sanity checks.
703
document_frames_to_build: FastHashSet<DocumentId>,
704
}
705
706
impl GpuCache {
707
pub fn new() -> Self {
708
let debug_flags = DebugFlags::empty();
709
GpuCache {
710
now: FrameStamp::INVALID,
711
texture: Texture::new(Epoch(0), debug_flags),
712
saved_block_count: 0,
713
debug_flags,
714
pending_clear: false,
715
prepared_for_frames: false,
716
requires_frame_build: false,
717
document_frames_to_build: FastHashSet::default(),
718
}
719
}
720
721
/// Creates a GpuCache and sets it up with a valid `FrameStamp`, which
722
/// is useful for avoiding panics when instantiating the `GpuCache`
723
/// directly from unit test code.
724
#[cfg(test)]
725
pub fn new_for_testing() -> Self {
726
let mut cache = Self::new();
727
let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
728
now.advance();
729
cache.prepared_for_frames = true;
730
cache.begin_frame(now);
731
cache
732
}
733
734
/// Drops everything in the GPU cache. Must not be called once gpu cache entries
735
/// for the next frame have already been requested.
736
pub fn clear(&mut self) {
737
assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
738
let mut next_base_epoch = self.texture.max_epoch;
739
next_base_epoch.next();
740
self.texture = Texture::new(next_base_epoch, self.debug_flags);
741
self.saved_block_count = 0;
742
self.pending_clear = true;
743
self.requires_frame_build = true;
744
}
745
746
pub fn requires_frame_build(&self) -> bool {
747
self.requires_frame_build
748
}
749
750
pub fn prepare_for_frames(&mut self) {
751
self.prepared_for_frames = true;
752
if self.should_reclaim_memory() {
753
self.clear();
754
debug_assert!(self.document_frames_to_build.is_empty());
755
for &document_id in self.texture.occupied_list_heads.keys() {
756
self.document_frames_to_build.insert(document_id);
757
}
758
}
759
}
760
761
pub fn bookkeep_after_frames(&mut self) {
762
assert!(self.document_frames_to_build.is_empty());
763
assert!(self.prepared_for_frames);
764
self.requires_frame_build = false;
765
self.prepared_for_frames = false;
766
}
767
768
/// Begin a new frame.
769
pub fn begin_frame(&mut self, stamp: FrameStamp) {
770
debug_assert!(self.texture.pending_blocks.is_empty());
771
assert!(self.prepared_for_frames);
772
self.now = stamp;
773
self.texture.evict_old_blocks(self.now);
774
self.saved_block_count = 0;
775
}
776
777
// Invalidate a (possibly) existing block in the cache.
778
// This means the next call to request() for this location
779
// will rebuild the data and upload it to the GPU.
780
pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
781
if let Some(ref location) = handle.location {
782
// don't invalidate blocks that are already re-assigned
783
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
784
if block.epoch == location.epoch {
785
block.advance_epoch(&mut self.texture.max_epoch);
786
}
787
}
788
}
789
}
790
791
/// Request a resource be added to the cache. If the resource
792
/// is already in the cache, `None` will be returned.
793
pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
794
let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
795
// Check if the allocation for this handle is still valid.
796
if let Some(ref location) = handle.location {
797
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
798
if block.epoch == location.epoch {
799
max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
800
if block.last_access_time != self.now.frame_id() {
801
// Mark last access time to avoid evicting this block.
802
block.last_access_time = self.now.frame_id();
803
self.saved_block_count += max_block_count;
804
}
805
return None;
806
}
807
}
808
}
809
810
debug_assert!(self.now.is_valid());
811
Some(GpuDataRequest {
812
handle,
813
frame_stamp: self.now,
814
start_index: self.texture.pending_blocks.len(),
815
texture: &mut self.texture,
816
max_block_count,
817
})
818
}
819
820
// Push an array of data blocks to be uploaded to the GPU
821
// unconditionally for this frame. The cache handle will
822
// assert if the caller tries to retrieve the address
823
// of this handle on a subsequent frame. This is typically
824
// used for uploading data that changes every frame, and
825
// therefore makes no sense to try and cache.
826
pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle {
827
let start_index = self.texture.pending_blocks.len();
828
self.texture.pending_blocks.extend_from_slice(blocks);
829
let location = self.texture
830
.push_data(Some(start_index), blocks.len(), self.now);
831
GpuCacheHandle {
832
location: Some(location),
833
}
834
}
835
836
// Reserve space in the cache for per-frame blocks that
837
// will be resolved by the render thread via the
838
// external image callback.
839
pub fn push_deferred_per_frame_blocks(&mut self, block_count: usize) -> GpuCacheHandle {
840
let location = self.texture.push_data(None, block_count, self.now);
841
GpuCacheHandle {
842
location: Some(location),
843
}
844
}
845
846
/// End the frame. Return the list of updates to apply to the
847
/// device specific cache texture.
848
pub fn end_frame(
849
&mut self,
850
profile_counters: &mut GpuCacheProfileCounters,
851
) -> FrameStamp {
852
profile_counters
853
.allocated_rows
854
.set(self.texture.rows.len());
855
profile_counters
856
.allocated_blocks
857
.set(self.texture.allocated_block_count);
858
profile_counters
859
.saved_blocks
860
.set(self.saved_block_count);
861
862
let reached_threshold =
863
self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
864
self.texture.utilization() < RECLAIM_THRESHOLD;
865
if reached_threshold {
866
self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
867
} else {
868
self.texture.reached_reclaim_threshold = None;
869
}
870
871
self.document_frames_to_build.remove(&self.now.document_id());
872
self.now
873
}
874
875
/// Returns true if utilization has been low enough for long enough that we
876
/// should blow the cache away and rebuild it.
877
pub fn should_reclaim_memory(&self) -> bool {
878
self.texture.reached_reclaim_threshold
879
.map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
880
}
881
882
/// Extract the pending updates from the cache.
883
pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
884
let clear = self.pending_clear;
885
self.pending_clear = false;
886
GpuCacheUpdateList {
887
frame_id: self.now.frame_id(),
888
clear,
889
height: self.texture.height,
890
debug_commands: mem::replace(&mut self.texture.debug_commands, Vec::new()),
891
updates: mem::replace(&mut self.texture.updates, Vec::new()),
892
blocks: mem::replace(&mut self.texture.pending_blocks, Vec::new()),
893
}
894
}
895
896
/// Sets the current debug flags for the system.
897
pub fn set_debug_flags(&mut self, flags: DebugFlags) {
898
self.debug_flags = flags;
899
self.texture.debug_flags = flags;
900
}
901
902
/// Get the actual GPU address in the texture for a given slot ID.
903
/// It's assumed at this point that the given slot has been requested
904
/// and built for this frame. Attempting to get the address for a
905
/// freed or pending slot will panic!
906
pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
907
let location = id.location.expect("handle not requested or allocated!");
908
let block = &self.texture.blocks[location.block_index.get()];
909
debug_assert_eq!(block.epoch, location.epoch);
910
debug_assert_eq!(block.last_access_time, self.now.frame_id());
911
block.address
912
}
913
}
914
915
#[test]
916
#[cfg(target_pointer_width = "64")]
917
fn test_struct_sizes() {
918
use std::mem;
919
// We can end up with a lot of blocks stored in the global vec, and keeping
920
// them small helps reduce memory overhead.
921
assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
922
}