upload.rs - mozsearch

mozilla-central/gfx/wr/webrender/src/renderer/upload.rs (file symbol)

Enable keyboard shortcuts

Source code

File a bug in Core :: Graphics: WebRender

Revision control

Copy as Markdown

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

//! This module contains the convoluted logic that goes into uploading content into

//! the texture cache's textures.

//!

//! We need to support various combinations of code paths depending on the quirks of

//! each hardware/driver configuration:

//! - direct upload,

//! - staged upload via a pixel buffer object,

//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,

//! - copy from the staging to destination textures, either via blits or batched draw calls.

//!

//! Conceptually a lot of this logic should probably be in the device module, but some code

//! here relies on submitting draw calls via the renderer.

use std::mem;

use std::collections::VecDeque;

use std::sync::Arc;

use std::time::Duration;

use euclid::{Transform3D, point2};

use time::precise_time_ns;

use malloc_size_of::MallocSizeOfOps;

use api::units::*;

use api::{ExternalImageSource, ImageBufferKind, ImageFormat};

use crate::renderer::{

    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR

};

use crate::internal_types::{

    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,

    CacheTextureId, RenderTargetInfo,

};

use crate::device::{

    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,

    TextureFilter,

};

use crate::gpu_types::CopyInstance;

use crate::batch::BatchTextures;

use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};

use crate::profiler;

use crate::render_api::MemoryReport;

pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);

const BATCH_UPLOAD_FORMAT_COUNT: usize = 4;

/// Upload a number of items to texture cache textures.

///

/// This is the main entry point of the texture cache upload code.

/// See also the module documentation for more information.

pub fn upload_to_texture_cache(

    renderer: &mut Renderer,

    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,

) {

    let mut stats = UploadStats {

        num_draw_calls: 0,

        upload_time: 0,

        cpu_buffer_alloc_time: 0,

        texture_alloc_time: 0,

        cpu_copy_time: 0,

        gpu_copy_commands_time: 0,

        bytes_uploaded: 0,

        items_uploaded: 0,

};

    let upload_total_start = precise_time_ns();

    let mut batch_upload_textures = Vec::new();

    // A list of copies that must be performed from the temporary textures to the texture cache.

    let mut batch_upload_copies = Vec::new();

    // For each texture format, this stores a list of staging buffers

    // and a texture allocator for packing the buffers.

    let mut batch_upload_buffers = FastHashMap::default();

    // For best performance we use a single TextureUploader for all uploads.

    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.

    let mut uploader = renderer.device.upload_texture(

        &mut renderer.texture_upload_pbo_pool,

);

    let num_updates = update_list.len();

    for (texture_id, updates) in update_list {

        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;

        for update in updates {

            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;

            let mut arc_data = None;

            let dummy_data;

            let data = match source {

                TextureUpdateSource::Bytes { ref data } => {

                    arc_data = Some(data.clone());

                    &data[offset as usize ..]

                TextureUpdateSource::External { id, channel_index } => {

                    let handler = renderer.external_image_handler

                        .as_mut()

                        .expect("Found external image, but no handler set!");

                    // The filter is only relevant for NativeTexture external images.

                    match handler.lock(id, channel_index).source {

                        ExternalImageSource::RawData(data) => {

                            &data[offset as usize ..]

                        ExternalImageSource::Invalid => {

                            // Create a local buffer to fill the pbo.

                            let bpp = texture.get_format().bytes_per_pixel();

                            let width = stride.unwrap_or(rect.width() * bpp);

                            let total_size = width * rect.height();

                            // WR haven't support RGBAF32 format in texture_cache, so

                            // we use u8 type here.

                            dummy_data = vec![0xFFu8; total_size as usize];

                            &dummy_data

                        ExternalImageSource::NativeTexture(eid) => {

                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);

                TextureUpdateSource::DebugClear => {

                    let draw_target = DrawTarget::from_texture(

                        texture,

                        false,

);

                    renderer.device.bind_draw_target(draw_target);

                    renderer.device.clear_target(

                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),

                        None,

                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))

);

                    continue;

};

            stats.items_uploaded += 1;

            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&

                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&

                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&

                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&

                rect.area() < renderer.device.batched_upload_threshold();

            if use_batch_upload

                && arc_data.is_some()

                && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)

                && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {

                skip_staging_buffer(

                    &mut renderer.device,

                    &mut renderer.staging_texture_pool,

                    rect,

                    stride,

                    arc_data.unwrap(),

                    texture_id,

                    texture,

                    &mut batch_upload_buffers,

                    &mut batch_upload_textures,

                    &mut batch_upload_copies,

                    &mut stats,

);

            } else if use_batch_upload {

                copy_into_staging_buffer(

                    &mut renderer.device,

                    &mut uploader,

                    &mut renderer.staging_texture_pool,

                    rect,

                    stride,

                    data,

                    texture_id,

                    texture,

                    &mut batch_upload_buffers,

                    &mut batch_upload_textures,

                    &mut batch_upload_copies,

                    &mut stats,

);

            } else {

                let upload_start_time = precise_time_ns();

                stats.bytes_uploaded += uploader.upload(

                    &mut renderer.device,

                    texture,

                    rect,

                    stride,

                    format_override,

                    data.as_ptr(),

                    data.len()

);

                stats.upload_time += precise_time_ns() - upload_start_time;

            if let TextureUpdateSource::External { id, channel_index } = source {

                let handler = renderer.external_image_handler

                    .as_mut()

                    .expect("Found external image, but no handler set!");

                handler.unlock(id, channel_index);

    let upload_start_time = precise_time_ns();

    // Upload batched texture updates to their temporary textures.

    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {

        let texture = &batch_upload_textures[batch_buffer.texture_index];

        match batch_buffer.staging_buffer {

            StagingBufferKind::Pbo(pbo) => {

                stats.bytes_uploaded += uploader.upload_staged(

                    &mut renderer.device,

                    texture,

                    DeviceIntRect::from_size(texture.get_dimensions()),

                    None,

                    pbo,

);

            StagingBufferKind::CpuBuffer { bytes, .. } => {

                let bpp = texture.get_format().bytes_per_pixel();

                stats.bytes_uploaded += uploader.upload(

                    &mut renderer.device,

                    texture,

                    batch_buffer.upload_rect,

                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),

                    None,

                    bytes.as_ptr(),

                    bytes.len()

);

                renderer.staging_texture_pool.return_temporary_buffer(bytes);

            StagingBufferKind::Image { bytes, stride } => {

                stats.bytes_uploaded += uploader.upload(

                    &mut renderer.device,

                    texture,

                    batch_buffer.upload_rect,

                    stride,

                    None,

                    bytes.as_ptr(),

                    bytes.len()

);

    stats.upload_time += precise_time_ns() - upload_start_time;

    // Flush all uploads, batched or otherwise.

    let flush_start_time = precise_time_ns();

    uploader.flush(&mut renderer.device);

    stats.upload_time += precise_time_ns() - flush_start_time;

    if !batch_upload_copies.is_empty() {

        // Copy updates that were batch uploaded to their correct destination in the texture cache.

        // Sort them by destination and source to minimize framebuffer binding changes.

        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));

        let gpu_copy_start = precise_time_ns();

        if renderer.device.use_draw_calls_for_texture_copy() {

            // Some drivers have a very high CPU overhead when submitting hundreds of small blit

            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a

            // few hundred blits). In this case we do the copy with batched draw calls.

            copy_from_staging_to_cache_using_draw_calls(

                renderer,

                &mut stats,

                &batch_upload_textures,

                batch_upload_copies,

);

        } else {

            copy_from_staging_to_cache(

                renderer,

                &batch_upload_textures,

                batch_upload_copies,

);

        stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;

    for texture in batch_upload_textures.drain(..) {

        renderer.staging_texture_pool.return_texture(texture);

    // Update the profile counters. We use add instead of set because

    // this function can be called several times per frame.

    // We don't update the counters when their value is zero, so that

    // the profiler can treat them as events and we can get notified

    // when they happen.

    let upload_total = precise_time_ns() - upload_total_start;

    renderer.profile.add(

        profiler::TOTAL_UPLOAD_TIME,

        profiler::ns_to_ms(upload_total)

);

    if num_updates > 0 {

        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);

    if stats.bytes_uploaded > 0 {

        renderer.profile.add(

            profiler::TEXTURE_UPLOADS_MEM,

            profiler::bytes_to_mb(stats.bytes_uploaded)

);

    if stats.cpu_copy_time > 0 {

        renderer.profile.add(

            profiler::UPLOAD_CPU_COPY_TIME,

            profiler::ns_to_ms(stats.cpu_copy_time)

);

    if stats.upload_time > 0 {

        renderer.profile.add(

            profiler::UPLOAD_TIME,

            profiler::ns_to_ms(stats.upload_time)

);

    if stats.texture_alloc_time > 0 {

        renderer.profile.add(

            profiler::STAGING_TEXTURE_ALLOCATION_TIME,

            profiler::ns_to_ms(stats.texture_alloc_time)

);

    if stats.cpu_buffer_alloc_time > 0 {

        renderer.profile.add(

            profiler::CPU_TEXTURE_ALLOCATION_TIME,

            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)

);

    if stats.num_draw_calls > 0{

        renderer.profile.add(

            profiler::UPLOAD_NUM_COPY_BATCHES,

            stats.num_draw_calls

);

    if stats.gpu_copy_commands_time > 0 {

        renderer.profile.add(

            profiler::UPLOAD_GPU_COPY_TIME,

            profiler::ns_to_ms(stats.gpu_copy_commands_time)

);

    let add_markers = profiler::thread_is_being_profiled();

    if add_markers && stats.bytes_uploaded > 0 {

    	let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);

    	profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));

/// Copy an item into a batched upload staging buffer.

fn copy_into_staging_buffer<'a>(

    device: &mut Device,

    uploader: &mut TextureUploader< 'a>,

    staging_texture_pool: &mut UploadTexturePool,

    update_rect: DeviceIntRect,

    update_stride: Option<i32>,

    data: &[u8],

    dest_texture_id: CacheTextureId,

    texture: &Texture,

    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,

    batch_upload_textures: &mut Vec<Texture>,

    batch_upload_copies: &mut Vec<BatchUploadCopy>,

    stats: &mut UploadStats

) {

    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())

        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));

    // Allocate a region within the staging buffer for this update. If there is

    // no room in an existing buffer then allocate another texture and buffer.

    let (slice, origin) = match allocator.allocate(&update_rect.size()) {

        Some((slice, origin)) => (slice, origin),

        None => {

            let new_slice = FreeRectSlice(buffers.len() as u32);

            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());

            let texture_alloc_time_start = precise_time_ns();

            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());

            stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;

            let texture_index = batch_upload_textures.len();

            batch_upload_textures.push(staging_texture);

            let cpu_buffer_alloc_start_time = precise_time_ns();

            let staging_buffer = match device.upload_method() {

                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {

                    bytes: staging_texture_pool.get_temporary_buffer(),

},

                UploadMethod::PixelBuffer(_) => {

                    let pbo = uploader.stage(

                        device,

                        texture.get_format(),

                        BATCH_UPLOAD_TEXTURE_SIZE,

                    ).unwrap();

                    StagingBufferKind::Pbo(pbo)

};

            stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;

            buffers.push(BatchUploadBuffer {

                staging_buffer,

                texture_index,

                upload_rect: DeviceIntRect::zero()

});

            (new_slice, DeviceIntPoint::zero())

};

    let buffer = &mut buffers[slice.0 as usize];

    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());

    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);

    batch_upload_copies.push(BatchUploadCopy {

        src_texture_index: buffer.texture_index,

        src_offset: allocated_rect.min,

        dest_texture_id,

        dest_offset: update_rect.min,

        size: update_rect.size(),

});

    unsafe {

        let memcpy_start_time = precise_time_ns();

        let bpp = texture.get_format().bytes_per_pixel() as usize;

        let width_bytes = update_rect.width() as usize * bpp;

        let src_stride = update_stride.map_or(width_bytes, |stride| {

            assert!(stride >= 0);

            stride as usize

});

        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;

        assert!(src_size <= data.len());

        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);

        let (dst_stride, dst) = match &mut buffer.staging_buffer {

            StagingBufferKind::Pbo(buffer) => (

                buffer.get_stride(),

                buffer.get_mapping(),

),

            StagingBufferKind::CpuBuffer { bytes } => (

                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,

                &mut bytes[..],

),

            StagingBufferKind::Image { .. } => unreachable!(),

};

        // copy the data line-by-line in to the buffer so that we do not overwrite

        // any other region of the buffer.

        for y in 0..allocated_rect.height() as usize {

            let src_start = y * src_stride;

            let src_end = src_start + width_bytes;

            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +

                allocated_rect.min.x as usize * bpp;

            let dst_end = dst_start + width_bytes;

            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])

        stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;

/// Take this code path instead of copying into a staging CPU buffer when the image

/// we would copy is large enough that it's unlikely anything else would fit in the

/// buffer, therefore we might as well copy directly from the source image's pixels.

fn skip_staging_buffer<'a>(

    device: &mut Device,

    staging_texture_pool: &mut UploadTexturePool,

    update_rect: DeviceIntRect,

    stride: Option<i32>,

    data: Arc<Vec<u8>>,

    dest_texture_id: CacheTextureId,

    texture: &Texture,

    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,

    batch_upload_textures: &mut Vec<Texture>,

    batch_upload_copies: &mut Vec<BatchUploadCopy>,

    stats: &mut UploadStats

) {

    let (_, buffers) = batch_upload_buffers.entry(texture.get_format())

        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));

    let texture_alloc_time_start = precise_time_ns();

    let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());

    stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;

    let texture_index = batch_upload_textures.len();

    batch_upload_textures.push(staging_texture);

    buffers.push(BatchUploadBuffer {

        staging_buffer: StagingBufferKind::Image { bytes: data, stride },

        texture_index,

        upload_rect: DeviceIntRect::from_size(update_rect.size())

});

    batch_upload_copies.push(BatchUploadCopy {

        src_texture_index: texture_index,

        src_offset: point2(0, 0),

        dest_texture_id,

        dest_offset: update_rect.min,

        size: update_rect.size(),

});

/// Copy from the staging PBOs or textures to texture cache textures using blit commands.

///

/// Using blits instead of draw calls is supposedly more efficient but some drivers have

/// a very high per-command overhead so in some configurations we end up using

/// copy_from_staging_to_cache_using_draw_calls instead.

fn copy_from_staging_to_cache(

    renderer: &mut Renderer,

    batch_upload_textures: &[Texture],

    batch_upload_copies: Vec<BatchUploadCopy>,

) {

    for copy in batch_upload_copies {

        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;

        renderer.device.copy_texture_sub_region(

            &batch_upload_textures[copy.src_texture_index],

            copy.src_offset.x as _,

            copy.src_offset.y as _,

            dest_texture,

            copy.dest_offset.x as _,

            copy.dest_offset.y as _,

            copy.size.width as _,

            copy.size.height as _,

);

/// Generate and submit composite shader batches to copy from

/// the staging textures to the destination cache textures.

///

/// If this shows up in GPU time ptofiles we could replace it with

/// a simpler shader (composite.glsl is already quite simple).

fn copy_from_staging_to_cache_using_draw_calls(

    renderer: &mut Renderer,

    stats: &mut UploadStats,

    batch_upload_textures: &[Texture],

    batch_upload_copies: Vec<BatchUploadCopy>,

) {

    let mut copy_instances = Vec::new();

    let mut prev_src = None;

    let mut prev_dst = None;

    let mut dst_texture_size = DeviceSize::new(0.0, 0.0);

    for copy in batch_upload_copies {

        let src_changed = prev_src != Some(copy.src_texture_index);

        let dst_changed = prev_dst != Some(copy.dest_texture_id);

        if (src_changed || dst_changed) && !copy_instances.is_empty() {

            renderer.draw_instanced_batch(

                &copy_instances,

                VertexArrayKind::Copy,

                // We bind the staging texture manually because it isn't known

                // to the texture resolver.

                &BatchTextures::empty(),

                &mut RendererStats::default(),

);

            stats.num_draw_calls += 1;

            copy_instances.clear();

        if dst_changed {

            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;

            dst_texture_size = dest_texture.get_dimensions().to_f32();

            let draw_target = DrawTarget::from_texture(dest_texture, false);

            renderer.device.bind_draw_target(draw_target);

            renderer.shaders

                .borrow_mut()

                .ps_copy

                .bind(

                    &mut renderer.device,

                    &Transform3D::identity(),

                    None,

                    &mut renderer.renderer_errors,

                    &mut renderer.profile,

);

            prev_dst = Some(copy.dest_texture_id);

        if src_changed {

            renderer.device.bind_texture(

                TextureSampler::Color0,

                &batch_upload_textures[copy.src_texture_index],

                Swizzle::default(),

);

            prev_src = Some(copy.src_texture_index)

        let src_rect = DeviceRect::from_origin_and_size(

            copy.src_offset.to_f32(),

            copy.size.to_f32(),

);

        let dst_rect = DeviceRect::from_origin_and_size(

            copy.dest_offset.to_f32(),

            copy.size.to_f32(),

);

        copy_instances.push(CopyInstance {

            src_rect,

            dst_rect,

            dst_texture_size,

});

    if !copy_instances.is_empty() {

        renderer.draw_instanced_batch(

            &copy_instances,

            VertexArrayKind::Copy,

            &BatchTextures::empty(),

            &mut RendererStats::default(),

);

        stats.num_draw_calls += 1;

/// A very basic pool to avoid reallocating staging textures as well as staging

/// CPU side buffers.

pub struct UploadTexturePool {

    /// The textures in the pool associated with a last used frame index.

///

    /// The outer array corresponds to each of teh three supported texture formats.

    textures: [VecDeque<(Texture, u64)>; BATCH_UPLOAD_FORMAT_COUNT],

    // Frame at which to deallocate some textures if there are too many in the pool,

    // for each format.

    delay_texture_deallocation: [u64; BATCH_UPLOAD_FORMAT_COUNT],

    current_frame: u64,

    /// Temporary buffers that are used when using staging uploads + glTexImage2D.

///

    /// Temporary buffers aren't used asynchronously so they can be reused every frame.

    /// To keep things simple we always allocate enough memory for formats with four bytes

    /// per pixel (more than we need for alpha-only textures but it works just as well).

    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,

    min_temporary_buffers: usize,

    delay_buffer_deallocation: u64,

impl UploadTexturePool {

    pub fn new() -> Self {

        UploadTexturePool {

            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new(), VecDeque::new()],

            delay_texture_deallocation: [0; BATCH_UPLOAD_FORMAT_COUNT],

            current_frame: 0,

            temporary_buffers: Vec::new(),

            min_temporary_buffers: 0,

            delay_buffer_deallocation: 0,

    fn format_index(&self, format: ImageFormat) -> usize {

        match format {

            ImageFormat::RGBA8 => 0,

            ImageFormat::BGRA8 => 1,

            ImageFormat::R8 => 2,

            ImageFormat::R16 => 3,

            _ => { panic!("unexpected format {:?}", format); }

    pub fn begin_frame(&mut self) {

        self.current_frame += 1;

        self.min_temporary_buffers = self.temporary_buffers.len();

    /// Create or reuse a staging texture.

///

    /// See also return_texture.

    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {

        // First try to reuse a texture from the pool.

        // "available" here means hasn't been used for 2 frames to avoid stalls.

        // No need to scan the vector. Newer textures are always pushed at the back

        // of the vector so we know the first element is the least recently used.

        let format_idx = self.format_index(format);

        let can_reuse = self.textures[format_idx].get(0)

            .map(|tex| self.current_frame - tex.1 > 2)

            .unwrap_or(false);

        if can_reuse {

            return self.textures[format_idx].pop_front().unwrap().0;

        // If we couldn't find an available texture, create a new one.

        device.create_texture(

            ImageBufferKind::Texture2D,

            format,

            BATCH_UPLOAD_TEXTURE_SIZE.width,

            BATCH_UPLOAD_TEXTURE_SIZE.height,

            TextureFilter::Nearest,

            // Currently we need render target support as we always use glBlitFramebuffer

            // to copy the texture data. Instead, we should use glCopyImageSubData on some

            // platforms, and avoid creating the FBOs in that case.

            Some(RenderTargetInfo { has_depth: false }),

    /// Hand the staging texture back to the pool after being done with uploads.

///

    /// The texture must have been obtained from this pool via get_texture.

    pub fn return_texture(&mut self, texture: Texture) {

        let format_idx = self.format_index(texture.get_format());

        self.textures[format_idx].push_back((texture, self.current_frame));

    /// Create or reuse a temporary CPU buffer.

///

    /// These buffers are used in the batched upload path when PBOs are not supported.

    /// Content is first written to the temporary buffer and uploaded via a single

    /// glTexSubImage2D call.

    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {

        let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {

            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]

});

        self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());

        buffer

    /// Return memory that was obtained from this pool via get_temporary_buffer.

    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {

        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);

        self.temporary_buffers.push(buffer);

    /// Deallocate this pool's CPU and GPU memory.

    pub fn delete_textures(&mut self, device: &mut Device) {

        for format in &mut self.textures {

            while let Some(texture) = format.pop_back() {

                device.delete_texture(texture.0)

        self.temporary_buffers.clear();

    /// Deallocate some textures if there are too many for a long time.

    pub fn end_frame(&mut self, device: &mut Device) {

        for format_idx in 0..self.textures.len() {

            // Count the number of reusable staging textures.

            // if it stays high for a large number of frames, truncate it back to 8-ish

            // over multiple frames.

            let mut num_reusable_textures = 0;

            for texture in &self.textures[format_idx] {

                if self.current_frame - texture.1 > 2 {

                    num_reusable_textures += 1;

            if num_reusable_textures < 8 {

                // Don't deallocate textures for another 120 frames.

                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;

            // Deallocate up to 4 staging textures every frame.

            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {

                num_reusable_textures.min(4)

            } else {

};

            for _ in 0..to_remove {

                let texture = self.textures[format_idx].pop_front().unwrap().0;

                device.delete_texture(texture);

        // Similar logic for temporary CPU buffers. Our calls to get and return

        // temporary buffers should have been balanced for this frame, but the call

        // get_temporary_buffer will allocate a buffer if the vec is empty. Since we

        // carry these buffers from frame to frame, we keep track of the smallest

        // length of the temporary_buffers vec that we encountered this frame. Those

        // buffers were not touched and we deallocate some if there are a lot of them.

        let unused_buffers = self.min_temporary_buffers;

        if unused_buffers < 8 {

            self.delay_buffer_deallocation = self.current_frame + 120;

        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {

            unused_buffers.min(4)

        } else {

};

        for _ in 0..to_remove {

            // Unlike textures it doesn't matter whether we pop from the front or back

            // of the vector.

            self.temporary_buffers.pop();

    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {

        for buf in &self.temporary_buffers {

            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };

        for format in &self.textures {

            for texture in format {

                report.upload_staging_textures += texture.0.size_in_bytes();

struct UploadStats {

    num_draw_calls: u32,

    upload_time: u64,

    cpu_buffer_alloc_time: u64,

    texture_alloc_time: u64,

    cpu_copy_time: u64,

    gpu_copy_commands_time: u64,

    bytes_uploaded: usize,

    items_uploaded: usize,

#[derive(Debug)]

enum StagingBufferKind<'a> {

    Pbo(UploadStagingBuffer<'a>),

    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },

    Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },

#[derive(Debug)]

struct BatchUploadBuffer<'a> {

    staging_buffer: StagingBufferKind<'a>,

    texture_index: usize,

    // A rectangle containing all items going into this staging texture, so

    // that we can avoid uploading the entire area if we are using glTexSubImage2d.

    upload_rect: DeviceIntRect,

// On some devices performing many small texture uploads is slow, so instead we batch

// updates in to a small number of uploads to temporary textures, then copy from those

// textures to the correct place in the texture cache.

// A list of temporary textures that batches of updates are uploaded to.

#[derive(Debug)]

struct BatchUploadCopy {

    // Index within batch_upload_textures

    src_texture_index: usize,

    src_offset: DeviceIntPoint,

    dest_texture_id: CacheTextureId,

    dest_offset: DeviceIntPoint,

    size: DeviceIntSize,