Source code

Revision control

Other Tools

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use super::super::shader_source::{OPTIMIZED_SHADERS, UNOPTIMIZED_SHADERS};
use api::{ColorF, ImageDescriptor, ImageFormat};
use api::{MixBlendMode, ImageBufferKind, VoidPtrToSizeFn};
use api::{CrashAnnotator, CrashAnnotation, CrashAnnotatorGuard};
use api::units::*;
use euclid::default::Transform3D;
use gleam::gl;
use crate::render_api::MemoryReport;
use crate::internal_types::{FastHashMap, RenderTargetInfo, Swizzle, SwizzleSettings};
use crate::util::round_up_to_multiple;
use crate::profiler;
use log::Level;
use smallvec::SmallVec;
use std::{
borrow::Cow,
cell::{Cell, RefCell},
cmp,
collections::hash_map::Entry,
marker::PhantomData,
mem,
num::NonZeroUsize,
os::raw::c_void,
ops::Add,
path::PathBuf,
ptr,
rc::Rc,
slice,
sync::Arc,
thread,
time::Duration,
};
use webrender_build::shader::{
ProgramSourceDigest, ShaderKind, ShaderVersion, build_shader_main_string,
build_shader_prefix_string, do_build_shader_string, shader_source_from_file,
};
use malloc_size_of::MallocSizeOfOps;
/// Sequence number for frames, as tracked by the device layer.
#[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuFrameId(usize);
impl GpuFrameId {
pub fn new(value: usize) -> Self {
GpuFrameId(value)
}
}
impl Add<usize> for GpuFrameId {
type Output = GpuFrameId;
fn add(self, other: usize) -> GpuFrameId {
GpuFrameId(self.0 + other)
}
}
pub struct TextureSlot(pub usize);
// In some places we need to temporarily bind a texture to any slot.
const DEFAULT_TEXTURE: TextureSlot = TextureSlot(0);
#[repr(u32)]
pub enum DepthFunction {
Always = gl::ALWAYS,
Less = gl::LESS,
LessEqual = gl::LEQUAL,
}
#[repr(u32)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub enum TextureFilter {
Nearest,
Linear,
Trilinear,
}
/// A structure defining a particular workflow of texture transfers.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct TextureFormatPair<T> {
/// Format the GPU natively stores texels in.
pub internal: T,
/// Format we expect the users to provide the texels in.
pub external: T,
}
impl<T: Copy> From<T> for TextureFormatPair<T> {
fn from(value: T) -> Self {
TextureFormatPair {
internal: value,
external: value,
}
}
}
#[derive(Debug)]
pub enum VertexAttributeKind {
F32,
U8Norm,
U16Norm,
I32,
U16,
}
#[derive(Debug)]
pub struct VertexAttribute {
pub name: &'static str,
pub count: u32,
pub kind: VertexAttributeKind,
}
#[derive(Debug)]
pub struct VertexDescriptor {
pub vertex_attributes: &'static [VertexAttribute],
pub instance_attributes: &'static [VertexAttribute],
}
enum FBOTarget {
Read,
Draw,
}
/// Method of uploading texel data from CPU to GPU.
#[derive(Debug, Clone)]
pub enum UploadMethod {
/// Just call `glTexSubImage` directly with the CPU data pointer
Immediate,
/// Accumulate the changes in PBO first before transferring to a texture.
PixelBuffer(VertexUsageHint),
}
/// Plain old data that can be used to initialize a texture.
pub unsafe trait Texel: Copy {}
unsafe impl Texel for u8 {}
unsafe impl Texel for f32 {}
/// Returns the size in bytes of a depth target with the given dimensions.
fn depth_target_size_in_bytes(dimensions: &DeviceIntSize) -> usize {
// DEPTH24 textures generally reserve 3 bytes for depth and 1 byte
// for stencil, so we measure them as 32 bits.
let pixels = dimensions.width * dimensions.height;
(pixels as usize) * 4
}
pub fn get_gl_target(target: ImageBufferKind) -> gl::GLuint {
match target {
ImageBufferKind::Texture2D => gl::TEXTURE_2D,
ImageBufferKind::TextureRect => gl::TEXTURE_RECTANGLE,
ImageBufferKind::TextureExternal => gl::TEXTURE_EXTERNAL_OES,
}
}
pub fn from_gl_target(target: gl::GLuint) -> ImageBufferKind {
match target {
gl::TEXTURE_2D => ImageBufferKind::Texture2D,
gl::TEXTURE_RECTANGLE => ImageBufferKind::TextureRect,
gl::TEXTURE_EXTERNAL_OES => ImageBufferKind::TextureExternal,
_ => panic!("Unexpected target {:?}", target),
}
}
fn supports_extension(extensions: &[String], extension: &str) -> bool {
extensions.iter().any(|s| s == extension)
}
fn get_shader_version(gl: &dyn gl::Gl) -> ShaderVersion {
match gl.get_type() {
gl::GlType::Gl => ShaderVersion::Gl,
gl::GlType::Gles => ShaderVersion::Gles,
}
}
// Get an unoptimized shader string by name, from the built in resources or
// an override path, if supplied.
pub fn get_unoptimized_shader_source(shader_name: &str, base_path: Option<&PathBuf>) -> Cow<'static, str> {
if let Some(ref base) = base_path {
let shader_path = base.join(&format!("{}.glsl", shader_name));
Cow::Owned(shader_source_from_file(&shader_path))
} else {
Cow::Borrowed(
UNOPTIMIZED_SHADERS
.get(shader_name)
.expect("Shader not found")
.source
)
}
}
pub trait FileWatcherHandler: Send {
fn file_changed(&self, path: PathBuf);
}
impl VertexAttributeKind {
fn size_in_bytes(&self) -> u32 {
match *self {
VertexAttributeKind::F32 => 4,
VertexAttributeKind::U8Norm => 1,
VertexAttributeKind::U16Norm => 2,
VertexAttributeKind::I32 => 4,
VertexAttributeKind::U16 => 2,
}
}
}
impl VertexAttribute {
fn size_in_bytes(&self) -> u32 {
self.count * self.kind.size_in_bytes()
}
fn bind_to_vao(
&self,
attr_index: gl::GLuint,
divisor: gl::GLuint,
stride: gl::GLint,
offset: gl::GLuint,
gl: &dyn gl::Gl,
) {
gl.enable_vertex_attrib_array(attr_index);
gl.vertex_attrib_divisor(attr_index, divisor);
match self.kind {
VertexAttributeKind::F32 => {
gl.vertex_attrib_pointer(
attr_index,
self.count as gl::GLint,
gl::FLOAT,
false,
stride,
offset,
);
}
VertexAttributeKind::U8Norm => {
gl.vertex_attrib_pointer(
attr_index,
self.count as gl::GLint,
gl::UNSIGNED_BYTE,
true,
stride,
offset,
);
}
VertexAttributeKind::U16Norm => {
gl.vertex_attrib_pointer(
attr_index,
self.count as gl::GLint,
gl::UNSIGNED_SHORT,
true,
stride,
offset,
);
}
VertexAttributeKind::I32 => {
gl.vertex_attrib_i_pointer(
attr_index,
self.count as gl::GLint,
gl::INT,
stride,
offset,
);
}
VertexAttributeKind::U16 => {
gl.vertex_attrib_i_pointer(
attr_index,
self.count as gl::GLint,
gl::UNSIGNED_SHORT,
stride,
offset,
);
}
}
}
}
impl VertexDescriptor {
fn instance_stride(&self) -> u32 {
self.instance_attributes
.iter()
.map(|attr| attr.size_in_bytes())
.sum()
}
fn bind_attributes(
attributes: &[VertexAttribute],
start_index: usize,
divisor: u32,
gl: &dyn gl::Gl,
vbo: VBOId,
) {
vbo.bind(gl);
let stride: u32 = attributes
.iter()
.map(|attr| attr.size_in_bytes())
.sum();
let mut offset = 0;
for (i, attr) in attributes.iter().enumerate() {
let attr_index = (start_index + i) as gl::GLuint;
attr.bind_to_vao(attr_index, divisor, stride as _, offset, gl);
offset += attr.size_in_bytes();
}
}
fn bind(&self, gl: &dyn gl::Gl, main: VBOId, instance: VBOId, instance_divisor: u32) {
Self::bind_attributes(self.vertex_attributes, 0, 0, gl, main);
if !self.instance_attributes.is_empty() {
Self::bind_attributes(
self.instance_attributes,
self.vertex_attributes.len(),
instance_divisor,
gl,
instance,
);
}
}
}
impl VBOId {
fn bind(&self, gl: &dyn gl::Gl) {
gl.bind_buffer(gl::ARRAY_BUFFER, self.0);
}
}
impl IBOId {
fn bind(&self, gl: &dyn gl::Gl) {
gl.bind_buffer(gl::ELEMENT_ARRAY_BUFFER, self.0);
}
}
impl FBOId {
fn bind(&self, gl: &dyn gl::Gl, target: FBOTarget) {
let target = match target {
FBOTarget::Read => gl::READ_FRAMEBUFFER,
FBOTarget::Draw => gl::DRAW_FRAMEBUFFER,
};
gl.bind_framebuffer(target, self.0);
}
}
pub struct Stream<'a> {
attributes: &'a [VertexAttribute],
vbo: VBOId,
}
pub struct VBO<V> {
id: gl::GLuint,
target: gl::GLenum,
allocated_count: usize,
marker: PhantomData<V>,
}
impl<V> VBO<V> {
pub fn allocated_count(&self) -> usize {
self.allocated_count
}
pub fn stream_with<'a>(&self, attributes: &'a [VertexAttribute]) -> Stream<'a> {
debug_assert_eq!(
mem::size_of::<V>(),
attributes.iter().map(|a| a.size_in_bytes() as usize).sum::<usize>()
);
Stream {
attributes,
vbo: VBOId(self.id),
}
}
}
impl<T> Drop for VBO<T> {
fn drop(&mut self) {
debug_assert!(thread::panicking() || self.id == 0);
}
}
#[cfg_attr(feature = "replay", derive(Clone))]
#[derive(Debug)]
pub struct ExternalTexture {
id: gl::GLuint,
target: gl::GLuint,
swizzle: Swizzle,
uv_rect: TexelRect,
}
impl ExternalTexture {
pub fn new(
id: u32,
target: ImageBufferKind,
swizzle: Swizzle,
uv_rect: TexelRect,
) -> Self {
ExternalTexture {
id,
target: get_gl_target(target),
swizzle,
uv_rect,
}
}
#[cfg(feature = "replay")]
pub fn internal_id(&self) -> gl::GLuint {
self.id
}
pub fn get_uv_rect(&self) -> TexelRect {
self.uv_rect
}
}
bitflags! {
#[derive(Default)]
pub struct TextureFlags: u32 {
/// This texture corresponds to one of the shared texture caches.
const IS_SHARED_TEXTURE_CACHE = 1 << 0;
}
}
/// WebRender interface to an OpenGL texture.
///
/// Because freeing a texture requires various device handles that are not
/// reachable from this struct, manual destruction via `Device` is required.
/// Our `Drop` implementation asserts that this has happened.
#[derive(Debug)]
pub struct Texture {
id: gl::GLuint,
target: gl::GLuint,
format: ImageFormat,
size: DeviceIntSize,
filter: TextureFilter,
flags: TextureFlags,
/// An internally mutable swizzling state that may change between batches.
active_swizzle: Cell<Swizzle>,
/// Framebuffer Object allowing this texture to be rendered to.
///
/// Empty if this texture is not used as a render target or if a depth buffer is needed.
fbo: Option<FBOId>,
/// Same as the above, but with a depth buffer attached.
///
/// FBOs are cheap to create but expensive to reconfigure (since doing so
/// invalidates framebuffer completeness caching). Moreover, rendering with
/// a depth buffer attached but the depth write+test disabled relies on the
/// driver to optimize it out of the rendering pass, which most drivers
/// probably do but, according to jgilbert, is best not to rely on.
///
/// So we lazily generate a second list of FBOs with depth. This list is
/// empty if this texture is not used as a render target _or_ if it is, but
/// the depth buffer has never been requested.
///
/// Note that we always fill fbo, and then lazily create fbo_with_depth
/// when needed. We could make both lazy (i.e. render targets would have one
/// or the other, but not both, unless they were actually used in both
/// configurations). But that would complicate a lot of logic in this module,
/// and FBOs are cheap enough to create.
fbo_with_depth: Option<FBOId>,
last_frame_used: GpuFrameId,
}
impl Texture {
pub fn get_dimensions(&self) -> DeviceIntSize {
self.size
}
pub fn get_format(&self) -> ImageFormat {
self.format
}
pub fn get_filter(&self) -> TextureFilter {
self.filter
}
pub fn get_target(&self) -> ImageBufferKind {
from_gl_target(self.target)
}
pub fn supports_depth(&self) -> bool {
self.fbo_with_depth.is_some()
}
pub fn last_frame_used(&self) -> GpuFrameId {
self.last_frame_used
}
pub fn used_in_frame(&self, frame_id: GpuFrameId) -> bool {
self.last_frame_used == frame_id
}
pub fn is_render_target(&self) -> bool {
self.fbo.is_some()
}
/// Returns true if this texture was used within `threshold` frames of
/// the current frame.
pub fn used_recently(&self, current_frame_id: GpuFrameId, threshold: usize) -> bool {
self.last_frame_used + threshold >= current_frame_id
}
/// Returns the flags for this texture.
pub fn flags(&self) -> &TextureFlags {
&self.flags
}
/// Returns a mutable borrow of the flags for this texture.
pub fn flags_mut(&mut self) -> &mut TextureFlags {
&mut self.flags
}
/// Returns the number of bytes (generally in GPU memory) that this texture
/// consumes.
pub fn size_in_bytes(&self) -> usize {
let bpp = self.format.bytes_per_pixel() as usize;
let w = self.size.width as usize;
let h = self.size.height as usize;
bpp * w * h
}
#[cfg(feature = "replay")]
pub fn into_external(mut self) -> ExternalTexture {
let ext = ExternalTexture {
id: self.id,
target: self.target,
swizzle: Swizzle::default(),
// TODO(gw): Support custom UV rect for external textures during captures
uv_rect: TexelRect::new(
0.0,
0.0,
self.size.width as f32,
self.size.height as f32,
),
};
self.id = 0; // don't complain, moved out
ext
}
}
impl Drop for Texture {
fn drop(&mut self) {
debug_assert!(thread::panicking() || self.id == 0);
}
}
pub struct Program {
id: gl::GLuint,
u_transform: gl::GLint,
u_mode: gl::GLint,
u_texture_size: gl::GLint,
source_info: ProgramSourceInfo,
is_initialized: bool,
}
impl Program {
pub fn is_initialized(&self) -> bool {
self.is_initialized
}
}
impl Drop for Program {
fn drop(&mut self) {
debug_assert!(
thread::panicking() || self.id == 0,
"renderer::deinit not called"
);
}
}
pub struct CustomVAO {
id: gl::GLuint,
}
impl Drop for CustomVAO {
fn drop(&mut self) {
debug_assert!(
thread::panicking() || self.id == 0,
"renderer::deinit not called"
);
}
}
pub struct VAO {
id: gl::GLuint,
ibo_id: IBOId,
main_vbo_id: VBOId,
instance_vbo_id: VBOId,
instance_stride: usize,
instance_divisor: u32,
owns_vertices_and_indices: bool,
}
impl Drop for VAO {
fn drop(&mut self) {
debug_assert!(
thread::panicking() || self.id == 0,
"renderer::deinit not called"
);
}
}
#[derive(Debug)]
pub struct PBO {
id: gl::GLuint,
reserved_size: usize,
}
impl PBO {
pub fn get_reserved_size(&self) -> usize {
self.reserved_size
}
}
impl Drop for PBO {
fn drop(&mut self) {
debug_assert!(
thread::panicking() || self.id == 0,
"renderer::deinit not called or PBO not returned to pool"
);
}
}
pub struct BoundPBO<'a> {
device: &'a mut Device,
pub data: &'a [u8]
}
impl<'a> Drop for BoundPBO<'a> {
fn drop(&mut self) {
self.device.gl.unmap_buffer(gl::PIXEL_PACK_BUFFER);
self.device.gl.bind_buffer(gl::PIXEL_PACK_BUFFER, 0);
}
}
#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
pub struct FBOId(gl::GLuint);
#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
pub struct RBOId(gl::GLuint);
#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
pub struct VBOId(gl::GLuint);
#[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
struct IBOId(gl::GLuint);
#[derive(Clone, Debug)]
enum ProgramSourceType {
Unoptimized,
Optimized(ShaderVersion),
}
#[derive(Clone, Debug)]
pub struct ProgramSourceInfo {
base_filename: &'static str,
features: Vec<&'static str>,
full_name_cstr: Rc<std::ffi::CString>,
source_type: ProgramSourceType,
digest: ProgramSourceDigest,
}
impl ProgramSourceInfo {
fn new(
device: &Device,
name: &'static str,
features: &[&'static str],
) -> Self {
// Compute the digest. Assuming the device has a `ProgramCache`, this
// will always be needed, whereas the source is rarely needed.
use std::collections::hash_map::DefaultHasher;
use std::hash::Hasher;
// Setup.
let mut hasher = DefaultHasher::new();
let gl_version = get_shader_version(&*device.gl());
// Hash the renderer name.
hasher.write(device.capabilities.renderer_name.as_bytes());
let full_name = Self::make_full_name(name, features);
let optimized_source = if device.use_optimized_shaders {
OPTIMIZED_SHADERS.get(&(gl_version, &full_name)).or_else(|| {
warn!("Missing optimized shader source for {}", &full_name);
None
})
} else {
None
};
let source_type = match optimized_source {
Some(source_and_digest) => {
// Optimized shader sources are used as-is, without any run-time processing.
// The vertex and fragment shaders are different, so must both be hashed.
// We use the hashes that were computed at build time, and verify it in debug builds.
if cfg!(debug_assertions) {
let mut h = DefaultHasher::new();
h.write(source_and_digest.vert_source.as_bytes());
h.write(source_and_digest.frag_source.as_bytes());
let d: ProgramSourceDigest = h.into();
let digest = d.to_string();
debug_assert_eq!(digest, source_and_digest.digest);
hasher.write(digest.as_bytes());
} else {
hasher.write(source_and_digest.digest.as_bytes());
}
ProgramSourceType::Optimized(gl_version)
}
None => {
// For non-optimized sources we compute the hash by walking the static strings
// in the same order as we would when concatenating the source, to avoid
// heap-allocating in the common case.
//
// Note that we cheat a bit to make the hashing more efficient. First, the only
// difference between the vertex and fragment shader is a single deterministic
// define, so we don't need to hash both. Second, we precompute the digest of the
// expanded source file at build time, and then just hash that digest here.
let override_path = device.resource_override_path.as_ref();
let source_and_digest = UNOPTIMIZED_SHADERS.get(&name).expect("Shader not found");
// Hash the prefix string.
build_shader_prefix_string(
gl_version,
&features,
ShaderKind::Vertex,
&name,
&mut |s| hasher.write(s.as_bytes()),
);
// Hash the shader file contents. We use a precomputed digest, and
// verify it in debug builds.
if override_path.is_some() || cfg!(debug_assertions) {
let mut h = DefaultHasher::new();
build_shader_main_string(
&name,
&|f| get_unoptimized_shader_source(f, override_path),
&mut |s| h.write(s.as_bytes())
);
let d: ProgramSourceDigest = h.into();
let digest = format!("{}", d);
debug_assert!(override_path.is_some() || digest == source_and_digest.digest);
hasher.write(digest.as_bytes());
} else {
hasher.write(source_and_digest.digest.as_bytes());
}
ProgramSourceType::Unoptimized
}
};
// Finish.
ProgramSourceInfo {
base_filename: name,
features: features.to_vec(),
full_name_cstr: Rc::new(std::ffi::CString::new(full_name).unwrap()),
source_type,
digest: hasher.into(),
}
}
fn compute_source(&self, device: &Device, kind: ShaderKind) -> String {
let full_name = self.full_name();
match self.source_type {
ProgramSourceType::Optimized(gl_version) => {
let shader = OPTIMIZED_SHADERS
.get(&(gl_version, &full_name))
.unwrap_or_else(|| panic!("Missing optimized shader source for {}", full_name));
match kind {
ShaderKind::Vertex => shader.vert_source.to_string(),
ShaderKind::Fragment => shader.frag_source.to_string(),
}
},
ProgramSourceType::Unoptimized => {
let mut src = String::new();
device.build_shader_string(
&self.features,
kind,
self.base_filename,
|s| src.push_str(s),
);
src
}
}
}
fn make_full_name(base_filename: &'static str, features: &[&'static str]) -> String {
if features.is_empty() {
base_filename.to_string()
} else {
format!("{}_{}", base_filename, features.join("_"))
}
}
fn full_name(&self) -> String {
Self::make_full_name(self.base_filename, &self.features)
}
}
#[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
pub struct ProgramBinary {
bytes: Vec<u8>,
format: gl::GLenum,
source_digest: ProgramSourceDigest,
}
impl ProgramBinary {
fn new(bytes: Vec<u8>,
format: gl::GLenum,
source_digest: ProgramSourceDigest) -> Self {
ProgramBinary {
bytes,
format,
source_digest,
}
}
/// Returns a reference to the source digest hash.
pub fn source_digest(&self) -> &ProgramSourceDigest {
&self.source_digest
}
}
/// The interfaces that an application can implement to handle ProgramCache update
pub trait ProgramCacheObserver {
fn save_shaders_to_disk(&self, entries: Vec<Arc<ProgramBinary>>);
fn set_startup_shaders(&self, entries: Vec<Arc<ProgramBinary>>);
fn try_load_shader_from_disk(&self, digest: &ProgramSourceDigest, program_cache: &Rc<ProgramCache>);
fn notify_program_binary_failed(&self, program_binary: &Arc<ProgramBinary>);
}
struct ProgramCacheEntry {
/// The binary.
binary: Arc<ProgramBinary>,
/// True if the binary has been linked, i.e. used for rendering.
linked: bool,
}
pub struct ProgramCache {
entries: RefCell<FastHashMap<ProgramSourceDigest, ProgramCacheEntry>>,
/// Optional trait object that allows the client
/// application to handle ProgramCache updating
program_cache_handler: Option<Box<dyn ProgramCacheObserver>>,
/// Programs that have not yet been cached to disk (by program_cache_handler)
pending_entries: RefCell<Vec<Arc<ProgramBinary>>>,
}
impl ProgramCache {
pub fn new(program_cache_observer: Option<Box<dyn ProgramCacheObserver>>) -> Rc<Self> {
Rc::new(
ProgramCache {
entries: RefCell::new(FastHashMap::default()),
program_cache_handler: program_cache_observer,
pending_entries: RefCell::new(Vec::default()),
}
)
}
/// Save any new program binaries to the disk cache, and if startup has
/// just completed then write the list of shaders to load on next startup.
fn update_disk_cache(&self, startup_complete: bool) {
if let Some(ref handler) = self.program_cache_handler {
if !self.pending_entries.borrow().is_empty() {
let pending_entries = self.pending_entries.replace(Vec::default());
handler.save_shaders_to_disk(pending_entries);
}
if startup_complete {
let startup_shaders = self.entries.borrow().values()
.filter(|e| e.linked).map(|e| e.binary.clone())
.collect::<Vec<_>>();
handler.set_startup_shaders(startup_shaders);
}
}
}
/// Add a new ProgramBinary to the cache.
/// This function is typically used after compiling and linking a new program.
/// The binary will be saved to disk the next time update_disk_cache() is called.
fn add_new_program_binary(&self, program_binary: Arc<ProgramBinary>) {
self.pending_entries.borrow_mut().push(program_binary.clone());
let digest = program_binary.source_digest.clone();
let entry = ProgramCacheEntry {
binary: program_binary,
linked: true,
};
self.entries.borrow_mut().insert(digest, entry);
}
/// Load ProgramBinary to ProgramCache.
/// The function is typically used to load ProgramBinary from disk.
#[cfg(feature = "serialize_program")]
pub fn load_program_binary(&self, program_binary: Arc<ProgramBinary>) {
let digest = program_binary.source_digest.clone();
let entry = ProgramCacheEntry {
binary: program_binary,
linked: false,
};
self.entries.borrow_mut().insert(digest, entry);
}
/// Returns the number of bytes allocated for shaders in the cache.
pub fn report_memory(&self, op: VoidPtrToSizeFn) -> usize {
self.entries.borrow().values()
.map(|e| unsafe { op(e.binary.bytes.as_ptr() as *const c_void ) })
.sum()
}
}
#[derive(Debug, Copy, Clone)]
pub enum VertexUsageHint {
Static,
Dynamic,
Stream,
}
impl VertexUsageHint {
fn to_gl(&self) -> gl::GLuint {
match *self {
VertexUsageHint::Static => gl::STATIC_DRAW,
VertexUsageHint::Dynamic => gl::DYNAMIC_DRAW,
VertexUsageHint::Stream => gl::STREAM_DRAW,
}
}
}
#[derive(Copy, Clone, Debug)]
pub struct UniformLocation(gl::GLint);
impl UniformLocation {
pub const INVALID: Self = UniformLocation(-1);
}
#[derive(Debug)]
pub struct Capabilities {
/// Whether multisampled render targets are supported.
pub supports_multisampling: bool,
/// Whether the function `glCopyImageSubData` is available.
pub supports_copy_image_sub_data: bool,
/// Whether the RGBAF32 textures can be bound to framebuffers.
pub supports_color_buffer_float: bool,
/// Whether the device supports persistently mapped buffers, via glBufferStorage.
pub supports_buffer_storage: bool,
/// Whether advanced blend equations are supported.
pub supports_advanced_blend_equation: bool,
/// Whether dual-source blending is supported.
pub supports_dual_source_blending: bool,
/// Whether KHR_debug is supported for getting debug messages from
/// the driver.
pub supports_khr_debug: bool,
/// Whether we can configure texture units to do swizzling on sampling.
pub supports_texture_swizzle: bool,
/// Whether the driver supports uploading to textures from a non-zero
/// offset within a PBO.
pub supports_nonzero_pbo_offsets: bool,
/// Whether the driver supports specifying the texture usage up front.
pub supports_texture_usage: bool,
/// Whether offscreen render targets can be partially updated.
pub supports_render_target_partial_update: bool,
/// Whether we can use SSBOs.
pub supports_shader_storage_object: bool,
/// Whether to enforce that texture uploads be batched regardless of what
/// the pref says.
pub requires_batched_texture_uploads: Option<bool>,
/// Whether we are able to ue glClear to clear regions of an alpha render target.
/// If false, we must use a shader to clear instead.
pub supports_alpha_target_clears: bool,
/// Whether the driver can reliably upload data to R8 format textures.
pub supports_r8_texture_upload: bool,
/// Whether clip-masking is supported natively by the GL implementation
/// rather than emulated in shaders.
pub uses_native_clip_mask: bool,
/// Whether anti-aliasing is supported natively by the GL implementation
/// rather than emulated in shaders.
pub uses_native_antialiasing: bool,
/// Whether the extension GL_OES_EGL_image_external_essl3 is supported. If true, external
/// textures can be used as normal. If false, external textures can only be rendered with
/// certain shaders, and must first be copied in to regular textures for others.
pub supports_image_external_essl3: bool,
/// Whether the VAO must be rebound after an attached VBO has been orphaned.
pub requires_vao_rebind_after_orphaning: bool,
/// The name of the renderer, as reported by GL
pub renderer_name: String,
}
#[derive(Clone, Debug)]
pub enum ShaderError {
Compilation(String, String), // name, error message
Link(String, String), // name, error message
}
/// A refcounted depth target, which may be shared by multiple textures across
/// the device.
struct SharedDepthTarget {
/// The Render Buffer Object representing the depth target.
rbo_id: RBOId,
/// Reference count. When this drops to zero, the RBO is deleted.
refcount: usize,
}
#[cfg(debug_assertions)]
impl Drop for SharedDepthTarget {
fn drop(&mut self) {
debug_assert!(thread::panicking() || self.refcount == 0);
}
}
/// Describes for which texture formats to use the glTexStorage*
/// family of functions.
#[derive(PartialEq, Debug)]
enum TexStorageUsage {
Never,
NonBGRA8,
Always,
}
/// Describes a required alignment for a stride,
/// which can either be represented in bytes or pixels.
#[derive(Copy, Clone, Debug)]
pub enum StrideAlignment {
Bytes(NonZeroUsize),
Pixels(NonZeroUsize),
}
impl StrideAlignment {
pub fn num_bytes(&self, format: ImageFormat) -> NonZeroUsize {
match *self {
Self::Bytes(bytes) => bytes,
Self::Pixels(pixels) => {
assert!(format.bytes_per_pixel() > 0);
NonZeroUsize::new(pixels.get() * format.bytes_per_pixel() as usize).unwrap()
}
}
}
}
// We get 24 bits of Z value - use up 22 bits of it to give us
// 4 bits to account for GPU issues. This seems to manifest on
// some GPUs under certain perspectives due to z interpolation
// precision problems.
const RESERVE_DEPTH_BITS: i32 = 2;
pub struct Device {
gl: Rc<dyn gl::Gl>,
/// If non-None, |gl| points to a profiling wrapper, and this points to the
/// underling Gl instance.
base_gl: Option<Rc<dyn gl::Gl>>,
// device state
bound_textures: [gl::GLuint; 16],
bound_program: gl::GLuint,
bound_program_name: Rc<std::ffi::CString>,
bound_vao: gl::GLuint,
bound_read_fbo: (FBOId, DeviceIntPoint),
bound_draw_fbo: FBOId,
program_mode_id: UniformLocation,
default_read_fbo: FBOId,
default_draw_fbo: FBOId,
/// Track depth state for assertions. Note that the default FBO has depth,
/// so this defaults to true.
depth_available: bool,
upload_method: UploadMethod,
use_batched_texture_uploads: bool,
/// Whether to use draw calls instead of regular blitting commands.
///
/// Note: this currently only applies to the batched texture uploads
/// path.
use_draw_calls_for_texture_copy: bool,
// HW or API capabilities
capabilities: Capabilities,
color_formats: TextureFormatPair<ImageFormat>,
bgra_formats: TextureFormatPair<gl::GLuint>,
bgra_pixel_type: gl::GLuint,
swizzle_settings: SwizzleSettings,
depth_format: gl::GLuint,
/// Map from texture dimensions to shared depth buffers for render targets.
///
/// Render targets often have the same width/height, so we can save memory
/// by sharing these across targets.
depth_targets: FastHashMap<DeviceIntSize, SharedDepthTarget>,
// debug
inside_frame: bool,
crash_annotator: Option<Box<dyn CrashAnnotator>>,
annotate_draw_call_crashes: bool,
// resources
resource_override_path: Option<PathBuf>,
/// Whether to use shaders that have been optimized at build time.
use_optimized_shaders: bool,
max_texture_size: i32,
cached_programs: Option<Rc<ProgramCache>>,
// Frame counter. This is used to map between CPU
// frames and GPU frames.
frame_id: GpuFrameId,
/// When to use glTexStorage*. We prefer this over glTexImage* because it
/// guarantees that mipmaps won't be generated (which they otherwise are on
/// some drivers, particularly ANGLE). However, it is not always supported
/// at all, or for BGRA8 format. If it's not supported for the required
/// format, we fall back to glTexImage*.
texture_storage_usage: TexStorageUsage,
/// Required stride alignment for pixel transfers. This may be required for
/// correctness reasons due to driver bugs, or for performance reasons to
/// ensure we remain on the fast-path for transfers.
required_pbo_stride: StrideAlignment,
/// Whether we must ensure the source strings passed to glShaderSource()
/// are null-terminated, to work around driver bugs.
requires_null_terminated_shader_source: bool,
/// Whether we must unbind any texture from GL_TEXTURE_EXTERNAL_OES before
/// binding to GL_TEXTURE_2D, to work around an android emulator bug.
requires_texture_external_unbind: bool,
// GL extensions
extensions: Vec<String>,
/// Dumps the source of the shader with the given name
dump_shader_source: Option<String>,
surface_origin_is_top_left: bool,
/// A debug boolean for tracking if the shader program has been set after
/// a blend mode change.
///
/// This is needed for compatibility with next-gen
/// GPU APIs that switch states using "pipeline object" that bundles
/// together the blending state with the shader.
///
/// Having the constraint of always binding the shader last would allow
/// us to have the "pipeline object" bound at that time. Without this
/// constraint, we'd either have to eagerly bind the "pipeline object"
/// on changing either the shader or the blend more, or lazily bind it
/// at draw call time, neither of which is desirable.
#[cfg(debug_assertions)]
shader_is_ready: bool,
}
/// Contains the parameters necessary to bind a draw target.
#[derive(Clone, Copy, Debug)]
pub enum DrawTarget {
/// Use the device's default draw target, with the provided dimensions,
/// which are used to set the viewport.
Default {
/// Target rectangle to draw.
rect: FramebufferIntRect,
/// Total size of the target.
total_size: FramebufferIntSize,
surface_origin_is_top_left: bool,
},
/// Use the provided texture.
Texture {
/// Size of the texture in pixels
dimensions: DeviceIntSize,
/// Whether to draw with the texture's associated depth target
with_depth: bool,
/// FBO that corresponds to the selected layer / depth mode
fbo_id: FBOId,
/// Native GL texture ID
id: gl::GLuint,
/// Native GL texture target
target: gl::GLuint,
},
/// Use an FBO attached to an external texture.
External {
fbo: FBOId,
size: FramebufferIntSize,
},
/// An OS compositor surface
NativeSurface {
offset: DeviceIntPoint,
external_fbo_id: u32,
dimensions: DeviceIntSize,
},
}
impl DrawTarget {
pub fn new_default(size: DeviceIntSize, surface_origin_is_top_left: bool) -> Self {
let total_size = device_size_as_framebuffer_size(size);
DrawTarget::Default {
rect: total_size.into(),
total_size,
surface_origin_is_top_left,
}
}
/// Returns true if this draw target corresponds to the default framebuffer.
pub fn is_default(&self) -> bool {
match *self {
DrawTarget::Default {..} => true,
_ => false,
}
}
pub fn from_texture(
texture: &Texture,
with_depth: bool,
) -> Self {
let fbo_id = if with_depth {
texture.fbo_with_depth.unwrap()
} else {
texture.fbo.unwrap()
};
DrawTarget::Texture {
dimensions: texture.get_dimensions(),
fbo_id,
with_depth,
id: texture.id,
target: texture.target,
}
}
/// Returns the dimensions of this draw-target.
pub fn dimensions(&self) -> DeviceIntSize {
match *self {
DrawTarget::Default { total_size, .. } => total_size.cast_unit(),
DrawTarget::Texture { dimensions, .. } => dimensions,
DrawTarget::External { size, .. } => size.cast_unit(),
DrawTarget::NativeSurface { dimensions, .. } => dimensions,
}
}
pub fn to_framebuffer_rect(&self, device_rect: DeviceIntRect) -> FramebufferIntRect {
let mut fb_rect = device_rect_as_framebuffer_rect(&device_rect);
match *self {
DrawTarget::Default { ref rect, surface_origin_is_top_left, .. } => {
// perform a Y-flip here
if !surface_origin_is_top_left {
let w = fb_rect.width();
let h = fb_rect.height();
fb_rect.min.x = fb_rect.min.x + rect.min.x;
fb_rect.min.y = rect.max.y - fb_rect.max.y;
fb_rect.max.x = fb_rect.min.x + w;
fb_rect.max.y = fb_rect.min.y + h;
}
}
DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => (),
}
fb_rect
}
pub fn surface_origin_is_top_left(&self) -> bool {
match *self {
DrawTarget::Default { surface_origin_is_top_left, .. } => surface_origin_is_top_left,
DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => true,
}
}
/// Given a scissor rect, convert it to the right coordinate space
/// depending on the draw target kind. If no scissor rect was supplied,
/// returns a scissor rect that encloses the entire render target.
pub fn build_scissor_rect(
&self,
scissor_rect: Option<DeviceIntRect>,
) -> FramebufferIntRect {
let dimensions = self.dimensions();
match scissor_rect {
Some(scissor_rect) => match *self {
DrawTarget::Default { ref rect, .. } => {
self.to_framebuffer_rect(scissor_rect)
.intersection(rect)
.unwrap_or_else(FramebufferIntRect::zero)
}
DrawTarget::NativeSurface { offset, .. } => {
device_rect_as_framebuffer_rect(&scissor_rect.translate(offset.to_vector()))
}
DrawTarget::Texture { .. } | DrawTarget::External { .. } => {
device_rect_as_framebuffer_rect(&scissor_rect)
}
}
None => {
FramebufferIntRect::from_size(
device_size_as_framebuffer_size(dimensions),
)
}
}
}
}
/// Contains the parameters necessary to bind a texture-backed read target.
#[derive(Clone, Copy, Debug)]
pub enum ReadTarget {
/// Use the device's default draw target.
Default,
/// Use the provided texture,
Texture {
/// ID of the FBO to read from.
fbo_id: FBOId,
},
/// Use an FBO attached to an external texture.
External {
fbo: FBOId,
},
/// An FBO bound to a native (OS compositor) surface
NativeSurface {
fbo_id: FBOId,
offset: DeviceIntPoint,
},
}
impl ReadTarget {
pub fn from_texture(
texture: &Texture,
) -> Self {
ReadTarget::Texture {
fbo_id: texture.fbo.unwrap(),
}
}
fn offset(&self) -> DeviceIntPoint {
match *self {
ReadTarget::Default |
ReadTarget::Texture { .. } |
ReadTarget::External { .. } => {
DeviceIntPoint::zero()
}
ReadTarget::NativeSurface { offset, .. } => {
offset
}
}
}
}
impl From<DrawTarget> for ReadTarget {
fn from(t: DrawTarget) -> Self {
match t {
DrawTarget::Default { .. } => {
ReadTarget::Default
}
DrawTarget::NativeSurface { external_fbo_id, offset, .. } => {
ReadTarget::NativeSurface {
fbo_id: FBOId(external_fbo_id),
offset,
}
}
DrawTarget::Texture { fbo_id, .. } => {
ReadTarget::Texture { fbo_id }
}
DrawTarget::External { fbo, .. } => {
ReadTarget::External { fbo }
}
}
}
}
impl Device {
pub fn new(
mut gl: Rc<dyn gl::Gl>,
crash_annotator: Option<Box<dyn CrashAnnotator>>,
resource_override_path: Option<PathBuf>,
use_optimized_shaders: bool,
upload_method: UploadMethod,
cached_programs: Option<Rc<ProgramCache>>,
allow_texture_storage_support: bool,
allow_texture_swizzling: bool,
dump_shader_source: Option<String>,
surface_origin_is_top_left: bool,
panic_on_gl_error: bool,
) -> Device {
let mut max_texture_size = [0];
unsafe {
gl.get_integer_v(gl::MAX_TEXTURE_SIZE, &mut max_texture_size);
}
// We cap the max texture size at 16384. Some hardware report higher
// capabilities but get very unstable with very large textures.
// Bug 1702494 tracks re-evaluating this cap.
let max_texture_size = max_texture_size[0].min(16384);
let renderer_name = gl.get_string(gl::RENDERER);
info!("Renderer: {}", renderer_name);
info!("Max texture size: {}", max_texture_size);
let mut extension_count = [0];
unsafe {
gl.get_integer_v(gl::NUM_EXTENSIONS, &mut extension_count);
}
let extension_count = extension_count[0] as gl::GLuint;
let mut extensions = Vec::new();
for i in 0 .. extension_count {
extensions.push(gl.get_string_i(gl::EXTENSIONS, i));
}
// On debug builds, assert that each GL call is error-free. We don't do
// this on release builds because the synchronous call can stall the
// pipeline.
let supports_khr_debug = supports_extension(&extensions, "GL_KHR_debug");
if panic_on_gl_error || cfg!(debug_assertions) {
gl = gl::ErrorReactingGl::wrap(gl, move |gl, name, code| {
if supports_khr_debug {
Self::log_driver_messages(gl);
}
println!("Caught GL error {:x} at {}", code, name);
panic!("Caught GL error {:x} at {}", code, name);
});
}
if supports_extension(&extensions, "GL_ANGLE_provoking_vertex") {
gl.provoking_vertex_angle(gl::FIRST_VERTEX_CONVENTION);
}
let supports_texture_usage = supports_extension(&extensions, "GL_ANGLE_texture_usage");
// Our common-case image data in Firefox is BGRA, so we make an effort
// to use BGRA as the internal texture storage format to avoid the need
// to swizzle during upload. Currently we only do this on GLES (and thus
// for Windows, via ANGLE).
//
// On Mac, Apple docs [1] claim that BGRA is a more efficient internal
// format, but they don't support it with glTextureStorage. As a workaround,
// we pretend that it's RGBA8 for the purposes of texture transfers,
// but swizzle R with B for the texture sampling.
//
// We also need our internal format types to be sized, since glTexStorage*