Source code
Revision control
Copy as Markdown
Other Tools
export const description = `
Stress tests for GPUAdapter.requestDevice.
`;
import { Fixture } from '../../common/framework/fixture.js';
import { makeTestGroup } from '../../common/framework/test_group.js';
import { attemptGarbageCollection } from '../../common/util/collect_garbage.js';
import { keysOf } from '../../common/util/data_tables.js';
import { getGPU } from '../../common/util/navigator_gpu.js';
import { assert, iterRange } from '../../common/util/util.js';
import { getDefaultLimitsForAdapter } from '../../webgpu/capability_info.js';
export const g = makeTestGroup(Fixture);
/** Adapter preference identifier to option. */
const kAdapterTypeOptions: {
readonly [k in GPUPowerPreference | 'fallback']: GPURequestAdapterOptions;
} =
/* prettier-ignore */ {
'low-power': { powerPreference: 'low-power', forceFallbackAdapter: false },
'high-performance': { powerPreference: 'high-performance', forceFallbackAdapter: false },
'fallback': { powerPreference: undefined, forceFallbackAdapter: true },
};
/** List of all adapter hint types. */
const kAdapterTypes = keysOf(kAdapterTypeOptions);
/**
* Creates a device, a valid compute pipeline, valid resources for the pipeline, and
* ties them together into a set of compute commands ready to be submitted to the GPU
* queue. Does not submit the commands in order to make sure that all resources are
* kept alive until the device is destroyed.
*/
async function createDeviceAndComputeCommands(t: Fixture, adapter: GPUAdapter) {
// Constants are computed such that per run, this function should allocate roughly 2G
// worth of data. This should be sufficient as we run these creation functions many
// times. If the data backing the created objects is not recycled we should OOM.
const limitInfo = getDefaultLimitsForAdapter(adapter);
const kNumPipelines = 64;
const kNumBindgroups = 128;
const kNumBufferElements =
limitInfo.maxComputeWorkgroupSizeX.default * limitInfo.maxComputeWorkgroupSizeY.default;
const kBufferSize = kNumBufferElements * 4;
const kBufferData = new Uint32Array([...iterRange(kNumBufferElements, x => x)]);
const device: GPUDevice = await t.requestDeviceTracked(adapter);
const commands = [];
for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
const pipeline = device.createComputePipeline({
layout: 'auto',
compute: {
module: device.createShaderModule({
code: `
struct Buffer { data: array<u32>, };
@group(0) @binding(0) var<storage, read_write> buffer: Buffer;
@compute @workgroup_size(1) fn main(
@builtin(global_invocation_id) id: vec3<u32>) {
buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] =
buffer.data[id.x * ${limitInfo.maxComputeWorkgroupSizeX.default}u + id.y] +
${pipelineIndex}u;
}
`,
}),
entryPoint: 'main',
},
});
for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
const buffer = t.trackForCleanup(
device.createBuffer({
size: kBufferSize,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
})
);
device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
const bindgroup = device.createBindGroup({
layout: pipeline.getBindGroupLayout(0),
entries: [{ binding: 0, resource: { buffer } }],
});
const encoder = device.createCommandEncoder();
const pass = encoder.beginComputePass();
pass.setPipeline(pipeline);
pass.setBindGroup(0, bindgroup);
pass.dispatchWorkgroups(
limitInfo.maxComputeWorkgroupSizeX.default,
limitInfo.maxComputeWorkgroupSizeY.default
);
pass.end();
commands.push(encoder.finish());
}
}
return { device, objects: commands };
}
/**
* Creates a device, a valid render pipeline, valid resources for the pipeline, and
* ties them together into a set of render commands ready to be submitted to the GPU
* queue. Does not submit the commands in order to make sure that all resources are
* kept alive until the device is destroyed.
*/
async function createDeviceAndRenderCommands(t: Fixture, adapter: GPUAdapter) {
// Constants are computed such that per run, this function should allocate roughly 2G
// worth of data. This should be sufficient as we run these creation functions many
// times. If the data backing the created objects is not recycled we should OOM.
const kNumPipelines = 128;
const kNumBindgroups = 128;
const kSize = 128;
const kBufferData = new Uint32Array([...iterRange(kSize * kSize, x => x)]);
const device: GPUDevice = await t.requestDeviceTracked(adapter);
const commands = [];
for (let pipelineIndex = 0; pipelineIndex < kNumPipelines; ++pipelineIndex) {
const module = device.createShaderModule({
code: `
struct Buffer { data: array<vec4<u32>, ${(kSize * kSize) / 4}>, };
@group(0) @binding(0) var<uniform> buffer: Buffer;
@vertex fn vmain(
@builtin(vertex_index) vertexIndex: u32
) -> @builtin(position) vec4<f32> {
let index = buffer.data[vertexIndex / 4u][vertexIndex % 4u];
let position = vec2<f32>(f32(index % ${kSize}u), f32(index / ${kSize}u));
let r = vec2<f32>(1.0 / f32(${kSize}));
let a = 2.0 * r;
let b = r - vec2<f32>(1.0);
return vec4<f32>(fma(position, a, b), 0.0, 1.0);
}
@fragment fn fmain() -> @location(0) vec4<f32> {
return vec4<f32>(${pipelineIndex}.0 / ${kNumPipelines}.0, 0.0, 0.0, 1.0);
}
`,
});
const pipeline = device.createRenderPipeline({
layout: device.createPipelineLayout({
bindGroupLayouts: [
device.createBindGroupLayout({
entries: [
{
binding: 0,
visibility: GPUShaderStage.VERTEX,
buffer: { type: 'uniform' },
},
],
}),
],
}),
vertex: { module, entryPoint: 'vmain', buffers: [] },
primitive: { topology: 'point-list' },
fragment: {
targets: [{ format: 'rgba8unorm' }],
module,
entryPoint: 'fmain',
},
});
for (let bindgroupIndex = 0; bindgroupIndex < kNumBindgroups; ++bindgroupIndex) {
const buffer = t.trackForCleanup(
device.createBuffer({
size: kSize * kSize * 4,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
})
);
device.queue.writeBuffer(buffer, 0, kBufferData, 0, kBufferData.length);
const bindgroup = device.createBindGroup({
layout: pipeline.getBindGroupLayout(0),
entries: [{ binding: 0, resource: { buffer } }],
});
const texture = t.trackForCleanup(
device.createTexture({
size: [kSize, kSize],
usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
format: 'rgba8unorm',
})
);
const encoder = device.createCommandEncoder();
const pass = encoder.beginRenderPass({
colorAttachments: [
{
view: texture.createView(),
loadOp: 'load',
storeOp: 'store',
},
],
});
pass.setPipeline(pipeline);
pass.setBindGroup(0, bindgroup);
pass.draw(kSize * kSize);
pass.end();
commands.push(encoder.finish());
}
}
return { device, objects: commands };
}
/**
* Creates a device and a large number of buffers which are immediately written to. The
* buffers are expected to be kept alive until they or the device are destroyed.
*/
async function createDeviceAndBuffers(t: Fixture, adapter: GPUAdapter) {
// Currently we just allocate 2G of memory using 512MB blocks. We may be able to
// increase this to hit OOM instead, but on integrated GPUs on Metal, this can cause
// kernel panics at the moment, and it can greatly increase the time needed.
const kTotalMemorySize = 2 * 1024 * 1024 * 1024;
const kMemoryBlockSize = 512 * 1024 * 1024;
const kMemoryBlockData = new Uint8Array(kMemoryBlockSize);
const device: GPUDevice = await t.requestDeviceTracked(adapter);
const buffers = [];
for (let memory = 0; memory < kTotalMemorySize; memory += kMemoryBlockSize) {
const buffer = t.trackForCleanup(
device.createBuffer({
size: kMemoryBlockSize,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
})
);
// Write out to the buffer to make sure that it has backing memory.
device.queue.writeBuffer(buffer, 0, kMemoryBlockData, 0, kMemoryBlockData.length);
buffers.push(buffer);
}
return { device, objects: buffers };
}
g.test('coexisting')
.desc(`Tests allocation of many coexisting GPUDevice objects.`)
.params(u => u.combine('adapterType', kAdapterTypes))
.fn(async t => {
const { adapterType } = t.params;
const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
assert(adapter !== null, 'Failed to get adapter.');
// Based on Vulkan conformance test requirement to be able to create multiple devices.
const kNumDevices = 5;
const devices = [];
for (let i = 0; i < kNumDevices; ++i) {
const device = await t.requestDeviceTracked(adapter);
devices.push(device);
}
});
g.test('continuous,with_destroy')
.desc(
`Tests allocation and destruction of many GPUDevice objects over time. Device objects
are sequentially requested with a series of device allocated objects created on each
device. The devices are then destroyed to verify that the device and the device allocated
objects are recycled over a very large number of iterations.`
)
.params(u => u.combine('adapterType', kAdapterTypes))
.fn(async t => {
const { adapterType } = t.params;
const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
assert(adapter !== null, 'Failed to get adapter.');
// Since devices are being destroyed, we should be able to create many devices.
const kNumDevices = 100;
const kFunctions = [
createDeviceAndBuffers,
createDeviceAndComputeCommands,
createDeviceAndRenderCommands,
];
const deviceList = [];
const objectLists = [];
for (let i = 0; i < kNumDevices; ++i) {
const { device, objects } = await kFunctions[i % kFunctions.length](t, adapter);
t.expect(objects.length > 0, 'unable to allocate any objects');
deviceList.push(device);
objectLists.push(objects);
device.destroy();
}
});
g.test('continuous,no_destroy')
.desc(
`Tests allocation and implicit GC of many GPUDevice objects over time. Objects are
sequentially requested and dropped for GC over a very large number of iterations. Note
that without destroy, we do not create device allocated objects because that will
implicitly keep the device in scope.`
)
.params(u => u.combine('adapterType', kAdapterTypes))
.fn(async t => {
const { adapterType } = t.params;
const adapter = await getGPU(t.rec).requestAdapter(kAdapterTypeOptions[adapterType]);
assert(adapter !== null, 'Failed to get adapter.');
const kNumDevices = 10_000;
for (let i = 1; i <= kNumDevices; ++i) {
await (async () => {
// No trackForCleanup because it would prevent the GPUDevice from being GCed.
// eslint-disable-next-line no-restricted-syntax
t.expect((await adapter.requestDevice()) !== null, 'unexpected null device');
})();
if (i % 10 === 0) {
// We need to occasionally wait for GC to clear out stale devices.
await attemptGarbageCollection();
}
}
});