Skip to content

Commit 19a5668

Browse files
authored
Make more samples deterministic in benchmark mode (#1488)
There is a simple change to dynamic_rendering_local_read that seeds the random generator in benchmark mode. This also addresses race conditions in compute_nbody and its variants (hpp_compute_nbody & synchronization_2) which can cause the compute UBO to be updated by the CPU whilst its being read by the GPU. This often causes no issue, but can cause the frame ordering to be non-deterministic. Worst case could see the GPU read a partially written UBO entry I guess.
1 parent cd6db31 commit 19a5668

7 files changed

Lines changed: 49 additions & 13 deletions

File tree

samples/api/compute_nbody/compute_nbody.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2019-2025, Sascha Willems
1+
/* Copyright (c) 2019-2026, Sascha Willems
22
*
33
* SPDX-License-Identifier: Apache-2.0
44
*
@@ -53,6 +53,7 @@ ComputeNBody::~ComputeNBody()
5353
vkDestroyDescriptorSetLayout(get_device().get_handle(), compute.descriptor_set_layout, nullptr);
5454
vkDestroyPipeline(get_device().get_handle(), compute.pipeline_calculate, nullptr);
5555
vkDestroyPipeline(get_device().get_handle(), compute.pipeline_integrate, nullptr);
56+
vkDestroyFence(get_device().get_handle(), compute.fence, nullptr);
5657
vkDestroySemaphore(get_device().get_handle(), compute.semaphore, nullptr);
5758
vkDestroyCommandPool(get_device().get_handle(), compute.command_pool, nullptr);
5859

@@ -664,6 +665,11 @@ void ComputeNBody::prepare_compute()
664665
VkSemaphoreCreateInfo semaphore_create_info = vkb::initializers::semaphore_create_info();
665666
VK_CHECK(vkCreateSemaphore(get_device().get_handle(), &semaphore_create_info, nullptr, &compute.semaphore));
666667

668+
// Fence to ensure compute dispatch has finished reading the UBO before we update it.
669+
// Created signaled so the first frame's wait doesn't block forever.
670+
VkFenceCreateInfo fence_create_info = vkb::initializers::fence_create_info(VK_FENCE_CREATE_SIGNALED_BIT);
671+
VK_CHECK(vkCreateFence(get_device().get_handle(), &fence_create_info, nullptr, &compute.fence));
672+
667673
// Signal the semaphore
668674
VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
669675
submit_info.signalSemaphoreCount = 1;
@@ -825,7 +831,7 @@ void ComputeNBody::draw()
825831
compute_submit_info.pWaitDstStageMask = &wait_stage_mask;
826832
compute_submit_info.signalSemaphoreCount = 1;
827833
compute_submit_info.pSignalSemaphores = &compute.semaphore;
828-
VK_CHECK(vkQueueSubmit(compute.queue, 1, &compute_submit_info, VK_NULL_HANDLE));
834+
VK_CHECK(vkQueueSubmit(compute.queue, 1, &compute_submit_info, compute.fence));
829835
}
830836

831837
bool ComputeNBody::prepare(const vkb::ApplicationOptions &options)
@@ -859,12 +865,17 @@ void ComputeNBody::render(float delta_time)
859865
{
860866
return;
861867
}
862-
draw();
868+
869+
// Wait for the previous compute dispatch to finish before updating the UBO
870+
VK_CHECK(vkWaitForFences(get_device().get_handle(), 1, &compute.fence, VK_TRUE, UINT64_MAX));
871+
VK_CHECK(vkResetFences(get_device().get_handle(), 1, &compute.fence));
872+
863873
update_compute_uniform_buffers(delta_time);
864874
if (camera.updated)
865875
{
866876
update_graphics_uniform_buffers();
867877
}
878+
draw();
868879
}
869880

870881
bool ComputeNBody::resize(const uint32_t width, const uint32_t height)

samples/api/compute_nbody/compute_nbody.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2019-2025, Sascha Willems
1+
/* Copyright (c) 2019-2026, Sascha Willems
22
*
33
* SPDX-License-Identifier: Apache-2.0
44
*
@@ -69,6 +69,7 @@ class ComputeNBody : public ApiVulkanSample
6969
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
7070
VkCommandPool command_pool; // Use a separate command pool (queue family may differ from the one used for graphics)
7171
VkCommandBuffer command_buffer; // Command buffer storing the dispatch commands and barriers
72+
VkFence fence; // Fence to wait for compute dispatch completion before UBO update
7273
VkSemaphore semaphore; // Execution dependency between compute & graphic submission
7374
VkDescriptorSetLayout descriptor_set_layout; // Compute shader binding layout
7475
VkDescriptorSet descriptor_set; // Compute shader bindings

samples/api/hpp_compute_nbody/hpp_compute_nbody.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved.
1+
/* Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
22
*
33
* SPDX-License-Identifier: Apache-2.0
44
*
@@ -139,12 +139,19 @@ void HPPComputeNBody::render(float delta_time)
139139
{
140140
if (prepared)
141141
{
142-
draw();
142+
vk::Device device = get_device().get_handle();
143+
144+
// Wait for the previous compute dispatch to finish before updating the UBO
145+
auto result = device.waitForFences(compute.fence, VK_TRUE, UINT64_MAX);
146+
assert(result == vk::Result::eSuccess);
147+
device.resetFences(compute.fence);
148+
143149
update_compute_uniform_buffers(delta_time);
144150
if (camera.updated)
145151
{
146152
update_graphics_uniform_buffers();
147153
}
154+
draw();
148155
}
149156
}
150157

@@ -360,7 +367,7 @@ void HPPComputeNBody::draw()
360367
.pCommandBuffers = &compute.command_buffer,
361368
.signalSemaphoreCount = 1,
362369
.pSignalSemaphores = &compute.semaphore};
363-
compute.queue.submit(compute_submit_info);
370+
compute.queue.submit(compute_submit_info, compute.fence);
364371
}
365372

366373
void HPPComputeNBody::initializeCamera()
@@ -463,6 +470,10 @@ void HPPComputeNBody::prepare_compute()
463470
// Semaphore for compute & graphics sync
464471
compute.semaphore = device.createSemaphore({});
465472

473+
// Fence to ensure compute dispatch has finished reading the UBO before we update it.
474+
// Created signaled so the first frame's wait doesn't block forever.
475+
compute.fence = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled});
476+
466477
// Signal the semaphore
467478
vkb::common::submit_and_wait(device, queue, {}, {compute.semaphore});
468479

samples/api/hpp_compute_nbody/hpp_compute_nbody.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved.
1+
/* Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
22
*
33
* SPDX-License-Identifier: Apache-2.0
44
*
@@ -55,6 +55,7 @@ class HPPComputeNBody : public HPPApiVulkanSample
5555
vk::PipelineLayout pipeline_layout; // Layout of the compute pipeline
5656
vk::Queue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
5757
uint32_t queue_family_index = ~0;
58+
vk::Fence fence; // Fence to wait for compute dispatch completion before UBO update
5859
vk::Semaphore semaphore; // Execution dependency between compute & graphic submission
5960
uint32_t shared_data_size = 1024;
6061
std::unique_ptr<vkb::core::BufferCpp> storage_buffer; // (Shader) storage buffer object containing the particles
@@ -71,6 +72,7 @@ class HPPComputeNBody : public HPPApiVulkanSample
7172
device.destroyPipelineLayout(pipeline_layout);
7273
// no need to free the descriptor_set, as it's implicitly free'd with the descriptor_pool
7374
device.destroyDescriptorSetLayout(descriptor_set_layout);
75+
device.destroyFence(fence);
7476
device.destroySemaphore(semaphore);
7577
device.freeCommandBuffers(command_pool, command_buffer);
7678
device.destroyCommandPool(command_pool);

samples/extensions/dynamic_rendering_local_read/dynamic_rendering_local_read.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,7 @@ void DynamicRenderingLocalRead::prepare_buffers()
443443

444444
void DynamicRenderingLocalRead::update_lights_buffer()
445445
{
446-
std::random_device rnd_device;
447-
std::default_random_engine rnd_gen(rnd_device());
446+
std::default_random_engine rnd_gen(lock_simulation_speed ? 0 : std::random_device{}());
448447
std::uniform_real_distribution<float> rnd_dist(-1.0f, 1.0f);
449448
std::uniform_real_distribution<float> rnd_col(0.0f, 0.5f);
450449

samples/extensions/synchronization_2/synchronization_2.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Synchronization2::~Synchronization2()
5555
vkDestroyDescriptorSetLayout(get_device().get_handle(), compute.descriptor_set_layout, nullptr);
5656
vkDestroyPipeline(get_device().get_handle(), compute.pipeline_calculate, nullptr);
5757
vkDestroyPipeline(get_device().get_handle(), compute.pipeline_integrate, nullptr);
58+
vkDestroyFence(get_device().get_handle(), compute.fence, nullptr);
5859
vkDestroySemaphore(get_device().get_handle(), compute.semaphore, nullptr);
5960
vkDestroyCommandPool(get_device().get_handle(), compute.command_pool, nullptr);
6061

@@ -644,6 +645,11 @@ void Synchronization2::prepare_compute()
644645
VkSemaphoreCreateInfo semaphore_create_info = vkb::initializers::semaphore_create_info();
645646
VK_CHECK(vkCreateSemaphore(get_device().get_handle(), &semaphore_create_info, nullptr, &compute.semaphore));
646647

648+
// Fence to ensure compute dispatch has finished reading the UBO before we update it.
649+
// Created signaled so the first frame's wait doesn't block forever.
650+
VkFenceCreateInfo fence_create_info = vkb::initializers::fence_create_info(VK_FENCE_CREATE_SIGNALED_BIT);
651+
VK_CHECK(vkCreateFence(get_device().get_handle(), &fence_create_info, nullptr, &compute.fence));
652+
647653
// Signal the semaphore
648654
VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
649655
submit_info.signalSemaphoreCount = 1;
@@ -813,7 +819,7 @@ void Synchronization2::draw()
813819
compute_submit_info.pWaitSemaphoreInfos = &computeWaitSemaphore;
814820
compute_submit_info.signalSemaphoreInfoCount = 1;
815821
compute_submit_info.pSignalSemaphoreInfos = &computeSignalSemaphore;
816-
VK_CHECK(vkQueueSubmit2KHR(compute.queue, 1, &compute_submit_info, VK_NULL_HANDLE));
822+
VK_CHECK(vkQueueSubmit2KHR(compute.queue, 1, &compute_submit_info, compute.fence));
817823
}
818824

819825
bool Synchronization2::prepare(const vkb::ApplicationOptions &options)
@@ -847,12 +853,17 @@ void Synchronization2::render(float delta_time)
847853
{
848854
return;
849855
}
850-
draw();
856+
857+
// Wait for the previous compute dispatch to finish before updating the UBO
858+
VK_CHECK(vkWaitForFences(get_device().get_handle(), 1, &compute.fence, VK_TRUE, UINT64_MAX));
859+
VK_CHECK(vkResetFences(get_device().get_handle(), 1, &compute.fence));
860+
851861
update_compute_uniform_buffers(delta_time);
852862
if (camera.updated)
853863
{
854864
update_graphics_uniform_buffers();
855865
}
866+
draw();
856867
}
857868

858869
bool Synchronization2::resize(const uint32_t width, const uint32_t height)

samples/extensions/synchronization_2/synchronization_2.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2021-2025, Sascha Willems
1+
/* Copyright (c) 2021-2026, Sascha Willems
22
*
33
* SPDX-License-Identifier: Apache-2.0
44
*
@@ -70,6 +70,7 @@ class Synchronization2 : public ApiVulkanSample
7070
VkQueue queue; // Separate queue for compute commands (queue family may differ from the one used for graphics)
7171
VkCommandPool command_pool; // Use a separate command pool (queue family may differ from the one used for graphics)
7272
VkCommandBuffer command_buffer; // Command buffer storing the dispatch commands and barriers
73+
VkFence fence; // Fence to wait for compute dispatch completion before UBO update
7374
VkSemaphore semaphore; // Execution dependency between compute & graphic submission
7475
VkDescriptorSetLayout descriptor_set_layout; // Compute shader binding layout
7576
VkDescriptorSet descriptor_set; // Compute shader bindings

0 commit comments

Comments
 (0)