[TEST] Review the concurrency of resolve compute + barriers operations

This commit is contained in:
CamilleLaVey 2026-06-26 00:35:00 -04:00
parent 42d4c5dab7
commit f285e2ea27
2 changed files with 87 additions and 5 deletions

View file

@ -1271,7 +1271,7 @@ struct QueryCacheRuntimeImpl {
, tfb_streamer(size_t(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool)
, primitives_succeeded_streamer(size_t(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, device_memory_)
, primitives_needed_minus_succeeded_streamer(size_t(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u)
, hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} {
, hcr_setup{}, hcr_is_set{}, is_hcr_running{}, hcr_bc_resolve_cache{}, maxwell3d{} {
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
hcr_setup.pNext = nullptr;
@ -1326,6 +1326,15 @@ struct QueryCacheRuntimeImpl {
bool hcr_is_set;
bool is_hcr_running;
struct HCRBCResolveCache {
DAddr address{};
u64 record_serial{};
bool is_equal{};
bool compare_to_zero{};
bool valid{};
};
HCRBCResolveCache hcr_bc_resolve_cache;
// maxwell3d
Maxwell3D* maxwell3d;
};
@ -1351,6 +1360,7 @@ void QueryCacheRuntime::EndHostConditionalRendering() {
PauseHostConditionalRendering();
impl->hcr_is_set = false;
impl->is_hcr_running = false;
impl->hcr_bc_resolve_cache.valid = false;
impl->hcr_buffer = VkBuffer{};
impl->hcr_offset = 0;
}
@ -1380,6 +1390,7 @@ void QueryCacheRuntime::ResumeHostConditionalRendering() {
void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object,
bool is_equal) {
impl->hcr_bc_resolve_cache.valid = false;
{
std::scoped_lock lk(impl->buffer_cache.mutex);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
@ -1411,6 +1422,14 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal,
bool compare_to_zero) {
const u64 current_record_serial = impl->scheduler.CurrentRecordSerial();
auto& cache = impl->hcr_bc_resolve_cache;
if (cache.valid && cache.address == address && cache.is_equal == is_equal &&
cache.compare_to_zero == compare_to_zero &&
cache.record_serial == current_record_serial) {
return;
}
VkBuffer to_resolve;
u32 to_resolve_offset;
const u32 resolve_size = compare_to_zero ? 8 : 24;
@ -1434,16 +1453,66 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
impl->hcr_is_set = true;
impl->is_hcr_running = false;
cache.address = address;
cache.record_serial = impl->scheduler.CurrentRecordSerial();
cache.is_equal = is_equal;
cache.compare_to_zero = compare_to_zero;
cache.valid = true;
if (was_running) {
ResumeHostConditionalRendering();
}
}
bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1,
[[maybe_unused]] bool qc_dirty) {
bool qc_dirty) {
if (!impl->device.IsExtConditionalRendering()) {
return false;
}
if (object_1.address == 0) {
EndHostConditionalRendering();
return false;
}
const bool is_in_qc = object_1.found_query != nullptr;
bool is_in_bc = false;
if (!is_in_qc) {
std::scoped_lock lk(impl->buffer_cache.mutex);
is_in_bc = impl->buffer_cache.IsRegionGpuModified(object_1.address, 8);
}
const bool is_in_ac = is_in_qc || is_in_bc;
if (!is_in_ac) {
EndHostConditionalRendering();
return false;
}
if (!qc_dirty && !is_in_bc) {
EndHostConditionalRendering();
return false;
}
const auto driver_id = impl->device.GetDriverID();
const bool is_gpu_high = Settings::IsGPULevelHigh();
if ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
EndHostConditionalRendering();
return true;
}
if (!is_gpu_high) {
EndHostConditionalRendering();
return true;
}
if (!is_in_bc) {
// Avoid comparing stale data: query cache can be newer than guest memory.
EndHostConditionalRendering();
return true;
}
HostConditionalRenderingCompareBCImpl(object_1.address, true, true);
return true;
}
@ -1471,14 +1540,18 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
std::array<bool, 2> is_in_qc{};
std::array<bool, 2> is_in_ac{};
std::array<bool, 2> is_null{};
{
for (size_t i = 0; i < 2; i++) {
is_in_qc[i] = objects[i]->found_query != nullptr;
}
if (!is_in_qc[0] || !is_in_qc[1]) {
std::scoped_lock lk(impl->buffer_cache.mutex);
for (size_t i = 0; i < 2; i++) {
is_in_qc[i] = objects[i]->found_query != nullptr;
is_in_bc[i] = !is_in_qc[i] && check_in_bc(objects[i]->address);
is_in_ac[i] = is_in_qc[i] || is_in_bc[i];
}
}
for (size_t i = 0; i < 2; i++) {
is_in_ac[i] = is_in_qc[i] || is_in_bc[i];
}
if (!is_in_ac[0] && !is_in_ac[1]) {
EndHostConditionalRendering();

View file

@ -6,6 +6,7 @@
#pragma once
#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <functional>
@ -92,10 +93,12 @@ public:
requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
void RecordWithUploadBuffer(T&& command) {
if (chunk->Record(command)) {
record_serial.fetch_add(1, std::memory_order_relaxed);
return;
}
DispatchWork();
(void)chunk->Record(command);
record_serial.fetch_add(1, std::memory_order_relaxed);
}
template <typename T>
@ -117,6 +120,11 @@ public:
return master_semaphore->IsFree(tick);
}
/// Returns a monotonic serial incremented for every recorded command callback.
[[nodiscard]] u64 CurrentRecordSerial() const noexcept {
return record_serial.load(std::memory_order_relaxed);
}
/// Waits for the given GPU tick, optionally pacing frames.
void Wait(u64 tick, double target_fps = 0.0) {
if (tick > 0) {
@ -298,6 +306,7 @@ private:
u64 frame_counter{};
u64 last_submitted_tick = 0;
std::atomic<u64> record_serial{0};
};
} // namespace Vulkan