From f285e2ea273d196c80104fba3ab305a3aba3242a Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 26 Jun 2026 00:35:00 -0400 Subject: [PATCH] [TEST] Review the concurrency of resolve compute + barriers operations --- .../renderer_vulkan/vk_query_cache.cpp | 83 +++++++++++++++++-- src/video_core/renderer_vulkan/vk_scheduler.h | 9 ++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 06d405c2f5..2f9382c6ae 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1271,7 +1271,7 @@ struct QueryCacheRuntimeImpl { , tfb_streamer(size_t(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool) , primitives_succeeded_streamer(size_t(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, device_memory_) , primitives_needed_minus_succeeded_streamer(size_t(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u) - , hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { + , hcr_setup{}, hcr_is_set{}, is_hcr_running{}, hcr_bc_resolve_cache{}, maxwell3d{} { hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; hcr_setup.pNext = nullptr; @@ -1326,6 +1326,15 @@ struct QueryCacheRuntimeImpl { bool hcr_is_set; bool is_hcr_running; + struct HCRBCResolveCache { + DAddr address{}; + u64 record_serial{}; + bool is_equal{}; + bool compare_to_zero{}; + bool valid{}; + }; + HCRBCResolveCache hcr_bc_resolve_cache; + // maxwell3d Maxwell3D* maxwell3d; }; @@ -1351,6 +1360,7 @@ void QueryCacheRuntime::EndHostConditionalRendering() { PauseHostConditionalRendering(); impl->hcr_is_set = false; impl->is_hcr_running = false; + impl->hcr_bc_resolve_cache.valid = false; impl->hcr_buffer = VkBuffer{}; impl->hcr_offset = 0; } @@ -1380,6 +1390,7 @@ void QueryCacheRuntime::ResumeHostConditionalRendering() { void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal) { + impl->hcr_bc_resolve_cache.valid = false; { std::scoped_lock lk(impl->buffer_cache.mutex); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; @@ -1411,6 +1422,14 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal, bool compare_to_zero) { + const u64 current_record_serial = impl->scheduler.CurrentRecordSerial(); + auto& cache = impl->hcr_bc_resolve_cache; + if (cache.valid && cache.address == address && cache.is_equal == is_equal && + cache.compare_to_zero == compare_to_zero && + cache.record_serial == current_record_serial) { + return; + } + VkBuffer to_resolve; u32 to_resolve_offset; const u32 resolve_size = compare_to_zero ? 8 : 24; @@ -1434,16 +1453,66 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; impl->hcr_is_set = true; impl->is_hcr_running = false; + + cache.address = address; + cache.record_serial = impl->scheduler.CurrentRecordSerial(); + cache.is_equal = is_equal; + cache.compare_to_zero = compare_to_zero; + cache.valid = true; + if (was_running) { ResumeHostConditionalRendering(); } } bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, - [[maybe_unused]] bool qc_dirty) { + bool qc_dirty) { if (!impl->device.IsExtConditionalRendering()) { return false; } + if (object_1.address == 0) { + EndHostConditionalRendering(); + return false; + } + + const bool is_in_qc = object_1.found_query != nullptr; + bool is_in_bc = false; + if (!is_in_qc) { + std::scoped_lock lk(impl->buffer_cache.mutex); + is_in_bc = impl->buffer_cache.IsRegionGpuModified(object_1.address, 8); + } + const bool is_in_ac = is_in_qc || is_in_bc; + + if (!is_in_ac) { + EndHostConditionalRendering(); + return false; + } + + if (!qc_dirty && !is_in_bc) { + EndHostConditionalRendering(); + return false; + } + + const auto driver_id = impl->device.GetDriverID(); + const bool is_gpu_high = Settings::IsGPULevelHigh(); + + if ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { + EndHostConditionalRendering(); + return true; + } + + if (!is_gpu_high) { + EndHostConditionalRendering(); + return true; + } + + if (!is_in_bc) { + // Avoid comparing stale data: query cache can be newer than guest memory. + EndHostConditionalRendering(); + return true; + } + HostConditionalRenderingCompareBCImpl(object_1.address, true, true); return true; } @@ -1471,14 +1540,18 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku std::array is_in_qc{}; std::array is_in_ac{}; std::array is_null{}; - { + for (size_t i = 0; i < 2; i++) { + is_in_qc[i] = objects[i]->found_query != nullptr; + } + if (!is_in_qc[0] || !is_in_qc[1]) { std::scoped_lock lk(impl->buffer_cache.mutex); for (size_t i = 0; i < 2; i++) { - is_in_qc[i] = objects[i]->found_query != nullptr; is_in_bc[i] = !is_in_qc[i] && check_in_bc(objects[i]->address); - is_in_ac[i] = is_in_qc[i] || is_in_bc[i]; } } + for (size_t i = 0; i < 2; i++) { + is_in_ac[i] = is_in_qc[i] || is_in_bc[i]; + } if (!is_in_ac[0] && !is_in_ac[1]) { EndHostConditionalRendering(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6f471f6247..433d76135d 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -92,10 +93,12 @@ public: requires std::is_invocable_v void RecordWithUploadBuffer(T&& command) { if (chunk->Record(command)) { + record_serial.fetch_add(1, std::memory_order_relaxed); return; } DispatchWork(); (void)chunk->Record(command); + record_serial.fetch_add(1, std::memory_order_relaxed); } template @@ -117,6 +120,11 @@ public: return master_semaphore->IsFree(tick); } + /// Returns a monotonic serial incremented for every recorded command callback. + [[nodiscard]] u64 CurrentRecordSerial() const noexcept { + return record_serial.load(std::memory_order_relaxed); + } + /// Waits for the given GPU tick, optionally pacing frames. void Wait(u64 tick, double target_fps = 0.0) { if (tick > 0) { @@ -298,6 +306,7 @@ private: u64 frame_counter{}; u64 last_submitted_tick = 0; + std::atomic record_serial{0}; }; } // namespace Vulkan