mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2026-06-26 04:49:30 -04:00
[TEST] Review the concurrency of resolve compute + barriers operations
This commit is contained in:
parent
42d4c5dab7
commit
f285e2ea27
2 changed files with 87 additions and 5 deletions
|
|
@ -1271,7 +1271,7 @@ struct QueryCacheRuntimeImpl {
|
|||
, tfb_streamer(size_t(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool)
|
||||
, primitives_succeeded_streamer(size_t(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, device_memory_)
|
||||
, primitives_needed_minus_succeeded_streamer(size_t(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u)
|
||||
, hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} {
|
||||
, hcr_setup{}, hcr_is_set{}, is_hcr_running{}, hcr_bc_resolve_cache{}, maxwell3d{} {
|
||||
|
||||
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
||||
hcr_setup.pNext = nullptr;
|
||||
|
|
@ -1326,6 +1326,15 @@ struct QueryCacheRuntimeImpl {
|
|||
bool hcr_is_set;
|
||||
bool is_hcr_running;
|
||||
|
||||
struct HCRBCResolveCache {
|
||||
DAddr address{};
|
||||
u64 record_serial{};
|
||||
bool is_equal{};
|
||||
bool compare_to_zero{};
|
||||
bool valid{};
|
||||
};
|
||||
HCRBCResolveCache hcr_bc_resolve_cache;
|
||||
|
||||
// maxwell3d
|
||||
Maxwell3D* maxwell3d;
|
||||
};
|
||||
|
|
@ -1351,6 +1360,7 @@ void QueryCacheRuntime::EndHostConditionalRendering() {
|
|||
PauseHostConditionalRendering();
|
||||
impl->hcr_is_set = false;
|
||||
impl->is_hcr_running = false;
|
||||
impl->hcr_bc_resolve_cache.valid = false;
|
||||
impl->hcr_buffer = VkBuffer{};
|
||||
impl->hcr_offset = 0;
|
||||
}
|
||||
|
|
@ -1380,6 +1390,7 @@ void QueryCacheRuntime::ResumeHostConditionalRendering() {
|
|||
|
||||
void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object,
|
||||
bool is_equal) {
|
||||
impl->hcr_bc_resolve_cache.valid = false;
|
||||
{
|
||||
std::scoped_lock lk(impl->buffer_cache.mutex);
|
||||
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
|
||||
|
|
@ -1411,6 +1422,14 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
|
|||
|
||||
void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal,
|
||||
bool compare_to_zero) {
|
||||
const u64 current_record_serial = impl->scheduler.CurrentRecordSerial();
|
||||
auto& cache = impl->hcr_bc_resolve_cache;
|
||||
if (cache.valid && cache.address == address && cache.is_equal == is_equal &&
|
||||
cache.compare_to_zero == compare_to_zero &&
|
||||
cache.record_serial == current_record_serial) {
|
||||
return;
|
||||
}
|
||||
|
||||
VkBuffer to_resolve;
|
||||
u32 to_resolve_offset;
|
||||
const u32 resolve_size = compare_to_zero ? 8 : 24;
|
||||
|
|
@ -1434,16 +1453,66 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
|
|||
impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
|
||||
impl->hcr_is_set = true;
|
||||
impl->is_hcr_running = false;
|
||||
|
||||
cache.address = address;
|
||||
cache.record_serial = impl->scheduler.CurrentRecordSerial();
|
||||
cache.is_equal = is_equal;
|
||||
cache.compare_to_zero = compare_to_zero;
|
||||
cache.valid = true;
|
||||
|
||||
if (was_running) {
|
||||
ResumeHostConditionalRendering();
|
||||
}
|
||||
}
|
||||
|
||||
bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1,
|
||||
[[maybe_unused]] bool qc_dirty) {
|
||||
bool qc_dirty) {
|
||||
if (!impl->device.IsExtConditionalRendering()) {
|
||||
return false;
|
||||
}
|
||||
if (object_1.address == 0) {
|
||||
EndHostConditionalRendering();
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool is_in_qc = object_1.found_query != nullptr;
|
||||
bool is_in_bc = false;
|
||||
if (!is_in_qc) {
|
||||
std::scoped_lock lk(impl->buffer_cache.mutex);
|
||||
is_in_bc = impl->buffer_cache.IsRegionGpuModified(object_1.address, 8);
|
||||
}
|
||||
const bool is_in_ac = is_in_qc || is_in_bc;
|
||||
|
||||
if (!is_in_ac) {
|
||||
EndHostConditionalRendering();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!qc_dirty && !is_in_bc) {
|
||||
EndHostConditionalRendering();
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto driver_id = impl->device.GetDriverID();
|
||||
const bool is_gpu_high = Settings::IsGPULevelHigh();
|
||||
|
||||
if ((!is_gpu_high && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) ||
|
||||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
|
||||
EndHostConditionalRendering();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!is_gpu_high) {
|
||||
EndHostConditionalRendering();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!is_in_bc) {
|
||||
// Avoid comparing stale data: query cache can be newer than guest memory.
|
||||
EndHostConditionalRendering();
|
||||
return true;
|
||||
}
|
||||
|
||||
HostConditionalRenderingCompareBCImpl(object_1.address, true, true);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1471,14 +1540,18 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
|
|||
std::array<bool, 2> is_in_qc{};
|
||||
std::array<bool, 2> is_in_ac{};
|
||||
std::array<bool, 2> is_null{};
|
||||
{
|
||||
for (size_t i = 0; i < 2; i++) {
|
||||
is_in_qc[i] = objects[i]->found_query != nullptr;
|
||||
}
|
||||
if (!is_in_qc[0] || !is_in_qc[1]) {
|
||||
std::scoped_lock lk(impl->buffer_cache.mutex);
|
||||
for (size_t i = 0; i < 2; i++) {
|
||||
is_in_qc[i] = objects[i]->found_query != nullptr;
|
||||
is_in_bc[i] = !is_in_qc[i] && check_in_bc(objects[i]->address);
|
||||
is_in_ac[i] = is_in_qc[i] || is_in_bc[i];
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < 2; i++) {
|
||||
is_in_ac[i] = is_in_qc[i] || is_in_bc[i];
|
||||
}
|
||||
|
||||
if (!is_in_ac[0] && !is_in_ac[1]) {
|
||||
EndHostConditionalRendering();
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
|
|
@ -92,10 +93,12 @@ public:
|
|||
requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
|
||||
void RecordWithUploadBuffer(T&& command) {
|
||||
if (chunk->Record(command)) {
|
||||
record_serial.fetch_add(1, std::memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
DispatchWork();
|
||||
(void)chunk->Record(command);
|
||||
record_serial.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
@ -117,6 +120,11 @@ public:
|
|||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
/// Returns a monotonic serial incremented for every recorded command callback.
|
||||
[[nodiscard]] u64 CurrentRecordSerial() const noexcept {
|
||||
return record_serial.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/// Waits for the given GPU tick, optionally pacing frames.
|
||||
void Wait(u64 tick, double target_fps = 0.0) {
|
||||
if (tick > 0) {
|
||||
|
|
@ -298,6 +306,7 @@ private:
|
|||
u64 frame_counter{};
|
||||
|
||||
u64 last_submitted_tick = 0;
|
||||
std::atomic<u64> record_serial{0};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue