From e01518fbbf073e7eaa6f46cc38976a27ae7064de Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 16 May 2026 15:22:59 +0000 Subject: [PATCH] revert --- src/video_core/control/channel_state.cpp | 4 +- src/video_core/control/channel_state.h | 2 +- src/video_core/dma_pusher.cpp | 23 +++- src/video_core/dma_pusher.h | 55 ++++---- src/video_core/engines/puller.cpp | 139 +++++++++++-------- src/video_core/engines/puller.h | 46 +++++-- src/video_core/gpu.cpp | 166 ++++++++++++++++------- 7 files changed, 278 insertions(+), 157 deletions(-) diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index 49e2e270b1..d07c7e2a83 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -19,10 +19,10 @@ namespace Tegra::Control { ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {} -void ChannelState::Init(Core::System& system, u64 program_id_) { +void ChannelState::Init(Core::System& system, GPU& gpu, u64 program_id_) { ASSERT(memory_manager); program_id = program_id_; - dma_pusher.emplace(system, *memory_manager, *this); + dma_pusher.emplace(system, gpu, *memory_manager, *this); maxwell_3d.emplace(system, *memory_manager); fermi_2d.emplace(*memory_manager); kepler_compute.emplace(system, *memory_manager); diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index c72e1446e7..2984d2e09e 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -34,7 +34,7 @@ namespace Control { struct ChannelState { explicit ChannelState(s32 bind_id); - void Init(Core::System& system, u64 program_id); + void Init(Core::System& system, GPU& gpu, u64 program_id); void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index e7e26f7131..7997f0ff8b 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -12,6 +12,11 @@ #include "video_core/guest_memory.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/texture_cache/util.h" + +#ifdef _MSC_VER +#include +#endif namespace Tegra { @@ -26,15 +31,18 @@ DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_man DmaPusher::~DmaPusher() = default; void DmaPusher::DispatchCalls() { + dma_pushbuffer_subindex = 0; + dma_state.is_last_call = true; + while (system.IsPoweredOn()) { if (!Step()) { break; } } - system.GPU().FlushCommands(); - system.GPU().OnCommandListEnd(); + gpu.FlushCommands(); + gpu.OnCommandListEnd(); } bool DmaPusher::Step() { @@ -163,9 +171,9 @@ void DmaPusher::SetState(const CommandHeader& command_header) { dma_state.method_count = command_header.method_count; } -void DmaPusher::CallMethod(u32 argument) { +void DmaPusher::CallMethod(u32 argument) const { if (dma_state.method < non_puller_methods) { - puller.CallPullerMethod(*this, Engines::Puller::MethodCall{ + puller.CallPullerMethod(Engines::Puller::MethodCall{ dma_state.method, argument, dma_state.subchannel, @@ -183,9 +191,9 @@ void DmaPusher::CallMethod(u32 argument) { } } -void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) { +void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { if (dma_state.method < non_puller_methods) { - puller.CallMultiMethod(*this, dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count); + puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count); } else { auto subchannel = subchannels[dma_state.subchannel]; subchannel->ConsumeSink(); @@ -196,6 +204,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) { void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { rasterizer = rasterizer_; + puller.BindRasterizer(rasterizer); } } // namespace Tegra diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 62af628511..f850513603 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -109,21 +109,25 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} - explicit CommandList(boost::container::small_vector&& prefetch_command_list_) + explicit CommandList( + boost::container::small_vector&& prefetch_command_list_) : prefetch_command_list{std::move(prefetch_command_list_)} {} boost::container::small_vector command_lists; boost::container::small_vector prefetch_command_list; }; -/// @brief The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the -/// emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled -/// into a "command stream" consisting of 32-bit words that make up "commands". -/// See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for -/// details on this implementation. +/** + * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the + * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled + * into a "command stream" consisting of 32-bit words that make up "commands". + * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for + * details on this implementation. + */ class DmaPusher final { public: - explicit DmaPusher(Core::System& system_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_); + explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, + Control::ChannelState& channel_state_); ~DmaPusher(); void Push(CommandList&& entries) { @@ -132,7 +136,8 @@ public: void DispatchCalls(); - void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, Engines::EngineTypes engine_type) { + void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, + Engines::EngineTypes engine_type) { subchannels[subchannel_id] = engine; subchannel_type[subchannel_id] = engine_type; } @@ -147,11 +152,11 @@ private: void SetState(const CommandHeader& command_header); - void CallMethod(u32 argument); - void CallMultiMethod(const u32* base_start, u32 num_methods); + void CallMethod(u32 argument) const; + void CallMultiMethod(const u32* base_start, u32 num_methods) const; -public: - Common::ScratchBuffer command_headers; ///< Buffer for list of commands fetched at once + Common::ScratchBuffer + command_headers; ///< Buffer for list of commands fetched at once std::queue dma_pushbuffer; ///< Queue of command lists to be processed std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer @@ -167,24 +172,24 @@ public: bool is_last_call; }; - Core::System& system; - MemoryManager& memory_manager; - Control::ChannelState& channel_state; - DmaState dma_state{}; + bool dma_increment_once{}; + + const bool ib_enable{true}; ///< IB mode enabled + std::array subchannels{}; std::array subchannel_type; - Engines::Puller puller; + GPU& gpu; + Core::System& system; + MemoryManager& memory_manager; + mutable Engines::Puller puller; + + VideoCore::RasterizerInterface* rasterizer; + bool signal_sync; + bool synced; std::mutex sync_mutex; std::condition_variable sync_cv; - - VideoCore::RasterizerInterface* rasterizer = nullptr; - - const bool ib_enable : 1 = true; ///< IB mode enabled - bool dma_increment_once : 1 = false; - bool signal_sync : 1 = false; - bool synced : 1 = false; }; } // namespace Tegra diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index b153619c16..4030f93d49 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -22,29 +22,37 @@ namespace Tegra::Engines { -void Puller::ProcessBindMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { +Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_, + Control::ChannelState& channel_state_) + : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{ + channel_state_} {} + +Puller::~Puller() = default; + +void Puller::ProcessBindMethod(const MethodCall& method_call) { // Bind the current subchannel to the desired engine id. - LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, method_call.argument); + LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, + method_call.argument); const auto engine_id = static_cast(method_call.argument); bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: - dma_pusher.BindSubchannel(&*dma_pusher.channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D); + dma_pusher.BindSubchannel(&*channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D); break; case EngineID::MAXWELL_B: - dma_pusher.BindSubchannel(&*dma_pusher.channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D); + dma_pusher.BindSubchannel(&*channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.BindSubchannel(&*dma_pusher.channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute); + dma_pusher.BindSubchannel(&*channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.BindSubchannel(&*dma_pusher.channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA); + dma_pusher.BindSubchannel(&*channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.BindSubchannel(&*dma_pusher.channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory); + dma_pusher.BindSubchannel(&*channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory); break; case EngineID::NV01_TIMER: - dma_pusher.BindSubchannel(&*dma_pusher.channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer); + dma_pusher.BindSubchannel(&*channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer); break; default: UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); @@ -52,15 +60,15 @@ void Puller::ProcessBindMethod(DmaPusher& dma_pusher, const MethodCall& method_c } } -void Puller::ProcessFenceActionMethod(DmaPusher& dma_pusher) { +void Puller::ProcessFenceActionMethod() { switch (regs.fence_action.op) { case Puller::FenceOperation::Acquire: // UNIMPLEMENTED_MSG("Channel Scheduling pending."); // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); - dma_pusher.rasterizer->ReleaseFences(); + rasterizer->ReleaseFences(); break; case Puller::FenceOperation::Increment: - dma_pusher.rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); + rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); break; default: UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); @@ -68,35 +76,37 @@ void Puller::ProcessFenceActionMethod(DmaPusher& dma_pusher) { } } -void Puller::ProcessSemaphoreTriggerMethod(DmaPusher& dma_pusher) { +void Puller::ProcessSemaphoreTriggerMethod() { const auto semaphoreOperationMask = 0xF; - const auto op = GpuSemaphoreOperation(regs.semaphore_trigger & semaphoreOperationMask); + const auto op = + static_cast(regs.semaphore_trigger & semaphoreOperationMask); if (op == GpuSemaphoreOperation::WriteLong) { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; - dma_pusher.rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); } else { do { - const u32 word = dma_pusher.memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); + const u32 word{memory_manager.Read(regs.semaphore_address.SemaphoreAddress())}; regs.acquire_source = true; regs.acquire_value = regs.semaphore_sequence; if (op == GpuSemaphoreOperation::AcquireEqual) { regs.acquire_active = true; regs.acquire_mode = false; if (word != regs.acquire_value) { - dma_pusher.rasterizer->ReleaseFences(); + rasterizer->ReleaseFences(); continue; } } else if (op == GpuSemaphoreOperation::AcquireGequal) { regs.acquire_active = true; regs.acquire_mode = true; if (word < regs.acquire_value) { - dma_pusher.rasterizer->ReleaseFences(); + rasterizer->ReleaseFences(); continue; } } else if (op == GpuSemaphoreOperation::AcquireMask) { if (word && regs.semaphore_sequence == 0) { - dma_pusher.rasterizer->ReleaseFences(); + rasterizer->ReleaseFences(); continue; } } else { @@ -106,20 +116,21 @@ void Puller::ProcessSemaphoreTriggerMethod(DmaPusher& dma_pusher) { } } -void Puller::ProcessSemaphoreRelease(DmaPusher& dma_pusher) { +void Puller::ProcessSemaphoreRelease() { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_release; - dma_pusher.rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); } -void Puller::ProcessSemaphoreAcquire(DmaPusher& dma_pusher) { - u32 word = dma_pusher.memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); +void Puller::ProcessSemaphoreAcquire() { + u32 word = memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); const auto value = regs.semaphore_acquire; while (word != value) { regs.acquire_active = true; regs.acquire_value = value; - dma_pusher.rasterizer->ReleaseFences(); - word = dma_pusher.memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); + rasterizer->ReleaseFences(); + word = memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); // TODO(kemathe73) figure out how to do the acquire_timeout regs.acquire_mode = false; regs.acquire_source = false; @@ -127,13 +138,13 @@ void Puller::ProcessSemaphoreAcquire(DmaPusher& dma_pusher) { } /// Calls a GPU puller method. -void Puller::CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { +void Puller::CallPullerMethod(const MethodCall& method_call) { regs.reg_array[method_call.method] = method_call.argument; const auto method = static_cast(method_call.method); switch (method) { case BufferMethods::BindObject: { - ProcessBindMethod(dma_pusher, method_call); + ProcessBindMethod(method_call); break; } case BufferMethods::Nop: @@ -144,16 +155,16 @@ void Puller::CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_ca case BufferMethods::WrcacheFlush: break; case BufferMethods::RefCnt: - dma_pusher.rasterizer->SignalReference(); + rasterizer->SignalReference(); break; case BufferMethods::SyncpointOperation: - ProcessFenceActionMethod(dma_pusher); + ProcessFenceActionMethod(); break; case BufferMethods::WaitForIdle: - dma_pusher.rasterizer->WaitForIdle(); + rasterizer->WaitForIdle(); break; case BufferMethods::SemaphoreOperation: { - ProcessSemaphoreTriggerMethod(dma_pusher); + ProcessSemaphoreTriggerMethod(); break; } case BufferMethods::NonStallInterrupt: { @@ -166,7 +177,7 @@ void Puller::CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_ca } case BufferMethods::MemOpB: { // Implement this better. - dma_pusher.rasterizer->InvalidateGPUCache(); + rasterizer->InvalidateGPUCache(); break; } case BufferMethods::MemOpC: @@ -175,11 +186,11 @@ void Puller::CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_ca break; } case BufferMethods::SemaphoreAcquire: { - ProcessSemaphoreAcquire(dma_pusher); + ProcessSemaphoreAcquire(); break; } case BufferMethods::SemaphoreRelease: { - ProcessSemaphoreRelease(dma_pusher); + ProcessSemaphoreRelease(); break; } case BufferMethods::Yield: { @@ -194,26 +205,27 @@ void Puller::CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_ca } /// Calls a GPU engine method. -void Puller::CallEngineMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { +void Puller::CallEngineMethod(const MethodCall& method_call) { const EngineID engine = bound_engines[method_call.subchannel]; + switch (engine) { case EngineID::FERMI_TWOD_A: - dma_pusher.channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::MAXWELL_B: - dma_pusher.channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::NV01_TIMER: - dma_pusher.channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; default: UNIMPLEMENTED_MSG("Unimplemented engine"); @@ -222,26 +234,28 @@ void Puller::CallEngineMethod(DmaPusher& dma_pusher, const MethodCall& method_ca } /// Calls a GPU engine multivalue method. -void Puller::CallEngineMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending) { +void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, + u32 methods_pending) { const EngineID engine = bound_engines[subchannel]; + switch (engine) { case EngineID::FERMI_TWOD_A: - dma_pusher.channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); + channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::MAXWELL_B: - dma_pusher.channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); + channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); + channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); + channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); + channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::NV01_TIMER: - dma_pusher.channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending); + channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending); break; default: UNIMPLEMENTED_MSG("Unimplemented engine"); @@ -250,26 +264,31 @@ void Puller::CallEngineMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subcha } /// Calls a GPU method. -void Puller::CallMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { - LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, method_call.subchannel); +void Puller::CallMethod(const MethodCall& method_call) { + LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, + method_call.subchannel); + ASSERT(method_call.subchannel < bound_engines.size()); - if (ExecuteMethodOnEngine(dma_pusher, method_call.method)) { - CallEngineMethod(dma_pusher, method_call); + if (ExecuteMethodOnEngine(method_call.method)) { + CallEngineMethod(method_call); } else { - CallPullerMethod(dma_pusher, method_call); + CallPullerMethod(method_call); } } /// Calls a GPU multivalue method. -void Puller::CallMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending) { +void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, + u32 methods_pending) { LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); + ASSERT(subchannel < bound_engines.size()); - if (ExecuteMethodOnEngine(dma_pusher, method)) { - CallEngineMultiMethod(dma_pusher, method, subchannel, base_start, amount, methods_pending); + + if (ExecuteMethodOnEngine(method)) { + CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); } else { for (u32 i = 0; i < amount; i++) { - CallPullerMethod(dma_pusher, MethodCall{ + CallPullerMethod(MethodCall{ method, base_start[i], subchannel, @@ -279,9 +298,13 @@ void Puller::CallMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, } } +void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; +} + /// Determines where the method should be executed. -[[nodiscard]] bool Puller::ExecuteMethodOnEngine(DmaPusher& dma_pusher, u32 method) { - const auto buffer_method = BufferMethods(method); +[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) { + const auto buffer_method = static_cast(method); return buffer_method >= BufferMethods::NonPullerMethods; } diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h index e8994f5640..fe5102e3ed 100644 --- a/src/video_core/engines/puller.h +++ b/src/video_core/engines/puller.h @@ -70,13 +70,32 @@ public: BitField<8, 24, u32> syncpoint_id; }; - void CallMethod(DmaPusher& dma_pusher, const MethodCall& method_call); - void CallMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending); - void BindRasterizer(DmaPusher& dma_pusher, VideoCore::RasterizerInterface* rasterizer); - void CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_call); - void CallEngineMethod(DmaPusher& dma_pusher, const MethodCall& method_call); - void CallEngineMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending); + explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher, + Control::ChannelState& channel_state); + ~Puller(); + + void CallMethod(const MethodCall& method_call); + + void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, + u32 methods_pending); + + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + + void CallPullerMethod(const MethodCall& method_call); + + void CallEngineMethod(const MethodCall& method_call); + + void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, + u32 methods_pending); + private: + Tegra::GPU& gpu; + + MemoryManager& memory_manager; + DmaPusher& dma_pusher; + Control::ChannelState& channel_state; + VideoCore::RasterizerInterface* rasterizer = nullptr; + static constexpr std::size_t NUM_REGS = 0x800; struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -120,12 +139,12 @@ private: }; } regs{}; - void ProcessBindMethod(DmaPusher& dma_pusher, const MethodCall& method_call); - void ProcessFenceActionMethod(DmaPusher& dma_pusher); - void ProcessSemaphoreAcquire(DmaPusher& dma_pusher); - void ProcessSemaphoreRelease(DmaPusher& dma_pusher); - void ProcessSemaphoreTriggerMethod(DmaPusher& dma_pusher); - [[nodiscard]] bool ExecuteMethodOnEngine(DmaPusher& dma_pusher, u32 method); + void ProcessBindMethod(const MethodCall& method_call); + void ProcessFenceActionMethod(); + void ProcessSemaphoreAcquire(); + void ProcessSemaphoreRelease(); + void ProcessSemaphoreTriggerMethod(); + [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); /// Mapping of command subchannels to their bound engine ids std::array bound_engines{}; @@ -138,7 +157,8 @@ private: }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, "Field " #field_name " has invalid position") + static_assert(offsetof(Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") ASSERT_REG_POSITION(semaphore_address, 0x4); ASSERT_REG_POSITION(semaphore_sequence, 0x6); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 446a125e22..391ca4ef5f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -40,32 +40,30 @@ namespace Tegra { struct GPU::Impl { - explicit Impl(Core::System& system_, bool is_async_, bool use_nvdec_) - : system{system_} - , use_nvdec{use_nvdec_} - , shader_notify() - , is_async{is_async_} - , gpu_thread{system_, is_async_} - , scheduler(system_.GPU()) - {} + explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) + : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, + shader_notify{std::make_unique()}, is_async{is_async_}, + gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {} ~Impl() = default; std::shared_ptr CreateChannel(s32 channel_id) { auto channel_state = std::make_shared(channel_id); channels.emplace(channel_id, channel_state); - scheduler.DeclareChannel(channel_state); + scheduler->DeclareChannel(channel_state); return channel_state; } void BindChannel(s32 channel_id) { - if (bound_channel != channel_id) { - auto it = channels.find(channel_id); - ASSERT(it != channels.end()); - bound_channel = channel_id; - current_channel = it->second.get(); - renderer->ReadRasterizer()->BindChannel(*current_channel); + if (bound_channel == channel_id) { + return; } + auto it = channels.find(channel_id); + ASSERT(it != channels.end()); + bound_channel = channel_id; + current_channel = it->second.get(); + + rasterizer->BindChannel(*current_channel); } std::shared_ptr AllocateChannel() { @@ -73,13 +71,13 @@ struct GPU::Impl { } void InitChannel(Control::ChannelState& to_init, u64 program_id) { - to_init.Init(system, program_id); - to_init.BindRasterizer(renderer->ReadRasterizer()); - renderer->ReadRasterizer()->InitializeChannel(to_init); + to_init.Init(system, gpu, program_id); + to_init.BindRasterizer(rasterizer); + rasterizer->InitializeChannel(to_init); } void InitAddressSpace(Tegra::MemoryManager& memory_manager) { - memory_manager.BindRasterizer(renderer->ReadRasterizer()); + memory_manager.BindRasterizer(rasterizer); } void ReleaseChannel(Control::ChannelState& to_release) { @@ -89,26 +87,26 @@ struct GPU::Impl { /// Binds a renderer to the GPU. void BindRenderer(std::unique_ptr renderer_) { renderer = std::move(renderer_); - system.Host1x().memory_manager.BindInterface(renderer->ReadRasterizer()); - system.Host1x().gmmu_manager.BindRasterizer(renderer->ReadRasterizer()); + rasterizer = renderer->ReadRasterizer(); + host1x.MemoryManager().BindInterface(rasterizer); + host1x.gmmu_manager.BindRasterizer(rasterizer); } /// Flush all current written commands into the host GPU for execution. void FlushCommands() { - renderer->ReadRasterizer()->FlushCommands(); + rasterizer->FlushCommands(); } /// Synchronizes CPU writes with Host GPU memory. void InvalidateGPUCache() { - std::function callback_writes([this](PAddr address, size_t size) { - renderer->ReadRasterizer()->OnCacheInvalidation(address, size); - }); + std::function callback_writes( + [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); system.GatherGPUDirtyMemory(callback_writes); } /// Signal the ending of command list. void OnCommandListEnd() { - renderer->ReadRasterizer()->ReleaseFences(false); + rasterizer->ReleaseFences(false); Settings::UpdateGPUAccuracy(); } @@ -145,6 +143,62 @@ struct GPU::Impl { } } + /// Returns a reference to the Maxwell3D GPU engine. + [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { + ASSERT(current_channel); + return *current_channel->maxwell_3d; + } + + /// Returns a const reference to the Maxwell3D GPU engine. + [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { + ASSERT(current_channel); + return *current_channel->maxwell_3d; + } + + /// Returns a reference to the KeplerCompute GPU engine. + [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { + ASSERT(current_channel); + return *current_channel->kepler_compute; + } + + /// Returns a reference to the KeplerCompute GPU engine. + [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { + ASSERT(current_channel); + return *current_channel->kepler_compute; + } + + /// Returns a reference to the GPU DMA pusher. + [[nodiscard]] Tegra::DmaPusher& DmaPusher() { + ASSERT(current_channel); + return *current_channel->dma_pusher; + } + + /// Returns a const reference to the GPU DMA pusher. + [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { + ASSERT(current_channel); + return *current_channel->dma_pusher; + } + + /// Returns a reference to the underlying renderer. + [[nodiscard]] VideoCore::RendererBase& Renderer() { + return *renderer; + } + + /// Returns a const reference to the underlying renderer. + [[nodiscard]] const VideoCore::RendererBase& Renderer() const { + return *renderer; + } + + /// Returns a reference to the shader notifier. + [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { + return *shader_notify; + } + + /// Returns a const reference to the shader notifier. + [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { + return *shader_notify; + } + [[nodiscard]] u64 GetTicks() const { u64 gpu_tick = system.CoreTiming().GetGPUTicks(); Settings::GpuOverclock overclock = Settings::values.fast_gpu_time.GetValue(); @@ -156,6 +210,14 @@ struct GPU::Impl { return gpu_tick; } + [[nodiscard]] bool IsAsync() const { + return is_async; + } + + [[nodiscard]] bool UseNvdec() const { + return use_nvdec; + } + void RendererFrameEndNotify() { system.GetPerfStats().EndGameFrame(); } @@ -165,7 +227,7 @@ struct GPU::Impl { /// core timing events. void Start() { Settings::UpdateGPUAccuracy(); - gpu_thread.StartThread(*renderer, renderer->Context(), scheduler); + gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); } void NotifyShutdown() { @@ -198,13 +260,14 @@ struct GPU::Impl { } VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { - auto raster_area = renderer->ReadRasterizer()->GetFlushArea(addr, size); + auto raster_area = rasterizer->GetFlushArea(addr, size); if (raster_area.preemtive) { return raster_area; } raster_area.preemtive = true; const u64 fence = RequestSyncOperation([this, &raster_area]() { - renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address); + rasterizer->FlushRegion(raster_area.start_address, + raster_area.end_address - raster_area.start_address); }); gpu_thread.TickGPU(); WaitForSyncOperation(fence); @@ -217,7 +280,7 @@ struct GPU::Impl { } bool OnCPUWrite(DAddr addr, u64 size) { - return renderer->ReadRasterizer()->OnCPUWrite(addr, size); + return rasterizer->OnCPUWrite(addr, size); } /// Notify rasterizer that any caches of the specified region should be flushed and invalidated @@ -274,14 +337,17 @@ struct GPU::Impl { return out; } + GPU& gpu; Core::System& system; + Host1x::Host1x& host1x; std::unique_ptr renderer; + VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; s32 new_channel_id{1}; /// Shader build notifier - VideoCore::ShaderNotify shader_notify; + std::unique_ptr shader_notify; /// When true, we are about to shut down emulation session, so terminate outstanding tasks std::atomic_bool shutting_down{}; @@ -305,7 +371,7 @@ struct GPU::Impl { VideoCommon::GPUThread::ThreadManager gpu_thread; std::unique_ptr cpu_context; - Tegra::Control::Scheduler scheduler; + std::unique_ptr scheduler; ankerl::unordered_dense::map> channels; Tegra::Control::ChannelState* current_channel; s32 bound_channel{-1}; @@ -316,8 +382,7 @@ struct GPU::Impl { }; GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) - : impl{std::make_unique(system, is_async, use_nvdec)} -{} + : impl{std::make_unique(*this, system, is_async, use_nvdec)} {} GPU::~GPU() = default; @@ -358,9 +423,8 @@ void GPU::OnCommandListEnd() { } u64 GPU::RequestFlush(DAddr addr, std::size_t size) { - return impl->RequestSyncOperation([this, addr, size]() { - impl->renderer->ReadRasterizer()->FlushRegion(addr, size); - }); + return impl->RequestSyncOperation( + [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); } u64 GPU::CurrentSyncRequestFence() const { @@ -377,52 +441,52 @@ void GPU::TickWork() { /// Gets a mutable reference to the Host1x interface Host1x::Host1x& GPU::Host1x() { - return impl->system.Host1x(); + return impl->host1x; } /// Gets an immutable reference to the Host1x interface. const Host1x::Host1x& GPU::Host1x() const { - return impl->system.Host1x(); + return impl->host1x; } Engines::Maxwell3D& GPU::Maxwell3D() { - return *impl->current_channel->maxwell_3d; + return impl->Maxwell3D(); } const Engines::Maxwell3D& GPU::Maxwell3D() const { - return *impl->current_channel->maxwell_3d; + return impl->Maxwell3D(); } Engines::KeplerCompute& GPU::KeplerCompute() { - return *impl->current_channel->kepler_compute; + return impl->KeplerCompute(); } const Engines::KeplerCompute& GPU::KeplerCompute() const { - return *impl->current_channel->kepler_compute; + return impl->KeplerCompute(); } Tegra::DmaPusher& GPU::DmaPusher() { - return *impl->current_channel->dma_pusher; + return impl->DmaPusher(); } const Tegra::DmaPusher& GPU::DmaPusher() const { - return *impl->current_channel->dma_pusher; + return impl->DmaPusher(); } VideoCore::RendererBase& GPU::Renderer() { - return *impl->renderer; + return impl->Renderer(); } const VideoCore::RendererBase& GPU::Renderer() const { - return *impl->renderer; + return impl->Renderer(); } VideoCore::ShaderNotify& GPU::ShaderNotify() { - return impl->shader_notify; + return impl->ShaderNotify(); } const VideoCore::ShaderNotify& GPU::ShaderNotify() const { - return impl->shader_notify; + return impl->ShaderNotify(); } void GPU::RequestComposite(std::vector&& layers, @@ -439,11 +503,11 @@ u64 GPU::GetTicks() const { } bool GPU::IsAsync() const { - return impl->is_async; + return impl->IsAsync(); } bool GPU::UseNvdec() const { - return impl->use_nvdec; + return impl->UseNvdec(); } void GPU::RendererFrameEndNotify() {