From 5281f46b8432978ea382538b560fd3f6e884e05e Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 30 Apr 2026 23:57:07 +0000 Subject: [PATCH] less load, fix nv01 timer being kepler --- src/core/core.cpp | 8 ++--- src/core/cpu_manager.cpp | 6 ++-- src/core/cpu_manager.h | 17 ++++----- .../service/nvdrv/devices/nvhost_as_gpu.cpp | 5 ++- .../hle/service/nvdrv/devices/nvhost_as_gpu.h | 2 +- src/video_core/control/channel_state.cpp | 34 +++++++++++------- src/video_core/control/channel_state.h | 36 +++++++++++-------- src/video_core/gpu.cpp | 12 +++---- src/video_core/gpu_thread.cpp | 23 ++++++------ src/video_core/gpu_thread.h | 13 ++++--- src/video_core/video_core.cpp | 8 ++--- src/video_core/video_core.h | 8 +++-- 12 files changed, 93 insertions(+), 79 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 77333e3fe5..36c88d9848 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -271,7 +271,7 @@ struct System::Impl { SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) { host1x_core.emplace(system); - gpu_core = VideoCore::CreateGPU(emu_window, system); + VideoCore::CreateGPU(gpu_core, emu_window, system); if (!gpu_core) return SystemResultStatus::ErrorVideoCore; @@ -391,10 +391,8 @@ struct System::Impl { is_powered_on = false; exit_locked = false; exit_requested = false; - - if (gpu_core != nullptr) { + if (gpu_core) gpu_core->NotifyShutdown(); - } stop_event.request_stop(); core_timing.SyncPause(false); @@ -478,6 +476,7 @@ struct System::Impl { std::optional cheat_engine; std::optional memory_freezer; std::optional renderdoc_api; + std::optional gpu_core; std::array gpu_dirty_memory_managers; std::vector> user_channel; @@ -492,7 +491,6 @@ struct System::Impl { std::unique_ptr content_provider; /// AppLoader used to load the current executing application std::unique_ptr app_loader; - std::unique_ptr gpu_core; std::stop_source stop_event; mutable std::mutex suspend_guard; diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 10fdcdf8a2..3321a2d49d 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -25,9 +25,9 @@ CpuManager::~CpuManager() = default; void CpuManager::Initialize() { num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1; - gpu_barrier = std::make_unique(num_cores + 1); + gpu_barrier.emplace(num_cores + 1); for (std::size_t core = 0; core < num_cores; core++) - core_data[core].host_thread = std::jthread([this, core](std::stop_token token) { RunThread(token, core); }); + core_data[core].host_thread = std::jthread(&CpuManager::RunThread, core); } void CpuManager::Shutdown() { @@ -63,7 +63,7 @@ void CpuManager::HandleInterrupt() { auto& kernel = system.Kernel(); auto core_index = kernel.CurrentPhysicalCoreIndex(); - Kernel::KInterruptManager::HandleInterrupt(kernel, static_cast(core_index)); + Kernel::KInterruptManager::HandleInterrupt(kernel, s32(core_index)); } /////////////////////////////////////////////////////////////////////////////// diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index a249dc5f76..4ad5b5f0ad 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -86,22 +89,20 @@ private: void ShutdownThread(); void RunThread(std::stop_token stop_token, std::size_t core); + static constexpr std::size_t max_cycle_runs = 5; + + std::optional gpu_barrier{}; struct CoreData { std::shared_ptr host_context; std::jthread host_thread; }; - - std::unique_ptr gpu_barrier{}; std::array core_data{}; - - bool is_async_gpu{}; - bool is_multicore{}; + System& system; std::atomic current_core{}; std::size_t idle_count{}; std::size_t num_cores{}; - static constexpr std::size_t max_cycle_runs = 5; - - System& system; + bool is_async_gpu{}; + bool is_multicore{}; }; } // namespace Core diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 3bfef0c29c..1238a21f85 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -138,8 +138,7 @@ NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { static_cast((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)}; vm.big_page_allocator.emplace(start_big_pages, end_big_pages); - gmmu = std::make_shared(system, max_big_page_bits, vm.va_range_split, - vm.big_page_size_bits, VM::PAGE_SIZE_BITS); + gmmu = std::make_unique(system, max_big_page_bits, vm.va_range_split, vm.big_page_size_bits, VM::PAGE_SIZE_BITS); system.GPU().InitAddressSpace(*gmmu); vm.initialised = true; @@ -416,7 +415,7 @@ NvResult nvhost_as_gpu::BindChannel(IoctlBindChannel& params) { LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); auto gpu_channel_device = module.GetDevice(params.fd); - gpu_channel_device->channel_state->memory_manager = gmmu; + gpu_channel_device->channel_state->memory_manager = gmmu.get(); return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 44892ee368..b8ae57e1b4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -219,7 +219,7 @@ private: bool initialised{}; } vm; - std::shared_ptr gmmu; + std::unique_ptr gmmu; }; } // namespace Service::Nvidia::Devices diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index 49e2e270b1..7bd8954e50 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -17,28 +17,36 @@ namespace Tegra::Control { -ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {} +ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state) + : maxwell_3d(system, memory_manager) + , fermi_2d(memory_manager) + , kepler_compute(system, memory_manager) + , maxwell_dma(system, memory_manager) + , kepler_memory(system, memory_manager) + , nv01_timer(system, memory_manager) + , dma_pusher(system, memory_manager, channel_state) +{} + +ChannelState::ChannelState(s32 bind_id_) + : bind_id{bind_id_} +{} void ChannelState::Init(Core::System& system, u64 program_id_) { ASSERT(memory_manager); program_id = program_id_; - dma_pusher.emplace(system, *memory_manager, *this); - maxwell_3d.emplace(system, *memory_manager); - fermi_2d.emplace(*memory_manager); - kepler_compute.emplace(system, *memory_manager); - maxwell_dma.emplace(system, *memory_manager); - kepler_memory.emplace(system, *memory_manager); + payload.emplace(system, *memory_manager, *this); initialized = true; } void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) { - dma_pusher->BindRasterizer(rasterizer); + payload->dma_pusher.BindRasterizer(rasterizer); memory_manager->BindRasterizer(rasterizer); - maxwell_3d->BindRasterizer(rasterizer); - fermi_2d->BindRasterizer(rasterizer); - kepler_memory->BindRasterizer(rasterizer); - kepler_compute->BindRasterizer(rasterizer); - maxwell_dma->BindRasterizer(rasterizer); + payload->maxwell_3d.BindRasterizer(rasterizer); + payload->fermi_2d.BindRasterizer(rasterizer); + payload->kepler_memory.BindRasterizer(rasterizer); + payload->kepler_compute.BindRasterizer(rasterizer); + payload->maxwell_dma.BindRasterizer(rasterizer); + //payload->nv01_timer.BindRasterizer(rasterizer); } } // namespace Tegra::Control diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index c72e1446e7..80811c1458 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -14,6 +14,7 @@ #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" +#include "video_core/engines/nv01_timer.h" #include "video_core/dma_pusher.h" namespace Core { @@ -38,24 +39,29 @@ struct ChannelState { void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); - /// 3D engine - std::optional maxwell_3d; - /// 2D engine - std::optional fermi_2d; - /// Compute engine - std::optional kepler_compute; - /// DMA engine - std::optional maxwell_dma; - /// Inline memory engine - std::optional kepler_memory; - /// NV01 Timer - std::optional nv01_timer; - std::optional dma_pusher; - std::shared_ptr memory_manager; + struct Payload { + explicit Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state); + + /// 3D engine + Engines::Maxwell3D maxwell_3d; + /// 2D engine + Engines::Fermi2D fermi_2d; + /// Compute engine + Engines::KeplerCompute kepler_compute; + /// DMA engine + Engines::MaxwellDMA maxwell_dma; + /// Inline memory engine + Engines::KeplerMemory kepler_memory; + /// NV01 Timer + Engines::Nv01Timer nv01_timer; + DmaPusher dma_pusher; + }; + std::optional payload; + MemoryManager* memory_manager = nullptr; s32 bind_id = -1; u64 program_id = 0; - bool initialized{}; + bool initialized = false; }; } // namespace Control diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 0531c2ebcd..ad55383d66 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -189,12 +189,12 @@ struct GPU::Impl { /// Push GPU command entries to be processed void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { - gpu_thread.SubmitList(channel, std::move(entries)); + gpu_thread.SubmitList(channel, std::move(entries), is_async); } /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory void FlushRegion(DAddr addr, u64 size) { - gpu_thread.FlushRegion(addr, size); + gpu_thread.FlushRegion(addr, size, is_async); } VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { @@ -206,7 +206,7 @@ struct GPU::Impl { const u64 fence = RequestSyncOperation([this, &raster_area]() { renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address); }); - gpu_thread.TickGPU(); + gpu_thread.TickGPU(is_async); WaitForSyncOperation(fence); return raster_area; } @@ -222,7 +222,7 @@ struct GPU::Impl { /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(DAddr addr, u64 size) { - gpu_thread.FlushAndInvalidateRegion(addr, size); + gpu_thread.FlushAndInvalidateRegion(addr, size, is_async); } void RequestComposite(std::vector&& layers, std::vector&& fences) { @@ -258,7 +258,7 @@ struct GPU::Impl { syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); } }); - gpu_thread.TickGPU(); + gpu_thread.TickGPU(is_async); WaitForSyncOperation(wait_fence); } @@ -267,7 +267,7 @@ struct GPU::Impl { const auto wait_fence = RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); }); - gpu_thread.TickGPU(); + gpu_thread.TickGPU(is_async); WaitForSyncOperation(wait_fence); return out; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index d7c8ac391c..33f44e6fe7 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -20,7 +20,8 @@ namespace VideoCommon::GPUThread { ThreadManager::ThreadManager(Core::System& system_, bool is_async_) - : system{system_}, is_async{is_async_} {} + : system{system_} +{} ThreadManager::~ThreadManager() = default; @@ -60,41 +61,41 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Fronten }); } -void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { - PushCommand(SubmitListCommand(channel, std::move(entries))); +void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async) { + PushCommand(SubmitListCommand(channel, std::move(entries)), false, is_async); } -void ThreadManager::FlushRegion(DAddr addr, u64 size) { +void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) { if (!is_async) { // Always flush with synchronous GPU mode - PushCommand(FlushRegionCommand(addr, size)); + PushCommand(FlushRegionCommand(addr, size), false, is_async); } return; } -void ThreadManager::TickGPU() { - PushCommand(GPUTickCommand()); +void ThreadManager::TickGPU(bool is_async) { + PushCommand(GPUTickCommand(), false, is_async); } void ThreadManager::InvalidateRegion(DAddr addr, u64 size) { rasterizer->OnCacheInvalidation(addr, size); } -void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) { +void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async) { if (Settings::IsGPULevelHigh()) { if (!is_async) { - PushCommand(FlushRegionCommand(addr, size)); + PushCommand(FlushRegionCommand(addr, size), false, is_async); } else { auto& gpu = system.GPU(); const u64 fence = gpu.RequestFlush(addr, size); - TickGPU(); + TickGPU(is_async); gpu.WaitForSyncOperation(fence); } } rasterizer->OnCacheInvalidation(addr, size); } -u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { +u64 ThreadManager::PushCommand(CommandData&& command_data, bool block, bool is_async) { if (!is_async) { // In synchronous GPU mode, block the caller until the command has executed block = true; diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index ac1283a338..b48265922f 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -15,6 +15,7 @@ #include "common/bounded_threadsafe_queue.h" #include "common/polyfill_thread.h" +#include "video_core/dma_pusher.h" #include "video_core/framebuffer_config.h" namespace Tegra { @@ -111,27 +112,25 @@ public: Tegra::Control::Scheduler& scheduler); /// Push GPU command entries to be processed - void SubmitList(s32 channel, Tegra::CommandList&& entries); + void SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async); /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(DAddr addr, u64 size); + void FlushRegion(DAddr addr, u64 size, bool is_async); /// Notify rasterizer that any caches of the specified region should be invalidated void InvalidateRegion(DAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(DAddr addr, u64 size); + void FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async); - void TickGPU(); + void TickGPU(bool is_async); private: /// Pushes a command to be executed by the GPU thread - u64 PushCommand(CommandData&& command_data, bool block = false); + u64 PushCommand(CommandData&& command_data, bool block, bool is_async); Core::System& system; - const bool is_async; VideoCore::RasterizerInterface* rasterizer = nullptr; - SynchState state; std::jthread thread; }; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 1ac7a0bc35..54fb8f6239 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -45,23 +45,23 @@ std::unique_ptr CreateRenderer(Core::System& system, Co namespace VideoCore { -std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { +/// @brief Creates an emulated GPU instance using the given system context. +void CreateGPU(std::optional& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system) { Settings::UpdateRescalingInfo(); const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); - auto gpu = std::make_unique(system, use_async, use_nvdec); + gpu.emplace(system, use_async, use_nvdec); auto context = emu_window.CreateSharedContext(); auto scope = context->Acquire(); try { auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); gpu->BindRenderer(std::move(renderer)); - return gpu; } catch (const std::runtime_error& exception) { scope.Cancel(); LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); - return nullptr; + gpu.reset(); } } diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f8e2444f33..1a56dd99b6 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -1,9 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include +#include namespace Core { class System; @@ -20,8 +24,6 @@ class GPU; namespace VideoCore { class RendererBase; - -/// Creates an emulated GPU instance using the given system context. -std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); +void CreateGPU(std::optional& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system); } // namespace VideoCore