From 5281f46b8432978ea382538b560fd3f6e884e05e Mon Sep 17 00:00:00 2001
From: lizzie <lizzie@eden-emu.dev>
Date: Thu, 30 Apr 2026 23:57:07 +0000
Subject: [PATCH] less load, fix nv01 timer being kepler

---
 src/core/core.cpp                             |  8 ++---
 src/core/cpu_manager.cpp                      |  6 ++--
 src/core/cpu_manager.h                        | 17 ++++-----
 .../service/nvdrv/devices/nvhost_as_gpu.cpp   |  5 ++-
 .../hle/service/nvdrv/devices/nvhost_as_gpu.h |  2 +-
 src/video_core/control/channel_state.cpp      | 34 +++++++++++-------
 src/video_core/control/channel_state.h        | 36 +++++++++++--------
 src/video_core/gpu.cpp                        | 12 +++----
 src/video_core/gpu_thread.cpp                 | 23 ++++++------
 src/video_core/gpu_thread.h                   | 13 ++++---
 src/video_core/video_core.cpp                 |  8 ++---
 src/video_core/video_core.h                   |  8 +++--
 12 files changed, 93 insertions(+), 79 deletions(-)
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 77333e3fe5..36c88d9848 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -271,7 +271,7 @@ struct System::Impl {
 
     SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
         host1x_core.emplace(system);
-        gpu_core = VideoCore::CreateGPU(emu_window, system);
+        VideoCore::CreateGPU(gpu_core, emu_window, system);
         if (!gpu_core)
             return SystemResultStatus::ErrorVideoCore;
 
@@ -391,10 +391,8 @@ struct System::Impl {
         is_powered_on = false;
         exit_locked = false;
         exit_requested = false;
-
-        if (gpu_core != nullptr) {
+        if (gpu_core)
             gpu_core->NotifyShutdown();
-        }
 
         stop_event.request_stop();
         core_timing.SyncPause(false);
@@ -478,6 +476,7 @@ struct System::Impl {
     std::optional<Memory::CheatEngine> cheat_engine;
     std::optional<Tools::Freezer> memory_freezer;
     std::optional<Tools::RenderdocAPI> renderdoc_api;
+    std::optional<Tegra::GPU> gpu_core;
 
     std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> gpu_dirty_memory_managers;
     std::vector<std::vector<u8>> user_channel;
@@ -492,7 +491,6 @@ struct System::Impl {
     std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
     /// AppLoader used to load the current executing application
     std::unique_ptr<Loader::AppLoader> app_loader;
-    std::unique_ptr<Tegra::GPU> gpu_core;
     std::stop_source stop_event;
 
     mutable std::mutex suspend_guard;
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 10fdcdf8a2..3321a2d49d 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -25,9 +25,9 @@ CpuManager::~CpuManager() = default;
 
 void CpuManager::Initialize() {
     num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1;
-    gpu_barrier = std::make_unique<Common::Barrier>(num_cores + 1);
+    gpu_barrier.emplace(num_cores + 1);
     for (std::size_t core = 0; core < num_cores; core++)
-        core_data[core].host_thread = std::jthread([this, core](std::stop_token token) { RunThread(token, core); });
+        core_data[core].host_thread = std::jthread(&CpuManager::RunThread, core);
 }
 
 void CpuManager::Shutdown() {
@@ -63,7 +63,7 @@ void CpuManager::HandleInterrupt() {
     auto& kernel = system.Kernel();
     auto core_index = kernel.CurrentPhysicalCoreIndex();
 
-    Kernel::KInterruptManager::HandleInterrupt(kernel, static_cast<s32>(core_index));
+    Kernel::KInterruptManager::HandleInterrupt(kernel, s32(core_index));
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index a249dc5f76..4ad5b5f0ad 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -86,22 +89,20 @@ private:
     void ShutdownThread();
     void RunThread(std::stop_token stop_token, std::size_t core);
 
+    static constexpr std::size_t max_cycle_runs = 5;
+
+    std::optional<Common::Barrier> gpu_barrier{};
     struct CoreData {
         std::shared_ptr<Common::Fiber> host_context;
         std::jthread host_thread;
     };
-
-    std::unique_ptr<Common::Barrier> gpu_barrier{};
     std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
-
-    bool is_async_gpu{};
-    bool is_multicore{};
+    System& system;
     std::atomic<std::size_t> current_core{};
     std::size_t idle_count{};
     std::size_t num_cores{};
-    static constexpr std::size_t max_cycle_runs = 5;
-
-    System& system;
+    bool is_async_gpu{};
+    bool is_multicore{};
 };
 
 } // namespace Core
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 3bfef0c29c..1238a21f85 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -138,8 +138,7 @@ NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) {
         static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)};
     vm.big_page_allocator.emplace(start_big_pages, end_big_pages);
 
-    gmmu = std::make_shared<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split,
-                                                  vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
+    gmmu = std::make_unique<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split, vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
     system.GPU().InitAddressSpace(*gmmu);
     vm.initialised = true;
 
@@ -416,7 +415,7 @@ NvResult nvhost_as_gpu::BindChannel(IoctlBindChannel& params) {
     LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
 
     auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
-    gpu_channel_device->channel_state->memory_manager = gmmu;
+    gpu_channel_device->channel_state->memory_manager = gmmu.get();
     return NvResult::Success;
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 44892ee368..b8ae57e1b4 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -219,7 +219,7 @@ private:
 
         bool initialised{};
     } vm;
-    std::shared_ptr<Tegra::MemoryManager> gmmu;
+    std::unique_ptr<Tegra::MemoryManager> gmmu;
 };
 
 } // namespace Service::Nvidia::Devices
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index 49e2e270b1..7bd8954e50 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -17,28 +17,36 @@
 
 namespace Tegra::Control {
 
-ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
+ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state)
+    : maxwell_3d(system, memory_manager)
+    , fermi_2d(memory_manager)
+    , kepler_compute(system, memory_manager)
+    , maxwell_dma(system, memory_manager)
+    , kepler_memory(system, memory_manager)
+    , nv01_timer(system, memory_manager)
+    , dma_pusher(system, memory_manager, channel_state)
+{}
+
+ChannelState::ChannelState(s32 bind_id_)
+    : bind_id{bind_id_}
+{}
 
 void ChannelState::Init(Core::System& system, u64 program_id_) {
     ASSERT(memory_manager);
     program_id = program_id_;
-    dma_pusher.emplace(system, *memory_manager, *this);
-    maxwell_3d.emplace(system, *memory_manager);
-    fermi_2d.emplace(*memory_manager);
-    kepler_compute.emplace(system, *memory_manager);
-    maxwell_dma.emplace(system, *memory_manager);
-    kepler_memory.emplace(system, *memory_manager);
+    payload.emplace(system, *memory_manager, *this);
     initialized = true;
 }
 
 void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
-    dma_pusher->BindRasterizer(rasterizer);
+    payload->dma_pusher.BindRasterizer(rasterizer);
     memory_manager->BindRasterizer(rasterizer);
-    maxwell_3d->BindRasterizer(rasterizer);
-    fermi_2d->BindRasterizer(rasterizer);
-    kepler_memory->BindRasterizer(rasterizer);
-    kepler_compute->BindRasterizer(rasterizer);
-    maxwell_dma->BindRasterizer(rasterizer);
+    payload->maxwell_3d.BindRasterizer(rasterizer);
+    payload->fermi_2d.BindRasterizer(rasterizer);
+    payload->kepler_memory.BindRasterizer(rasterizer);
+    payload->kepler_compute.BindRasterizer(rasterizer);
+    payload->maxwell_dma.BindRasterizer(rasterizer);
+    //payload->nv01_timer.BindRasterizer(rasterizer);
 }
 
 } // namespace Tegra::Control
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
index c72e1446e7..80811c1458 100644
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -14,6 +14,7 @@
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/nv01_timer.h"
 #include "video_core/dma_pusher.h"
 
 namespace Core {
@@ -38,24 +39,29 @@ struct ChannelState {
 
     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
-    /// 3D engine
-    std::optional<Engines::Maxwell3D> maxwell_3d;
-    /// 2D engine
-    std::optional<Engines::Fermi2D> fermi_2d;
-    /// Compute engine
-    std::optional<Engines::KeplerCompute> kepler_compute;
-    /// DMA engine
-    std::optional<Engines::MaxwellDMA> maxwell_dma;
-    /// Inline memory engine
-    std::optional<Engines::KeplerMemory> kepler_memory;
-    /// NV01 Timer
-    std::optional<Engines::KeplerMemory> nv01_timer;
-    std::optional<DmaPusher> dma_pusher;
-    std::shared_ptr<MemoryManager> memory_manager;
+    struct Payload {
+        explicit Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state);
+
+        /// 3D engine
+        Engines::Maxwell3D maxwell_3d;
+        /// 2D engine
+        Engines::Fermi2D fermi_2d;
+        /// Compute engine
+        Engines::KeplerCompute kepler_compute;
+        /// DMA engine
+        Engines::MaxwellDMA maxwell_dma;
+        /// Inline memory engine
+        Engines::KeplerMemory kepler_memory;
+        /// NV01 Timer
+        Engines::Nv01Timer nv01_timer;
+        DmaPusher dma_pusher;
+    };
+    std::optional<Payload> payload;
+    MemoryManager* memory_manager = nullptr;
 
     s32 bind_id = -1;
     u64 program_id = 0;
-    bool initialized{};
+    bool initialized = false;
 };
 
 } // namespace Control
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 0531c2ebcd..ad55383d66 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -189,12 +189,12 @@ struct GPU::Impl {
 
     /// Push GPU command entries to be processed
     void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
-        gpu_thread.SubmitList(channel, std::move(entries));
+        gpu_thread.SubmitList(channel, std::move(entries), is_async);
     }
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
     void FlushRegion(DAddr addr, u64 size) {
-        gpu_thread.FlushRegion(addr, size);
+        gpu_thread.FlushRegion(addr, size, is_async);
     }
 
     VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
@@ -206,7 +206,7 @@ struct GPU::Impl {
         const u64 fence = RequestSyncOperation([this, &raster_area]() {
             renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
         });
-        gpu_thread.TickGPU();
+        gpu_thread.TickGPU(is_async);
         WaitForSyncOperation(fence);
         return raster_area;
     }
@@ -222,7 +222,7 @@ struct GPU::Impl {
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
     void FlushAndInvalidateRegion(DAddr addr, u64 size) {
-        gpu_thread.FlushAndInvalidateRegion(addr, size);
+        gpu_thread.FlushAndInvalidateRegion(addr, size, is_async);
     }
 
     void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, std::vector<Service::Nvidia::NvFence>&& fences) {
@@ -258,7 +258,7 @@ struct GPU::Impl {
                 syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
             }
         });
-        gpu_thread.TickGPU();
+        gpu_thread.TickGPU(is_async);
         WaitForSyncOperation(wait_fence);
     }
 
@@ -267,7 +267,7 @@ struct GPU::Impl {
 
         const auto wait_fence =
             RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); });
-        gpu_thread.TickGPU();
+        gpu_thread.TickGPU(is_async);
         WaitForSyncOperation(wait_fence);
 
         return out;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index d7c8ac391c..33f44e6fe7 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -20,7 +20,8 @@
 namespace VideoCommon::GPUThread {
 
 ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
-    : system{system_}, is_async{is_async_} {}
+    : system{system_}
+{}
 
 ThreadManager::~ThreadManager() = default;
 
@@ -60,41 +61,41 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Fronten
     });
 }
 
-void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
-    PushCommand(SubmitListCommand(channel, std::move(entries)));
+void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async) {
+    PushCommand(SubmitListCommand(channel, std::move(entries)), false, is_async);
 }
 
-void ThreadManager::FlushRegion(DAddr addr, u64 size) {
+void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) {
     if (!is_async) {
         // Always flush with synchronous GPU mode
-        PushCommand(FlushRegionCommand(addr, size));
+        PushCommand(FlushRegionCommand(addr, size), false, is_async);
     }
     return;
 }
 
-void ThreadManager::TickGPU() {
-    PushCommand(GPUTickCommand());
+void ThreadManager::TickGPU(bool is_async) {
+    PushCommand(GPUTickCommand(), false, is_async);
 }
 
 void ThreadManager::InvalidateRegion(DAddr addr, u64 size) {
     rasterizer->OnCacheInvalidation(addr, size);
 }
 
-void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async) {
     if (Settings::IsGPULevelHigh()) {
         if (!is_async) {
-            PushCommand(FlushRegionCommand(addr, size));
+            PushCommand(FlushRegionCommand(addr, size), false, is_async);
         } else {
             auto& gpu = system.GPU();
             const u64 fence = gpu.RequestFlush(addr, size);
-            TickGPU();
+            TickGPU(is_async);
             gpu.WaitForSyncOperation(fence);
         }
     }
     rasterizer->OnCacheInvalidation(addr, size);
 }
 
-u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
+u64 ThreadManager::PushCommand(CommandData&& command_data, bool block, bool is_async) {
     if (!is_async) {
         // In synchronous GPU mode, block the caller until the command has executed
         block = true;
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index ac1283a338..b48265922f 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -15,6 +15,7 @@
 
 #include "common/bounded_threadsafe_queue.h"
 #include "common/polyfill_thread.h"
+#include "video_core/dma_pusher.h"
 #include "video_core/framebuffer_config.h"
 
 namespace Tegra {
@@ -111,27 +112,25 @@ public:
                      Tegra::Control::Scheduler& scheduler);
 
     /// Push GPU command entries to be processed
-    void SubmitList(s32 channel, Tegra::CommandList&& entries);
+    void SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async);
 
     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    void FlushRegion(DAddr addr, u64 size);
+    void FlushRegion(DAddr addr, u64 size, bool is_async);
 
     /// Notify rasterizer that any caches of the specified region should be invalidated
     void InvalidateRegion(DAddr addr, u64 size);
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    void FlushAndInvalidateRegion(DAddr addr, u64 size);
+    void FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async);
 
-    void TickGPU();
+    void TickGPU(bool is_async);
 
 private:
     /// Pushes a command to be executed by the GPU thread
-    u64 PushCommand(CommandData&& command_data, bool block = false);
+    u64 PushCommand(CommandData&& command_data, bool block, bool is_async);
 
     Core::System& system;
-    const bool is_async;
     VideoCore::RasterizerInterface* rasterizer = nullptr;
-
     SynchState state;
     std::jthread thread;
 };
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 1ac7a0bc35..54fb8f6239 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -45,23 +45,23 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::System& system, Co
 
 namespace VideoCore {
 
-std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+/// @brief Creates an emulated GPU instance using the given system context.
+void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system) {
     Settings::UpdateRescalingInfo();
 
     const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
     const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
     const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
-    auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
+    gpu.emplace(system, use_async, use_nvdec);
     auto context = emu_window.CreateSharedContext();
     auto scope = context->Acquire();
     try {
         auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
         gpu->BindRenderer(std::move(renderer));
-        return gpu;
     } catch (const std::runtime_error& exception) {
         scope.Cancel();
         LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
-        return nullptr;
+        gpu.reset();
     }
 }
 
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index f8e2444f33..1a56dd99b6 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -1,9 +1,13 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: 2014 Citra Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
 #include <memory>
+#include <optional>
 
 namespace Core {
 class System;
@@ -20,8 +24,6 @@ class GPU;
 namespace VideoCore {
 
 class RendererBase;
-
-/// Creates an emulated GPU instance using the given system context.
-std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
+void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system);
 
 } // namespace VideoCore