less load, fix nv01 timer being kepler

2026-06-06 01:13:45 -04:00 · 2026-04-30 23:57:07 +00:00 · 2026-04-30 23:57:07 +00:00 · 9ee484b375
commit 9ee484b375
parent 02d16582f8
12 changed files with 93 additions and 79 deletions
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -271,7 +271,7 @@ struct System::Impl {
    SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
        host1x_core.emplace(system);
-        gpu_core = VideoCore::CreateGPU(emu_window, system);
+        VideoCore::CreateGPU(gpu_core, emu_window, system);
        if (!gpu_core)
            return SystemResultStatus::ErrorVideoCore;
@ -391,10 +391,8 @@ struct System::Impl {
        is_powered_on = false;
        exit_locked = false;
        exit_requested = false;
-
+        if (gpu_core)
        if (gpu_core != nullptr) {
            gpu_core->NotifyShutdown();
        }
        stop_event.request_stop();
        core_timing.SyncPause(false);
@ -478,6 +476,7 @@ struct System::Impl {
    std::optional<Memory::CheatEngine> cheat_engine;
    std::optional<Tools::Freezer> memory_freezer;
    std::optional<Tools::RenderdocAPI> renderdoc_api;
    std::optional<Tegra::GPU> gpu_core;
    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> gpu_dirty_memory_managers;
    std::vector<std::vector<u8>> user_channel;
@ -492,7 +491,6 @@ struct System::Impl {
    std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
    /// AppLoader used to load the current executing application
    std::unique_ptr<Loader::AppLoader> app_loader;
    std::unique_ptr<Tegra::GPU> gpu_core;
    std::stop_source stop_event;
    mutable std::mutex suspend_guard;
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@ -25,9 +25,9 @@ CpuManager::~CpuManager() = default;
 void CpuManager::Initialize() {
    num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1;
-    gpu_barrier = std::make_unique<Common::Barrier>(num_cores + 1);
+    gpu_barrier.emplace(num_cores + 1);
    for (std::size_t core = 0; core < num_cores; core++)
-        core_data[core].host_thread = std::jthread([this, core](std::stop_token token) { RunThread(token, core); });
+        core_data[core].host_thread = std::jthread(&CpuManager::RunThread, core);
 }
 void CpuManager::Shutdown() {
@ -63,7 +63,7 @@ void CpuManager::HandleInterrupt() {
    auto& kernel = system.Kernel();
    auto core_index = kernel.CurrentPhysicalCoreIndex();
-    Kernel::KInterruptManager::HandleInterrupt(kernel, static_cast<s32>(core_index));
+    Kernel::KInterruptManager::HandleInterrupt(kernel, s32(core_index));
 }
 ///////////////////////////////////////////////////////////////////////////////
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@ -1,3 +1,6 @@
 // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
@ -86,22 +89,20 @@ private:
    void ShutdownThread();
    void RunThread(std::stop_token stop_token, std::size_t core);
    static constexpr std::size_t max_cycle_runs = 5;
    std::optional<Common::Barrier> gpu_barrier{};
    struct CoreData {
        std::shared_ptr<Common::Fiber> host_context;
        std::jthread host_thread;
    };
    std::unique_ptr<Common::Barrier> gpu_barrier{};
    std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
-
+    System& system;
    bool is_async_gpu{};
    bool is_multicore{};
    std::atomic<std::size_t> current_core{};
    std::size_t idle_count{};
    std::size_t num_cores{};
-    static constexpr std::size_t max_cycle_runs = 5;
+    bool is_async_gpu{};
-
+    bool is_multicore{};
    System& system;
 };
 } // namespace Core
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@ -138,8 +138,7 @@ NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) {
        static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)};
    vm.big_page_allocator.emplace(start_big_pages, end_big_pages);
-    gmmu = std::make_shared<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split,
+    gmmu = std::make_unique<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split, vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
                                                  vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
    system.GPU().InitAddressSpace(*gmmu);
    vm.initialised = true;
@ -416,7 +415,7 @@ NvResult nvhost_as_gpu::BindChannel(IoctlBindChannel& params) {
    LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
    auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
-    gpu_channel_device->channel_state->memory_manager = gmmu;
+    gpu_channel_device->channel_state->memory_manager = gmmu.get();
    return NvResult::Success;
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@ -219,7 +219,7 @@ private:
        bool initialised{};
    } vm;
-    std::shared_ptr<Tegra::MemoryManager> gmmu;
+    std::unique_ptr<Tegra::MemoryManager> gmmu;
 };
 } // namespace Service::Nvidia::Devices
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@ -17,28 +17,36 @@
 namespace Tegra::Control {
-ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
+ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state)
    : maxwell_3d(system, memory_manager)
    , fermi_2d(memory_manager)
    , kepler_compute(system, memory_manager)
    , maxwell_dma(system, memory_manager)
    , kepler_memory(system, memory_manager)
    , nv01_timer(system, memory_manager)
    , dma_pusher(system, memory_manager, channel_state)
 {}
 ChannelState::ChannelState(s32 bind_id_)
    : bind_id{bind_id_}
 {}
 void ChannelState::Init(Core::System& system, u64 program_id_) {
    ASSERT(memory_manager);
    program_id = program_id_;
-    dma_pusher.emplace(system, *memory_manager, *this);
+    payload.emplace(system, *memory_manager, *this);
    maxwell_3d.emplace(system, *memory_manager);
    fermi_2d.emplace(*memory_manager);
    kepler_compute.emplace(system, *memory_manager);
    maxwell_dma.emplace(system, *memory_manager);
    kepler_memory.emplace(system, *memory_manager);
    initialized = true;
 }
 void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
-    dma_pusher->BindRasterizer(rasterizer);
+    payload->dma_pusher.BindRasterizer(rasterizer);
    memory_manager->BindRasterizer(rasterizer);
-    maxwell_3d->BindRasterizer(rasterizer);
+    payload->maxwell_3d.BindRasterizer(rasterizer);
-    fermi_2d->BindRasterizer(rasterizer);
+    payload->fermi_2d.BindRasterizer(rasterizer);
-    kepler_memory->BindRasterizer(rasterizer);
+    payload->kepler_memory.BindRasterizer(rasterizer);
-    kepler_compute->BindRasterizer(rasterizer);
+    payload->kepler_compute.BindRasterizer(rasterizer);
-    maxwell_dma->BindRasterizer(rasterizer);
+    payload->maxwell_dma.BindRasterizer(rasterizer);
    //payload->nv01_timer.BindRasterizer(rasterizer);
 }
 } // namespace Tegra::Control
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@ -14,6 +14,7 @@
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/engines/nv01_timer.h"
 #include "video_core/dma_pusher.h"
 namespace Core {
@ -38,24 +39,29 @@ struct ChannelState {
    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-    /// 3D engine
+    struct Payload {
-    std::optional<Engines::Maxwell3D> maxwell_3d;
+        explicit Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state);
-    /// 2D engine
+
-    std::optional<Engines::Fermi2D> fermi_2d;
+        /// 3D engine
-    /// Compute engine
+        Engines::Maxwell3D maxwell_3d;
-    std::optional<Engines::KeplerCompute> kepler_compute;
+        /// 2D engine
-    /// DMA engine
+        Engines::Fermi2D fermi_2d;
-    std::optional<Engines::MaxwellDMA> maxwell_dma;
+        /// Compute engine
-    /// Inline memory engine
+        Engines::KeplerCompute kepler_compute;
-    std::optional<Engines::KeplerMemory> kepler_memory;
+        /// DMA engine
-    /// NV01 Timer
+        Engines::MaxwellDMA maxwell_dma;
-    std::optional<Engines::KeplerMemory> nv01_timer;
+        /// Inline memory engine
-    std::optional<DmaPusher> dma_pusher;
+        Engines::KeplerMemory kepler_memory;
-    std::shared_ptr<MemoryManager> memory_manager;
+        /// NV01 Timer
        Engines::Nv01Timer nv01_timer;
        DmaPusher dma_pusher;
    };
    std::optional<Payload> payload;
    MemoryManager* memory_manager = nullptr;
    s32 bind_id = -1;
    u64 program_id = 0;
-    bool initialized{};
+    bool initialized = false;
 };
 } // namespace Control
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@ -189,12 +189,12 @@ struct GPU::Impl {
    /// Push GPU command entries to be processed
    void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
-        gpu_thread.SubmitList(channel, std::move(entries));
+        gpu_thread.SubmitList(channel, std::move(entries), is_async);
    }
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
    void FlushRegion(DAddr addr, u64 size) {
-        gpu_thread.FlushRegion(addr, size);
+        gpu_thread.FlushRegion(addr, size, is_async);
    }
    VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
@ -206,7 +206,7 @@ struct GPU::Impl {
        const u64 fence = RequestSyncOperation([this, &raster_area]() {
            renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
        });
-        gpu_thread.TickGPU();
+        gpu_thread.TickGPU(is_async);
        WaitForSyncOperation(fence);
        return raster_area;
    }
@ -222,7 +222,7 @@ struct GPU::Impl {
    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
    void FlushAndInvalidateRegion(DAddr addr, u64 size) {
-        gpu_thread.FlushAndInvalidateRegion(addr, size);
+        gpu_thread.FlushAndInvalidateRegion(addr, size, is_async);
    }
    void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, std::vector<Service::Nvidia::NvFence>&& fences) {
@ -258,7 +258,7 @@ struct GPU::Impl {
                syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
            }
        });
-        gpu_thread.TickGPU();
+        gpu_thread.TickGPU(is_async);
        WaitForSyncOperation(wait_fence);
    }
@ -267,7 +267,7 @@ struct GPU::Impl {
        const auto wait_fence =
            RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); });
-        gpu_thread.TickGPU();
+        gpu_thread.TickGPU(is_async);
        WaitForSyncOperation(wait_fence);
        return out;
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@ -20,7 +20,8 @@
 namespace VideoCommon::GPUThread {
 ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
-    : system{system_}, is_async{is_async_} {}
+    : system{system_}
 {}
 ThreadManager::~ThreadManager() = default;
@ -60,41 +61,41 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Fronten
    });
 }
-void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
+void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async) {
-    PushCommand(SubmitListCommand(channel, std::move(entries)));
+    PushCommand(SubmitListCommand(channel, std::move(entries)), false, is_async);
 }
-void ThreadManager::FlushRegion(DAddr addr, u64 size) {
+void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) {
    if (!is_async) {
        // Always flush with synchronous GPU mode
-        PushCommand(FlushRegionCommand(addr, size));
+        PushCommand(FlushRegionCommand(addr, size), false, is_async);
    }
    return;
 }
-void ThreadManager::TickGPU() {
+void ThreadManager::TickGPU(bool is_async) {
-    PushCommand(GPUTickCommand());
+    PushCommand(GPUTickCommand(), false, is_async);
 }
 void ThreadManager::InvalidateRegion(DAddr addr, u64 size) {
    rasterizer->OnCacheInvalidation(addr, size);
 }
-void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async) {
    if (Settings::IsGPULevelHigh()) {
        if (!is_async) {
-            PushCommand(FlushRegionCommand(addr, size));
+            PushCommand(FlushRegionCommand(addr, size), false, is_async);
        } else {
            auto& gpu = system.GPU();
            const u64 fence = gpu.RequestFlush(addr, size);
-            TickGPU();
+            TickGPU(is_async);
            gpu.WaitForSyncOperation(fence);
        }
    }
    rasterizer->OnCacheInvalidation(addr, size);
 }
-u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
+u64 ThreadManager::PushCommand(CommandData&& command_data, bool block, bool is_async) {
    if (!is_async) {
        // In synchronous GPU mode, block the caller until the command has executed
        block = true;
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@ -15,6 +15,7 @@
 #include "common/bounded_threadsafe_queue.h"
 #include "common/polyfill_thread.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/framebuffer_config.h"
 namespace Tegra {
@ -111,27 +112,25 @@ public:
                     Tegra::Control::Scheduler& scheduler);
    /// Push GPU command entries to be processed
-    void SubmitList(s32 channel, Tegra::CommandList&& entries);
+    void SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async);
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    void FlushRegion(DAddr addr, u64 size);
+    void FlushRegion(DAddr addr, u64 size, bool is_async);
    /// Notify rasterizer that any caches of the specified region should be invalidated
    void InvalidateRegion(DAddr addr, u64 size);
    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    void FlushAndInvalidateRegion(DAddr addr, u64 size);
+    void FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async);
-    void TickGPU();
+    void TickGPU(bool is_async);
 private:
    /// Pushes a command to be executed by the GPU thread
-    u64 PushCommand(CommandData&& command_data, bool block = false);
+    u64 PushCommand(CommandData&& command_data, bool block, bool is_async);
    Core::System& system;
    const bool is_async;
    VideoCore::RasterizerInterface* rasterizer = nullptr;
    SynchState state;
    std::jthread thread;
 };
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@ -45,23 +45,23 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::System& system, Co
 namespace VideoCore {
-std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+/// @brief Creates an emulated GPU instance using the given system context.
 void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system) {
    Settings::UpdateRescalingInfo();
    const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
    const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
    const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
-    auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
+    gpu.emplace(system, use_async, use_nvdec);
    auto context = emu_window.CreateSharedContext();
    auto scope = context->Acquire();
    try {
        auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
        gpu->BindRenderer(std::move(renderer));
        return gpu;
    } catch (const std::runtime_error& exception) {
        scope.Cancel();
        LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
-        return nullptr;
+        gpu.reset();
    }
 }
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@ -1,9 +1,13 @@
 // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later
 // SPDX-FileCopyrightText: 2014 Citra Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 #pragma once
 #include <memory>
 #include <optional>
 namespace Core {
 class System;
@ -20,8 +24,6 @@ class GPU;
 namespace VideoCore {
 class RendererBase;
-
+void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system);
 /// Creates an emulated GPU instance using the given system context.
 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
 } // namespace VideoCore