less load, fix nv01 timer being kepler

This commit is contained in:
lizzie 2026-04-30 23:57:07 +00:00
parent 02d16582f8
commit 9ee484b375
12 changed files with 93 additions and 79 deletions

View file

@ -271,7 +271,7 @@ struct System::Impl {
SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) { SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
host1x_core.emplace(system); host1x_core.emplace(system);
gpu_core = VideoCore::CreateGPU(emu_window, system); VideoCore::CreateGPU(gpu_core, emu_window, system);
if (!gpu_core) if (!gpu_core)
return SystemResultStatus::ErrorVideoCore; return SystemResultStatus::ErrorVideoCore;
@ -391,10 +391,8 @@ struct System::Impl {
is_powered_on = false; is_powered_on = false;
exit_locked = false; exit_locked = false;
exit_requested = false; exit_requested = false;
if (gpu_core)
if (gpu_core != nullptr) {
gpu_core->NotifyShutdown(); gpu_core->NotifyShutdown();
}
stop_event.request_stop(); stop_event.request_stop();
core_timing.SyncPause(false); core_timing.SyncPause(false);
@ -478,6 +476,7 @@ struct System::Impl {
std::optional<Memory::CheatEngine> cheat_engine; std::optional<Memory::CheatEngine> cheat_engine;
std::optional<Tools::Freezer> memory_freezer; std::optional<Tools::Freezer> memory_freezer;
std::optional<Tools::RenderdocAPI> renderdoc_api; std::optional<Tools::RenderdocAPI> renderdoc_api;
std::optional<Tegra::GPU> gpu_core;
std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> gpu_dirty_memory_managers; std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> gpu_dirty_memory_managers;
std::vector<std::vector<u8>> user_channel; std::vector<std::vector<u8>> user_channel;
@ -492,7 +491,6 @@ struct System::Impl {
std::unique_ptr<FileSys::ContentProviderUnion> content_provider; std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
/// AppLoader used to load the current executing application /// AppLoader used to load the current executing application
std::unique_ptr<Loader::AppLoader> app_loader; std::unique_ptr<Loader::AppLoader> app_loader;
std::unique_ptr<Tegra::GPU> gpu_core;
std::stop_source stop_event; std::stop_source stop_event;
mutable std::mutex suspend_guard; mutable std::mutex suspend_guard;

View file

@ -25,9 +25,9 @@ CpuManager::~CpuManager() = default;
void CpuManager::Initialize() { void CpuManager::Initialize() {
num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1; num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1;
gpu_barrier = std::make_unique<Common::Barrier>(num_cores + 1); gpu_barrier.emplace(num_cores + 1);
for (std::size_t core = 0; core < num_cores; core++) for (std::size_t core = 0; core < num_cores; core++)
core_data[core].host_thread = std::jthread([this, core](std::stop_token token) { RunThread(token, core); }); core_data[core].host_thread = std::jthread(&CpuManager::RunThread, core);
} }
void CpuManager::Shutdown() { void CpuManager::Shutdown() {
@ -63,7 +63,7 @@ void CpuManager::HandleInterrupt() {
auto& kernel = system.Kernel(); auto& kernel = system.Kernel();
auto core_index = kernel.CurrentPhysicalCoreIndex(); auto core_index = kernel.CurrentPhysicalCoreIndex();
Kernel::KInterruptManager::HandleInterrupt(kernel, static_cast<s32>(core_index)); Kernel::KInterruptManager::HandleInterrupt(kernel, s32(core_index));
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -86,22 +89,20 @@ private:
void ShutdownThread(); void ShutdownThread();
void RunThread(std::stop_token stop_token, std::size_t core); void RunThread(std::stop_token stop_token, std::size_t core);
static constexpr std::size_t max_cycle_runs = 5;
std::optional<Common::Barrier> gpu_barrier{};
struct CoreData { struct CoreData {
std::shared_ptr<Common::Fiber> host_context; std::shared_ptr<Common::Fiber> host_context;
std::jthread host_thread; std::jthread host_thread;
}; };
std::unique_ptr<Common::Barrier> gpu_barrier{};
std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{}; std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
System& system;
bool is_async_gpu{};
bool is_multicore{};
std::atomic<std::size_t> current_core{}; std::atomic<std::size_t> current_core{};
std::size_t idle_count{}; std::size_t idle_count{};
std::size_t num_cores{}; std::size_t num_cores{};
static constexpr std::size_t max_cycle_runs = 5; bool is_async_gpu{};
bool is_multicore{};
System& system;
}; };
} // namespace Core } // namespace Core

View file

@ -138,8 +138,7 @@ NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) {
static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)}; static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)};
vm.big_page_allocator.emplace(start_big_pages, end_big_pages); vm.big_page_allocator.emplace(start_big_pages, end_big_pages);
gmmu = std::make_shared<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split, gmmu = std::make_unique<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split, vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
system.GPU().InitAddressSpace(*gmmu); system.GPU().InitAddressSpace(*gmmu);
vm.initialised = true; vm.initialised = true;
@ -416,7 +415,7 @@ NvResult nvhost_as_gpu::BindChannel(IoctlBindChannel& params) {
LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd); auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
gpu_channel_device->channel_state->memory_manager = gmmu; gpu_channel_device->channel_state->memory_manager = gmmu.get();
return NvResult::Success; return NvResult::Success;
} }

View file

@ -219,7 +219,7 @@ private:
bool initialised{}; bool initialised{};
} vm; } vm;
std::shared_ptr<Tegra::MemoryManager> gmmu; std::unique_ptr<Tegra::MemoryManager> gmmu;
}; };
} // namespace Service::Nvidia::Devices } // namespace Service::Nvidia::Devices

View file

@ -17,28 +17,36 @@
namespace Tegra::Control { namespace Tegra::Control {
ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {} ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state)
: maxwell_3d(system, memory_manager)
, fermi_2d(memory_manager)
, kepler_compute(system, memory_manager)
, maxwell_dma(system, memory_manager)
, kepler_memory(system, memory_manager)
, nv01_timer(system, memory_manager)
, dma_pusher(system, memory_manager, channel_state)
{}
ChannelState::ChannelState(s32 bind_id_)
: bind_id{bind_id_}
{}
void ChannelState::Init(Core::System& system, u64 program_id_) { void ChannelState::Init(Core::System& system, u64 program_id_) {
ASSERT(memory_manager); ASSERT(memory_manager);
program_id = program_id_; program_id = program_id_;
dma_pusher.emplace(system, *memory_manager, *this); payload.emplace(system, *memory_manager, *this);
maxwell_3d.emplace(system, *memory_manager);
fermi_2d.emplace(*memory_manager);
kepler_compute.emplace(system, *memory_manager);
maxwell_dma.emplace(system, *memory_manager);
kepler_memory.emplace(system, *memory_manager);
initialized = true; initialized = true;
} }
void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) { void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
dma_pusher->BindRasterizer(rasterizer); payload->dma_pusher.BindRasterizer(rasterizer);
memory_manager->BindRasterizer(rasterizer); memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer); payload->maxwell_3d.BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer); payload->fermi_2d.BindRasterizer(rasterizer);
kepler_memory->BindRasterizer(rasterizer); payload->kepler_memory.BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer); payload->kepler_compute.BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer); payload->maxwell_dma.BindRasterizer(rasterizer);
//payload->nv01_timer.BindRasterizer(rasterizer);
} }
} // namespace Tegra::Control } // namespace Tegra::Control

View file

@ -14,6 +14,7 @@
#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h" #include "video_core/engines/maxwell_dma.h"
#include "video_core/engines/nv01_timer.h"
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
namespace Core { namespace Core {
@ -38,24 +39,29 @@ struct ChannelState {
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
/// 3D engine struct Payload {
std::optional<Engines::Maxwell3D> maxwell_3d; explicit Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state);
/// 2D engine
std::optional<Engines::Fermi2D> fermi_2d; /// 3D engine
/// Compute engine Engines::Maxwell3D maxwell_3d;
std::optional<Engines::KeplerCompute> kepler_compute; /// 2D engine
/// DMA engine Engines::Fermi2D fermi_2d;
std::optional<Engines::MaxwellDMA> maxwell_dma; /// Compute engine
/// Inline memory engine Engines::KeplerCompute kepler_compute;
std::optional<Engines::KeplerMemory> kepler_memory; /// DMA engine
/// NV01 Timer Engines::MaxwellDMA maxwell_dma;
std::optional<Engines::KeplerMemory> nv01_timer; /// Inline memory engine
std::optional<DmaPusher> dma_pusher; Engines::KeplerMemory kepler_memory;
std::shared_ptr<MemoryManager> memory_manager; /// NV01 Timer
Engines::Nv01Timer nv01_timer;
DmaPusher dma_pusher;
};
std::optional<Payload> payload;
MemoryManager* memory_manager = nullptr;
s32 bind_id = -1; s32 bind_id = -1;
u64 program_id = 0; u64 program_id = 0;
bool initialized{}; bool initialized = false;
}; };
} // namespace Control } // namespace Control

View file

@ -189,12 +189,12 @@ struct GPU::Impl {
/// Push GPU command entries to be processed /// Push GPU command entries to be processed
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
gpu_thread.SubmitList(channel, std::move(entries)); gpu_thread.SubmitList(channel, std::move(entries), is_async);
} }
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size) { void FlushRegion(DAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size); gpu_thread.FlushRegion(addr, size, is_async);
} }
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
@ -206,7 +206,7 @@ struct GPU::Impl {
const u64 fence = RequestSyncOperation([this, &raster_area]() { const u64 fence = RequestSyncOperation([this, &raster_area]() {
renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address); renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
}); });
gpu_thread.TickGPU(); gpu_thread.TickGPU(is_async);
WaitForSyncOperation(fence); WaitForSyncOperation(fence);
return raster_area; return raster_area;
} }
@ -222,7 +222,7 @@ struct GPU::Impl {
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(DAddr addr, u64 size) { void FlushAndInvalidateRegion(DAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size); gpu_thread.FlushAndInvalidateRegion(addr, size, is_async);
} }
void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, std::vector<Service::Nvidia::NvFence>&& fences) { void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, std::vector<Service::Nvidia::NvFence>&& fences) {
@ -258,7 +258,7 @@ struct GPU::Impl {
syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
} }
}); });
gpu_thread.TickGPU(); gpu_thread.TickGPU(is_async);
WaitForSyncOperation(wait_fence); WaitForSyncOperation(wait_fence);
} }
@ -267,7 +267,7 @@ struct GPU::Impl {
const auto wait_fence = const auto wait_fence =
RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); }); RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); });
gpu_thread.TickGPU(); gpu_thread.TickGPU(is_async);
WaitForSyncOperation(wait_fence); WaitForSyncOperation(wait_fence);
return out; return out;

View file

@ -20,7 +20,8 @@
namespace VideoCommon::GPUThread { namespace VideoCommon::GPUThread {
ThreadManager::ThreadManager(Core::System& system_, bool is_async_) ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
: system{system_}, is_async{is_async_} {} : system{system_}
{}
ThreadManager::~ThreadManager() = default; ThreadManager::~ThreadManager() = default;
@ -60,41 +61,41 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Fronten
}); });
} }
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async) {
PushCommand(SubmitListCommand(channel, std::move(entries))); PushCommand(SubmitListCommand(channel, std::move(entries)), false, is_async);
} }
void ThreadManager::FlushRegion(DAddr addr, u64 size) { void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) {
if (!is_async) { if (!is_async) {
// Always flush with synchronous GPU mode // Always flush with synchronous GPU mode
PushCommand(FlushRegionCommand(addr, size)); PushCommand(FlushRegionCommand(addr, size), false, is_async);
} }
return; return;
} }
void ThreadManager::TickGPU() { void ThreadManager::TickGPU(bool is_async) {
PushCommand(GPUTickCommand()); PushCommand(GPUTickCommand(), false, is_async);
} }
void ThreadManager::InvalidateRegion(DAddr addr, u64 size) { void ThreadManager::InvalidateRegion(DAddr addr, u64 size) {
rasterizer->OnCacheInvalidation(addr, size); rasterizer->OnCacheInvalidation(addr, size);
} }
void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) { void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async) {
if (Settings::IsGPULevelHigh()) { if (Settings::IsGPULevelHigh()) {
if (!is_async) { if (!is_async) {
PushCommand(FlushRegionCommand(addr, size)); PushCommand(FlushRegionCommand(addr, size), false, is_async);
} else { } else {
auto& gpu = system.GPU(); auto& gpu = system.GPU();
const u64 fence = gpu.RequestFlush(addr, size); const u64 fence = gpu.RequestFlush(addr, size);
TickGPU(); TickGPU(is_async);
gpu.WaitForSyncOperation(fence); gpu.WaitForSyncOperation(fence);
} }
} }
rasterizer->OnCacheInvalidation(addr, size); rasterizer->OnCacheInvalidation(addr, size);
} }
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { u64 ThreadManager::PushCommand(CommandData&& command_data, bool block, bool is_async) {
if (!is_async) { if (!is_async) {
// In synchronous GPU mode, block the caller until the command has executed // In synchronous GPU mode, block the caller until the command has executed
block = true; block = true;

View file

@ -15,6 +15,7 @@
#include "common/bounded_threadsafe_queue.h" #include "common/bounded_threadsafe_queue.h"
#include "common/polyfill_thread.h" #include "common/polyfill_thread.h"
#include "video_core/dma_pusher.h"
#include "video_core/framebuffer_config.h" #include "video_core/framebuffer_config.h"
namespace Tegra { namespace Tegra {
@ -111,27 +112,25 @@ public:
Tegra::Control::Scheduler& scheduler); Tegra::Control::Scheduler& scheduler);
/// Push GPU command entries to be processed /// Push GPU command entries to be processed
void SubmitList(s32 channel, Tegra::CommandList&& entries); void SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size); void FlushRegion(DAddr addr, u64 size, bool is_async);
/// Notify rasterizer that any caches of the specified region should be invalidated /// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(DAddr addr, u64 size); void InvalidateRegion(DAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(DAddr addr, u64 size); void FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async);
void TickGPU(); void TickGPU(bool is_async);
private: private:
/// Pushes a command to be executed by the GPU thread /// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data, bool block = false); u64 PushCommand(CommandData&& command_data, bool block, bool is_async);
Core::System& system; Core::System& system;
const bool is_async;
VideoCore::RasterizerInterface* rasterizer = nullptr; VideoCore::RasterizerInterface* rasterizer = nullptr;
SynchState state; SynchState state;
std::jthread thread; std::jthread thread;
}; };

View file

@ -45,23 +45,23 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::System& system, Co
namespace VideoCore { namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { /// @brief Creates an emulated GPU instance using the given system context.
void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system) {
Settings::UpdateRescalingInfo(); Settings::UpdateRescalingInfo();
const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); gpu.emplace(system, use_async, use_nvdec);
auto context = emu_window.CreateSharedContext(); auto context = emu_window.CreateSharedContext();
auto scope = context->Acquire(); auto scope = context->Acquire();
try { try {
auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
gpu->BindRenderer(std::move(renderer)); gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) { } catch (const std::runtime_error& exception) {
scope.Cancel(); scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr; gpu.reset();
} }
} }

View file

@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <memory> #include <memory>
#include <optional>
namespace Core { namespace Core {
class System; class System;
@ -20,8 +24,6 @@ class GPU;
namespace VideoCore { namespace VideoCore {
class RendererBase; class RendererBase;
void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system);
/// Creates an emulated GPU instance using the given system context.
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
} // namespace VideoCore } // namespace VideoCore