less load, fix nv01 timer being kepler

This commit is contained in:
lizzie 2026-04-30 23:57:07 +00:00
parent 02d16582f8
commit 9ee484b375
12 changed files with 93 additions and 79 deletions

View file

@ -271,7 +271,7 @@ struct System::Impl {
SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
host1x_core.emplace(system);
gpu_core = VideoCore::CreateGPU(emu_window, system);
VideoCore::CreateGPU(gpu_core, emu_window, system);
if (!gpu_core)
return SystemResultStatus::ErrorVideoCore;
@ -391,10 +391,8 @@ struct System::Impl {
is_powered_on = false;
exit_locked = false;
exit_requested = false;
if (gpu_core != nullptr) {
if (gpu_core)
gpu_core->NotifyShutdown();
}
stop_event.request_stop();
core_timing.SyncPause(false);
@ -478,6 +476,7 @@ struct System::Impl {
std::optional<Memory::CheatEngine> cheat_engine;
std::optional<Tools::Freezer> memory_freezer;
std::optional<Tools::RenderdocAPI> renderdoc_api;
std::optional<Tegra::GPU> gpu_core;
std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> gpu_dirty_memory_managers;
std::vector<std::vector<u8>> user_channel;
@ -492,7 +491,6 @@ struct System::Impl {
std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
/// AppLoader used to load the current executing application
std::unique_ptr<Loader::AppLoader> app_loader;
std::unique_ptr<Tegra::GPU> gpu_core;
std::stop_source stop_event;
mutable std::mutex suspend_guard;

View file

@ -25,9 +25,9 @@ CpuManager::~CpuManager() = default;
void CpuManager::Initialize() {
num_cores = is_multicore ? Core::Hardware::NUM_CPU_CORES : 1;
gpu_barrier = std::make_unique<Common::Barrier>(num_cores + 1);
gpu_barrier.emplace(num_cores + 1);
for (std::size_t core = 0; core < num_cores; core++)
core_data[core].host_thread = std::jthread([this, core](std::stop_token token) { RunThread(token, core); });
core_data[core].host_thread = std::jthread(&CpuManager::RunThread, core);
}
void CpuManager::Shutdown() {
@ -63,7 +63,7 @@ void CpuManager::HandleInterrupt() {
auto& kernel = system.Kernel();
auto core_index = kernel.CurrentPhysicalCoreIndex();
Kernel::KInterruptManager::HandleInterrupt(kernel, static_cast<s32>(core_index));
Kernel::KInterruptManager::HandleInterrupt(kernel, s32(core_index));
}
///////////////////////////////////////////////////////////////////////////////

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -86,22 +89,20 @@ private:
void ShutdownThread();
void RunThread(std::stop_token stop_token, std::size_t core);
static constexpr std::size_t max_cycle_runs = 5;
std::optional<Common::Barrier> gpu_barrier{};
struct CoreData {
std::shared_ptr<Common::Fiber> host_context;
std::jthread host_thread;
};
std::unique_ptr<Common::Barrier> gpu_barrier{};
std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
bool is_async_gpu{};
bool is_multicore{};
System& system;
std::atomic<std::size_t> current_core{};
std::size_t idle_count{};
std::size_t num_cores{};
static constexpr std::size_t max_cycle_runs = 5;
System& system;
bool is_async_gpu{};
bool is_multicore{};
};
} // namespace Core

View file

@ -138,8 +138,7 @@ NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) {
static_cast<u32>((vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits)};
vm.big_page_allocator.emplace(start_big_pages, end_big_pages);
gmmu = std::make_shared<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split,
vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
gmmu = std::make_unique<Tegra::MemoryManager>(system, max_big_page_bits, vm.va_range_split, vm.big_page_size_bits, VM::PAGE_SIZE_BITS);
system.GPU().InitAddressSpace(*gmmu);
vm.initialised = true;
@ -416,7 +415,7 @@ NvResult nvhost_as_gpu::BindChannel(IoctlBindChannel& params) {
LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
gpu_channel_device->channel_state->memory_manager = gmmu;
gpu_channel_device->channel_state->memory_manager = gmmu.get();
return NvResult::Success;
}

View file

@ -219,7 +219,7 @@ private:
bool initialised{};
} vm;
std::shared_ptr<Tegra::MemoryManager> gmmu;
std::unique_ptr<Tegra::MemoryManager> gmmu;
};
} // namespace Service::Nvidia::Devices

View file

@ -17,28 +17,36 @@
namespace Tegra::Control {
ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state)
: maxwell_3d(system, memory_manager)
, fermi_2d(memory_manager)
, kepler_compute(system, memory_manager)
, maxwell_dma(system, memory_manager)
, kepler_memory(system, memory_manager)
, nv01_timer(system, memory_manager)
, dma_pusher(system, memory_manager, channel_state)
{}
ChannelState::ChannelState(s32 bind_id_)
: bind_id{bind_id_}
{}
void ChannelState::Init(Core::System& system, u64 program_id_) {
ASSERT(memory_manager);
program_id = program_id_;
dma_pusher.emplace(system, *memory_manager, *this);
maxwell_3d.emplace(system, *memory_manager);
fermi_2d.emplace(*memory_manager);
kepler_compute.emplace(system, *memory_manager);
maxwell_dma.emplace(system, *memory_manager);
kepler_memory.emplace(system, *memory_manager);
payload.emplace(system, *memory_manager, *this);
initialized = true;
}
void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
dma_pusher->BindRasterizer(rasterizer);
payload->dma_pusher.BindRasterizer(rasterizer);
memory_manager->BindRasterizer(rasterizer);
maxwell_3d->BindRasterizer(rasterizer);
fermi_2d->BindRasterizer(rasterizer);
kepler_memory->BindRasterizer(rasterizer);
kepler_compute->BindRasterizer(rasterizer);
maxwell_dma->BindRasterizer(rasterizer);
payload->maxwell_3d.BindRasterizer(rasterizer);
payload->fermi_2d.BindRasterizer(rasterizer);
payload->kepler_memory.BindRasterizer(rasterizer);
payload->kepler_compute.BindRasterizer(rasterizer);
payload->maxwell_dma.BindRasterizer(rasterizer);
//payload->nv01_timer.BindRasterizer(rasterizer);
}
} // namespace Tegra::Control

View file

@ -14,6 +14,7 @@
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/engines/nv01_timer.h"
#include "video_core/dma_pusher.h"
namespace Core {
@ -38,24 +39,29 @@ struct ChannelState {
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
struct Payload {
explicit Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state);
/// 3D engine
std::optional<Engines::Maxwell3D> maxwell_3d;
Engines::Maxwell3D maxwell_3d;
/// 2D engine
std::optional<Engines::Fermi2D> fermi_2d;
Engines::Fermi2D fermi_2d;
/// Compute engine
std::optional<Engines::KeplerCompute> kepler_compute;
Engines::KeplerCompute kepler_compute;
/// DMA engine
std::optional<Engines::MaxwellDMA> maxwell_dma;
Engines::MaxwellDMA maxwell_dma;
/// Inline memory engine
std::optional<Engines::KeplerMemory> kepler_memory;
Engines::KeplerMemory kepler_memory;
/// NV01 Timer
std::optional<Engines::KeplerMemory> nv01_timer;
std::optional<DmaPusher> dma_pusher;
std::shared_ptr<MemoryManager> memory_manager;
Engines::Nv01Timer nv01_timer;
DmaPusher dma_pusher;
};
std::optional<Payload> payload;
MemoryManager* memory_manager = nullptr;
s32 bind_id = -1;
u64 program_id = 0;
bool initialized{};
bool initialized = false;
};
} // namespace Control

View file

@ -189,12 +189,12 @@ struct GPU::Impl {
/// Push GPU command entries to be processed
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
gpu_thread.SubmitList(channel, std::move(entries));
gpu_thread.SubmitList(channel, std::move(entries), is_async);
}
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
gpu_thread.FlushRegion(addr, size, is_async);
}
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
@ -206,7 +206,7 @@ struct GPU::Impl {
const u64 fence = RequestSyncOperation([this, &raster_area]() {
renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
});
gpu_thread.TickGPU();
gpu_thread.TickGPU(is_async);
WaitForSyncOperation(fence);
return raster_area;
}
@ -222,7 +222,7 @@ struct GPU::Impl {
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(DAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
gpu_thread.FlushAndInvalidateRegion(addr, size, is_async);
}
void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, std::vector<Service::Nvidia::NvFence>&& fences) {
@ -258,7 +258,7 @@ struct GPU::Impl {
syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
}
});
gpu_thread.TickGPU();
gpu_thread.TickGPU(is_async);
WaitForSyncOperation(wait_fence);
}
@ -267,7 +267,7 @@ struct GPU::Impl {
const auto wait_fence =
RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); });
gpu_thread.TickGPU();
gpu_thread.TickGPU(is_async);
WaitForSyncOperation(wait_fence);
return out;

View file

@ -20,7 +20,8 @@
namespace VideoCommon::GPUThread {
ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
: system{system_}, is_async{is_async_} {}
: system{system_}
{}
ThreadManager::~ThreadManager() = default;
@ -60,41 +61,41 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Fronten
});
}
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
PushCommand(SubmitListCommand(channel, std::move(entries)));
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async) {
PushCommand(SubmitListCommand(channel, std::move(entries)), false, is_async);
}
void ThreadManager::FlushRegion(DAddr addr, u64 size) {
void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) {
if (!is_async) {
// Always flush with synchronous GPU mode
PushCommand(FlushRegionCommand(addr, size));
PushCommand(FlushRegionCommand(addr, size), false, is_async);
}
return;
}
void ThreadManager::TickGPU() {
PushCommand(GPUTickCommand());
void ThreadManager::TickGPU(bool is_async) {
PushCommand(GPUTickCommand(), false, is_async);
}
void ThreadManager::InvalidateRegion(DAddr addr, u64 size) {
rasterizer->OnCacheInvalidation(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) {
void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async) {
if (Settings::IsGPULevelHigh()) {
if (!is_async) {
PushCommand(FlushRegionCommand(addr, size));
PushCommand(FlushRegionCommand(addr, size), false, is_async);
} else {
auto& gpu = system.GPU();
const u64 fence = gpu.RequestFlush(addr, size);
TickGPU();
TickGPU(is_async);
gpu.WaitForSyncOperation(fence);
}
}
rasterizer->OnCacheInvalidation(addr, size);
}
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block, bool is_async) {
if (!is_async) {
// In synchronous GPU mode, block the caller until the command has executed
block = true;

View file

@ -15,6 +15,7 @@
#include "common/bounded_threadsafe_queue.h"
#include "common/polyfill_thread.h"
#include "video_core/dma_pusher.h"
#include "video_core/framebuffer_config.h"
namespace Tegra {
@ -111,27 +112,25 @@ public:
Tegra::Control::Scheduler& scheduler);
/// Push GPU command entries to be processed
void SubmitList(s32 channel, Tegra::CommandList&& entries);
void SubmitList(s32 channel, Tegra::CommandList&& entries, bool is_async);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(DAddr addr, u64 size);
void FlushRegion(DAddr addr, u64 size, bool is_async);
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(DAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(DAddr addr, u64 size);
void FlushAndInvalidateRegion(DAddr addr, u64 size, bool is_async);
void TickGPU();
void TickGPU(bool is_async);
private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data, bool block = false);
u64 PushCommand(CommandData&& command_data, bool block, bool is_async);
Core::System& system;
const bool is_async;
VideoCore::RasterizerInterface* rasterizer = nullptr;
SynchState state;
std::jthread thread;
};

View file

@ -45,23 +45,23 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::System& system, Co
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
/// @brief Creates an emulated GPU instance using the given system context.
void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system) {
Settings::UpdateRescalingInfo();
const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
gpu.emplace(system, use_async, use_nvdec);
auto context = emu_window.CreateSharedContext();
auto scope = context->Acquire();
try {
auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) {
scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
gpu.reset();
}
}

View file

@ -1,9 +1,13 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <optional>
namespace Core {
class System;
@ -20,8 +24,6 @@ class GPU;
namespace VideoCore {
class RendererBase;
/// Creates an emulated GPU instance using the given system context.
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system);
void CreateGPU(std::optional<Tegra::GPU>& gpu, Core::Frontend::EmuWindow& emu_window, Core::System& system);
} // namespace VideoCore