From 736fa0540eb3a4d574669d5f831cccead5d8c45b Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 1 May 2026 16:21:21 +0000 Subject: [PATCH] better inlining --- src/video_core/cdma_pusher.cpp | 5 +- src/video_core/gpu.cpp | 2 +- src/video_core/gpu_thread.cpp | 3 +- src/video_core/gpu_thread.h | 2 +- src/video_core/host1x/control.cpp | 14 ++- src/video_core/host1x/control.h | 13 ++- src/video_core/host1x/host1x.cpp | 8 +- src/video_core/host1x/host1x.h | 137 +++++++++++++----------------- 8 files changed, 77 insertions(+), 107 deletions(-) diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 1f0f8b5a38..635ca769d8 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -20,8 +20,7 @@ namespace Tegra { CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id) - : host_processor(host1x_) - , host1x{host1x_} + : host1x{host1x_} , current_class{ChClassId(id)} { thread = std::jthread([this](std::stop_token stop_token) { @@ -99,7 +98,7 @@ void CDmaPusher::ExecuteCommand(u32 method, u32 arg) { switch (current_class) { case ChClassId::Control: LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), method, arg); - host_processor.ProcessMethod(Host1x::Control::Method(method), arg); + host_processor.ProcessMethod(host1x, Host1x::Control::Method(method), arg); break; default: thi_regs.reg_array[method] = arg; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index fd79e1ed2e..398f7ca02b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -45,7 +45,7 @@ struct GPU::Impl { , use_nvdec{use_nvdec_} , shader_notify() , is_async{is_async_} - , gpu_thread{system_, is_async_} + , gpu_thread{system_} {} ~Impl() = default; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index bb31dac7be..50570b596d 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -19,7 +19,7 @@ namespace VideoCommon::GPUThread { -ThreadManager::ThreadManager(Core::System& system_, bool is_async_) +ThreadManager::ThreadManager(Core::System& system_) : system{system_} {} @@ -70,7 +70,6 @@ void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) { // Always flush with synchronous GPU mode PushCommand(FlushRegionCommand(addr, size), false, is_async); } - return; } void ThreadManager::TickGPU(bool is_async) { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index b48265922f..4c3fadce15 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -104,7 +104,7 @@ struct SynchState final { /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(Core::System& system_, bool is_async_); + explicit ThreadManager(Core::System& system_); ~ThreadManager(); /// Creates and starts the GPU thread. diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp index 53b3063557..93ee57456b 100644 --- a/src/video_core/host1x/control.cpp +++ b/src/video_core/host1x/control.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: 2021 yuzu Emulator Project @@ -10,26 +10,22 @@ namespace Tegra::Host1x { -Control::Control(Host1x& host1x_) : host1x(host1x_) {} - -Control::~Control() = default; - -void Control::ProcessMethod(Method method, u32 argument) { +void Control::ProcessMethod(Host1x& host1x, Method method, u32 argument) { switch (method) { case Method::LoadSyncptPayload32: syncpoint_value = argument; break; case Method::WaitSyncpt: case Method::WaitSyncpt32: - Execute(argument); + Execute(host1x, argument); break; default: - UNIMPLEMENTED_MSG("Control method {:#X}", static_cast(method)); + UNIMPLEMENTED_MSG("Control method {:#X}", u32(method)); break; } } -void Control::Execute(u32 data) { +void Control::Execute(Host1x& host1x, u32 data) { LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value); host1x.GetSyncpointManager().WaitHost(data, syncpoint_value); } diff --git a/src/video_core/host1x/control.h b/src/video_core/host1x/control.h index bd8a2d7ad4..4d87e63b32 100644 --- a/src/video_core/host1x/control.h +++ b/src/video_core/host1x/control.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-FileCopyrightText: 2021 Skyline Team and Contributors // SPDX-License-Identifier: GPL-3.0-or-later @@ -19,17 +22,11 @@ public: WaitSyncpt32 = 0x50, }; - explicit Control(Host1x& host1x); - ~Control(); - /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, u32 argument); - -private: + void ProcessMethod(Host1x& host1x, Method method, u32 argument); /// For Host1x, execute is waiting on a syncpoint previously written into the state - void Execute(u32 data); + void Execute(Host1x& host1x, u32 data); - Host1x& host1x; u32 syncpoint_value{}; }; diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index b4e4a38c80..590b0e0ff2 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -27,22 +27,22 @@ void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) { #ifdef YUZU_LEGACY std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer #endif - devices[fd] = std::make_unique(*this, fd, syncpt); + devices[fd].emplace(*this, fd, syncpt); break; case ChannelType::VIC: #ifdef YUZU_LEGACY std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer #endif - devices[fd] = std::make_unique(*this, fd, syncpt); + devices[fd].emplace(*this, fd, syncpt); break; default: - LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast(type)); + LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", u32(type)); break; } } void Host1x::StopDevice(s32 fd, ChannelType type) { - devices.erase(fd); + devices[fd].emplace(); } } // namespace Tegra::Host1x diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 838e749072..95af998ac0 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include "common/common_types.h" @@ -17,6 +17,9 @@ #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/host1x/syncpoint_manager.h" #include "video_core/memory_manager.h" +// fd types? +#include "video_core/host1x/nvdec.h" +#include "video_core/host1x/vic.h" namespace Core { class System; @@ -31,118 +34,90 @@ class Nvdec; class FrameQueue { public: + struct FrameDevice { + std::deque>> m_presentation_order; + std::unordered_map> m_decode_order; + }; + void Open(s32 fd) { std::scoped_lock l{m_mutex}; - m_presentation_order.insert({fd, {}}); - m_decode_order.insert({fd, {}}); + m_frame_devices.insert_or_assign(fd, FrameDevice{}); } void Close(s32 fd) { std::scoped_lock l{m_mutex}; - m_presentation_order.erase(fd); - m_decode_order.erase(fd); + m_frame_devices.erase(fd); } s32 VicFindNvdecFdFromOffset(u64 search_offset) { std::scoped_lock l{m_mutex}; - for (auto& map : m_presentation_order) { - for (auto& [offset, frame] : map.second) { - if (offset == search_offset) { - return map.first; - } - } - } - for (auto& map : m_decode_order) { - for (auto& [offset, frame] : map.second) { - if (offset == search_offset) { - return map.first; - } - } + for (auto const& [fd, dev] : m_frame_devices) { + for (auto const& [offset, frame] : dev.m_presentation_order) + if (offset == search_offset) + return fd; + for (auto const& [offset, frame] : dev.m_decode_order) + if (offset == search_offset) + return fd; } return -1; } void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr&& frame) { std::scoped_lock l{m_mutex}; - auto map = m_presentation_order.find(fd); - if (map == m_presentation_order.end()) { - return; + if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) { + if (it->second.m_presentation_order.size() >= MAX_PRESENT_QUEUE) + it->second.m_presentation_order.pop_front(); + it->second.m_presentation_order.emplace_back(offset, std::move(frame)); } - - if (map->second.size() >= MAX_PRESENT_QUEUE) { - map->second.pop_front(); - } - - map->second.emplace_back(offset, std::move(frame)); } void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr&& frame) { std::scoped_lock l{m_mutex}; - auto map = m_decode_order.find(fd); - if (map == m_decode_order.end()) { - return; - } - - map->second.insert_or_assign(offset, std::move(frame)); - - if (map->second.size() > MAX_DECODE_MAP) { - auto it = map->second.begin(); - std::advance(it, map->second.size() - MAX_DECODE_MAP); - map->second.erase(map->second.begin(), it); + if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) { + it->second.m_decode_order.insert_or_assign(offset, std::move(frame)); + if (it->second.m_decode_order.size() > MAX_DECODE_MAP) { + auto it2 = it->second.m_decode_order.begin(); + std::advance(it2, it->second.m_decode_order.size() - MAX_DECODE_MAP); + it->second.m_decode_order.erase(it->second.m_decode_order.begin(), it2); + } } } std::shared_ptr GetFrame(s32 fd, u64 offset) { - if (fd == -1) { - return {}; + if (fd != -1) { + std::scoped_lock l{m_mutex}; + if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) { + if (it->second.m_presentation_order.size() > 0) + return GetPresentOrderLocked(fd); + if (it->second.m_decode_order.size() > 0) + return GetDecodeOrderLocked(fd, offset); + } } - - std::scoped_lock l{m_mutex}; - - auto present_map = m_presentation_order.find(fd); - if (present_map != m_presentation_order.end() && !present_map->second.empty()) { - return GetPresentOrderLocked(fd); - } - - auto decode_map = m_decode_order.find(fd); - if (decode_map != m_decode_order.end() && !decode_map->second.empty()) { - return GetDecodeOrderLocked(fd, offset); - } - return {}; } private: std::shared_ptr GetPresentOrderLocked(s32 fd) { - auto map = m_presentation_order.find(fd); - if (map == m_presentation_order.end() || map->second.empty()) { - return {}; + if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) { + auto frame = std::move(it->second.m_presentation_order.front().second); + it->second.m_presentation_order.pop_front(); + return frame; } - - auto frame = std::move(map->second.front().second); - map->second.pop_front(); - return frame; + return {}; } std::shared_ptr GetDecodeOrderLocked(s32 fd, u64 offset) { - auto map = m_decode_order.find(fd); - if (map == m_decode_order.end() || map->second.empty()) { - return {}; + if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) { + if (auto const it2 = it->second.m_decode_order.find(offset); it2 != it->second.m_decode_order.end()) { + // TODO: this "mapped" prevents us from fully embracing ankerl + return std::move(it->second.m_decode_order.extract(it2).mapped()); + } } - - auto it = map->second.find(offset); - if (it == map->second.end()) { - return {}; - } - // TODO: this "mapped" prevents us from fully embracing ankerl - return std::move(map->second.extract(it).mapped()); + return {}; } - using FramePtr = std::shared_ptr; - std::mutex m_mutex{}; - ankerl::unordered_dense::map>> m_presentation_order; - ankerl::unordered_dense::map> m_decode_order; + ankerl::unordered_dense::map m_frame_devices; static constexpr size_t MAX_PRESENT_QUEUE = 100; static constexpr size_t MAX_DECODE_MAP = 200; @@ -196,11 +171,11 @@ public: void StopDevice(s32 fd, ChannelType type); void PushEntries(s32 fd, ChCommandHeaderList&& entries) { - auto it = devices.find(fd); - if (it == devices.end()) { - return; + if (auto const nvdec = std::get_if(&devices[fd])) { + nvdec->PushEntries(std::move(entries)); + } else if (auto const vic = std::get_if(&devices[fd])) { + vic->PushEntries(std::move(entries)); } - it->second->PushEntries(std::move(entries)); } Core::System& system; @@ -209,7 +184,11 @@ public: Tegra::MemoryManager gmmu_manager; Common::FlatAllocator allocator; FrameQueue frame_queue; - ankerl::unordered_dense::map> devices; + std::array, 1024> devices; #ifdef YUZU_LEGACY std::once_flag nvdec_first_init; std::once_flag vic_first_init;