mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2026-06-06 01:13:45 -04:00
better inlining
This commit is contained in:
parent
47186b6a16
commit
9629573caf
8 changed files with 77 additions and 107 deletions
|
|
@ -20,8 +20,7 @@
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
|
CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
|
||||||
: host_processor(host1x_)
|
: host1x{host1x_}
|
||||||
, host1x{host1x_}
|
|
||||||
, current_class{ChClassId(id)}
|
, current_class{ChClassId(id)}
|
||||||
{
|
{
|
||||||
thread = std::jthread([this](std::stop_token stop_token) {
|
thread = std::jthread([this](std::stop_token stop_token) {
|
||||||
|
|
@ -99,7 +98,7 @@ void CDmaPusher::ExecuteCommand(u32 method, u32 arg) {
|
||||||
switch (current_class) {
|
switch (current_class) {
|
||||||
case ChClassId::Control:
|
case ChClassId::Control:
|
||||||
LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), method, arg);
|
LOG_TRACE(Service_NVDRV, "Class {} method {:#X} arg 0x{:X}", u32(current_class), method, arg);
|
||||||
host_processor.ProcessMethod(Host1x::Control::Method(method), arg);
|
host_processor.ProcessMethod(host1x, Host1x::Control::Method(method), arg);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
thi_regs.reg_array[method] = arg;
|
thi_regs.reg_array[method] = arg;
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ struct GPU::Impl {
|
||||||
, use_nvdec{use_nvdec_}
|
, use_nvdec{use_nvdec_}
|
||||||
, shader_notify()
|
, shader_notify()
|
||||||
, is_async{is_async_}
|
, is_async{is_async_}
|
||||||
, gpu_thread{system_, is_async_}
|
, gpu_thread{system_}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~Impl() = default;
|
~Impl() = default;
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
namespace VideoCommon::GPUThread {
|
namespace VideoCommon::GPUThread {
|
||||||
|
|
||||||
ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
|
ThreadManager::ThreadManager(Core::System& system_)
|
||||||
: system{system_}
|
: system{system_}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
|
@ -70,7 +70,6 @@ void ThreadManager::FlushRegion(DAddr addr, u64 size, bool is_async) {
|
||||||
// Always flush with synchronous GPU mode
|
// Always flush with synchronous GPU mode
|
||||||
PushCommand(FlushRegionCommand(addr, size), false, is_async);
|
PushCommand(FlushRegionCommand(addr, size), false, is_async);
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::TickGPU(bool is_async) {
|
void ThreadManager::TickGPU(bool is_async) {
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,7 @@ struct SynchState final {
|
||||||
/// Class used to manage the GPU thread
|
/// Class used to manage the GPU thread
|
||||||
class ThreadManager final {
|
class ThreadManager final {
|
||||||
public:
|
public:
|
||||||
explicit ThreadManager(Core::System& system_, bool is_async_);
|
explicit ThreadManager(Core::System& system_);
|
||||||
~ThreadManager();
|
~ThreadManager();
|
||||||
|
|
||||||
/// Creates and starts the GPU thread.
|
/// Creates and starts the GPU thread.
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||||
|
|
@ -10,26 +10,22 @@
|
||||||
|
|
||||||
namespace Tegra::Host1x {
|
namespace Tegra::Host1x {
|
||||||
|
|
||||||
Control::Control(Host1x& host1x_) : host1x(host1x_) {}
|
void Control::ProcessMethod(Host1x& host1x, Method method, u32 argument) {
|
||||||
|
|
||||||
Control::~Control() = default;
|
|
||||||
|
|
||||||
void Control::ProcessMethod(Method method, u32 argument) {
|
|
||||||
switch (method) {
|
switch (method) {
|
||||||
case Method::LoadSyncptPayload32:
|
case Method::LoadSyncptPayload32:
|
||||||
syncpoint_value = argument;
|
syncpoint_value = argument;
|
||||||
break;
|
break;
|
||||||
case Method::WaitSyncpt:
|
case Method::WaitSyncpt:
|
||||||
case Method::WaitSyncpt32:
|
case Method::WaitSyncpt32:
|
||||||
Execute(argument);
|
Execute(host1x, argument);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED_MSG("Control method {:#X}", static_cast<u32>(method));
|
UNIMPLEMENTED_MSG("Control method {:#X}", u32(method));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Control::Execute(u32 data) {
|
void Control::Execute(Host1x& host1x, u32 data) {
|
||||||
LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value);
|
LOG_TRACE(Service_NVDRV, "Control wait syncpt {} value {}", data, syncpoint_value);
|
||||||
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
|
host1x.GetSyncpointManager().WaitHost(data, syncpoint_value);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||||
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
|
// SPDX-FileCopyrightText: 2021 Skyline Team and Contributors
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
@ -19,17 +22,11 @@ public:
|
||||||
WaitSyncpt32 = 0x50,
|
WaitSyncpt32 = 0x50,
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit Control(Host1x& host1x);
|
|
||||||
~Control();
|
|
||||||
|
|
||||||
/// Writes the method into the state, Invoke Execute() if encountered
|
/// Writes the method into the state, Invoke Execute() if encountered
|
||||||
void ProcessMethod(Method method, u32 argument);
|
void ProcessMethod(Host1x& host1x, Method method, u32 argument);
|
||||||
|
|
||||||
private:
|
|
||||||
/// For Host1x, execute is waiting on a syncpoint previously written into the state
|
/// For Host1x, execute is waiting on a syncpoint previously written into the state
|
||||||
void Execute(u32 data);
|
void Execute(Host1x& host1x, u32 data);
|
||||||
|
|
||||||
Host1x& host1x;
|
|
||||||
u32 syncpoint_value{};
|
u32 syncpoint_value{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,22 +27,22 @@ void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) {
|
||||||
#ifdef YUZU_LEGACY
|
#ifdef YUZU_LEGACY
|
||||||
std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
|
std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
|
||||||
#endif
|
#endif
|
||||||
devices[fd] = std::make_unique<Tegra::Host1x::Nvdec>(*this, fd, syncpt);
|
devices[fd].emplace<Tegra::Host1x::Nvdec>(*this, fd, syncpt);
|
||||||
break;
|
break;
|
||||||
case ChannelType::VIC:
|
case ChannelType::VIC:
|
||||||
#ifdef YUZU_LEGACY
|
#ifdef YUZU_LEGACY
|
||||||
std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
|
std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer
|
||||||
#endif
|
#endif
|
||||||
devices[fd] = std::make_unique<Tegra::Host1x::Vic>(*this, fd, syncpt);
|
devices[fd].emplace<Tegra::Host1x::Vic>(*this, fd, syncpt);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", static_cast<u32>(type));
|
LOG_ERROR(HW_GPU, "Unimplemented host1x device {}", u32(type));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Host1x::StopDevice(s32 fd, ChannelType type) {
|
void Host1x::StopDevice(s32 fd, ChannelType type) {
|
||||||
devices.erase(fd);
|
devices[fd].emplace<std::monostate>();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Tegra::Host1x
|
} // namespace Tegra::Host1x
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
#include <ankerl/unordered_dense.h>
|
#include <ankerl/unordered_dense.h>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <queue>
|
#include <variant>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
|
@ -17,6 +17,9 @@
|
||||||
#include "video_core/host1x/gpu_device_memory_manager.h"
|
#include "video_core/host1x/gpu_device_memory_manager.h"
|
||||||
#include "video_core/host1x/syncpoint_manager.h"
|
#include "video_core/host1x/syncpoint_manager.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
// fd types?
|
||||||
|
#include "video_core/host1x/nvdec.h"
|
||||||
|
#include "video_core/host1x/vic.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
|
|
@ -31,118 +34,90 @@ class Nvdec;
|
||||||
|
|
||||||
class FrameQueue {
|
class FrameQueue {
|
||||||
public:
|
public:
|
||||||
|
struct FrameDevice {
|
||||||
|
std::deque<std::pair<u64, std::shared_ptr<FFmpeg::Frame>>> m_presentation_order;
|
||||||
|
std::unordered_map<u64, std::shared_ptr<FFmpeg::Frame>> m_decode_order;
|
||||||
|
};
|
||||||
|
|
||||||
void Open(s32 fd) {
|
void Open(s32 fd) {
|
||||||
std::scoped_lock l{m_mutex};
|
std::scoped_lock l{m_mutex};
|
||||||
m_presentation_order.insert({fd, {}});
|
m_frame_devices.insert_or_assign(fd, FrameDevice{});
|
||||||
m_decode_order.insert({fd, {}});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Close(s32 fd) {
|
void Close(s32 fd) {
|
||||||
std::scoped_lock l{m_mutex};
|
std::scoped_lock l{m_mutex};
|
||||||
m_presentation_order.erase(fd);
|
m_frame_devices.erase(fd);
|
||||||
m_decode_order.erase(fd);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
|
s32 VicFindNvdecFdFromOffset(u64 search_offset) {
|
||||||
std::scoped_lock l{m_mutex};
|
std::scoped_lock l{m_mutex};
|
||||||
for (auto& map : m_presentation_order) {
|
for (auto const& [fd, dev] : m_frame_devices) {
|
||||||
for (auto& [offset, frame] : map.second) {
|
for (auto const& [offset, frame] : dev.m_presentation_order)
|
||||||
if (offset == search_offset) {
|
if (offset == search_offset)
|
||||||
return map.first;
|
return fd;
|
||||||
}
|
for (auto const& [offset, frame] : dev.m_decode_order)
|
||||||
}
|
if (offset == search_offset)
|
||||||
}
|
return fd;
|
||||||
for (auto& map : m_decode_order) {
|
|
||||||
for (auto& [offset, frame] : map.second) {
|
|
||||||
if (offset == search_offset) {
|
|
||||||
return map.first;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
|
void PushPresentOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
|
||||||
std::scoped_lock l{m_mutex};
|
std::scoped_lock l{m_mutex};
|
||||||
auto map = m_presentation_order.find(fd);
|
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
|
||||||
if (map == m_presentation_order.end()) {
|
if (it->second.m_presentation_order.size() >= MAX_PRESENT_QUEUE)
|
||||||
return;
|
it->second.m_presentation_order.pop_front();
|
||||||
|
it->second.m_presentation_order.emplace_back(offset, std::move(frame));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (map->second.size() >= MAX_PRESENT_QUEUE) {
|
|
||||||
map->second.pop_front();
|
|
||||||
}
|
|
||||||
|
|
||||||
map->second.emplace_back(offset, std::move(frame));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
|
void PushDecodeOrder(s32 fd, u64 offset, std::shared_ptr<FFmpeg::Frame>&& frame) {
|
||||||
std::scoped_lock l{m_mutex};
|
std::scoped_lock l{m_mutex};
|
||||||
auto map = m_decode_order.find(fd);
|
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
|
||||||
if (map == m_decode_order.end()) {
|
it->second.m_decode_order.insert_or_assign(offset, std::move(frame));
|
||||||
return;
|
if (it->second.m_decode_order.size() > MAX_DECODE_MAP) {
|
||||||
}
|
auto it2 = it->second.m_decode_order.begin();
|
||||||
|
std::advance(it2, it->second.m_decode_order.size() - MAX_DECODE_MAP);
|
||||||
map->second.insert_or_assign(offset, std::move(frame));
|
it->second.m_decode_order.erase(it->second.m_decode_order.begin(), it2);
|
||||||
|
}
|
||||||
if (map->second.size() > MAX_DECODE_MAP) {
|
|
||||||
auto it = map->second.begin();
|
|
||||||
std::advance(it, map->second.size() - MAX_DECODE_MAP);
|
|
||||||
map->second.erase(map->second.begin(), it);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
|
std::shared_ptr<FFmpeg::Frame> GetFrame(s32 fd, u64 offset) {
|
||||||
if (fd == -1) {
|
if (fd != -1) {
|
||||||
return {};
|
std::scoped_lock l{m_mutex};
|
||||||
|
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
|
||||||
|
if (it->second.m_presentation_order.size() > 0)
|
||||||
|
return GetPresentOrderLocked(fd);
|
||||||
|
if (it->second.m_decode_order.size() > 0)
|
||||||
|
return GetDecodeOrderLocked(fd, offset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock l{m_mutex};
|
|
||||||
|
|
||||||
auto present_map = m_presentation_order.find(fd);
|
|
||||||
if (present_map != m_presentation_order.end() && !present_map->second.empty()) {
|
|
||||||
return GetPresentOrderLocked(fd);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto decode_map = m_decode_order.find(fd);
|
|
||||||
if (decode_map != m_decode_order.end() && !decode_map->second.empty()) {
|
|
||||||
return GetDecodeOrderLocked(fd, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<FFmpeg::Frame> GetPresentOrderLocked(s32 fd) {
|
std::shared_ptr<FFmpeg::Frame> GetPresentOrderLocked(s32 fd) {
|
||||||
auto map = m_presentation_order.find(fd);
|
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
|
||||||
if (map == m_presentation_order.end() || map->second.empty()) {
|
auto frame = std::move(it->second.m_presentation_order.front().second);
|
||||||
return {};
|
it->second.m_presentation_order.pop_front();
|
||||||
|
return frame;
|
||||||
}
|
}
|
||||||
|
return {};
|
||||||
auto frame = std::move(map->second.front().second);
|
|
||||||
map->second.pop_front();
|
|
||||||
return frame;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<FFmpeg::Frame> GetDecodeOrderLocked(s32 fd, u64 offset) {
|
std::shared_ptr<FFmpeg::Frame> GetDecodeOrderLocked(s32 fd, u64 offset) {
|
||||||
auto map = m_decode_order.find(fd);
|
if (auto const it = m_frame_devices.find(fd); it != m_frame_devices.end()) {
|
||||||
if (map == m_decode_order.end() || map->second.empty()) {
|
if (auto const it2 = it->second.m_decode_order.find(offset); it2 != it->second.m_decode_order.end()) {
|
||||||
return {};
|
// TODO: this "mapped" prevents us from fully embracing ankerl
|
||||||
|
return std::move(it->second.m_decode_order.extract(it2).mapped());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return {};
|
||||||
auto it = map->second.find(offset);
|
|
||||||
if (it == map->second.end()) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
// TODO: this "mapped" prevents us from fully embracing ankerl
|
|
||||||
return std::move(map->second.extract(it).mapped());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
using FramePtr = std::shared_ptr<FFmpeg::Frame>;
|
|
||||||
|
|
||||||
std::mutex m_mutex{};
|
std::mutex m_mutex{};
|
||||||
ankerl::unordered_dense::map<s32, std::deque<std::pair<u64, FramePtr>>> m_presentation_order;
|
ankerl::unordered_dense::map<s32, FrameDevice> m_frame_devices;
|
||||||
ankerl::unordered_dense::map<s32, std::unordered_map<u64, FramePtr>> m_decode_order;
|
|
||||||
|
|
||||||
static constexpr size_t MAX_PRESENT_QUEUE = 100;
|
static constexpr size_t MAX_PRESENT_QUEUE = 100;
|
||||||
static constexpr size_t MAX_DECODE_MAP = 200;
|
static constexpr size_t MAX_DECODE_MAP = 200;
|
||||||
|
|
@ -196,11 +171,11 @@ public:
|
||||||
void StopDevice(s32 fd, ChannelType type);
|
void StopDevice(s32 fd, ChannelType type);
|
||||||
|
|
||||||
void PushEntries(s32 fd, ChCommandHeaderList&& entries) {
|
void PushEntries(s32 fd, ChCommandHeaderList&& entries) {
|
||||||
auto it = devices.find(fd);
|
if (auto const nvdec = std::get_if<Tegra::Host1x::Nvdec>(&devices[fd])) {
|
||||||
if (it == devices.end()) {
|
nvdec->PushEntries(std::move(entries));
|
||||||
return;
|
} else if (auto const vic = std::get_if<Tegra::Host1x::Vic>(&devices[fd])) {
|
||||||
|
vic->PushEntries(std::move(entries));
|
||||||
}
|
}
|
||||||
it->second->PushEntries(std::move(entries));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
@ -209,7 +184,11 @@ public:
|
||||||
Tegra::MemoryManager gmmu_manager;
|
Tegra::MemoryManager gmmu_manager;
|
||||||
Common::FlatAllocator<u32, 0, 32> allocator;
|
Common::FlatAllocator<u32, 0, 32> allocator;
|
||||||
FrameQueue frame_queue;
|
FrameQueue frame_queue;
|
||||||
ankerl::unordered_dense::map<s32, std::unique_ptr<CDmaPusher>> devices;
|
std::array<std::variant<
|
||||||
|
std::monostate,
|
||||||
|
Tegra::Host1x::Nvdec,
|
||||||
|
Tegra::Host1x::Vic
|
||||||
|
>, 1024> devices;
|
||||||
#ifdef YUZU_LEGACY
|
#ifdef YUZU_LEGACY
|
||||||
std::once_flag nvdec_first_init;
|
std::once_flag nvdec_first_init;
|
||||||
std::once_flag vic_first_init;
|
std::once_flag vic_first_init;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue