[maxwell_3d] append inline index draw streams in bulk

DmaPusher already hands non-incrementing bursts to CallMultiMethod as
one span, but draw_inline_index / inline_index_2x16 / inline_index_4x8
fell into the default per-word loop: each word re-ran the macro check,
shadow-RAM branch, dirty-table stores and two switch dispatches, then
SetInlineIndexBuffer pushed every index one byte at a time (4 push_backs
per index, 16 per 4x8 word).

Add a multi-data path mirroring ProcessCBMultiData: update
register/shadow/dirty state once with the last word (only the last
write is observable mid-burst) and append the whole span with one
insert/resize + tight unpack. Shadow-RAM Replay falls through to the
per-word path since each word must be substituted from shadow RAM.
Byte output is identical on the little-endian hosts the codebase
already assumes. No functional change intended.
This commit is contained in:
simply0001 2026-06-09 21:45:57 +02:00 committed by crueter
parent 629ebf1bde
commit e70b4d25eb
3 changed files with 60 additions and 0 deletions

View file

@ -4,6 +4,8 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include "common/settings.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
@ -130,6 +132,44 @@ void Maxwell3D::DrawManager::SetInlineIndexBuffer(Maxwell3D& maxwell3d, u32 inde
draw_state.draw_mode = DrawMode::InlineIndex;
}
void Maxwell3D::DrawManager::SetInlineIndexBuffer(Maxwell3D& maxwell3d, u32 method,
const u32* base_start, u32 amount) {
auto& index_buffer = draw_state.inline_index_draw_indexes;
switch (method) {
case MAXWELL3D_REG_INDEX(draw_inline_index): {
const auto* const bytes = reinterpret_cast<const u8*>(base_start);
index_buffer.insert(index_buffer.end(), bytes, bytes + size_t(amount) * sizeof(u32));
break;
}
case MAXWELL3D_REG_INDEX(inline_index_2x16.even): {
const size_t offset = index_buffer.size();
index_buffer.resize(offset + size_t(amount) * 2 * sizeof(u32));
u8* dst = index_buffer.data() + offset;
for (u32 i = 0; i < amount; ++i) {
const u32 word = base_start[i];
const u32 indexes[2]{word & 0xFFFF, word >> 16};
std::memcpy(dst, indexes, sizeof(indexes));
dst += sizeof(indexes);
}
break;
}
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): {
const size_t offset = index_buffer.size();
index_buffer.resize(offset + size_t(amount) * 4 * sizeof(u32));
u8* dst = index_buffer.data() + offset;
for (u32 i = 0; i < amount; ++i) {
const u32 word = base_start[i];
const u32 indexes[4]{word & 0xFF, (word >> 8) & 0xFF, (word >> 16) & 0xFF,
word >> 24};
std::memcpy(dst, indexes, sizeof(indexes));
dst += sizeof(indexes);
}
break;
}
}
draw_state.draw_mode = DrawMode::InlineIndex;
}
void Maxwell3D::DrawManager::DrawBegin(Maxwell3D& maxwell3d) {
auto reset_instance_count = maxwell3d.regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First;
auto increment_instance_count = maxwell3d.regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent;

View file

@ -436,6 +436,14 @@ void Maxwell3D::CallMultiMethod(Core::System& system, u32 method, const u32* bas
upload_state.ProcessData(base_start, amount);
return;
}
case MAXWELL3D_REG_INDEX(draw_inline_index):
case MAXWELL3D_REG_INDEX(inline_index_2x16.even):
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
if (shadow_state.shadow_ram_control != Regs::ShadowRamControl::Replay) {
ProcessInlineIndexMultiData(method, base_start, amount);
break;
}
[[fallthrough]];
default:
for (u32 i = 0; i < amount; i++) {
CallMethod(system, method, base_start[i], methods_pending - i <= 1);
@ -622,6 +630,15 @@ void Maxwell3D::ProcessCBData(u32 value) {
ProcessCBMultiData(&value, 1);
}
void Maxwell3D::ProcessInlineIndexMultiData(u32 method, const u32* start_base, u32 amount) {
if (amount == 0) {
return;
}
const u32 argument = ProcessShadowRam(method, start_base[amount - 1]);
ProcessDirtyRegisters(method, argument);
draw_manager.SetInlineIndexBuffer(*this, method, start_base, amount);
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const GPUVAddr tic_address_gpu{regs.tex_header.Address() +
tic_index * sizeof(Texture::TICEntry)};

View file

@ -3077,6 +3077,7 @@ public:
void DrawArrayIndirect(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology);
void DrawIndexedIndirect(Maxwell3D& maxwell3d, Maxwell3D::Regs::PrimitiveTopology topology, u32 index_first, u32 index_count);
void SetInlineIndexBuffer(Maxwell3D& maxwell3d, u32 index);
void SetInlineIndexBuffer(Maxwell3D& maxwell3d, u32 method, const u32* base_start, u32 amount);
void DrawBegin(Maxwell3D& maxwell3d);
void DrawEnd(Maxwell3D& maxwell3d, u32 instance_count = 1, bool force_draw = false);
void DrawIndexSmall(Maxwell3D& maxwell3d, u32 argument);
@ -3193,6 +3194,8 @@ public:
void ProcessCBData(u32 value);
void ProcessCBMultiData(const u32* start_base, u32 amount);
void ProcessInlineIndexMultiData(u32 method, const u32* start_base, u32 amount);
private:
void InitializeRegisterDefaults();