maxwell macros

This commit is contained in:
lizzie 2026-05-01 01:19:49 +00:00
parent db45124c61
commit 4152989bbf
6 changed files with 205 additions and 205 deletions

View file

@ -18,12 +18,12 @@
namespace Tegra::Control { namespace Tegra::Control {
ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state) ChannelState::Payload::Payload(Core::System& system, MemoryManager& memory_manager, ChannelState& channel_state)
: maxwell_3d(system, memory_manager) : maxwell_3d(memory_manager)
, fermi_2d(memory_manager) , fermi_2d(memory_manager)
, kepler_compute(system, memory_manager) , kepler_compute(memory_manager)
, maxwell_dma(system, memory_manager) , maxwell_dma(memory_manager)
, kepler_memory(system, memory_manager) , kepler_memory(memory_manager)
, nv01_timer(system, memory_manager) , nv01_timer(memory_manager)
, dma_pusher(system, memory_manager, channel_state) , dma_pusher(system, memory_manager, channel_state)
{} {}

View file

@ -181,7 +181,7 @@ void DmaPusher::CallMethod(u32 argument) {
} else { } else {
subchannel->ConsumeSink(system); subchannel->ConsumeSink(system);
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); subchannel->CallMethod(system, dma_state.method, argument, dma_state.is_last_call);
} }
} }
} }
@ -193,7 +193,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) {
auto subchannel = subchannels[dma_state.subchannel]; auto subchannel = subchannels[dma_state.subchannel];
subchannel->ConsumeSink(system); subchannel->ConsumeSink(system);
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, dma_state.method_count); subchannel->CallMultiMethod(system, dma_state.method, base_start, num_methods, dma_state.method_count);
} }
} }

View file

@ -218,7 +218,7 @@ void Maxwell3D::ProcessMacro(Core::System& system, u32 method, const u32* base_s
// Call the macro when there are no more parameters in the command buffer // Call the macro when there are no more parameters in the command buffer
if (is_last_call) { if (is_last_call) {
ConsumeSink(system); ConsumeSink(system);
CallMacroMethod(executing_macro, macro_params); CallMacroMethod(system, executing_macro, macro_params);
macro_params.clear(); macro_params.clear();
macro_addresses.clear(); macro_addresses.clear();
macro_segments.clear(); macro_segments.clear();
@ -389,7 +389,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
} }
} }
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) { void Maxwell3D::CallMacroMethod(Core::System& system, u32 method, const std::vector<u32>& parameters) {
// Reset the current macro. // Reset the current macro.
executing_macro = 0; executing_macro = 0;
@ -398,7 +398,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro. // Execute the current macro.
macro_engine.Execute(*this, macro_positions[entry], parameters); macro_engine.Execute(system, *this, macro_positions[entry], parameters);
draw_manager.DrawDeferred(*this); draw_manager.DrawDeferred(*this);
} }

View file

@ -3211,7 +3211,7 @@ private:
* @param method Method to call * @param method Method to call
* @param parameters Arguments to the method call * @param parameters Arguments to the method call
*/ */
void CallMacroMethod(u32 method, const std::vector<u32>& parameters); void CallMacroMethod(Core::System& system, u32 method, const std::vector<u32>& parameters);
/// Handles writes to the macro uploading register. /// Handles writes to the macro uploading register.
void ProcessMacroUpload(u32 data); void ProcessMacroUpload(u32 data);

View file

@ -76,10 +76,10 @@ bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
} // Anonymous namespace } // Anonymous namespace
void HLE_DrawArraysIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_DrawArraysIndirect::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
Fallback(maxwell3d, parameters); Fallback(system, maxwell3d, parameters);
return; return;
} }
@ -105,7 +105,7 @@ void HLE_DrawArraysIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<co
maxwell3d.replace_table.clear(); maxwell3d.replace_table.clear();
} }
} }
void HLE_DrawArraysIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) { void HLE_DrawArraysIndirect::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
SCOPE_EXIT { SCOPE_EXIT {
if (extended) { if (extended) {
maxwell3d.engine_state = Maxwell3D::EngineHint::None; maxwell3d.engine_state = Maxwell3D::EngineHint::None;
@ -135,10 +135,10 @@ void HLE_DrawArraysIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<c
} }
} }
void HLE_DrawIndexedIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_DrawIndexedIndirect::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
Fallback(maxwell3d, parameters); Fallback(system, maxwell3d, parameters);
return; return;
} }
@ -173,7 +173,7 @@ void HLE_DrawIndexedIndirect::Execute(Engines::Maxwell3D& maxwell3d, std::span<c
maxwell3d.replace_table.clear(); maxwell3d.replace_table.clear();
} }
} }
void HLE_DrawIndexedIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) { void HLE_DrawIndexedIndirect::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
const u32 element_base = parameters[4]; const u32 element_base = parameters[4];
@ -196,7 +196,7 @@ void HLE_DrawIndexedIndirect::Fallback(Engines::Maxwell3D& maxwell3d, std::span<
maxwell3d.replace_table.clear(); maxwell3d.replace_table.clear();
} }
} }
void HLE_MultiLayerClear::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_MultiLayerClear::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
ASSERT(parameters.size() == 1); ASSERT(parameters.size() == 1);
@ -208,47 +208,44 @@ void HLE_MultiLayerClear::Execute(Engines::Maxwell3D& maxwell3d, std::span<const
maxwell3d.regs.clear_surface.raw = clear_params.raw; maxwell3d.regs.clear_surface.raw = clear_params.raw;
maxwell3d.draw_manager.Clear(maxwell3d, num_layers); maxwell3d.draw_manager.Clear(maxwell3d, num_layers);
} }
void HLE_MultiDrawIndexedIndirectCount::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_MultiDrawIndexedIndirectCount::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]); const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]);
if (!IsTopologySafe(topology)) { if (IsTopologySafe(topology)) {
Fallback(maxwell3d, parameters); const u32 start_indirect = parameters[0];
return; const u32 end_indirect = parameters[1];
if (start_indirect >= end_indirect) {
// Nothing to do.
return;
}
const u32 padding = parameters[3]; // padding is in words
// size of each indirect segment
const u32 indirect_words = 5 + padding;
const u32 stride = indirect_words * sizeof(u32);
const std::size_t draw_count = end_indirect - start_indirect;
const u32 estimate = u32(maxwell3d.EstimateIndexBufferSize());
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = false;
params.is_indexed = true;
params.include_count = true;
params.count_start_address = maxwell3d.GetMacroAddress(4);
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
params.buffer_size = stride * draw_count;
params.max_draw_counts = draw_count;
params.stride = stride;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(0, 0x648, Maxwell3D::HLEReplacementAttributeType::DrawID);
maxwell3d.draw_manager.DrawIndexedIndirect(maxwell3d, topology, 0, estimate);
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
maxwell3d.replace_table.clear();
} else {
Fallback(system, maxwell3d, parameters);
} }
const u32 start_indirect = parameters[0];
const u32 end_indirect = parameters[1];
if (start_indirect >= end_indirect) {
// Nothing to do.
return;
}
const u32 padding = parameters[3]; // padding is in words
// size of each indirect segment
const u32 indirect_words = 5 + padding;
const u32 stride = indirect_words * sizeof(u32);
const std::size_t draw_count = end_indirect - start_indirect;
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = false;
params.is_indexed = true;
params.include_count = true;
params.count_start_address = maxwell3d.GetMacroAddress(4);
params.indirect_start_address = maxwell3d.GetMacroAddress(5);
params.buffer_size = stride * draw_count;
params.max_draw_counts = draw_count;
params.stride = stride;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
maxwell3d.SetHLEReplacementAttributeType(0, 0x648, Maxwell3D::HLEReplacementAttributeType::DrawID);
maxwell3d.draw_manager.DrawIndexedIndirect(maxwell3d, topology, 0, estimate);
maxwell3d.engine_state = Maxwell3D::EngineHint::None;
maxwell3d.replace_table.clear();
} }
void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) { void HLE_MultiDrawIndexedIndirectCount::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
SCOPE_EXIT { SCOPE_EXIT {
// Clean everything. // Clean everything.
maxwell3d.regs.vertex_id_base = 0x0; maxwell3d.regs.vertex_id_base = 0x0;
@ -262,7 +259,7 @@ void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d,
// Nothing to do. // Nothing to do.
return; return;
} }
const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]);
const u32 padding = parameters[3]; const u32 padding = parameters[3];
const std::size_t max_draws = parameters[4]; const std::size_t max_draws = parameters[4];
const u32 indirect_words = 5 + padding; const u32 indirect_words = 5 + padding;
@ -277,41 +274,41 @@ void HLE_MultiDrawIndexedIndirectCount::Fallback(Engines::Maxwell3D& maxwell3d,
maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
maxwell3d.CallMethod(0x8e3, 0x648, true); maxwell3d.CallMethod(system, 0x8e3, 0x648, true);
maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true); maxwell3d.CallMethod(system, 0x8e4, u32(index), true);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.draw_manager.DrawIndex(maxwell3d, topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]); maxwell3d.draw_manager.DrawIndex(maxwell3d, topology, parameters[base + 2], parameters[base], base_vertex, base_instance, parameters[base + 1]);
} }
} }
void HLE_DrawIndirectByteCount::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_DrawIndirectByteCount::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
if (!force) { if (force) {
Fallback(maxwell3d, parameters); auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU);
return; auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = true;
params.is_indexed = false;
params.include_count = false;
params.count_start_address = 0;
params.indirect_start_address = maxwell3d.GetMacroAddress(2);
params.buffer_size = 4;
params.max_draw_counts = 1;
params.stride = parameters[1];
maxwell3d.regs.draw.begin = parameters[0];
maxwell3d.regs.draw_auto_stride = parameters[1];
maxwell3d.regs.draw_auto_byte_count = parameters[2];
maxwell3d.draw_manager.DrawArrayIndirect(maxwell3d, topology);
} else {
Fallback(system, maxwell3d, parameters);
} }
auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[0] & 0xFFFFU);
auto& params = maxwell3d.draw_manager.indirect_state;
params.is_byte_count = true;
params.is_indexed = false;
params.include_count = false;
params.count_start_address = 0;
params.indirect_start_address = maxwell3d.GetMacroAddress(2);
params.buffer_size = 4;
params.max_draw_counts = 1;
params.stride = parameters[1];
maxwell3d.regs.draw.begin = parameters[0];
maxwell3d.regs.draw_auto_stride = parameters[1];
maxwell3d.regs.draw_auto_byte_count = parameters[2];
maxwell3d.draw_manager.DrawArrayIndirect(maxwell3d, topology);
} }
void HLE_DrawIndirectByteCount::Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) { void HLE_DrawIndirectByteCount::Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
maxwell3d.regs.draw.begin = parameters[0]; maxwell3d.regs.draw.begin = parameters[0];
maxwell3d.regs.draw_auto_stride = parameters[1]; maxwell3d.regs.draw_auto_stride = parameters[1];
maxwell3d.regs.draw_auto_byte_count = parameters[2]; maxwell3d.regs.draw_auto_byte_count = parameters[2];
maxwell3d.draw_manager.DrawArray(maxwell3d, maxwell3d.regs.draw.topology, 0, maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); maxwell3d.draw_manager.DrawArray(maxwell3d, maxwell3d.regs.draw.topology, 0, maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
} }
void HLE_C713C83D8F63CCF3::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_C713C83D8F63CCF3::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
const u32 address = maxwell3d.regs.shadow_scratch[24]; const u32 address = maxwell3d.regs.shadow_scratch[24];
@ -321,7 +318,7 @@ void HLE_C713C83D8F63CCF3::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
const_buffer.address_low = address << 8; const_buffer.address_low = address << 8;
const_buffer.offset = offset; const_buffer.offset = offset;
} }
void HLE_D7333D26E0A93EDE::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_D7333D26E0A93EDE::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const size_t index = parameters[0]; const size_t index = parameters[0];
const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
@ -331,7 +328,7 @@ void HLE_D7333D26E0A93EDE::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
const_buffer.address_high = (address >> 24) & 0xFF; const_buffer.address_high = (address >> 24) & 0xFF;
const_buffer.address_low = address << 8; const_buffer.address_low = address << 8;
} }
void HLE_BindShader::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_BindShader::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
const u32 index = parameters[0]; const u32 index = parameters[0];
@ -355,7 +352,7 @@ void HLE_BindShader::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32>
bind_group.raw_config = 0x11; bind_group.raw_config = 0x11;
maxwell3d.ProcessCBBind(bind_group_id); maxwell3d.ProcessCBBind(bind_group_id);
} }
void HLE_SetRasterBoundingBox::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_SetRasterBoundingBox::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 raster_mode = parameters[0]; const u32 raster_mode = parameters[0];
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
@ -364,7 +361,7 @@ void HLE_SetRasterBoundingBox::Execute(Engines::Maxwell3D& maxwell3d, std::span<
regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
} }
void HLE_ClearConstBuffer::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_ClearConstBuffer::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
static constexpr std::array<u32, 0x7000> zeroes{}; //must be bigger than either 7000 or 5F00 static constexpr std::array<u32, 0x7000> zeroes{}; //must be bigger than either 7000 or 5F00
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
@ -374,7 +371,7 @@ void HLE_ClearConstBuffer::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
regs.const_buffer.offset = 0; regs.const_buffer.offset = 0;
maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
} }
void HLE_ClearMemory::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_ClearMemory::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
const u32 needed_memory = parameters[2] / sizeof(u32); const u32 needed_memory = parameters[2] / sizeof(u32);
if (needed_memory > zero_memory.size()) { if (needed_memory > zero_memory.size()) {
@ -385,10 +382,10 @@ void HLE_ClearMemory::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32
regs.upload.line_count = 1; regs.upload.line_count = 1;
regs.upload.dest.address_high = parameters[0]; regs.upload.dest.address_high = parameters[0];
regs.upload.dest.address_low = parameters[1]; regs.upload.dest.address_low = parameters[1];
maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); maxwell3d.CallMethod(system, size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory); maxwell3d.CallMultiMethod(system, size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory);
} }
void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) { void HLE_TransformFeedbackSetup::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method) {
maxwell3d.RefreshParameters(); maxwell3d.RefreshParameters();
auto& regs = maxwell3d.regs; auto& regs = maxwell3d.regs;
regs.transform_feedback_enabled = 1; regs.transform_feedback_enabled = 1;
@ -400,8 +397,8 @@ void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::spa
regs.upload.line_count = 1; regs.upload.line_count = 1;
regs.upload.dest.address_high = parameters[0]; regs.upload.dest.address_high = parameters[0];
regs.upload.dest.address_low = parameters[1]; regs.upload.dest.address_low = parameters[1];
maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); maxwell3d.CallMethod(system, size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true); maxwell3d.CallMethod(system, size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true);
maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
} }
@ -441,7 +438,7 @@ void HLE_TransformFeedbackSetup::Execute(Engines::Maxwell3D& maxwell3d, std::spa
} }
} }
void MacroInterpreterImpl::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method) { void MacroInterpreterImpl::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method) {
Reset(); Reset();
registers[1] = params[0]; registers[1] = params[0];
@ -451,7 +448,7 @@ void MacroInterpreterImpl::Execute(Engines::Maxwell3D& maxwell3d, std::span<cons
// Execute the code until we hit an exit condition. // Execute the code until we hit an exit condition.
bool keep_executing = true; bool keep_executing = true;
while (keep_executing) { while (keep_executing) {
keep_executing = Step(maxwell3d, false); keep_executing = Step(system, maxwell3d, false);
} }
// Assert the the macro used all the input parameters // Assert the the macro used all the input parameters
@ -474,7 +471,7 @@ void MacroInterpreterImpl::Reset() {
/// @brief Executes a single macro instruction located at the current program counter. Returns whether /// @brief Executes a single macro instruction located at the current program counter. Returns whether
/// the interpreter should keep running. /// the interpreter should keep running.
/// @param is_delay_slot Whether the current step is being executed due to a delay slot in a previous instruction. /// @param is_delay_slot Whether the current step is being executed due to a delay slot in a previous instruction.
bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot) { bool MacroInterpreterImpl::Step(Core::System& system, Engines::Maxwell3D& maxwell3d, bool is_delay_slot) {
u32 base_address = pc; u32 base_address = pc;
Macro::Opcode opcode = GetOpcode(); Macro::Opcode opcode = GetOpcode();
@ -490,11 +487,11 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
switch (opcode.operation) { switch (opcode.operation) {
case Macro::Operation::ALU: { case Macro::Operation::ALU: {
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), GetRegister(opcode.src_b)); u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), GetRegister(opcode.src_b));
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::AddImmediate: { case Macro::Operation::AddImmediate: {
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, GetRegister(opcode.src_a) + opcode.immediate); ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, GetRegister(opcode.src_a) + opcode.immediate);
break; break;
} }
case Macro::Operation::ExtractInsert: { case Macro::Operation::ExtractInsert: {
@ -504,7 +501,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
dst |= src << opcode.bf_dst_bit; dst |= src << opcode.bf_dst_bit;
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, dst); ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, dst);
break; break;
} }
case Macro::Operation::ExtractShiftLeftImmediate: { case Macro::Operation::ExtractShiftLeftImmediate: {
@ -513,7 +510,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::ExtractShiftLeftRegister: { case Macro::Operation::ExtractShiftLeftRegister: {
@ -522,12 +519,12 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::Read: { case Macro::Operation::Read: {
u32 result = Read(maxwell3d, GetRegister(opcode.src_a) + opcode.immediate); u32 result = Read(maxwell3d, GetRegister(opcode.src_a) + opcode.immediate);
ProcessResult(maxwell3d, opcode.result_operation, opcode.dst, result); ProcessResult(system, maxwell3d, opcode.result_operation, opcode.dst, result);
break; break;
} }
case Macro::Operation::Branch: { case Macro::Operation::Branch: {
@ -543,7 +540,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
delayed_pc = base_address + opcode.GetBranchTarget(); delayed_pc = base_address + opcode.GetBranchTarget();
// Execute one more instruction due to the delay slot. // Execute one more instruction due to the delay slot.
return Step(maxwell3d, true); return Step(system, maxwell3d, true);
} }
break; break;
} }
@ -556,7 +553,7 @@ bool MacroInterpreterImpl::Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slo
// cause an exit if it's executed inside a delay slot. // cause an exit if it's executed inside a delay slot.
if (opcode.is_exit && !is_delay_slot) { if (opcode.is_exit && !is_delay_slot) {
// Exit has a delay slot, execute the next instruction // Exit has a delay slot, execute the next instruction
Step(maxwell3d, true); Step(system, maxwell3d, true);
return false; return false;
} }
return true; return true;
@ -603,7 +600,7 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a,
} }
/// Performs the result operation on the input result and stores it in the specified register (if necessary). /// Performs the result operation on the input result and stores it in the specified register (if necessary).
void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result) { void MacroInterpreterImpl::ProcessResult(Core::System& system, Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result) {
switch (operation) { switch (operation) {
case Macro::ResultOperation::IgnoreAndFetch: case Macro::ResultOperation::IgnoreAndFetch:
// Fetch parameter and ignore result. // Fetch parameter and ignore result.
@ -621,12 +618,12 @@ void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::R
case Macro::ResultOperation::FetchAndSend: case Macro::ResultOperation::FetchAndSend:
// Fetch parameter and send result. // Fetch parameter and send result.
SetRegister(reg, FetchParameter()); SetRegister(reg, FetchParameter());
Send(maxwell3d, result); Send(system, maxwell3d, result);
break; break;
case Macro::ResultOperation::MoveAndSend: case Macro::ResultOperation::MoveAndSend:
// Move and send result. // Move and send result.
SetRegister(reg, result); SetRegister(reg, result);
Send(maxwell3d, result); Send(system, maxwell3d, result);
break; break;
case Macro::ResultOperation::FetchAndSetMethod: case Macro::ResultOperation::FetchAndSetMethod:
// Fetch parameter and use result as Method Address. // Fetch parameter and use result as Method Address.
@ -637,13 +634,13 @@ void MacroInterpreterImpl::ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::R
// Move result and use as Method Address, then fetch and send parameter. // Move result and use as Method Address, then fetch and send parameter.
SetRegister(reg, result); SetRegister(reg, result);
SetMethodAddress(result); SetMethodAddress(result);
Send(maxwell3d, FetchParameter()); Send(system, maxwell3d, FetchParameter());
break; break;
case Macro::ResultOperation::MoveAndSetMethodSend: case Macro::ResultOperation::MoveAndSetMethodSend:
// Move result and use as Method Address, then send bits 12:17 of result. // Move result and use as Method Address, then send bits 12:17 of result.
SetRegister(reg, result); SetRegister(reg, result);
SetMethodAddress(result); SetMethodAddress(result);
Send(maxwell3d, (result >> 12) & 0b111111); Send(system, maxwell3d, (result >> 12) & 0b111111);
break; break;
default: default:
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
@ -684,8 +681,8 @@ void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
} }
/// Calls a GPU Engine method with the input parameter. /// Calls a GPU Engine method with the input parameter.
void MacroInterpreterImpl::Send(Engines::Maxwell3D& maxwell3d, u32 value) { void MacroInterpreterImpl::Send(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 value) {
maxwell3d.CallMethod(method_address.address, value, true); maxwell3d.CallMethod(system, method_address.address, value, true);
// Increment the method address by the method increment. // Increment the method address by the method increment.
method_address.address.Assign(method_address.address.Value() + method_address.increment.Value()); method_address.address.Assign(method_address.address.Value() + method_address.increment.Value());
} }
@ -736,34 +733,35 @@ static const auto default_cg_mode = nullptr; //Allow RWE
#endif #endif
struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCachedMacro { struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCachedMacro {
explicit MacroJITx64Impl(std::span<const u32> code_) explicit MacroJITx64Impl(Core::System& system, std::span<const u32> code_)
: Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode) : Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode)
, code{code_} , code{code_}
{ {
Compile(); Compile(system);
} }
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) override; void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) override;
void Compile_ALU(Macro::Opcode opcode); void Compile_ALU(Core::System& system, Macro::Opcode opcode);
void Compile_AddImmediate(Macro::Opcode opcode); void Compile_AddImmediate(Core::System& system, Macro::Opcode opcode);
void Compile_ExtractInsert(Macro::Opcode opcode); void Compile_ExtractInsert(Core::System& system, Macro::Opcode opcode);
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); void Compile_ExtractShiftLeftImmediate(Core::System& system, Macro::Opcode opcode);
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); void Compile_ExtractShiftLeftRegister(Core::System& system, Macro::Opcode opcode);
void Compile_Read(Macro::Opcode opcode); void Compile_Read(Core::System& system, Macro::Opcode opcode);
void Compile_Branch(Macro::Opcode opcode); void Compile_Branch(Macro::Opcode opcode);
void Optimizer_ScanFlags(); void Optimizer_ScanFlags();
void Compile(); void Compile(Core::System& system);
bool Compile_NextInstruction(); bool Compile_NextInstruction(Core::System& system);
Xbyak::Reg32 Compile_FetchParameter(); Xbyak::Reg32 Compile_FetchParameter();
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); void Compile_ProcessResult(Core::System& system, Macro::ResultOperation operation, u32 reg);
void Compile_Send(Xbyak::Reg32 value); void Compile_Send(Core::System& system, Xbyak::Reg32 value);
Macro::Opcode GetOpCode() const; Macro::Opcode GetOpCode() const;
struct JITState { struct JITState {
Engines::Maxwell3D* maxwell3d{}; Engines::Maxwell3D* maxwell3d = nullptr;
Core::System* system = nullptr;
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
u32 carry_flag{}; u32 carry_flag{};
}; };
@ -789,15 +787,16 @@ struct MacroJITx64Impl final : public Xbyak::CodeGenerator, public DynamicCached
std::span<const u32> code; std::span<const u32> code;
}; };
void MacroJITx64Impl::Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) { void MacroJITx64Impl::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) {
ASSERT_OR_EXECUTE(program != nullptr, { return; }); ASSERT_OR_EXECUTE(program != nullptr, { return; });
JITState state{}; JITState state{};
state.maxwell3d = &maxwell3d; state.maxwell3d = &maxwell3d;
state.system = &system;
state.registers = {}; state.registers = {};
program(&state, parameters.data(), parameters.data() + parameters.size()); program(&state, parameters.data(), parameters.data() + parameters.size());
} }
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_ALU(Core::System& system, Macro::Opcode opcode) {
const bool is_a_zero = opcode.src_a == 0; const bool is_a_zero = opcode.src_a == 0;
const bool is_b_zero = opcode.src_b == 0; const bool is_b_zero = opcode.src_b == 0;
const bool valid_operation = !is_a_zero && !is_b_zero; const bool valid_operation = !is_a_zero && !is_b_zero;
@ -914,10 +913,10 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
break; break;
} }
Compile_ProcessResult(opcode.result_operation, opcode.dst); Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_AddImmediate(Core::System& system, Macro::Opcode opcode) {
if (optimizer.skip_dummy_addimmediate) { if (optimizer.skip_dummy_addimmediate) {
// Games tend to use this as an exit instruction placeholder. It's to encode an instruction // Games tend to use this as an exit instruction placeholder. It's to encode an instruction
// without doing anything. In our case we can just not emit anything. // without doing anything. In our case we can just not emit anything.
@ -952,10 +951,10 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
sub(result, opcode.immediate * -1); sub(result, opcode.immediate * -1);
} }
} }
Compile_ProcessResult(opcode.result_operation, opcode.dst); Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_ExtractInsert(Core::System& system, Macro::Opcode opcode) {
auto dst = Compile_GetRegister(opcode.src_a, RESULT); auto dst = Compile_GetRegister(opcode.src_a, RESULT);
auto src = Compile_GetRegister(opcode.src_b, eax); auto src = Compile_GetRegister(opcode.src_b, eax);
@ -966,10 +965,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
shl(src, opcode.bf_dst_bit); shl(src, opcode.bf_dst_bit);
or_(dst, src); or_(dst, src);
Compile_ProcessResult(opcode.result_operation, opcode.dst); Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Core::System& system, Macro::Opcode opcode) {
const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto dst = Compile_GetRegister(opcode.src_a, ecx);
const auto src = Compile_GetRegister(opcode.src_b, RESULT); const auto src = Compile_GetRegister(opcode.src_b, RESULT);
@ -977,10 +976,10 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
and_(src, opcode.GetBitfieldMask()); and_(src, opcode.GetBitfieldMask());
shl(src, opcode.bf_dst_bit); shl(src, opcode.bf_dst_bit);
Compile_ProcessResult(opcode.result_operation, opcode.dst); Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Core::System& system, Macro::Opcode opcode) {
const auto dst = Compile_GetRegister(opcode.src_a, ecx); const auto dst = Compile_GetRegister(opcode.src_a, ecx);
const auto src = Compile_GetRegister(opcode.src_b, RESULT); const auto src = Compile_GetRegister(opcode.src_b, RESULT);
@ -988,10 +987,10 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
and_(src, opcode.GetBitfieldMask()); and_(src, opcode.GetBitfieldMask());
shl(src, dst.cvt8()); shl(src, dst.cvt8());
Compile_ProcessResult(opcode.result_operation, opcode.dst); Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
} }
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_Read(Core::System& system, Macro::Opcode opcode) {
if (optimizer.zero_reg_skip && opcode.src_a == 0) { if (optimizer.zero_reg_skip && opcode.src_a == 0) {
if (opcode.immediate == 0) { if (opcode.immediate == 0) {
xor_(RESULT, RESULT); xor_(RESULT, RESULT);
@ -1017,23 +1016,21 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
int3(); int3();
L(pass_range_check); L(pass_range_check);
} }
mov(rax, qword[STATE]); mov(rax, qword[STATE + offsetof(JITState, maxwell3d)]);
mov(RESULT, mov(RESULT, dword[rax + offsetof(Engines::Maxwell3D, regs) + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
dword[rax + offsetof(Engines::Maxwell3D, regs) + Compile_ProcessResult(system, opcode.result_operation, opcode.dst);
offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
Compile_ProcessResult(opcode.result_operation, opcode.dst);
} }
static void MacroJIT_SendThunk(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { static void MacroJIT_SendThunk(Core::System* system, Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
maxwell3d->CallMethod(method_address.address, value, true); maxwell3d->CallMethod(*system, method_address.address, value, true);
} }
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { void MacroJITx64Impl::Compile_Send(Core::System& system, Xbyak::Reg32 value) {
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
mov(Common::X64::ABI_PARAM1, qword[STATE]); mov(Common::X64::ABI_PARAM1, qword[STATE + offsetof(JITState, system)]);
mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS); mov(Common::X64::ABI_PARAM2, qword[STATE + offsetof(JITState, maxwell3d)]);
mov(Common::X64::ABI_PARAM3.cvt32(), value); mov(Common::X64::ABI_PARAM3.cvt32(), METHOD_ADDRESS);
mov(Common::X64::ABI_PARAM4.cvt32(), value);
Common::X64::CallFarFunction(*this, &MacroJIT_SendThunk); Common::X64::CallFarFunction(*this, &MacroJIT_SendThunk);
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
@ -1057,9 +1054,8 @@ void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
} }
void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); ASSERT(!is_delay_slot && "Executing a branch in a delay slot is not valid");
const s32 jump_address = const s32 jump_address = s32(pc) + s32(opcode.GetBranchTarget() / sizeof(s32));
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
Xbyak::Label end; Xbyak::Label end;
auto value = Compile_GetRegister(opcode.src_a, eax); auto value = Compile_GetRegister(opcode.src_a, eax);
@ -1128,7 +1124,7 @@ void MacroJITx64Impl::Optimizer_ScanFlags() {
} }
} }
void MacroJITx64Impl::Compile() { void MacroJITx64Impl::Compile(Core::System& system) {
labels.fill(Xbyak::Label()); labels.fill(Xbyak::Label());
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
@ -1168,7 +1164,7 @@ void MacroJITx64Impl::Compile() {
next_opcode = {}; next_opcode = {};
} }
pc = i; pc = i;
Compile_NextInstruction(); Compile_NextInstruction(system);
} }
L(end_of_code); L(end_of_code);
@ -1179,7 +1175,7 @@ void MacroJITx64Impl::Compile() {
program = getCode<ProgramType>(); program = getCode<ProgramType>();
} }
bool MacroJITx64Impl::Compile_NextInstruction() { bool MacroJITx64Impl::Compile_NextInstruction(Core::System& system) {
const auto opcode = GetOpCode(); const auto opcode = GetOpCode();
if (labels[pc].getAddress()) { if (labels[pc].getAddress()) {
return false; return false;
@ -1189,22 +1185,22 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
switch (opcode.operation) { switch (opcode.operation) {
case Macro::Operation::ALU: case Macro::Operation::ALU:
Compile_ALU(opcode); Compile_ALU(system, opcode);
break; break;
case Macro::Operation::AddImmediate: case Macro::Operation::AddImmediate:
Compile_AddImmediate(opcode); Compile_AddImmediate(system, opcode);
break; break;
case Macro::Operation::ExtractInsert: case Macro::Operation::ExtractInsert:
Compile_ExtractInsert(opcode); Compile_ExtractInsert(system, opcode);
break; break;
case Macro::Operation::ExtractShiftLeftImmediate: case Macro::Operation::ExtractShiftLeftImmediate:
Compile_ExtractShiftLeftImmediate(opcode); Compile_ExtractShiftLeftImmediate(system, opcode);
break; break;
case Macro::Operation::ExtractShiftLeftRegister: case Macro::Operation::ExtractShiftLeftRegister:
Compile_ExtractShiftLeftRegister(opcode); Compile_ExtractShiftLeftRegister(system, opcode);
break; break;
case Macro::Operation::Read: case Macro::Operation::Read:
Compile_Read(opcode); Compile_Read(system, opcode);
break; break;
case Macro::Operation::Branch: case Macro::Operation::Branch:
Compile_Branch(opcode); Compile_Branch(opcode);
@ -1276,7 +1272,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
return dst; return dst;
} }
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { void MacroJITx64Impl::Compile_ProcessResult(Core::System& system, Macro::ResultOperation operation, u32 reg) {
const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
// register. // register.
@ -1301,12 +1297,12 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
case Macro::ResultOperation::FetchAndSend: case Macro::ResultOperation::FetchAndSend:
// Fetch parameter and send result. // Fetch parameter and send result.
SetRegister(reg, Compile_FetchParameter()); SetRegister(reg, Compile_FetchParameter());
Compile_Send(RESULT); Compile_Send(system, RESULT);
break; break;
case Macro::ResultOperation::MoveAndSend: case Macro::ResultOperation::MoveAndSend:
// Move and send result. // Move and send result.
SetRegister(reg, RESULT); SetRegister(reg, RESULT);
Compile_Send(RESULT); Compile_Send(system, RESULT);
break; break;
case Macro::ResultOperation::FetchAndSetMethod: case Macro::ResultOperation::FetchAndSetMethod:
// Fetch parameter and use result as Method Address. // Fetch parameter and use result as Method Address.
@ -1317,7 +1313,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
// Move result and use as Method Address, then fetch and send parameter. // Move result and use as Method Address, then fetch and send parameter.
SetRegister(reg, RESULT); SetRegister(reg, RESULT);
SetMethodAddress(RESULT); SetMethodAddress(RESULT);
Compile_Send(Compile_FetchParameter()); Compile_Send(system, Compile_FetchParameter());
break; break;
case Macro::ResultOperation::MoveAndSetMethodSend: case Macro::ResultOperation::MoveAndSetMethodSend:
// Move result and use as Method Address, then send bits 12:17 of result. // Move result and use as Method Address, then send bits 12:17 of result.
@ -1325,7 +1321,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
SetMethodAddress(RESULT); SetMethodAddress(RESULT);
shr(RESULT, 12); shr(RESULT, 12);
and_(RESULT, 0b111111); and_(RESULT, 0b111111);
Compile_Send(RESULT); Compile_Send(system, RESULT);
break; break;
default: default:
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
@ -1366,36 +1362,36 @@ static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) {
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes()); macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
} }
void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters) { void MacroEngine::Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters) {
auto const execute_variant = [&maxwell3d, &parameters, method](AnyCachedMacro& acm) { auto const execute_variant = [&system, &maxwell3d, &parameters, method](AnyCachedMacro& acm) {
if (auto a = std::get_if<HLE_DrawArraysIndirect>(&acm)) if (auto a = std::get_if<HLE_DrawArraysIndirect>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_DrawIndexedIndirect>(&acm)) if (auto a = std::get_if<HLE_DrawIndexedIndirect>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_MultiDrawIndexedIndirectCount>(&acm)) if (auto a = std::get_if<HLE_MultiDrawIndexedIndirectCount>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_MultiLayerClear>(&acm)) if (auto a = std::get_if<HLE_MultiLayerClear>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_C713C83D8F63CCF3>(&acm)) if (auto a = std::get_if<HLE_C713C83D8F63CCF3>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_D7333D26E0A93EDE>(&acm)) if (auto a = std::get_if<HLE_D7333D26E0A93EDE>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_BindShader>(&acm)) if (auto a = std::get_if<HLE_BindShader>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_SetRasterBoundingBox>(&acm)) if (auto a = std::get_if<HLE_SetRasterBoundingBox>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_ClearConstBuffer>(&acm)) if (auto a = std::get_if<HLE_ClearConstBuffer>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_ClearMemory>(&acm)) if (auto a = std::get_if<HLE_ClearMemory>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_TransformFeedbackSetup>(&acm)) if (auto a = std::get_if<HLE_TransformFeedbackSetup>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<HLE_DrawIndirectByteCount>(&acm)) if (auto a = std::get_if<HLE_DrawIndirectByteCount>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<MacroInterpreterImpl>(&acm)) if (auto a = std::get_if<MacroInterpreterImpl>(&acm))
a->Execute(maxwell3d, parameters, method); a->Execute(system, maxwell3d, parameters, method);
if (auto a = std::get_if<std::unique_ptr<DynamicCachedMacro>>(&acm)) if (auto a = std::get_if<std::unique_ptr<DynamicCachedMacro>>(&acm))
a->get()->Execute(maxwell3d, parameters, method); a->get()->Execute(system, maxwell3d, parameters, method);
}; };
if (auto const it = macro_cache.find(method); it != macro_cache.end()) { if (auto const it = macro_cache.find(method); it != macro_cache.end()) {
auto& ci = it->second; auto& ci = it->second;
@ -1426,9 +1422,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<c
code.resize(macro_cached.size() - rebased_method); code.resize(macro_cached.size() - rebased_method);
std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32)); std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32));
ci.hash = Common::HashValue(code); ci.hash = Common::HashValue(code);
ci.program = Compile(maxwell3d, code); ci.program = Compile(system, maxwell3d, code);
} else { } else {
ci.program = Compile(maxwell3d, macro_code->second); ci.program = Compile(system, maxwell3d, macro_code->second);
ci.hash = Common::HashValue(macro_code->second); ci.hash = Common::HashValue(macro_code->second);
} }
if (CanBeHLEProgram(ci.hash) && !Settings::values.disable_macro_hle) { if (CanBeHLEProgram(ci.hash) && !Settings::values.disable_macro_hle) {
@ -1443,10 +1439,10 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<c
} }
} }
AnyCachedMacro MacroEngine::Compile(Engines::Maxwell3D& maxwell3d, std::span<const u32> code) { AnyCachedMacro MacroEngine::Compile(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> code) {
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
if (!is_interpreted) if (!is_interpreted)
return std::make_unique<MacroJITx64Impl>(code); return std::make_unique<MacroJITx64Impl>(system, code);
#endif #endif
return MacroInterpreterImpl(code); return MacroInterpreterImpl(code);
} }

View file

@ -14,6 +14,10 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
namespace Core {
class System;
}
namespace Tegra { namespace Tegra {
namespace Engines { namespace Engines {
@ -106,61 +110,61 @@ struct HLEMacro {
/// also assigning the base vertex/instance. /// also assigning the base vertex/instance.
struct HLE_DrawArraysIndirect final { struct HLE_DrawArraysIndirect final {
HLE_DrawArraysIndirect(bool extended_) noexcept : extended{extended_} {} HLE_DrawArraysIndirect(bool extended_) noexcept : extended{extended_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters); void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended; bool extended;
}; };
/// @note: these macros have two versions, a normal and extended version, with the extended version /// @note: these macros have two versions, a normal and extended version, with the extended version
/// also assigning the base vertex/instance. /// also assigning the base vertex/instance.
struct HLE_DrawIndexedIndirect final { struct HLE_DrawIndexedIndirect final {
explicit HLE_DrawIndexedIndirect(bool extended_) noexcept : extended{extended_} {} explicit HLE_DrawIndexedIndirect(bool extended_) noexcept : extended{extended_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters); void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
bool extended; bool extended;
}; };
struct HLE_MultiLayerClear final { struct HLE_MultiLayerClear final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_MultiDrawIndexedIndirectCount final { struct HLE_MultiDrawIndexedIndirectCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters); void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
}; };
struct HLE_DrawIndirectByteCount final { struct HLE_DrawIndirectByteCount final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
void Fallback(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters); void Fallback(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters);
}; };
struct HLE_C713C83D8F63CCF3 final { struct HLE_C713C83D8F63CCF3 final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_D7333D26E0A93EDE final { struct HLE_D7333D26E0A93EDE final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_BindShader final { struct HLE_BindShader final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_SetRasterBoundingBox final { struct HLE_SetRasterBoundingBox final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct HLE_ClearConstBuffer final { struct HLE_ClearConstBuffer final {
HLE_ClearConstBuffer(size_t base_size_) noexcept : base_size{base_size_} {} HLE_ClearConstBuffer(size_t base_size_) noexcept : base_size{base_size_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
size_t base_size; size_t base_size;
}; };
struct HLE_ClearMemory final { struct HLE_ClearMemory final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
std::vector<u32> zero_memory; std::vector<u32> zero_memory;
}; };
struct HLE_TransformFeedbackSetup final { struct HLE_TransformFeedbackSetup final {
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, [[maybe_unused]] u32 method);
}; };
struct MacroInterpreterImpl final { struct MacroInterpreterImpl final {
MacroInterpreterImpl() {} MacroInterpreterImpl() {}
MacroInterpreterImpl(std::span<const u32> code_) : code{code_} {} MacroInterpreterImpl(std::span<const u32> code_) : code{code_} {}
void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> params, u32 method);
void Reset(); void Reset();
bool Step(Engines::Maxwell3D& maxwell3d, bool is_delay_slot); bool Step(Core::System& system, Engines::Maxwell3D& maxwell3d, bool is_delay_slot);
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
void ProcessResult(Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result); void ProcessResult(Core::System& system, Engines::Maxwell3D& maxwell3d, Macro::ResultOperation operation, u32 reg, u32 result);
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
Macro::Opcode GetOpcode() const; Macro::Opcode GetOpcode() const;
u32 GetRegister(u32 register_id) const; u32 GetRegister(u32 register_id) const;
@ -169,7 +173,7 @@ struct MacroInterpreterImpl final {
[[nodiscard]] inline void SetMethodAddress(u32 address) noexcept { [[nodiscard]] inline void SetMethodAddress(u32 address) noexcept {
method_address.raw = address; method_address.raw = address;
} }
void Send(Engines::Maxwell3D& maxwell3d, u32 value); void Send(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 value);
u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const; u32 Read(Engines::Maxwell3D& maxwell3d, u32 method) const;
u32 FetchParameter(); u32 FetchParameter();
/// General purpose macro registers. /// General purpose macro registers.
@ -192,7 +196,7 @@ struct DynamicCachedMacro {
/// Executes the macro code with the specified input parameters. /// Executes the macro code with the specified input parameters.
/// @param parameters The parameters of the macro /// @param parameters The parameters of the macro
/// @param method The method to execute /// @param method The method to execute
virtual void Execute(Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) = 0; virtual void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> parameters, u32 method) = 0;
}; };
using AnyCachedMacro = std::variant< using AnyCachedMacro = std::variant<
@ -227,8 +231,8 @@ struct MacroEngine {
uploaded_macro_code.erase(method); uploaded_macro_code.erase(method);
} }
// Compiles the macro if its not in the cache, and executes the compiled macro // Compiles the macro if its not in the cache, and executes the compiled macro
void Execute(Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters); void Execute(Core::System& system, Engines::Maxwell3D& maxwell3d, u32 method, std::span<const u32> parameters);
AnyCachedMacro Compile(Engines::Maxwell3D& maxwell3d, std::span<const u32> code); AnyCachedMacro Compile(Core::System& system, Engines::Maxwell3D& maxwell3d, std::span<const u32> code);
struct CacheInfo { struct CacheInfo {
AnyCachedMacro program; AnyCachedMacro program;
u64 hash{}; u64 hash{};