diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 1b53404fcc..4a9261d5f5 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -17,6 +20,9 @@ struct HostTranslateInfo { bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs + u32 max_per_stage_descriptor_sampled_images{1024}; ///< maximum sampled descriptors per stage + u32 max_per_stage_resources{4096}; ///< maximum resources per stage + u32 max_descriptor_set_sampled_images{1024}; ///< maximum sampled descriptors per set bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry ///< passthrough shaders bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 7a924aa8b5..55b6f7213c 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -62,6 +62,42 @@ u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift) { return std::min(MAX_DYNAMIC_DESCRIPTOR_COUNT, available_count); } +u32 SaturatingSub(u32 lhs, u32 rhs) { + return lhs > rhs ? lhs - rhs : 0; +} + +template +u32 StaticDescriptorCount(const Descriptors& descriptors) { + u32 count{}; + for (const auto& desc : descriptors) { + if (desc.count <= 1) { + count += desc.count; + } + } + return count; +} + +u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, + u32 dynamic_arrays) { + if (dynamic_arrays == 0) { + return MAX_DYNAMIC_DESCRIPTOR_COUNT; + } + const u32 sampled_static_count{StaticDescriptorCount(info.texture_buffer_descriptors) + + StaticDescriptorCount(info.texture_descriptors)}; + const u32 resource_static_count{ + NumDescriptors(info.constant_buffer_descriptors) + + NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count + + NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors)}; + const u32 sampled_limit{std::min(host_info.max_per_stage_descriptor_sampled_images, + host_info.max_descriptor_set_sampled_images)}; + const u32 sampled_budget{SaturatingSub(sampled_limit, sampled_static_count)}; + const u32 resource_budget{SaturatingSub(host_info.max_per_stage_resources, + resource_static_count)}; + const u32 sampled_cap{sampled_budget / dynamic_arrays}; + const u32 resource_cap{resource_budget / dynamic_arrays}; + return std::max(1U, std::min({MAX_DYNAMIC_DESCRIPTOR_COUNT, sampled_cap, resource_cap})); +} + IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -138,6 +174,39 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { } } +bool IsStorageImageOpcode(IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::ImageRead: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: + case IR::Opcode::ImageWrite: + return true; + default: + return false; + } +} + +u32 DynamicSampledTextureArrayCount(const TextureInstVector& to_replace) { + u32 count{}; + for (const TextureInst& inst : to_replace) { + const auto flags{inst.inst->Flags()}; + if (inst.cbuf.count > 1 && !IsStorageImageOpcode(IndexedInstruction(*inst.inst)) && + flags.type != TextureType::Buffer) { + ++count; + } + } + return count; +} + bool IsBindless(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -619,6 +688,8 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo program.info.texture_descriptors, program.info.image_descriptors, }; + const u32 sampled_dynamic_cap{ + DynamicSampledTextureCap(program.info, host_info, DynamicSampledTextureArrayCount(to_replace))}; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; @@ -664,6 +735,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo u32 index; const u32 size_shift{cbuf.count > 1 ? DynamicDescriptorSizeShift(cbuf.dynamic_offset) : DESCRIPTOR_SIZE_SHIFT}; + u32 count{cbuf.count}; switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: case IR::Opcode::ImageAtomicIAdd32: @@ -692,7 +764,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .is_integer = is_integer, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = cbuf.count, + .count = count, .size_shift = size_shift, }); } else { @@ -704,7 +776,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .is_integer = is_integer, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = cbuf.count, + .count = count, .size_shift = size_shift, }); } @@ -720,10 +792,11 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, .secondary_shift_left = cbuf.secondary_shift_left, - .count = cbuf.count, + .count = count, .size_shift = size_shift, }); } else { + count = std::min(count, sampled_dynamic_cap); index = descriptors.Add(TextureDescriptor{ .type = flags.type, .is_depth = flags.is_depth != 0, @@ -735,7 +808,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, .secondary_shift_left = cbuf.secondary_shift_left, - .count = cbuf.count, + .count = count, .size_shift = size_shift, }); } @@ -744,12 +817,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo flags.descriptor_index.Assign(index); inst->SetFlags(flags); - if (cbuf.count > 1) { + if (count > 1) { const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; IR::IREmitter ir{*texture_inst.block, insert_point}; const IR::U32 shift{ir.Imm32(size_shift)}; inst->SetArg(0, ir.UMin(ir.ShiftRightLogical(cbuf.dynamic_offset, shift), - ir.Imm32(cbuf.count - 1))); + ir.Imm32(count - 1))); } else { inst->SetArg(0, IR::Value{}); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 817320c96c..88dadd0ef8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -447,6 +447,9 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), .min_ssbo_alignment = static_cast(device.GetStorageBufferAlignment()), + .max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(), + .max_per_stage_resources = device.GetMaxPerStageResources(), + .max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_conditional_barrier = device.SupportsConditionalBarriers(), }; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a8a89aee89..a5ffd08eac 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -335,6 +335,18 @@ public: return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic; } + u32 GetMaxPerStageDescriptorSampledImages() const { + return properties.properties.limits.maxPerStageDescriptorSampledImages; + } + + u32 GetMaxPerStageResources() const { + return properties.properties.limits.maxPerStageResources; + } + + u32 GetMaxDescriptorSetSampledImages() const { + return properties.properties.limits.maxDescriptorSetSampledImages; + } + /// Returns float control properties of the device. const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { return properties.float_controls;