From 3ece2b24b75844da0b3beeb85c21a063ccccb9a7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 24 Jun 2026 07:04:23 +0000 Subject: [PATCH] register extra limits in texutre pass --- src/shader_recompiler/host_translate_info.h | 17 ++- .../global_memory_to_storage_buffer_pass.cpp | 5 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 112 ++++++++---------- src/shader_recompiler/profile.h | 1 - .../renderer_vulkan/vk_pipeline_cache.cpp | 17 ++- src/video_core/vulkan_common/vulkan_device.h | 43 +++---- 6 files changed, 97 insertions(+), 98 deletions(-) diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 4a9261d5f5..2229f94b6e 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -6,6 +6,8 @@ #pragma once +#include "common/common_types.h" + namespace Shader { // Try to keep entries here to a minimum @@ -13,16 +15,23 @@ namespace Shader { /// Misc information about the host struct HostTranslateInfo { + u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs + u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage + u32 max_per_stage_resources{}; ///< maximum resources per stage + u32 max_descriptor_set_samplers{}; + u32 max_descriptor_set_uniform_buffers{}; + u32 max_descriptor_set_uniform_buffers_dynamic{}; + u32 max_descriptor_set_storage_buffers{}; + u32 max_descriptor_set_storage_buffers_dynamic{}; + u32 max_descriptor_set_sampled_images{}; + u32 max_descriptor_set_storage_images{}; + u32 max_descriptor_set_input_attachements{}; bool support_float64{}; ///< True when the device supports 64-bit floats bool support_float16{}; ///< True when the device supports 16-bit floats bool support_int64{}; ///< True when the device supports 64-bit integers bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS - u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs - u32 max_per_stage_descriptor_sampled_images{1024}; ///< maximum sampled descriptors per stage - u32 max_per_stage_resources{4096}; ///< maximum resources per stage - u32 max_descriptor_set_sampled_images{1024}; ///< maximum sampled descriptors per set bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry ///< passthrough shaders bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 2d4feca02c..3929475ddc 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -545,7 +548,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn IR::Block* const block{storage_inst.block}; IR::Inst* const inst{storage_inst.inst}; const IR::U32 offset{ - StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; + StorageOffset(*block, *inst, storage_buffer, u32(host_info.min_ssbo_alignment))}; Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index c500b53ff7..28a6c5a7ee 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -32,9 +32,8 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; constexpr u32 DESCRIPTOR_SIZE = 8; -constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast(std::countr_zero(DESCRIPTOR_SIZE)); -constexpr u32 DYNAMIC_DESCRIPTOR_CBUF_BYTES = 16 * 1024; -constexpr u32 MAX_DYNAMIC_DESCRIPTOR_COUNT = 1024; +constexpr u32 DESCRIPTOR_SIZE_SHIFT = u32(std::countr_zero(DESCRIPTOR_SIZE)); +constexpr u32 DESCRIPTOR_MAX_COUNT = 1024; u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) { const IR::Inst* const inst{dynamic_offset.InstRecursive()}; @@ -46,57 +45,47 @@ u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) { return DESCRIPTOR_SIZE_SHIFT; } const u32 size_shift{shift.U32()}; - return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift - : DESCRIPTOR_SIZE_SHIFT; + return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift : DESCRIPTOR_SIZE_SHIFT; } -u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift) { - if (size_shift >= 31 || base_offset >= DYNAMIC_DESCRIPTOR_CBUF_BYTES) { +u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) { + auto const max_cbuf_bytes = 16 * max_descriptors; + if (size_shift >= 31 || base_offset >= max_cbuf_bytes) return 1; - } - const u32 stride{1U << size_shift}; - const u32 available{DYNAMIC_DESCRIPTOR_CBUF_BYTES - base_offset}; - if (available < DESCRIPTOR_SIZE) { + auto const stride = 1U << size_shift; + auto const available = max_cbuf_bytes - base_offset; + if (available < DESCRIPTOR_SIZE) return 1; - } - const u32 available_count{1U + (available - DESCRIPTOR_SIZE) / stride}; - return std::min(MAX_DYNAMIC_DESCRIPTOR_COUNT, available_count); + auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride; + return std::min(max_descriptors, available_count); } u32 SaturatingSub(u32 lhs, u32 rhs) { return lhs > rhs ? lhs - rhs : 0; } -template -u32 StaticDescriptorCount(const Descriptors& descriptors) { - u32 count{}; - for (const auto& desc : descriptors) { - if (desc.count <= 1) { - count += desc.count; - } - } - return count; +template +[[nodiscard]] u32 StaticDescriptorCount(T const& descriptors) noexcept { + return std::accumulate(descriptors.cbegin(), descriptors.cend(), 0U, [](auto const& acc, auto const& e) { + return acc + e.count <= 1 ? e.count : 0; + }); } -u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, - u32 dynamic_arrays) { - if (dynamic_arrays == 0) { - return MAX_DYNAMIC_DESCRIPTOR_COUNT; +u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) { + if (dynamic_arrays > 0) { + const u32 sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors); + auto const resource_static_count = + NumDescriptors(info.constant_buffer_descriptors) + + NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count + + NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors); + auto const sampled_limit = std::min(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images); + auto const sampled_budget = SaturatingSub(sampled_limit, sampled_static_count); + auto const resource_budget = SaturatingSub(host_info.max_per_stage_resources, resource_static_count); + auto const sampled_cap = sampled_budget / dynamic_arrays; + auto const resource_cap = resource_budget / dynamic_arrays; + return std::max(1U, std::min({DESCRIPTOR_MAX_COUNT, sampled_cap, resource_cap})); } - const u32 sampled_static_count{StaticDescriptorCount(info.texture_buffer_descriptors) + - StaticDescriptorCount(info.texture_descriptors)}; - const u32 resource_static_count{ - NumDescriptors(info.constant_buffer_descriptors) + - NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count + - NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors)}; - const u32 sampled_limit{std::min(host_info.max_per_stage_descriptor_sampled_images, - host_info.max_descriptor_set_sampled_images)}; - const u32 sampled_budget{SaturatingSub(sampled_limit, sampled_static_count)}; - const u32 resource_budget{SaturatingSub(host_info.max_per_stage_resources, - resource_static_count)}; - const u32 sampled_cap{sampled_budget / dynamic_arrays}; - const u32 resource_cap{resource_budget / dynamic_arrays}; - return std::max(1U, std::min({MAX_DYNAMIC_DESCRIPTOR_COUNT, sampled_cap, resource_cap})); + return DESCRIPTOR_MAX_COUNT; } IR::Opcode IndexedInstruction(const IR::Inst& inst) { @@ -304,21 +293,23 @@ static inline bool IsTexturePixelFormatIntegerCached(Environment& env, } -std::optional Track(const IR::Value& value, Environment& env); -static inline std::optional TrackCached(const IR::Value& v, Environment& env) { +std::optional Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info); +static inline std::optional TrackCached(const IR::Value& v, Environment& env, const HostTranslateInfo& host_info) { if (const IR::Inst* key = v.InstRecursive()) { if (auto it = env.track_cache.find(key); it != env.track_cache.end()) return it->second; - auto found = Track(v, env); + auto found = Track(v, env, host_info); if (found) env.track_cache.emplace(key, *found); return found; } - return Track(v, env); + return Track(v, env, host_info); } -std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env); +std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info); -std::optional Track(const IR::Value& value, Environment& env) { - return IR::BreadthFirstSearch(value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); }); +std::optional Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info) { + return IR::BreadthFirstSearch(value, [&env, &host_info](const IR::Inst* inst) { + return TryGetConstBuffer(inst, env, host_info); + }); } std::optional TryGetConstant(IR::Value& value, Environment& env) { @@ -342,13 +333,13 @@ std::optional TryGetConstant(IR::Value& value, Environment& env) { return ReadCbufCached(env, index_number, offset_number); } -std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env) { +std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info) { switch (inst->GetOpcode()) { default: return std::nullopt; case IR::Opcode::BitwiseOr32: { - std::optional lhs{TrackCached(inst->Arg(0), env)}; - std::optional rhs{TrackCached(inst->Arg(1), env)}; + std::optional lhs{TrackCached(inst->Arg(0), env, host_info)}; + std::optional rhs{TrackCached(inst->Arg(1), env, host_info)}; if (!lhs || !rhs) { return std::nullopt; } @@ -378,12 +369,11 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme if (!shift.IsImmediate()) { return std::nullopt; } - std::optional lhs{TrackCached(inst->Arg(0), env)}; + std::optional lhs{TrackCached(inst->Arg(0), env, host_info)}; if (lhs) { lhs->shift_left = shift.U32(); } return lhs; - break; } case IR::Opcode::BitwiseAnd32: { IR::Value op1{inst->Arg(0)}; @@ -407,7 +397,7 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme return std::nullopt; } while (false); } - std::optional lhs{TrackCached(op1, env)}; + std::optional lhs{TrackCached(op1, env, host_info)}; if (lhs) { lhs->shift_left = static_cast(std::countr_zero(op2.U32())); } @@ -462,15 +452,15 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme .secondary_offset = 0, .secondary_shift_left = 0, .dynamic_offset = dynamic_offset, - .count = DynamicDescriptorCount(base_offset, size_shift), + .count = DynamicDescriptorCount(base_offset, size_shift, DESCRIPTOR_MAX_COUNT), .has_secondary = false, }; } -TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { +TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst, const HostTranslateInfo& host_info) { ConstBufferAddr addr; if (IsBindless(inst)) { - const std::optional track_addr{TrackCached(inst.Arg(0), env)}; + const std::optional track_addr{TrackCached(inst.Arg(0), env, host_info)}; if (!track_addr) { throw NotImplementedException("Failed to track bindless texture constant buffer"); @@ -506,15 +496,15 @@ u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) { return lhs_raw | rhs_raw; } - [[maybe_unused]]TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { +[[maybe_unused]] TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { return env.ReadTextureType(GetTextureHandle(env, cbuf)); } - [[maybe_unused]]TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) { +[[maybe_unused]] TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) { return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf)); } - [[maybe_unused]]bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) { +[[maybe_unused]] bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) { return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf)); } @@ -675,7 +665,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo if (!IsTextureInstruction(inst)) { continue; } - to_replace.push_back(MakeInst(env, block, inst)); + to_replace.push_back(MakeInst(env, block, inst, host_info)); } } // Sort instructions to visit textures by constant buffer index, then by offset diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ff19f0710f..bd7bc6ac7b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -92,7 +92,6 @@ struct Profile { bool has_broken_robust{}; u64 min_ssbo_alignment{}; - u32 max_user_clip_distances{}; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 798499141e..cb811857e4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -439,10 +439,21 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, .has_broken_robust = device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, .min_ssbo_alignment = device.GetStorageBufferAlignment(), - .max_user_clip_distances = device.GetMaxUserClipDistances(), + .max_user_clip_distances = device.GetMaxUserClipDistances() }; host_info = Shader::HostTranslateInfo{ + .min_ssbo_alignment = device.GetStorageBufferAlignment(), + .max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(), + .max_per_stage_resources = device.GetMaxPerStageResources(), + .max_descriptor_set_samplers = device.GetMaxDescriptorSetSamplers(), + .max_descriptor_set_uniform_buffers = device.GetMaxDescriptorSetUniformBuffers(), + .max_descriptor_set_uniform_buffers_dynamic = device.GetMaxDescriptorSetUniformBuffersDynamic(), + .max_descriptor_set_storage_buffers = device.GetMaxDescriptorSetStorageBuffers(), + .max_descriptor_set_storage_buffers_dynamic = device.GetMaxDescriptorSetStorageBuffersDynamic(), + .max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(), + .max_descriptor_set_storage_images = device.GetMaxDescriptorSetStorageImages(), + .max_descriptor_set_input_attachements = device.GetMaxDescriptorSetInputAttachments(), .support_float64 = device.IsFloat64Supported(), .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), @@ -451,10 +462,6 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY, .support_snorm_render_buffer = true, .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), - .min_ssbo_alignment = static_cast(device.GetStorageBufferAlignment()), - .max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(), - .max_per_stage_resources = device.GetMaxPerStageResources(), - .max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_conditional_barrier = device.SupportsConditionalBarriers(), }; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 7f2c29519f..110d0c1199 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -321,32 +321,23 @@ public: return properties.properties.limits.maxPushConstantsSize; } - /// Returns the maximum size for shared memory. - u32 GetMaxComputeSharedMemorySize() const { - return properties.properties.limits.maxComputeSharedMemorySize; - } - - /// Returns the maximum number of dynamic storage buffer descriptors per set. - u32 GetMaxDescriptorSetStorageBuffersDynamic() const { - return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic; - } - - /// Returns the maximum number of dynamic uniform buffer descriptors per set. - u32 GetMaxDescriptorSetUniformBuffersDynamic() const { - return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic; - } - - u32 GetMaxPerStageDescriptorSampledImages() const { - return properties.properties.limits.maxPerStageDescriptorSampledImages; - } - - u32 GetMaxPerStageResources() const { - return properties.properties.limits.maxPerStageResources; - } - - u32 GetMaxDescriptorSetSampledImages() const { - return properties.properties.limits.maxDescriptorSetSampledImages; - } +#define FN_MAX_LIMIT_LIST \ + FN_MAX_LIMIT_ELEM(ComputeSharedMemorySize) \ + FN_MAX_LIMIT_ELEM(PerStageDescriptorSampledImages) \ + FN_MAX_LIMIT_ELEM(PerStageResources) \ + FN_MAX_LIMIT_ELEM(DescriptorSetSamplers) \ + FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffers) \ + FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffersDynamic) \ + FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffers) \ + FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffersDynamic) \ + FN_MAX_LIMIT_ELEM(DescriptorSetSampledImages) \ + FN_MAX_LIMIT_ELEM(DescriptorSetStorageImages) \ + FN_MAX_LIMIT_ELEM(DescriptorSetInputAttachments) +#define FN_MAX_LIMIT_ELEM(name) \ + u32 GetMax##name() const { return properties.properties.limits.max##name; } +FN_MAX_LIMIT_LIST +#undef FN_MAX_LIMIT_ELEM +#undef FN_MAX_LIMIT_LIST /// Returns float control properties of the device. const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {