mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2026-06-26 04:49:30 -04:00
register extra limits in texutre pass
This commit is contained in:
parent
c1849ddc8e
commit
3ece2b24b7
6 changed files with 97 additions and 98 deletions
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
// Try to keep entries here to a minimum
|
||||
|
|
@ -13,16 +15,23 @@ namespace Shader {
|
|||
|
||||
/// Misc information about the host
|
||||
struct HostTranslateInfo {
|
||||
u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage
|
||||
u32 max_per_stage_resources{}; ///< maximum resources per stage
|
||||
u32 max_descriptor_set_samplers{};
|
||||
u32 max_descriptor_set_uniform_buffers{};
|
||||
u32 max_descriptor_set_uniform_buffers_dynamic{};
|
||||
u32 max_descriptor_set_storage_buffers{};
|
||||
u32 max_descriptor_set_storage_buffers_dynamic{};
|
||||
u32 max_descriptor_set_sampled_images{};
|
||||
u32 max_descriptor_set_storage_images{};
|
||||
u32 max_descriptor_set_input_attachements{};
|
||||
bool support_float64{}; ///< True when the device supports 64-bit floats
|
||||
bool support_float16{}; ///< True when the device supports 16-bit floats
|
||||
bool support_int64{}; ///< True when the device supports 64-bit integers
|
||||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
||||
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
||||
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
u32 max_per_stage_descriptor_sampled_images{1024}; ///< maximum sampled descriptors per stage
|
||||
u32 max_per_stage_resources{4096}; ///< maximum resources per stage
|
||||
u32 max_descriptor_set_sampled_images{1024}; ///< maximum sampled descriptors per set
|
||||
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
|
||||
///< passthrough shaders
|
||||
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -545,7 +548,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn
|
|||
IR::Block* const block{storage_inst.block};
|
||||
IR::Inst* const inst{storage_inst.inst};
|
||||
const IR::U32 offset{
|
||||
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
|
||||
StorageOffset(*block, *inst, storage_buffer, u32(host_info.min_ssbo_alignment))};
|
||||
Replace(*block, *inst, index, offset);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,9 +32,8 @@ struct TextureInst {
|
|||
using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
|
||||
|
||||
constexpr u32 DESCRIPTOR_SIZE = 8;
|
||||
constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
|
||||
constexpr u32 DYNAMIC_DESCRIPTOR_CBUF_BYTES = 16 * 1024;
|
||||
constexpr u32 MAX_DYNAMIC_DESCRIPTOR_COUNT = 1024;
|
||||
constexpr u32 DESCRIPTOR_SIZE_SHIFT = u32(std::countr_zero(DESCRIPTOR_SIZE));
|
||||
constexpr u32 DESCRIPTOR_MAX_COUNT = 1024;
|
||||
|
||||
u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
|
||||
const IR::Inst* const inst{dynamic_offset.InstRecursive()};
|
||||
|
|
@ -46,57 +45,47 @@ u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
|
|||
return DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
const u32 size_shift{shift.U32()};
|
||||
return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift
|
||||
: DESCRIPTOR_SIZE_SHIFT;
|
||||
return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift : DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
|
||||
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift) {
|
||||
if (size_shift >= 31 || base_offset >= DYNAMIC_DESCRIPTOR_CBUF_BYTES) {
|
||||
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) {
|
||||
auto const max_cbuf_bytes = 16 * max_descriptors;
|
||||
if (size_shift >= 31 || base_offset >= max_cbuf_bytes)
|
||||
return 1;
|
||||
}
|
||||
const u32 stride{1U << size_shift};
|
||||
const u32 available{DYNAMIC_DESCRIPTOR_CBUF_BYTES - base_offset};
|
||||
if (available < DESCRIPTOR_SIZE) {
|
||||
auto const stride = 1U << size_shift;
|
||||
auto const available = max_cbuf_bytes - base_offset;
|
||||
if (available < DESCRIPTOR_SIZE)
|
||||
return 1;
|
||||
}
|
||||
const u32 available_count{1U + (available - DESCRIPTOR_SIZE) / stride};
|
||||
return std::min(MAX_DYNAMIC_DESCRIPTOR_COUNT, available_count);
|
||||
auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride;
|
||||
return std::min(max_descriptors, available_count);
|
||||
}
|
||||
|
||||
u32 SaturatingSub(u32 lhs, u32 rhs) {
|
||||
return lhs > rhs ? lhs - rhs : 0;
|
||||
}
|
||||
|
||||
template <typename Descriptors>
|
||||
u32 StaticDescriptorCount(const Descriptors& descriptors) {
|
||||
u32 count{};
|
||||
for (const auto& desc : descriptors) {
|
||||
if (desc.count <= 1) {
|
||||
count += desc.count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
template <typename T>
|
||||
[[nodiscard]] u32 StaticDescriptorCount(T const& descriptors) noexcept {
|
||||
return std::accumulate(descriptors.cbegin(), descriptors.cend(), 0U, [](auto const& acc, auto const& e) {
|
||||
return acc + e.count <= 1 ? e.count : 0;
|
||||
});
|
||||
}
|
||||
|
||||
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info,
|
||||
u32 dynamic_arrays) {
|
||||
if (dynamic_arrays == 0) {
|
||||
return MAX_DYNAMIC_DESCRIPTOR_COUNT;
|
||||
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) {
|
||||
if (dynamic_arrays > 0) {
|
||||
const u32 sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors);
|
||||
auto const resource_static_count =
|
||||
NumDescriptors(info.constant_buffer_descriptors) +
|
||||
NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count +
|
||||
NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors);
|
||||
auto const sampled_limit = std::min(host_info.max_per_stage_descriptor_sampled_images, host_info.max_descriptor_set_sampled_images);
|
||||
auto const sampled_budget = SaturatingSub(sampled_limit, sampled_static_count);
|
||||
auto const resource_budget = SaturatingSub(host_info.max_per_stage_resources, resource_static_count);
|
||||
auto const sampled_cap = sampled_budget / dynamic_arrays;
|
||||
auto const resource_cap = resource_budget / dynamic_arrays;
|
||||
return std::max(1U, std::min({DESCRIPTOR_MAX_COUNT, sampled_cap, resource_cap}));
|
||||
}
|
||||
const u32 sampled_static_count{StaticDescriptorCount(info.texture_buffer_descriptors) +
|
||||
StaticDescriptorCount(info.texture_descriptors)};
|
||||
const u32 resource_static_count{
|
||||
NumDescriptors(info.constant_buffer_descriptors) +
|
||||
NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count +
|
||||
NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors)};
|
||||
const u32 sampled_limit{std::min(host_info.max_per_stage_descriptor_sampled_images,
|
||||
host_info.max_descriptor_set_sampled_images)};
|
||||
const u32 sampled_budget{SaturatingSub(sampled_limit, sampled_static_count)};
|
||||
const u32 resource_budget{SaturatingSub(host_info.max_per_stage_resources,
|
||||
resource_static_count)};
|
||||
const u32 sampled_cap{sampled_budget / dynamic_arrays};
|
||||
const u32 resource_cap{resource_budget / dynamic_arrays};
|
||||
return std::max(1U, std::min({MAX_DYNAMIC_DESCRIPTOR_COUNT, sampled_cap, resource_cap}));
|
||||
return DESCRIPTOR_MAX_COUNT;
|
||||
}
|
||||
|
||||
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
||||
|
|
@ -304,21 +293,23 @@ static inline bool IsTexturePixelFormatIntegerCached(Environment& env,
|
|||
}
|
||||
|
||||
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env);
|
||||
static inline std::optional<ConstBufferAddr> TrackCached(const IR::Value& v, Environment& env) {
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info);
|
||||
static inline std::optional<ConstBufferAddr> TrackCached(const IR::Value& v, Environment& env, const HostTranslateInfo& host_info) {
|
||||
if (const IR::Inst* key = v.InstRecursive()) {
|
||||
if (auto it = env.track_cache.find(key); it != env.track_cache.end()) return it->second;
|
||||
auto found = Track(v, env);
|
||||
auto found = Track(v, env, host_info);
|
||||
if (found) env.track_cache.emplace(key, *found);
|
||||
return found;
|
||||
}
|
||||
return Track(v, env);
|
||||
return Track(v, env, host_info);
|
||||
}
|
||||
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env);
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info);
|
||||
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env) {
|
||||
return IR::BreadthFirstSearch(value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); });
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info) {
|
||||
return IR::BreadthFirstSearch(value, [&env, &host_info](const IR::Inst* inst) {
|
||||
return TryGetConstBuffer(inst, env, host_info);
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
|
||||
|
|
@ -342,13 +333,13 @@ std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
|
|||
return ReadCbufCached(env, index_number, offset_number);
|
||||
}
|
||||
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env) {
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info) {
|
||||
switch (inst->GetOpcode()) {
|
||||
default:
|
||||
return std::nullopt;
|
||||
case IR::Opcode::BitwiseOr32: {
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env)};
|
||||
std::optional rhs{TrackCached(inst->Arg(1), env)};
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env, host_info)};
|
||||
std::optional rhs{TrackCached(inst->Arg(1), env, host_info)};
|
||||
if (!lhs || !rhs) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
|
@ -378,12 +369,11 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
if (!shift.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env)};
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env, host_info)};
|
||||
if (lhs) {
|
||||
lhs->shift_left = shift.U32();
|
||||
}
|
||||
return lhs;
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::BitwiseAnd32: {
|
||||
IR::Value op1{inst->Arg(0)};
|
||||
|
|
@ -407,7 +397,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
return std::nullopt;
|
||||
} while (false);
|
||||
}
|
||||
std::optional lhs{TrackCached(op1, env)};
|
||||
std::optional lhs{TrackCached(op1, env, host_info)};
|
||||
if (lhs) {
|
||||
lhs->shift_left = static_cast<u32>(std::countr_zero(op2.U32()));
|
||||
}
|
||||
|
|
@ -462,15 +452,15 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
.secondary_offset = 0,
|
||||
.secondary_shift_left = 0,
|
||||
.dynamic_offset = dynamic_offset,
|
||||
.count = DynamicDescriptorCount(base_offset, size_shift),
|
||||
.count = DynamicDescriptorCount(base_offset, size_shift, DESCRIPTOR_MAX_COUNT),
|
||||
.has_secondary = false,
|
||||
};
|
||||
}
|
||||
|
||||
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
|
||||
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst, const HostTranslateInfo& host_info) {
|
||||
ConstBufferAddr addr;
|
||||
if (IsBindless(inst)) {
|
||||
const std::optional<ConstBufferAddr> track_addr{TrackCached(inst.Arg(0), env)};
|
||||
const std::optional<ConstBufferAddr> track_addr{TrackCached(inst.Arg(0), env, host_info)};
|
||||
|
||||
if (!track_addr) {
|
||||
throw NotImplementedException("Failed to track bindless texture constant buffer");
|
||||
|
|
@ -506,15 +496,15 @@ u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) {
|
|||
return lhs_raw | rhs_raw;
|
||||
}
|
||||
|
||||
[[maybe_unused]]TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
[[maybe_unused]] TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
return env.ReadTextureType(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
[[maybe_unused]]TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
[[maybe_unused]] TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
[[maybe_unused]]bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
[[maybe_unused]] bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
|
|
@ -675,7 +665,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
if (!IsTextureInstruction(inst)) {
|
||||
continue;
|
||||
}
|
||||
to_replace.push_back(MakeInst(env, block, inst));
|
||||
to_replace.push_back(MakeInst(env, block, inst, host_info));
|
||||
}
|
||||
}
|
||||
// Sort instructions to visit textures by constant buffer index, then by offset
|
||||
|
|
|
|||
|
|
@ -92,7 +92,6 @@ struct Profile {
|
|||
bool has_broken_robust{};
|
||||
|
||||
u64 min_ssbo_alignment{};
|
||||
|
||||
u32 max_user_clip_distances{};
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -439,10 +439,21 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.has_broken_robust =
|
||||
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
|
||||
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
|
||||
.max_user_clip_distances = device.GetMaxUserClipDistances(),
|
||||
.max_user_clip_distances = device.GetMaxUserClipDistances()
|
||||
};
|
||||
|
||||
host_info = Shader::HostTranslateInfo{
|
||||
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
|
||||
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_samplers = device.GetMaxDescriptorSetSamplers(),
|
||||
.max_descriptor_set_uniform_buffers = device.GetMaxDescriptorSetUniformBuffers(),
|
||||
.max_descriptor_set_uniform_buffers_dynamic = device.GetMaxDescriptorSetUniformBuffersDynamic(),
|
||||
.max_descriptor_set_storage_buffers = device.GetMaxDescriptorSetStorageBuffers(),
|
||||
.max_descriptor_set_storage_buffers_dynamic = device.GetMaxDescriptorSetStorageBuffersDynamic(),
|
||||
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
|
||||
.max_descriptor_set_storage_images = device.GetMaxDescriptorSetStorageImages(),
|
||||
.max_descriptor_set_input_attachements = device.GetMaxDescriptorSetInputAttachments(),
|
||||
.support_float64 = device.IsFloat64Supported(),
|
||||
.support_float16 = device.IsFloat16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
|
|
@ -451,10 +462,6 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
|
||||
.support_snorm_render_buffer = true,
|
||||
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
|
||||
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
|
||||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -321,32 +321,23 @@ public:
|
|||
return properties.properties.limits.maxPushConstantsSize;
|
||||
}
|
||||
|
||||
/// Returns the maximum size for shared memory.
|
||||
u32 GetMaxComputeSharedMemorySize() const {
|
||||
return properties.properties.limits.maxComputeSharedMemorySize;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic storage buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageDescriptorSampledImages() const {
|
||||
return properties.properties.limits.maxPerStageDescriptorSampledImages;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageResources() const {
|
||||
return properties.properties.limits.maxPerStageResources;
|
||||
}
|
||||
|
||||
u32 GetMaxDescriptorSetSampledImages() const {
|
||||
return properties.properties.limits.maxDescriptorSetSampledImages;
|
||||
}
|
||||
#define FN_MAX_LIMIT_LIST \
|
||||
FN_MAX_LIMIT_ELEM(ComputeSharedMemorySize) \
|
||||
FN_MAX_LIMIT_ELEM(PerStageDescriptorSampledImages) \
|
||||
FN_MAX_LIMIT_ELEM(PerStageResources) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetSamplers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffersDynamic) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffersDynamic) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetSampledImages) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageImages) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetInputAttachments)
|
||||
#define FN_MAX_LIMIT_ELEM(name) \
|
||||
u32 GetMax##name() const { return properties.properties.limits.max##name; }
|
||||
FN_MAX_LIMIT_LIST
|
||||
#undef FN_MAX_LIMIT_ELEM
|
||||
#undef FN_MAX_LIMIT_LIST
|
||||
|
||||
/// Returns float control properties of the device.
|
||||
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue