[vk] Clamp dynamic descriptors based on device limits (#4115)
Some checks are pending
tx-src / sources (push) Waiting to run
Check Strings / check-strings (push) Waiting to run

Should fix this specific crash
```
* thread #82, name = 'GPU', stop reason = Exception 0xc0000005 encountered at address 0x7ff7e5193e89: Access violation reading location 0x00000098
  * frame #0: 0x00007ff7e5193e89 eden.exe`std::unique_ptr<Vulkan::Scheduler::CommandChunk,std::default_delete<Vulkan::Scheduler::CommandChunk> >::operator->(this=<unavailable>) at memory:3453 [inlined]
    frame #1: 0x00007ff7e5193e81 eden.exe`void Vulkan::Scheduler::DispatchWork(this=0x0000000000000000) at vk_scheduler.cpp:146
    frame #2: 0x00007ff7e5193d8f eden.exe`void Vulkan::Scheduler::WaitWorker(this=<unavailable>) at vk_scheduler.cpp:133
    frame #3: 0x00007ff7e54f472e eden.exe`void Vulkan::UpdateDescriptorQueue::Acquire(this=0x0000026c77cfb4b8) at vk_update_descriptor.cpp:41
    frame #4: 0x00007ff7e5537a43 eden.exe`void Vulkan::ASTCDecoderPass::Assemble(this=0x0000026c794fba10, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=<unavailable>) at vk_compute_pass.cpp:603
    frame #5: 0x00007ff7e55043e1 eden.exe`void Vulkan::TextureCacheRuntime::AccelerateImageUpload(this=<unavailable>, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=size=10, z_start=0, z_count=0) at vk_texture_cache.cpp:2482
    frame #6: 0x00007ff7e5516592 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::UploadImageContents<struct Vulkan::StagingBufferRef>(this=<unavailable>, image=0x0000026cfb794850, staging=0x00000025e28fcb30) at texture_cache.h:1147
    frame #7: 0x00007ff7e551523d eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::RefreshContents(this=0x0000026c794fd800, image=0x0000026cfb794850, image_id=(index = 3801072224)) at texture_cache.h:1133
    frame #8: 0x00007ff7e5517b50 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::JoinImages(this=0x0000026c794fd800, info=<unavailable>, gpu_addr=25374426112, cpu_addr=2254181376) at texture_cache.h:1644
    frame #9: 0x00007ff7e5516ff6 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::InsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1513
    frame #10: 0x00007ff7e5516a60 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FindOrInsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1194
    frame #11: 0x00007ff7e5515a13 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::CreateImageView(this=0x0000026c794fd800, config=0x00000025e28fd370) at texture_cache.h:1173
    frame #12: 0x00007ff7e550cd64 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::VisitImageView(this=0x0000026c794fd800, index=4586, compute=<unavailable>) at texture_cache.h:554
    frame #13: 0x00007ff7e550d181 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FillImageViews(this=0x0000026c794fd800, views=size=5, compute=<unavailable>, blacklist=<unavailable>) at texture_cache.h:227
    frame #14: 0x00007ff7e58896e4 eden.exe`Vulkan::GraphicsPipeline::ConfigureImpl<Vulkan::(anonymous namespace)::SimpleStorageSpec>(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.cpp:415
    frame #15: 0x00007ff7e5889479 eden.exe`<lambda_1>::operator(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123 [inlined]
    frame #16: 0x00007ff7e5889474 eden.exe`<lambda_1>::__invoke(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123
    frame #17: 0x00007ff7e519d2a8 eden.exe`Vulkan::GraphicsPipeline::Configure(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.h:105 [inlined]
    frame #18: 0x00007ff7e519d29d eden.exe`Vulkan::RasterizerVulkan::PrepareDraw<`lambda at D:\a\g\g\eden-source\src\video_core\renderer_vulkan\vk_rasterizer.cpp:256:29'>(this=0x0000026c764f9368, is_indexed=<unavailable>, draw_func=0x00000025e28fdb80) at vk_rasterizer.cpp:244
    frame #19: 0x00007ff7e519d1db eden.exe`void Vulkan::RasterizerVulkan::Draw(this=<unavailable>, is_indexed=<unavailable>, instance_count=<unavailable>) at vk_rasterizer.cpp:256
    frame #20: 0x00007ff7e50d9025 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Fallback(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:178
    frame #21: 0x00007ff7e50d8f21 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Execute(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:129
    frame #22: 0x00007ff7e50db607 eden.exe`Tegra::MacroEngine::Execute::<lambda_0>::operator(this=0x00000025e28fde70, acm= Active Type = Tegra::HLE_DrawIndexedIndirect ) at macro.cpp:1362
    frame #23: 0x00007ff7e50dada0 eden.exe`void Tegra::MacroEngine::Execute(this=0x0000026ccbfaa020, maxwell3d=0x0000026ccbf9eb00, method=418, parameters=<unavailable>) at macro.cpp:1392
    frame #24: 0x00007ff7e4ef062b eden.exe`void Tegra::Engines::Maxwell3D::CallMacroMethod(this=0x0000026ccbf9eb00, method=<unavailable>, parameters=<unavailable>) at maxwell_3d.cpp:390
    frame #25: 0x00007ff7e4ef04be eden.exe`void Tegra::Engines::Maxwell3D::ProcessMacro(this=0x0000026ccbf9eb00, method=<unavailable>, base_start=<unavailable>, amount=5, is_last_call=<unavailable>) at maxwell_3d.cpp:223
    frame #26: 0x00007ff7e4ef1673 eden.exe`void Tegra::Engines::Maxwell3D::CallMultiMethod(this=<unavailable>, method=<unavailable>, base_start=0x000001ea398e22f4, amount=<unavailable>, methods_pending=5) at maxwell_3d.cpp:419
    frame #27: 0x00007ff7e4ef535a eden.exe`void Tegra::DmaPusher::CallMultiMethod(this=<unavailable>, base_start=0x000001ea398e22f4, num_methods=<unavailable>) const at dma_pusher.cpp:201
    frame #28: 0x00007ff7e4ef5023 eden.exe`void Tegra::DmaPusher::ProcessCommands(this=0x0000026ccbfba698, commands=size=36) at dma_pusher.cpp:120
    frame #29: 0x00007ff7e4ef4ced eden.exe`bool Tegra::DmaPusher::Step(this=0x0000026ccbfba698) at dma_pusher.cpp:88
    frame #30: 0x00007ff7e4ef49f8 eden.exe`void Tegra::DmaPusher::DispatchCalls(this=0x0000026ccbfba698) at dma_pusher.cpp:40
    frame #31: 0x00007ff7e4ede797 eden.exe`void Tegra::Control::Scheduler::Push(this=<unavailable>, channel=-493895072, entries=0x00000025e28fe2e0) at scheduler.cpp:31
    frame #32: 0x00007ff7e4eedc7b eden.exe`VideoCommon::GPUThread::ThreadManager::StartThread::<lambda_0>::operator(this=<unavailable>, stop_token=stop_token @ 0x00000025e28ffb40) at gpu_thread.cpp:42 [inlined]
    frame #33: 0x00007ff7e4eedae8 eden.exe`std::invoke(_Obj=<unavailable>, _Arg1=0x0000026ccb3edef0) at type_traits:1680 [inlined]
    frame #34: 0x00007ff7e4eedac1 eden.exe`std:🧵:_Invoke<std::tuple<`lambda at D:\a\g\g\eden-source\src\video_core\gpu_thread.cpp:29:27',std::stop_token>,0,1>(_RawVals=0x0000026ccb3edef0) at thread:60
    frame #35: 0x00007ff8e87a37b0 ucrtbase.dll`wcsrchr + 336
    ```

Signed-off-by: lizzie <lizzie@eden-emu.dev>

Co-authored-by: CamilleLaVey <camillelavey99@gmail.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/4115
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
lizzie 2026-06-30 04:33:30 +02:00 committed by crueter
parent 1b482fa99b
commit 2068b5d452
No known key found for this signature in database
GPG key ID: 425ACD2D4830EBC6
21 changed files with 223 additions and 161 deletions

View file

@ -50,8 +50,8 @@ NPad::NPad(Core::HID::HIDCore& hid_core_, KernelHelpers::ServiceContext& service
auto& controller = controller_data[aruid_index][i];
controller.device = hid_core.GetEmulatedControllerByIndex(i);
Core::HID::ControllerUpdateCallback engine_callback{
.on_change = [this, i](Core::HID::ControllerTriggerType type) {
ControllerUpdate(hid_core.kernel, type, i);
.on_change = [this, i, kernel = &hid_core.kernel](Core::HID::ControllerTriggerType type) {
ControllerUpdate(*kernel, type, i);
},
.is_npad_service = true,
};

View file

@ -236,8 +236,11 @@ void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInf
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
HostTranslateInfo normalized_host_info{host_info};
normalized_host_info.ApplyDescriptorLimitPolicy();
IR::Program program;
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, normalized_host_info);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = PostOrder(program.syntax_list.front());
program.stage = env.ShaderStage();
@ -260,9 +263,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
}
if (!host_info.support_geometry_shader_passthrough) {
if (!normalized_host_info.support_geometry_shader_passthrough) {
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
LowerGeometryPassthrough(program, host_info);
LowerGeometryPassthrough(program, normalized_host_info);
}
}
break;
@ -277,16 +280,16 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
RemoveUnreachableBlocks(program);
// Replace instructions before the SSA rewrite
if (!host_info.support_float64) {
if (!normalized_host_info.support_float64) {
Optimization::LowerFp64ToFp32(program);
}
if (!host_info.support_float16) {
if (!normalized_host_info.support_float16) {
Optimization::LowerFp16ToFp32(program);
}
if (!host_info.support_int64) {
if (!normalized_host_info.support_int64) {
Optimization::LowerInt64ToInt32(program);
}
if (!host_info.support_conditional_barrier) {
if (!normalized_host_info.support_conditional_barrier) {
Optimization::ConditionalBarrierPass(program);
}
Optimization::SsaRewritePass(program);
@ -295,8 +298,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::PositionPass(env, program);
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
Optimization::TexturePass(env, program, host_info);
Optimization::GlobalMemoryToStorageBufferPass(program, normalized_host_info);
Optimization::TexturePass(env, program, normalized_host_info);
if (Settings::values.resolution_info.active || Settings::values.rescale_hack.GetValue()) {
Optimization::RescalingPass(program);
@ -306,7 +309,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::VerificationPass(program);
}
Optimization::CollectShaderInfoPass(env, program);
Optimization::LayerPass(program, host_info);
Optimization::LayerPass(program, normalized_host_info);
Optimization::VendorWorkaroundPass(program);
CollectInterpolationInfo(env, program);

View file

@ -6,6 +6,8 @@
#pragma once
#include "common/common_types.h"
namespace Shader {
// Try to keep entries here to a minimum
@ -13,20 +15,52 @@ namespace Shader {
/// Misc information about the host
struct HostTranslateInfo {
static constexpr u32 DEFAULT_DESCRIPTOR_LIMIT = 1024;
u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage
u32 max_per_stage_resources{}; ///< maximum resources per stage
u32 max_descriptor_set_samplers{};
u32 max_descriptor_set_uniform_buffers{};
u32 max_descriptor_set_uniform_buffers_dynamic{};
u32 max_descriptor_set_storage_buffers{};
u32 max_descriptor_set_storage_buffers_dynamic{};
u32 max_descriptor_set_sampled_images{};
u32 max_descriptor_set_storage_images{};
u32 max_descriptor_set_input_attachements{};
bool support_float64{}; ///< True when the device supports 64-bit floats
bool support_float16{}; ///< True when the device supports 16-bit floats
bool support_int64{}; ///< True when the device supports 64-bit integers
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
u32 max_per_stage_descriptor_sampled_images{1024}; ///< maximum sampled descriptors per stage
u32 max_per_stage_resources{4096}; ///< maximum resources per stage
u32 max_descriptor_set_sampled_images{1024}; ///< maximum sampled descriptors per set
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
///< control flow
void ApplyDescriptorLimitPolicy() noexcept {
if (min_ssbo_alignment == 0) {
min_ssbo_alignment = 1;
}
ApplyDescriptorLimitFallback(max_per_stage_descriptor_sampled_images);
ApplyDescriptorLimitFallback(max_per_stage_resources);
ApplyDescriptorLimitFallback(max_descriptor_set_samplers);
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers);
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers_dynamic);
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers);
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers_dynamic);
ApplyDescriptorLimitFallback(max_descriptor_set_sampled_images);
ApplyDescriptorLimitFallback(max_descriptor_set_storage_images);
ApplyDescriptorLimitFallback(max_descriptor_set_input_attachements);
}
private:
static void ApplyDescriptorLimitFallback(u32& limit) noexcept {
if (limit == 0) {
limit = DEFAULT_DESCRIPTOR_LIMIT;
}
}
};
} // namespace Shader

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -545,7 +548,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn
IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst};
const IR::U32 offset{
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
StorageOffset(*block, *inst, storage_buffer, u32(host_info.min_ssbo_alignment))};
Replace(*block, *inst, index, offset);
}
}

View file

@ -32,71 +32,62 @@ struct TextureInst {
using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
constexpr u32 DESCRIPTOR_SIZE = 8;
constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
constexpr u32 DYNAMIC_DESCRIPTOR_CBUF_BYTES = 16 * 1024;
constexpr u32 MAX_DYNAMIC_DESCRIPTOR_COUNT = 1024;
constexpr u32 DESCRIPTOR_SIZE_SHIFT = u32(std::countr_zero(DESCRIPTOR_SIZE));
constexpr u32 DESCRIPTOR_MAX_COUNT = 1024;
u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
const IR::Inst* const inst{dynamic_offset.InstRecursive()};
if (!inst || inst->GetOpcode() != IR::Opcode::ShiftLeftLogical32) {
const IR::Inst* const inst = dynamic_offset.InstRecursive();
if (!inst || inst->GetOpcode() != IR::Opcode::ShiftLeftLogical32)
return DESCRIPTOR_SIZE_SHIFT;
}
const IR::Value shift{inst->Arg(1)};
if (!shift.IsImmediate()) {
const IR::Value shift = inst->Arg(1);
if (!shift.IsImmediate())
return DESCRIPTOR_SIZE_SHIFT;
}
const u32 size_shift{shift.U32()};
return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift
: DESCRIPTOR_SIZE_SHIFT;
const u32 size_shift = shift.U32();
return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift : DESCRIPTOR_SIZE_SHIFT;
}
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift) {
if (size_shift >= 31 || base_offset >= DYNAMIC_DESCRIPTOR_CBUF_BYTES) {
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) {
auto const descriptor_limit = (std::max)(1U, max_descriptors);
auto const max_cbuf_bytes = 16 * descriptor_limit;
if (size_shift >= 31 || base_offset >= max_cbuf_bytes)
return 1;
}
const u32 stride{1U << size_shift};
const u32 available{DYNAMIC_DESCRIPTOR_CBUF_BYTES - base_offset};
if (available < DESCRIPTOR_SIZE) {
auto const stride = 1U << size_shift;
auto const available = max_cbuf_bytes - base_offset;
if (available < DESCRIPTOR_SIZE)
return 1;
}
const u32 available_count{1U + (available - DESCRIPTOR_SIZE) / stride};
return std::min(MAX_DYNAMIC_DESCRIPTOR_COUNT, available_count);
auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride;
return std::min(descriptor_limit, available_count);
}
u32 SaturatingSub(u32 lhs, u32 rhs) {
return lhs > rhs ? lhs - rhs : 0;
}
template <typename Descriptors>
u32 StaticDescriptorCount(const Descriptors& descriptors) {
u32 count{};
for (const auto& desc : descriptors) {
if (desc.count <= 1) {
count += desc.count;
}
}
return count;
template <typename T>
[[nodiscard]] u32 StaticDescriptorCount(T const& descriptors) noexcept {
return std::accumulate(descriptors.cbegin(), descriptors.cend(), 0U, [](auto const& acc, auto const& e) {
return acc + (e.count <= 1 ? e.count : 0);
});
}
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info,
u32 dynamic_arrays) {
if (dynamic_arrays == 0) {
return MAX_DYNAMIC_DESCRIPTOR_COUNT;
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) {
auto const sampled_limit = (std::max)(1U, std::min(host_info.max_per_stage_descriptor_sampled_images,
host_info.max_descriptor_set_sampled_images));
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
if (dynamic_arrays > 0) {
auto const sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors);
auto const resource_static_count =
NumDescriptors(info.constant_buffer_descriptors)
+ NumDescriptors(info.storage_buffers_descriptors)
+ sampled_static_count + NumDescriptors(info.image_buffer_descriptors)
+ NumDescriptors(info.image_descriptors);
auto const sampled_budget = SaturatingSub(sampled_limit, sampled_static_count);
auto const resource_budget = SaturatingSub(resource_limit, resource_static_count);
auto const sampled_cap = sampled_budget / dynamic_arrays;
auto const resource_cap = resource_budget / dynamic_arrays;
return (std::max)(1U, (std::min)(sampled_cap, resource_cap));
}
const u32 sampled_static_count{StaticDescriptorCount(info.texture_buffer_descriptors) +
StaticDescriptorCount(info.texture_descriptors)};
const u32 resource_static_count{
NumDescriptors(info.constant_buffer_descriptors) +
NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count +
NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors)};
const u32 sampled_limit{std::min(host_info.max_per_stage_descriptor_sampled_images,
host_info.max_descriptor_set_sampled_images)};
const u32 sampled_budget{SaturatingSub(sampled_limit, sampled_static_count)};
const u32 resource_budget{SaturatingSub(host_info.max_per_stage_resources,
resource_static_count)};
const u32 sampled_cap{sampled_budget / dynamic_arrays};
const u32 resource_cap{resource_budget / dynamic_arrays};
return std::max(1U, std::min({MAX_DYNAMIC_DESCRIPTOR_COUNT, sampled_cap, resource_cap}));
return (std::min)({DESCRIPTOR_MAX_COUNT, sampled_limit, resource_limit});
}
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
@ -304,21 +295,23 @@ static inline bool IsTexturePixelFormatIntegerCached(Environment& env,
}
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env);
static inline std::optional<ConstBufferAddr> TrackCached(const IR::Value& v, Environment& env) {
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info);
static inline std::optional<ConstBufferAddr> TrackCached(const IR::Value& v, Environment& env, const HostTranslateInfo& host_info) {
if (const IR::Inst* key = v.InstRecursive()) {
if (auto it = env.track_cache.find(key); it != env.track_cache.end()) return it->second;
auto found = Track(v, env);
auto found = Track(v, env, host_info);
if (found) env.track_cache.emplace(key, *found);
return found;
}
return Track(v, env);
return Track(v, env, host_info);
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env);
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info);
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env) {
return IR::BreadthFirstSearch(value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); });
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info) {
return IR::BreadthFirstSearch(value, [&env, &host_info](const IR::Inst* inst) {
return TryGetConstBuffer(inst, env, host_info);
});
}
std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
@ -342,13 +335,13 @@ std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
return ReadCbufCached(env, index_number, offset_number);
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env) {
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info) {
switch (inst->GetOpcode()) {
default:
return std::nullopt;
case IR::Opcode::BitwiseOr32: {
std::optional lhs{TrackCached(inst->Arg(0), env)};
std::optional rhs{TrackCached(inst->Arg(1), env)};
std::optional lhs{TrackCached(inst->Arg(0), env, host_info)};
std::optional rhs{TrackCached(inst->Arg(1), env, host_info)};
if (!lhs || !rhs) {
return std::nullopt;
}
@ -378,12 +371,11 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
if (!shift.IsImmediate()) {
return std::nullopt;
}
std::optional lhs{TrackCached(inst->Arg(0), env)};
std::optional lhs{TrackCached(inst->Arg(0), env, host_info)};
if (lhs) {
lhs->shift_left = shift.U32();
}
return lhs;
break;
}
case IR::Opcode::BitwiseAnd32: {
IR::Value op1{inst->Arg(0)};
@ -407,7 +399,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
return std::nullopt;
} while (false);
}
std::optional lhs{TrackCached(op1, env)};
std::optional lhs{TrackCached(op1, env, host_info)};
if (lhs) {
lhs->shift_left = static_cast<u32>(std::countr_zero(op2.U32()));
}
@ -453,7 +445,10 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
} else {
return std::nullopt;
}
const u32 size_shift{DynamicDescriptorSizeShift(dynamic_offset)};
auto const size_shift = DynamicDescriptorSizeShift(dynamic_offset);
auto const sampled_limit = (std::max)(1U, (std::min)(host_info.max_per_stage_descriptor_sampled_images,
host_info.max_descriptor_set_sampled_images));
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
return ConstBufferAddr{
.index = index.U32(),
.offset = base_offset,
@ -462,15 +457,15 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
.secondary_offset = 0,
.secondary_shift_left = 0,
.dynamic_offset = dynamic_offset,
.count = DynamicDescriptorCount(base_offset, size_shift),
.count = DynamicDescriptorCount(base_offset, size_shift, (std::min)({DESCRIPTOR_MAX_COUNT, sampled_limit, resource_limit})),
.has_secondary = false,
};
}
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst, const HostTranslateInfo& host_info) {
ConstBufferAddr addr;
if (IsBindless(inst)) {
const std::optional<ConstBufferAddr> track_addr{TrackCached(inst.Arg(0), env)};
const std::optional<ConstBufferAddr> track_addr{TrackCached(inst.Arg(0), env, host_info)};
if (!track_addr) {
throw NotImplementedException("Failed to track bindless texture constant buffer");
@ -506,15 +501,15 @@ u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) {
return lhs_raw | rhs_raw;
}
[[maybe_unused]]TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
[[maybe_unused]] TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
return env.ReadTextureType(GetTextureHandle(env, cbuf));
}
[[maybe_unused]]TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
[[maybe_unused]] TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
}
[[maybe_unused]]bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) {
[[maybe_unused]] bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) {
return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
}
@ -675,7 +670,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
if (!IsTextureInstruction(inst)) {
continue;
}
to_replace.push_back(MakeInst(env, block, inst));
to_replace.push_back(MakeInst(env, block, inst, host_info));
}
}
// Sort instructions to visit textures by constant buffer index, then by offset
@ -689,8 +684,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
program.info.texture_descriptors,
program.info.image_descriptors,
};
const u32 sampled_dynamic_cap{
DynamicSampledTextureCap(program.info, host_info, DynamicSampledTextureArrayCount(to_replace))};
const u32 sampled_dynamic_cap = DynamicSampledTextureCap(program.info, host_info, DynamicSampledTextureArrayCount(to_replace));
for (TextureInst& texture_inst : to_replace) {
// TODO: Handle arrays
IR::Inst* const inst{texture_inst.inst};

View file

@ -92,7 +92,6 @@ struct Profile {
bool has_broken_robust{};
u64 min_ssbo_alignment{};
u32 max_user_clip_distances{};
};

View file

@ -245,16 +245,31 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
},
host_info{
.support_float64 = true,
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.max_per_stage_descriptor_sampled_images =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_per_stage_resources = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_samplers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_uniform_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_uniform_buffers_dynamic =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_buffers_dynamic =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_sampled_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_storage_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.max_descriptor_set_input_attachements =
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
.support_float64 = true,
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
} {
host_info.ApplyDescriptorLimitPolicy();
if (use_asynchronous_shaders) {
workers = CreateWorkers();
}

View file

@ -22,6 +22,15 @@ namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
[[nodiscard]] inline u32 NumDescriptorEntries(const Shader::Info& info) {
return Shader::NumDescriptors(info.constant_buffer_descriptors) +
Shader::NumDescriptors(info.storage_buffers_descriptors) +
Shader::NumDescriptors(info.texture_buffer_descriptors) +
Shader::NumDescriptors(info.image_buffer_descriptors) +
Shader::NumDescriptors(info.texture_descriptors) +
Shader::NumDescriptors(info.image_descriptors);
}
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}

View file

@ -326,7 +326,7 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -384,7 +384,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -429,7 +429,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
}
const size_t compare_size = compare_to_zero ? 8 : 24;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -498,7 +498,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
static constexpr size_t DISPATCH_SIZE = 2048U;
size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE);
current_runs -= runs_to_do;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 3);
compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64));
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64));
compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
@ -600,7 +600,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
@ -821,7 +821,7 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
pc.blocks_dim[1] = blocks_y;
pc.blocks_dim[2] = z_count; // Only process the count
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 3);
compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0,
image.runtime->swizzle_table_size);
compute_pass_descriptor_queue.AddBuffer(swizzled.buffer,
@ -989,7 +989,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image,
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(scheduler, 2);
compute_pass_descriptor_queue.AddImage(
src_image.StorageImageView(copy.src_subresource.base_level));
compute_pass_descriptor_queue.AddImage(

View file

@ -45,6 +45,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
num_descriptor_entries = NumDescriptorEntries(info);
auto func{[this, &scheduler, &descriptor_pool, shader_notify, pipeline_statistics] {
DescriptorLayoutBuilder builder{device};
@ -113,7 +114,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache) {
guest_descriptor_queue.Acquire();
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();

View file

@ -53,6 +53,7 @@ private:
vk::PipelineCache& pipeline_cache;
GuestDescriptorQueue& guest_descriptor_queue;
Shader::Info info;
u32 num_descriptor_entries{};
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};

View file

@ -268,6 +268,7 @@ GraphicsPipeline::GraphicsPipeline(
num_textures += Shader::NumDescriptors(info->texture_descriptors);
num_image_elements += Shader::NumDescriptors(info->texture_descriptors);
num_image_elements += Shader::NumDescriptors(info->image_descriptors);
num_descriptor_entries += NumDescriptorEntries(*info);
}
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
@ -473,7 +474,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
guest_descriptor_queue.Acquire();
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
RescalingPushConstant rescaling;
RenderAreaPushConstant render_area;

View file

@ -159,6 +159,7 @@ private:
std::array<Shader::Info, NUM_STAGES> stage_infos;
std::array<u32, 5> enabled_uniform_buffer_masks{};
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
u32 num_descriptor_entries{};
size_t num_image_elements{};
u32 num_textures{};
bool fragment_has_color0_output{};

View file

@ -439,10 +439,21 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
.has_broken_robust =
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
.max_user_clip_distances = device.GetMaxUserClipDistances(),
.max_user_clip_distances = device.GetMaxUserClipDistances()
};
host_info = Shader::HostTranslateInfo{
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
.max_per_stage_resources = device.GetMaxPerStageResources(),
.max_descriptor_set_samplers = device.GetMaxDescriptorSetSamplers(),
.max_descriptor_set_uniform_buffers = device.GetMaxDescriptorSetUniformBuffers(),
.max_descriptor_set_uniform_buffers_dynamic = device.GetMaxDescriptorSetUniformBuffersDynamic(),
.max_descriptor_set_storage_buffers = device.GetMaxDescriptorSetStorageBuffers(),
.max_descriptor_set_storage_buffers_dynamic = device.GetMaxDescriptorSetStorageBuffersDynamic(),
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
.max_descriptor_set_storage_images = device.GetMaxDescriptorSetStorageImages(),
.max_descriptor_set_input_attachements = device.GetMaxDescriptorSetInputAttachments(),
.support_float64 = device.IsFloat64Supported(),
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
@ -451,13 +462,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
.support_snorm_render_buffer = true,
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
.max_per_stage_resources = device.GetMaxPerStageResources(),
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
};
host_info.ApplyDescriptorLimitPolicy();
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",

View file

@ -203,7 +203,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
: gpu{gpu_}, device_memory{device_memory_}, device{device_},
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
guest_descriptor_queue(device), compute_pass_descriptor_queue(device),
blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device),
texture_cache_runtime{
device, scheduler, memory_allocator, staging_pool,

View file

@ -155,15 +155,14 @@ void Scheduler::WaitWorker() {
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
if (chunk && !chunk->Empty()) {
{
std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
event_cv.notify_all();
AcquireNewChunk();
}
{
std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
event_cv.notify_all();
AcquireNewChunk();
}
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {

View file

@ -7,6 +7,7 @@
#include <variant>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/logging.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@ -15,8 +16,9 @@
namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_)
: device{device_}
{
payload_start = payload.data();
payload_cursor = payload.data();
}
@ -31,13 +33,15 @@ void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload_start;
}
void UpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call might use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
void UpdateDescriptorQueue::Acquire(Scheduler& scheduler, size_t required_entries) {
static constexpr size_t DEFAULT_REQUIRED_ENTRIES = 0x400;
const size_t reserve = required_entries > 0 ? required_entries : DEFAULT_REQUIRED_ENTRIES;
ASSERT_MSG(reserve < FRAME_PAYLOAD_SIZE, "Descriptor reservation {} >= frame capacity {}",
reserve, FRAME_PAYLOAD_SIZE);
const size_t used = static_cast<size_t>(std::distance(payload_start, payload_cursor));
if (used + reserve >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow (used={}, reserve={}, capacity={})",
used, reserve, FRAME_PAYLOAD_SIZE);
scheduler.WaitWorker();
payload_cursor = payload_start;
}

View file

@ -34,12 +34,11 @@ class UpdateDescriptorQueue final {
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
explicit UpdateDescriptorQueue(const Device& device_);
~UpdateDescriptorQueue();
void TickFrame();
void Acquire();
void Acquire(Scheduler& scheduler, size_t required_entries = 0);
const DescriptorUpdateEntry* UpdateData() const noexcept {
return upload_start;
@ -75,8 +74,6 @@ public:
private:
const Device& device;
Scheduler& scheduler;
size_t frame_index{0};
DescriptorUpdateEntry* payload_cursor = nullptr;
DescriptorUpdateEntry* payload_start = nullptr;

View file

@ -321,32 +321,23 @@ public:
return properties.properties.limits.maxPushConstantsSize;
}
/// Returns the maximum size for shared memory.
u32 GetMaxComputeSharedMemorySize() const {
return properties.properties.limits.maxComputeSharedMemorySize;
}
/// Returns the maximum number of dynamic storage buffer descriptors per set.
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
}
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
}
u32 GetMaxPerStageDescriptorSampledImages() const {
return properties.properties.limits.maxPerStageDescriptorSampledImages;
}
u32 GetMaxPerStageResources() const {
return properties.properties.limits.maxPerStageResources;
}
u32 GetMaxDescriptorSetSampledImages() const {
return properties.properties.limits.maxDescriptorSetSampledImages;
}
#define FN_MAX_LIMIT_LIST \
FN_MAX_LIMIT_ELEM(ComputeSharedMemorySize) \
FN_MAX_LIMIT_ELEM(PerStageDescriptorSampledImages) \
FN_MAX_LIMIT_ELEM(PerStageResources) \
FN_MAX_LIMIT_ELEM(DescriptorSetSamplers) \
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffers) \
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffersDynamic) \
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffers) \
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffersDynamic) \
FN_MAX_LIMIT_ELEM(DescriptorSetSampledImages) \
FN_MAX_LIMIT_ELEM(DescriptorSetStorageImages) \
FN_MAX_LIMIT_ELEM(DescriptorSetInputAttachments)
#define FN_MAX_LIMIT_ELEM(name) \
u32 GetMax##name() const { return properties.properties.limits.max##name; }
FN_MAX_LIMIT_LIST
#undef FN_MAX_LIMIT_ELEM
#undef FN_MAX_LIMIT_LIST
/// Returns float control properties of the device.
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {

View file

@ -44,6 +44,7 @@ int IrShaderRecompilerImpl(int argc, char *argv[]) {
host_info.support_geometry_shader_passthrough = true;
host_info.support_conditional_barrier = true;
host_info.min_ssbo_alignment = 0;
host_info.ApplyDescriptorLimitPolicy();
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
auto const dumped_ir = Shader::IR::DumpProgram(program);
std::printf("%s\n", dumped_ir.c_str());

View file

@ -52,6 +52,7 @@ int SpirvShaderRecompilerImpl(int argc, char *argv[]) {
host_info.support_geometry_shader_passthrough = true;
host_info.support_conditional_barrier = true;
host_info.min_ssbo_alignment = 0;
host_info.ApplyDescriptorLimitPolicy();
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
// IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,