mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2026-06-30 07:19:31 -04:00
[vk] Clamp dynamic descriptors based on device limits (#4115)
Should fix this specific crash ``` * thread #82, name = 'GPU', stop reason = Exception 0xc0000005 encountered at address 0x7ff7e5193e89: Access violation reading location 0x00000098 * frame #0: 0x00007ff7e5193e89 eden.exe`std::unique_ptr<Vulkan::Scheduler::CommandChunk,std::default_delete<Vulkan::Scheduler::CommandChunk> >::operator->(this=<unavailable>) at memory:3453 [inlined] frame #1: 0x00007ff7e5193e81 eden.exe`void Vulkan::Scheduler::DispatchWork(this=0x0000000000000000) at vk_scheduler.cpp:146 frame #2: 0x00007ff7e5193d8f eden.exe`void Vulkan::Scheduler::WaitWorker(this=<unavailable>) at vk_scheduler.cpp:133 frame #3: 0x00007ff7e54f472e eden.exe`void Vulkan::UpdateDescriptorQueue::Acquire(this=0x0000026c77cfb4b8) at vk_update_descriptor.cpp:41 frame #4: 0x00007ff7e5537a43 eden.exe`void Vulkan::ASTCDecoderPass::Assemble(this=0x0000026c794fba10, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=<unavailable>) at vk_compute_pass.cpp:603 frame #5: 0x00007ff7e55043e1 eden.exe`void Vulkan::TextureCacheRuntime::AccelerateImageUpload(this=<unavailable>, image=0x0000026cfb794850, map=0x00000025e28fcb30, swizzles=size=10, z_start=0, z_count=0) at vk_texture_cache.cpp:2482 frame #6: 0x00007ff7e5516592 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::UploadImageContents<struct Vulkan::StagingBufferRef>(this=<unavailable>, image=0x0000026cfb794850, staging=0x00000025e28fcb30) at texture_cache.h:1147 frame #7: 0x00007ff7e551523d eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::RefreshContents(this=0x0000026c794fd800, image=0x0000026cfb794850, image_id=(index = 3801072224)) at texture_cache.h:1133 frame #8: 0x00007ff7e5517b50 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::JoinImages(this=0x0000026c794fd800, info=<unavailable>, gpu_addr=25374426112, cpu_addr=2254181376) at texture_cache.h:1644 frame #9: 0x00007ff7e5516ff6 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::InsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1513 frame #10: 0x00007ff7e5516a60 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FindOrInsertImage(this=0x0000026c794fd800, info=0x00000025e28fd270, gpu_addr=25374426112, options=0x0) at texture_cache.h:1194 frame #11: 0x00007ff7e5515a13 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::CreateImageView(this=0x0000026c794fd800, config=0x00000025e28fd370) at texture_cache.h:1173 frame #12: 0x00007ff7e550cd64 eden.exe`struct Common::SlotId VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::VisitImageView(this=0x0000026c794fd800, index=4586, compute=<unavailable>) at texture_cache.h:554 frame #13: 0x00007ff7e550d181 eden.exe`void VideoCommon::TextureCache<struct Vulkan::TextureCacheParams>::FillImageViews(this=0x0000026c794fd800, views=size=5, compute=<unavailable>, blacklist=<unavailable>) at texture_cache.h:227 frame #14: 0x00007ff7e58896e4 eden.exe`Vulkan::GraphicsPipeline::ConfigureImpl<Vulkan::(anonymous namespace)::SimpleStorageSpec>(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.cpp:415 frame #15: 0x00007ff7e5889479 eden.exe`<lambda_1>::operator(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123 [inlined] frame #16: 0x00007ff7e5889474 eden.exe`<lambda_1>::__invoke(pl=<unavailable>, is_indexed=<unavailable>) at vk_graphics_pipeline.h:123 frame #17: 0x00007ff7e519d2a8 eden.exe`Vulkan::GraphicsPipeline::Configure(this=0x0000026f865c1a60, is_indexed=<unavailable>) at vk_graphics_pipeline.h:105 [inlined] frame #18: 0x00007ff7e519d29d eden.exe`Vulkan::RasterizerVulkan::PrepareDraw<`lambda at D:\a\g\g\eden-source\src\video_core\renderer_vulkan\vk_rasterizer.cpp:256:29'>(this=0x0000026c764f9368, is_indexed=<unavailable>, draw_func=0x00000025e28fdb80) at vk_rasterizer.cpp:244 frame #19: 0x00007ff7e519d1db eden.exe`void Vulkan::RasterizerVulkan::Draw(this=<unavailable>, is_indexed=<unavailable>, instance_count=<unavailable>) at vk_rasterizer.cpp:256 frame #20: 0x00007ff7e50d9025 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Fallback(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:178 frame #21: 0x00007ff7e50d8f21 eden.exe`void Tegra::HLE_DrawIndexedIndirect::Execute(this=0x0000026f2af85118, maxwell3d=0x0000026ccbf9eb00, parameters=size=6) at macro.cpp:129 frame #22: 0x00007ff7e50db607 eden.exe`Tegra::MacroEngine::Execute::<lambda_0>::operator(this=0x00000025e28fde70, acm= Active Type = Tegra::HLE_DrawIndexedIndirect ) at macro.cpp:1362 frame #23: 0x00007ff7e50dada0 eden.exe`void Tegra::MacroEngine::Execute(this=0x0000026ccbfaa020, maxwell3d=0x0000026ccbf9eb00, method=418, parameters=<unavailable>) at macro.cpp:1392 frame #24: 0x00007ff7e4ef062b eden.exe`void Tegra::Engines::Maxwell3D::CallMacroMethod(this=0x0000026ccbf9eb00, method=<unavailable>, parameters=<unavailable>) at maxwell_3d.cpp:390 frame #25: 0x00007ff7e4ef04be eden.exe`void Tegra::Engines::Maxwell3D::ProcessMacro(this=0x0000026ccbf9eb00, method=<unavailable>, base_start=<unavailable>, amount=5, is_last_call=<unavailable>) at maxwell_3d.cpp:223 frame #26: 0x00007ff7e4ef1673 eden.exe`void Tegra::Engines::Maxwell3D::CallMultiMethod(this=<unavailable>, method=<unavailable>, base_start=0x000001ea398e22f4, amount=<unavailable>, methods_pending=5) at maxwell_3d.cpp:419 frame #27: 0x00007ff7e4ef535a eden.exe`void Tegra::DmaPusher::CallMultiMethod(this=<unavailable>, base_start=0x000001ea398e22f4, num_methods=<unavailable>) const at dma_pusher.cpp:201 frame #28: 0x00007ff7e4ef5023 eden.exe`void Tegra::DmaPusher::ProcessCommands(this=0x0000026ccbfba698, commands=size=36) at dma_pusher.cpp:120 frame #29: 0x00007ff7e4ef4ced eden.exe`bool Tegra::DmaPusher::Step(this=0x0000026ccbfba698) at dma_pusher.cpp:88 frame #30: 0x00007ff7e4ef49f8 eden.exe`void Tegra::DmaPusher::DispatchCalls(this=0x0000026ccbfba698) at dma_pusher.cpp:40 frame #31: 0x00007ff7e4ede797 eden.exe`void Tegra::Control::Scheduler::Push(this=<unavailable>, channel=-493895072, entries=0x00000025e28fe2e0) at scheduler.cpp:31 frame #32: 0x00007ff7e4eedc7b eden.exe`VideoCommon::GPUThread::ThreadManager::StartThread::<lambda_0>::operator(this=<unavailable>, stop_token=stop_token @ 0x00000025e28ffb40) at gpu_thread.cpp:42 [inlined] frame #33: 0x00007ff7e4eedae8 eden.exe`std::invoke(_Obj=<unavailable>, _Arg1=0x0000026ccb3edef0) at type_traits:1680 [inlined] frame #34: 0x00007ff7e4eedac1 eden.exe`std:🧵:_Invoke<std::tuple<`lambda at D:\a\g\g\eden-source\src\video_core\gpu_thread.cpp:29:27',std::stop_token>,0,1>(_RawVals=0x0000026ccb3edef0) at thread:60 frame #35: 0x00007ff8e87a37b0 ucrtbase.dll`wcsrchr + 336 ``` Signed-off-by: lizzie <lizzie@eden-emu.dev> Co-authored-by: CamilleLaVey <camillelavey99@gmail.com> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/4115 Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
This commit is contained in:
parent
1b482fa99b
commit
2068b5d452
21 changed files with 223 additions and 161 deletions
|
|
@ -50,8 +50,8 @@ NPad::NPad(Core::HID::HIDCore& hid_core_, KernelHelpers::ServiceContext& service
|
|||
auto& controller = controller_data[aruid_index][i];
|
||||
controller.device = hid_core.GetEmulatedControllerByIndex(i);
|
||||
Core::HID::ControllerUpdateCallback engine_callback{
|
||||
.on_change = [this, i](Core::HID::ControllerTriggerType type) {
|
||||
ControllerUpdate(hid_core.kernel, type, i);
|
||||
.on_change = [this, i, kernel = &hid_core.kernel](Core::HID::ControllerTriggerType type) {
|
||||
ControllerUpdate(*kernel, type, i);
|
||||
},
|
||||
.is_npad_service = true,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -236,8 +236,11 @@ void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInf
|
|||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
|
||||
HostTranslateInfo normalized_host_info{host_info};
|
||||
normalized_host_info.ApplyDescriptorLimitPolicy();
|
||||
|
||||
IR::Program program;
|
||||
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
|
||||
program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, normalized_host_info);
|
||||
program.blocks = GenerateBlocks(program.syntax_list);
|
||||
program.post_order_blocks = PostOrder(program.syntax_list.front());
|
||||
program.stage = env.ShaderStage();
|
||||
|
|
@ -260,9 +263,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
|
||||
}
|
||||
|
||||
if (!host_info.support_geometry_shader_passthrough) {
|
||||
if (!normalized_host_info.support_geometry_shader_passthrough) {
|
||||
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
|
||||
LowerGeometryPassthrough(program, host_info);
|
||||
LowerGeometryPassthrough(program, normalized_host_info);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
@ -277,16 +280,16 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
RemoveUnreachableBlocks(program);
|
||||
|
||||
// Replace instructions before the SSA rewrite
|
||||
if (!host_info.support_float64) {
|
||||
if (!normalized_host_info.support_float64) {
|
||||
Optimization::LowerFp64ToFp32(program);
|
||||
}
|
||||
if (!host_info.support_float16) {
|
||||
if (!normalized_host_info.support_float16) {
|
||||
Optimization::LowerFp16ToFp32(program);
|
||||
}
|
||||
if (!host_info.support_int64) {
|
||||
if (!normalized_host_info.support_int64) {
|
||||
Optimization::LowerInt64ToInt32(program);
|
||||
}
|
||||
if (!host_info.support_conditional_barrier) {
|
||||
if (!normalized_host_info.support_conditional_barrier) {
|
||||
Optimization::ConditionalBarrierPass(program);
|
||||
}
|
||||
Optimization::SsaRewritePass(program);
|
||||
|
|
@ -295,8 +298,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
|
||||
Optimization::PositionPass(env, program);
|
||||
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
|
||||
Optimization::TexturePass(env, program, host_info);
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program, normalized_host_info);
|
||||
Optimization::TexturePass(env, program, normalized_host_info);
|
||||
|
||||
if (Settings::values.resolution_info.active || Settings::values.rescale_hack.GetValue()) {
|
||||
Optimization::RescalingPass(program);
|
||||
|
|
@ -306,7 +309,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
Optimization::VerificationPass(program);
|
||||
}
|
||||
Optimization::CollectShaderInfoPass(env, program);
|
||||
Optimization::LayerPass(program, host_info);
|
||||
Optimization::LayerPass(program, normalized_host_info);
|
||||
Optimization::VendorWorkaroundPass(program);
|
||||
|
||||
CollectInterpolationInfo(env, program);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
// Try to keep entries here to a minimum
|
||||
|
|
@ -13,20 +15,52 @@ namespace Shader {
|
|||
|
||||
/// Misc information about the host
|
||||
struct HostTranslateInfo {
|
||||
static constexpr u32 DEFAULT_DESCRIPTOR_LIMIT = 1024;
|
||||
|
||||
u64 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
u32 max_per_stage_descriptor_sampled_images{}; ///< maximum sampled descriptors per stage
|
||||
u32 max_per_stage_resources{}; ///< maximum resources per stage
|
||||
u32 max_descriptor_set_samplers{};
|
||||
u32 max_descriptor_set_uniform_buffers{};
|
||||
u32 max_descriptor_set_uniform_buffers_dynamic{};
|
||||
u32 max_descriptor_set_storage_buffers{};
|
||||
u32 max_descriptor_set_storage_buffers_dynamic{};
|
||||
u32 max_descriptor_set_sampled_images{};
|
||||
u32 max_descriptor_set_storage_images{};
|
||||
u32 max_descriptor_set_input_attachements{};
|
||||
bool support_float64{}; ///< True when the device supports 64-bit floats
|
||||
bool support_float16{}; ///< True when the device supports 16-bit floats
|
||||
bool support_int64{}; ///< True when the device supports 64-bit integers
|
||||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
|
||||
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
|
||||
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
|
||||
u32 max_per_stage_descriptor_sampled_images{1024}; ///< maximum sampled descriptors per stage
|
||||
u32 max_per_stage_resources{4096}; ///< maximum resources per stage
|
||||
u32 max_descriptor_set_sampled_images{1024}; ///< maximum sampled descriptors per set
|
||||
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
|
||||
///< passthrough shaders
|
||||
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
|
||||
///< control flow
|
||||
|
||||
void ApplyDescriptorLimitPolicy() noexcept {
|
||||
if (min_ssbo_alignment == 0) {
|
||||
min_ssbo_alignment = 1;
|
||||
}
|
||||
ApplyDescriptorLimitFallback(max_per_stage_descriptor_sampled_images);
|
||||
ApplyDescriptorLimitFallback(max_per_stage_resources);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_samplers);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_uniform_buffers_dynamic);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_storage_buffers_dynamic);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_sampled_images);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_storage_images);
|
||||
ApplyDescriptorLimitFallback(max_descriptor_set_input_attachements);
|
||||
}
|
||||
|
||||
private:
|
||||
static void ApplyDescriptorLimitFallback(u32& limit) noexcept {
|
||||
if (limit == 0) {
|
||||
limit = DEFAULT_DESCRIPTOR_LIMIT;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -545,7 +548,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn
|
|||
IR::Block* const block{storage_inst.block};
|
||||
IR::Inst* const inst{storage_inst.inst};
|
||||
const IR::U32 offset{
|
||||
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
|
||||
StorageOffset(*block, *inst, storage_buffer, u32(host_info.min_ssbo_alignment))};
|
||||
Replace(*block, *inst, index, offset);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,71 +32,62 @@ struct TextureInst {
|
|||
using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
|
||||
|
||||
constexpr u32 DESCRIPTOR_SIZE = 8;
|
||||
constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
|
||||
constexpr u32 DYNAMIC_DESCRIPTOR_CBUF_BYTES = 16 * 1024;
|
||||
constexpr u32 MAX_DYNAMIC_DESCRIPTOR_COUNT = 1024;
|
||||
constexpr u32 DESCRIPTOR_SIZE_SHIFT = u32(std::countr_zero(DESCRIPTOR_SIZE));
|
||||
constexpr u32 DESCRIPTOR_MAX_COUNT = 1024;
|
||||
|
||||
u32 DynamicDescriptorSizeShift(const IR::U32& dynamic_offset) {
|
||||
const IR::Inst* const inst{dynamic_offset.InstRecursive()};
|
||||
if (!inst || inst->GetOpcode() != IR::Opcode::ShiftLeftLogical32) {
|
||||
const IR::Inst* const inst = dynamic_offset.InstRecursive();
|
||||
if (!inst || inst->GetOpcode() != IR::Opcode::ShiftLeftLogical32)
|
||||
return DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
const IR::Value shift{inst->Arg(1)};
|
||||
if (!shift.IsImmediate()) {
|
||||
const IR::Value shift = inst->Arg(1);
|
||||
if (!shift.IsImmediate())
|
||||
return DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
const u32 size_shift{shift.U32()};
|
||||
return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift
|
||||
: DESCRIPTOR_SIZE_SHIFT;
|
||||
const u32 size_shift = shift.U32();
|
||||
return size_shift >= DESCRIPTOR_SIZE_SHIFT && size_shift < 31 ? size_shift : DESCRIPTOR_SIZE_SHIFT;
|
||||
}
|
||||
|
||||
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift) {
|
||||
if (size_shift >= 31 || base_offset >= DYNAMIC_DESCRIPTOR_CBUF_BYTES) {
|
||||
u32 DynamicDescriptorCount(u32 base_offset, u32 size_shift, u32 max_descriptors) {
|
||||
auto const descriptor_limit = (std::max)(1U, max_descriptors);
|
||||
auto const max_cbuf_bytes = 16 * descriptor_limit;
|
||||
if (size_shift >= 31 || base_offset >= max_cbuf_bytes)
|
||||
return 1;
|
||||
}
|
||||
const u32 stride{1U << size_shift};
|
||||
const u32 available{DYNAMIC_DESCRIPTOR_CBUF_BYTES - base_offset};
|
||||
if (available < DESCRIPTOR_SIZE) {
|
||||
auto const stride = 1U << size_shift;
|
||||
auto const available = max_cbuf_bytes - base_offset;
|
||||
if (available < DESCRIPTOR_SIZE)
|
||||
return 1;
|
||||
}
|
||||
const u32 available_count{1U + (available - DESCRIPTOR_SIZE) / stride};
|
||||
return std::min(MAX_DYNAMIC_DESCRIPTOR_COUNT, available_count);
|
||||
auto const available_count = 1U + (available - DESCRIPTOR_SIZE) / stride;
|
||||
return std::min(descriptor_limit, available_count);
|
||||
}
|
||||
|
||||
u32 SaturatingSub(u32 lhs, u32 rhs) {
|
||||
return lhs > rhs ? lhs - rhs : 0;
|
||||
}
|
||||
|
||||
template <typename Descriptors>
|
||||
u32 StaticDescriptorCount(const Descriptors& descriptors) {
|
||||
u32 count{};
|
||||
for (const auto& desc : descriptors) {
|
||||
if (desc.count <= 1) {
|
||||
count += desc.count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
template <typename T>
|
||||
[[nodiscard]] u32 StaticDescriptorCount(T const& descriptors) noexcept {
|
||||
return std::accumulate(descriptors.cbegin(), descriptors.cend(), 0U, [](auto const& acc, auto const& e) {
|
||||
return acc + (e.count <= 1 ? e.count : 0);
|
||||
});
|
||||
}
|
||||
|
||||
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info,
|
||||
u32 dynamic_arrays) {
|
||||
if (dynamic_arrays == 0) {
|
||||
return MAX_DYNAMIC_DESCRIPTOR_COUNT;
|
||||
u32 DynamicSampledTextureCap(const Info& info, const HostTranslateInfo& host_info, u32 dynamic_arrays) {
|
||||
auto const sampled_limit = (std::max)(1U, std::min(host_info.max_per_stage_descriptor_sampled_images,
|
||||
host_info.max_descriptor_set_sampled_images));
|
||||
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
|
||||
if (dynamic_arrays > 0) {
|
||||
auto const sampled_static_count = StaticDescriptorCount(info.texture_buffer_descriptors) + StaticDescriptorCount(info.texture_descriptors);
|
||||
auto const resource_static_count =
|
||||
NumDescriptors(info.constant_buffer_descriptors)
|
||||
+ NumDescriptors(info.storage_buffers_descriptors)
|
||||
+ sampled_static_count + NumDescriptors(info.image_buffer_descriptors)
|
||||
+ NumDescriptors(info.image_descriptors);
|
||||
auto const sampled_budget = SaturatingSub(sampled_limit, sampled_static_count);
|
||||
auto const resource_budget = SaturatingSub(resource_limit, resource_static_count);
|
||||
auto const sampled_cap = sampled_budget / dynamic_arrays;
|
||||
auto const resource_cap = resource_budget / dynamic_arrays;
|
||||
return (std::max)(1U, (std::min)(sampled_cap, resource_cap));
|
||||
}
|
||||
const u32 sampled_static_count{StaticDescriptorCount(info.texture_buffer_descriptors) +
|
||||
StaticDescriptorCount(info.texture_descriptors)};
|
||||
const u32 resource_static_count{
|
||||
NumDescriptors(info.constant_buffer_descriptors) +
|
||||
NumDescriptors(info.storage_buffers_descriptors) + sampled_static_count +
|
||||
NumDescriptors(info.image_buffer_descriptors) + NumDescriptors(info.image_descriptors)};
|
||||
const u32 sampled_limit{std::min(host_info.max_per_stage_descriptor_sampled_images,
|
||||
host_info.max_descriptor_set_sampled_images)};
|
||||
const u32 sampled_budget{SaturatingSub(sampled_limit, sampled_static_count)};
|
||||
const u32 resource_budget{SaturatingSub(host_info.max_per_stage_resources,
|
||||
resource_static_count)};
|
||||
const u32 sampled_cap{sampled_budget / dynamic_arrays};
|
||||
const u32 resource_cap{resource_budget / dynamic_arrays};
|
||||
return std::max(1U, std::min({MAX_DYNAMIC_DESCRIPTOR_COUNT, sampled_cap, resource_cap}));
|
||||
return (std::min)({DESCRIPTOR_MAX_COUNT, sampled_limit, resource_limit});
|
||||
}
|
||||
|
||||
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
||||
|
|
@ -304,21 +295,23 @@ static inline bool IsTexturePixelFormatIntegerCached(Environment& env,
|
|||
}
|
||||
|
||||
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env);
|
||||
static inline std::optional<ConstBufferAddr> TrackCached(const IR::Value& v, Environment& env) {
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info);
|
||||
static inline std::optional<ConstBufferAddr> TrackCached(const IR::Value& v, Environment& env, const HostTranslateInfo& host_info) {
|
||||
if (const IR::Inst* key = v.InstRecursive()) {
|
||||
if (auto it = env.track_cache.find(key); it != env.track_cache.end()) return it->second;
|
||||
auto found = Track(v, env);
|
||||
auto found = Track(v, env, host_info);
|
||||
if (found) env.track_cache.emplace(key, *found);
|
||||
return found;
|
||||
}
|
||||
return Track(v, env);
|
||||
return Track(v, env, host_info);
|
||||
}
|
||||
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env);
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info);
|
||||
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env) {
|
||||
return IR::BreadthFirstSearch(value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); });
|
||||
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env, const HostTranslateInfo& host_info) {
|
||||
return IR::BreadthFirstSearch(value, [&env, &host_info](const IR::Inst* inst) {
|
||||
return TryGetConstBuffer(inst, env, host_info);
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
|
||||
|
|
@ -342,13 +335,13 @@ std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
|
|||
return ReadCbufCached(env, index_number, offset_number);
|
||||
}
|
||||
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env) {
|
||||
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const HostTranslateInfo& host_info) {
|
||||
switch (inst->GetOpcode()) {
|
||||
default:
|
||||
return std::nullopt;
|
||||
case IR::Opcode::BitwiseOr32: {
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env)};
|
||||
std::optional rhs{TrackCached(inst->Arg(1), env)};
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env, host_info)};
|
||||
std::optional rhs{TrackCached(inst->Arg(1), env, host_info)};
|
||||
if (!lhs || !rhs) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
|
@ -378,12 +371,11 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
if (!shift.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env)};
|
||||
std::optional lhs{TrackCached(inst->Arg(0), env, host_info)};
|
||||
if (lhs) {
|
||||
lhs->shift_left = shift.U32();
|
||||
}
|
||||
return lhs;
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::BitwiseAnd32: {
|
||||
IR::Value op1{inst->Arg(0)};
|
||||
|
|
@ -407,7 +399,7 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
return std::nullopt;
|
||||
} while (false);
|
||||
}
|
||||
std::optional lhs{TrackCached(op1, env)};
|
||||
std::optional lhs{TrackCached(op1, env, host_info)};
|
||||
if (lhs) {
|
||||
lhs->shift_left = static_cast<u32>(std::countr_zero(op2.U32()));
|
||||
}
|
||||
|
|
@ -453,7 +445,10 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
const u32 size_shift{DynamicDescriptorSizeShift(dynamic_offset)};
|
||||
auto const size_shift = DynamicDescriptorSizeShift(dynamic_offset);
|
||||
auto const sampled_limit = (std::max)(1U, (std::min)(host_info.max_per_stage_descriptor_sampled_images,
|
||||
host_info.max_descriptor_set_sampled_images));
|
||||
auto const resource_limit = (std::max)(1U, host_info.max_per_stage_resources);
|
||||
return ConstBufferAddr{
|
||||
.index = index.U32(),
|
||||
.offset = base_offset,
|
||||
|
|
@ -462,15 +457,15 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
.secondary_offset = 0,
|
||||
.secondary_shift_left = 0,
|
||||
.dynamic_offset = dynamic_offset,
|
||||
.count = DynamicDescriptorCount(base_offset, size_shift),
|
||||
.count = DynamicDescriptorCount(base_offset, size_shift, (std::min)({DESCRIPTOR_MAX_COUNT, sampled_limit, resource_limit})),
|
||||
.has_secondary = false,
|
||||
};
|
||||
}
|
||||
|
||||
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
|
||||
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst, const HostTranslateInfo& host_info) {
|
||||
ConstBufferAddr addr;
|
||||
if (IsBindless(inst)) {
|
||||
const std::optional<ConstBufferAddr> track_addr{TrackCached(inst.Arg(0), env)};
|
||||
const std::optional<ConstBufferAddr> track_addr{TrackCached(inst.Arg(0), env, host_info)};
|
||||
|
||||
if (!track_addr) {
|
||||
throw NotImplementedException("Failed to track bindless texture constant buffer");
|
||||
|
|
@ -506,15 +501,15 @@ u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) {
|
|||
return lhs_raw | rhs_raw;
|
||||
}
|
||||
|
||||
[[maybe_unused]]TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
[[maybe_unused]] TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
return env.ReadTextureType(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
[[maybe_unused]]TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
[[maybe_unused]] TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
[[maybe_unused]]bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
[[maybe_unused]] bool IsTexturePixelFormatInteger(Environment& env, const ConstBufferAddr& cbuf) {
|
||||
return env.IsTexturePixelFormatInteger(GetTextureHandle(env, cbuf));
|
||||
}
|
||||
|
||||
|
|
@ -675,7 +670,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
if (!IsTextureInstruction(inst)) {
|
||||
continue;
|
||||
}
|
||||
to_replace.push_back(MakeInst(env, block, inst));
|
||||
to_replace.push_back(MakeInst(env, block, inst, host_info));
|
||||
}
|
||||
}
|
||||
// Sort instructions to visit textures by constant buffer index, then by offset
|
||||
|
|
@ -689,8 +684,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
|
|||
program.info.texture_descriptors,
|
||||
program.info.image_descriptors,
|
||||
};
|
||||
const u32 sampled_dynamic_cap{
|
||||
DynamicSampledTextureCap(program.info, host_info, DynamicSampledTextureArrayCount(to_replace))};
|
||||
const u32 sampled_dynamic_cap = DynamicSampledTextureCap(program.info, host_info, DynamicSampledTextureArrayCount(to_replace));
|
||||
for (TextureInst& texture_inst : to_replace) {
|
||||
// TODO: Handle arrays
|
||||
IR::Inst* const inst{texture_inst.inst};
|
||||
|
|
|
|||
|
|
@ -92,7 +92,6 @@ struct Profile {
|
|||
bool has_broken_robust{};
|
||||
|
||||
u64 min_ssbo_alignment{};
|
||||
|
||||
u32 max_user_clip_distances{};
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -245,16 +245,31 @@ ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
std::min<u32>(device.GetMaxUserClipDistances(), Maxwell::Regs::NumClipDistances),
|
||||
},
|
||||
host_info{
|
||||
.support_float64 = true,
|
||||
.support_float16 = false,
|
||||
.support_int64 = device.HasShaderInt64(),
|
||||
.needs_demote_reorder = device.IsAmd(),
|
||||
.support_snorm_render_buffer = false,
|
||||
.support_viewport_index_layer = device.HasVertexViewportLayer(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
|
||||
.max_per_stage_descriptor_sampled_images =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_per_stage_resources = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_samplers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_uniform_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_uniform_buffers_dynamic =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_buffers = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_buffers_dynamic =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_sampled_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_storage_images = Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.max_descriptor_set_input_attachements =
|
||||
Shader::HostTranslateInfo::DEFAULT_DESCRIPTOR_LIMIT,
|
||||
.support_float64 = true,
|
||||
.support_float16 = false,
|
||||
.support_int64 = device.HasShaderInt64(),
|
||||
.needs_demote_reorder = device.IsAmd(),
|
||||
.support_snorm_render_buffer = false,
|
||||
.support_viewport_index_layer = device.HasVertexViewportLayer(),
|
||||
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
} {
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
if (use_asynchronous_shaders) {
|
||||
workers = CreateWorkers();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,15 @@ namespace Vulkan {
|
|||
|
||||
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
|
||||
|
||||
[[nodiscard]] inline u32 NumDescriptorEntries(const Shader::Info& info) {
|
||||
return Shader::NumDescriptors(info.constant_buffer_descriptors) +
|
||||
Shader::NumDescriptors(info.storage_buffers_descriptors) +
|
||||
Shader::NumDescriptors(info.texture_buffer_descriptors) +
|
||||
Shader::NumDescriptors(info.image_buffer_descriptors) +
|
||||
Shader::NumDescriptors(info.texture_descriptors) +
|
||||
Shader::NumDescriptors(info.image_descriptors);
|
||||
}
|
||||
|
||||
class DescriptorLayoutBuilder {
|
||||
public:
|
||||
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
||||
|
|
|
|||
|
|
@ -326,7 +326,7 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
|
|||
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
|
@ -384,7 +384,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
|||
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
|
@ -429,7 +429,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
|
|||
}
|
||||
const size_t compare_size = compare_to_zero ? 8 : 24;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
|
@ -498,7 +498,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
|
|||
static constexpr size_t DISPATCH_SIZE = 2048U;
|
||||
size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE);
|
||||
current_runs -= runs_to_do;
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 3);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
|
||||
|
|
@ -600,7 +600,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
|||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
|
||||
const u32 num_dispatches_z = image.info.resources.layers;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||
|
|
@ -821,7 +821,7 @@ void BlockLinearUnswizzle3DPass::UnswizzleChunk(
|
|||
pc.blocks_dim[1] = blocks_y;
|
||||
pc.blocks_dim[2] = z_count; // Only process the count
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 3);
|
||||
compute_pass_descriptor_queue.AddBuffer(*image.runtime->swizzle_table_buffer, 0,
|
||||
image.runtime->swizzle_table_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(swizzled.buffer,
|
||||
|
|
@ -989,7 +989,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image,
|
|||
ASSERT(copy.dst_subresource.base_layer == 0);
|
||||
ASSERT(copy.dst_subresource.num_layers == 1);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(scheduler, 2);
|
||||
compute_pass_descriptor_queue.AddImage(
|
||||
src_image.StorageImageView(copy.src_subresource.base_level));
|
||||
compute_pass_descriptor_queue.AddImage(
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
|
|||
}
|
||||
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
||||
uniform_buffer_sizes.begin());
|
||||
num_descriptor_entries = NumDescriptorEntries(info);
|
||||
|
||||
auto func{[this, &scheduler, &descriptor_pool, shader_notify, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
|
|
@ -113,7 +114,7 @@ ComputePipeline::ComputePipeline(const Device& device_, Scheduler& scheduler, vk
|
|||
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
|
||||
BufferCache& buffer_cache, TextureCache& texture_cache) {
|
||||
guest_descriptor_queue.Acquire();
|
||||
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
|
||||
|
||||
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ private:
|
|||
vk::PipelineCache& pipeline_cache;
|
||||
GuestDescriptorQueue& guest_descriptor_queue;
|
||||
Shader::Info info;
|
||||
u32 num_descriptor_entries{};
|
||||
|
||||
VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
|
||||
|
||||
|
|
|
|||
|
|
@ -268,6 +268,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||
num_image_elements += Shader::NumDescriptors(info->texture_descriptors);
|
||||
num_image_elements += Shader::NumDescriptors(info->image_descriptors);
|
||||
num_descriptor_entries += NumDescriptorEntries(*info);
|
||||
}
|
||||
fragment_has_color0_output = stage_infos[NUM_STAGES - 1].stores_frag_color[0];
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||
|
|
@ -473,7 +474,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
guest_descriptor_queue.Acquire();
|
||||
guest_descriptor_queue.Acquire(scheduler, num_descriptor_entries);
|
||||
|
||||
RescalingPushConstant rescaling;
|
||||
RenderAreaPushConstant render_area;
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ private:
|
|||
std::array<Shader::Info, NUM_STAGES> stage_infos;
|
||||
std::array<u32, 5> enabled_uniform_buffer_masks{};
|
||||
VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
|
||||
u32 num_descriptor_entries{};
|
||||
size_t num_image_elements{};
|
||||
u32 num_textures{};
|
||||
bool fragment_has_color0_output{};
|
||||
|
|
|
|||
|
|
@ -439,10 +439,21 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
.has_broken_robust =
|
||||
device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
|
||||
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
|
||||
.max_user_clip_distances = device.GetMaxUserClipDistances(),
|
||||
.max_user_clip_distances = device.GetMaxUserClipDistances()
|
||||
};
|
||||
|
||||
host_info = Shader::HostTranslateInfo{
|
||||
.min_ssbo_alignment = device.GetStorageBufferAlignment(),
|
||||
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_samplers = device.GetMaxDescriptorSetSamplers(),
|
||||
.max_descriptor_set_uniform_buffers = device.GetMaxDescriptorSetUniformBuffers(),
|
||||
.max_descriptor_set_uniform_buffers_dynamic = device.GetMaxDescriptorSetUniformBuffersDynamic(),
|
||||
.max_descriptor_set_storage_buffers = device.GetMaxDescriptorSetStorageBuffers(),
|
||||
.max_descriptor_set_storage_buffers_dynamic = device.GetMaxDescriptorSetStorageBuffersDynamic(),
|
||||
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
|
||||
.max_descriptor_set_storage_images = device.GetMaxDescriptorSetStorageImages(),
|
||||
.max_descriptor_set_input_attachements = device.GetMaxDescriptorSetInputAttachments(),
|
||||
.support_float64 = device.IsFloat64Supported(),
|
||||
.support_float16 = device.IsFloat16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
|
|
@ -451,13 +462,10 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
|
|||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY,
|
||||
.support_snorm_render_buffer = true,
|
||||
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
|
||||
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
|
||||
.max_per_stage_descriptor_sampled_images = device.GetMaxPerStageDescriptorSampledImages(),
|
||||
.max_per_stage_resources = device.GetMaxPerStageResources(),
|
||||
.max_descriptor_set_sampled_images = device.GetMaxDescriptorSetSampledImages(),
|
||||
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
|
||||
.support_conditional_barrier = device.SupportsConditionalBarriers(),
|
||||
};
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
|
||||
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
|
||||
LOG_WARNING(Render_Vulkan, "maxVertexInputAttributes is too low: {} < {}",
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
: gpu{gpu_}, device_memory{device_memory_}, device{device_},
|
||||
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
|
||||
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
|
||||
guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
|
||||
guest_descriptor_queue(device), compute_pass_descriptor_queue(device),
|
||||
blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device),
|
||||
texture_cache_runtime{
|
||||
device, scheduler, memory_allocator, staging_pool,
|
||||
|
|
|
|||
|
|
@ -155,15 +155,14 @@ void Scheduler::WaitWorker() {
|
|||
}
|
||||
|
||||
void Scheduler::DispatchWork() {
|
||||
if (chunk->Empty()) {
|
||||
return;
|
||||
if (chunk && !chunk->Empty()) {
|
||||
{
|
||||
std::scoped_lock ql{queue_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
event_cv.notify_all();
|
||||
AcquireNewChunk();
|
||||
}
|
||||
{
|
||||
std::scoped_lock ql{queue_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
event_cv.notify_all();
|
||||
AcquireNewChunk();
|
||||
}
|
||||
|
||||
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <variant>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
|
|
@ -15,8 +16,9 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
|
||||
: device{device_}, scheduler{scheduler_} {
|
||||
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_)
|
||||
: device{device_}
|
||||
{
|
||||
payload_start = payload.data();
|
||||
payload_cursor = payload.data();
|
||||
}
|
||||
|
|
@ -31,13 +33,15 @@ void UpdateDescriptorQueue::TickFrame() {
|
|||
payload_cursor = payload_start;
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::Acquire() {
|
||||
// Minimum number of entries required.
|
||||
// This is the maximum number of entries a single draw call might use.
|
||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
||||
|
||||
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
||||
void UpdateDescriptorQueue::Acquire(Scheduler& scheduler, size_t required_entries) {
|
||||
static constexpr size_t DEFAULT_REQUIRED_ENTRIES = 0x400;
|
||||
const size_t reserve = required_entries > 0 ? required_entries : DEFAULT_REQUIRED_ENTRIES;
|
||||
ASSERT_MSG(reserve < FRAME_PAYLOAD_SIZE, "Descriptor reservation {} >= frame capacity {}",
|
||||
reserve, FRAME_PAYLOAD_SIZE);
|
||||
const size_t used = static_cast<size_t>(std::distance(payload_start, payload_cursor));
|
||||
if (used + reserve >= FRAME_PAYLOAD_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow (used={}, reserve={}, capacity={})",
|
||||
used, reserve, FRAME_PAYLOAD_SIZE);
|
||||
scheduler.WaitWorker();
|
||||
payload_cursor = payload_start;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,12 +34,11 @@ class UpdateDescriptorQueue final {
|
|||
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
|
||||
|
||||
public:
|
||||
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
|
||||
explicit UpdateDescriptorQueue(const Device& device_);
|
||||
~UpdateDescriptorQueue();
|
||||
|
||||
void TickFrame();
|
||||
|
||||
void Acquire();
|
||||
void Acquire(Scheduler& scheduler, size_t required_entries = 0);
|
||||
|
||||
const DescriptorUpdateEntry* UpdateData() const noexcept {
|
||||
return upload_start;
|
||||
|
|
@ -75,8 +74,6 @@ public:
|
|||
|
||||
private:
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
|
||||
size_t frame_index{0};
|
||||
DescriptorUpdateEntry* payload_cursor = nullptr;
|
||||
DescriptorUpdateEntry* payload_start = nullptr;
|
||||
|
|
|
|||
|
|
@ -321,32 +321,23 @@ public:
|
|||
return properties.properties.limits.maxPushConstantsSize;
|
||||
}
|
||||
|
||||
/// Returns the maximum size for shared memory.
|
||||
u32 GetMaxComputeSharedMemorySize() const {
|
||||
return properties.properties.limits.maxComputeSharedMemorySize;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic storage buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetStorageBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetStorageBuffersDynamic;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of dynamic uniform buffer descriptors per set.
|
||||
u32 GetMaxDescriptorSetUniformBuffersDynamic() const {
|
||||
return properties.properties.limits.maxDescriptorSetUniformBuffersDynamic;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageDescriptorSampledImages() const {
|
||||
return properties.properties.limits.maxPerStageDescriptorSampledImages;
|
||||
}
|
||||
|
||||
u32 GetMaxPerStageResources() const {
|
||||
return properties.properties.limits.maxPerStageResources;
|
||||
}
|
||||
|
||||
u32 GetMaxDescriptorSetSampledImages() const {
|
||||
return properties.properties.limits.maxDescriptorSetSampledImages;
|
||||
}
|
||||
#define FN_MAX_LIMIT_LIST \
|
||||
FN_MAX_LIMIT_ELEM(ComputeSharedMemorySize) \
|
||||
FN_MAX_LIMIT_ELEM(PerStageDescriptorSampledImages) \
|
||||
FN_MAX_LIMIT_ELEM(PerStageResources) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetSamplers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetUniformBuffersDynamic) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffers) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageBuffersDynamic) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetSampledImages) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetStorageImages) \
|
||||
FN_MAX_LIMIT_ELEM(DescriptorSetInputAttachments)
|
||||
#define FN_MAX_LIMIT_ELEM(name) \
|
||||
u32 GetMax##name() const { return properties.properties.limits.max##name; }
|
||||
FN_MAX_LIMIT_LIST
|
||||
#undef FN_MAX_LIMIT_ELEM
|
||||
#undef FN_MAX_LIMIT_LIST
|
||||
|
||||
/// Returns float control properties of the device.
|
||||
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ int IrShaderRecompilerImpl(int argc, char *argv[]) {
|
|||
host_info.support_geometry_shader_passthrough = true;
|
||||
host_info.support_conditional_barrier = true;
|
||||
host_info.min_ssbo_alignment = 0;
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
||||
auto const dumped_ir = Shader::IR::DumpProgram(program);
|
||||
std::printf("%s\n", dumped_ir.c_str());
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ int SpirvShaderRecompilerImpl(int argc, char *argv[]) {
|
|||
host_info.support_geometry_shader_passthrough = true;
|
||||
host_info.support_conditional_barrier = true;
|
||||
host_info.min_ssbo_alignment = 0;
|
||||
host_info.ApplyDescriptorLimitPolicy();
|
||||
auto program = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg, host_info);
|
||||
|
||||
// IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue