diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 275364f27..618d295b2 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -58,6 +58,15 @@ set(SHADER_FILES scaling/FSR/ffx_fsr1.h scaling/SharpBilinear/OpenGL/opengl_sharpbilinear.vert scaling/SharpBilinear/OpenGL/opengl_sharpbilinear.frag + scaling/FSR/Vulkan/vulkan_fsr_pass0.vert + scaling/FSR/Vulkan/vulkan_fsr_pass0_part1.frag + scaling/FSR/Vulkan/vulkan_fsr_pass0_part2.frag + scaling/FSR/Vulkan/vulkan_fsr_pass1.vert + scaling/FSR/Vulkan/vulkan_fsr_pass1_part1.frag + scaling/FSR/Vulkan/vulkan_fsr_pass1_part2.frag + scaling/FSR/Vulkan/vulkan_fsr_pass1_part3.frag + scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.vert + scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.frag full_screen_triangle.vert opengl_present.frag opengl_present.vert diff --git a/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0.vert b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0.vert new file mode 100644 index 000000000..98dfdf69f --- /dev/null +++ b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0.vert @@ -0,0 +1,26 @@ +// FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING +// SM 4.0 compatible: no textureGather, direct texelFetch of 12 unique texels. +#version 450 core +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 vert_position; +layout(location = 1) in vec2 vert_tex_coord; +layout(location = 0) out vec2 frag_tex_coord; + +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; + int convert_colors; +}; + +void main() +{ + gl_Position = vec4(vert_position, 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} + diff --git a/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part1.frag b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part1.frag new file mode 100644 index 000000000..e84ee683e --- /dev/null +++ b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part1.frag @@ -0,0 +1,32 @@ +// FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING +// SM 4.0 compatible: no textureGather, direct texelFetch of 12 unique texels. +#version 450 core +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 frag_tex_coord; +layout(location = 0) out vec4 color; +layout (set = 0, binding = 0) uniform sampler2D color_texture; + +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; + int convert_colors; +}; + +#define A_GPU 1 +#define A_GLSL 1 +// #include "ffx_a.h" + +// // We intentionally do NOT define FSR_EASU_F here. +// // We only need FsrEasuCon (which compiles under A_GPU alone), +// // and we inline the EASU filter logic below to avoid the +// // textureGather-based callback system entirely. +// // This yields 12 texelFetch calls instead of the original +// // 12 textureGather calls (4 gathers x 3 channels), and is +// // faster than emulating gathers with 48 individual fetches. +// #include "ffx_fsr1.h" diff --git a/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part2.frag b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part2.frag new file mode 100644 index 000000000..faae0c453 --- /dev/null +++ b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part2.frag @@ -0,0 +1,193 @@ +void main() { + // --- Setup constants (same as original) --- + AU4 con0, con1, con2, con3; + FsrEasuCon(con0, con1, con2, con3, + i_resolution.x, i_resolution.y, + i_resolution.x, i_resolution.y, + o_resolution.x, o_resolution.y); + + AU2 gxy = AU2(frag_tex_coord.xy * o_resolution.xy); + + // --- Get position of 'f' (the center texel of the kernel) --- + AF2 pp = AF2(gxy) * AF2_AU2(con0.xy) + AF2_AU2(con0.zw); + AF2 fp = floor(pp); + pp -= fp; + + // --- Fetch all 12 unique texels directly as RGB --- + // The 12-tap kernel layout relative to 'fp': + // b c (0,-1) (1,-1) + // e f g h (-1,0) (0,0) (1,0) (2,0) + // i j k l (-1,1) (0,1) (1,1) (2,1) + // n o (0, 2) (1, 2) + ivec2 sp = ivec2(fp); + AF3 b = texelFetch(color_texture, sp + ivec2( 0,-1), 0).rgb; + AF3 c = texelFetch(color_texture, sp + ivec2( 1,-1), 0).rgb; + AF3 e = texelFetch(color_texture, sp + ivec2(-1, 0), 0).rgb; + AF3 f = texelFetch(color_texture, sp + ivec2( 0, 0), 0).rgb; + AF3 g = texelFetch(color_texture, sp + ivec2( 1, 0), 0).rgb; + AF3 h = texelFetch(color_texture, sp + ivec2( 2, 0), 0).rgb; + AF3 i = texelFetch(color_texture, sp + ivec2(-1, 1), 0).rgb; + AF3 j = texelFetch(color_texture, sp + ivec2( 0, 1), 0).rgb; + AF3 k = texelFetch(color_texture, sp + ivec2( 1, 1), 0).rgb; + AF3 l = texelFetch(color_texture, sp + ivec2( 2, 1), 0).rgb; + AF3 n = texelFetch(color_texture, sp + ivec2( 0, 2), 0).rgb; + AF3 o = texelFetch(color_texture, sp + ivec2( 1, 2), 0).rgb; + + // --- Approximate luma (luma times 2, in 2 FMA/MAD) --- + AF1 bL = b.b * AF1_(0.5) + (b.r * AF1_(0.5) + b.g); + AF1 cL = c.b * AF1_(0.5) + (c.r * AF1_(0.5) + c.g); + AF1 eL = e.b * AF1_(0.5) + (e.r * AF1_(0.5) + e.g); + AF1 fL = f.b * AF1_(0.5) + (f.r * AF1_(0.5) + f.g); + AF1 gL = g.b * AF1_(0.5) + (g.r * AF1_(0.5) + g.g); + AF1 hL = h.b * AF1_(0.5) + (h.r * AF1_(0.5) + h.g); + AF1 iL = i.b * AF1_(0.5) + (i.r * AF1_(0.5) + i.g); + AF1 jL = j.b * AF1_(0.5) + (j.r * AF1_(0.5) + j.g); + AF1 kL = k.b * AF1_(0.5) + (k.r * AF1_(0.5) + k.g); + AF1 lL = l.b * AF1_(0.5) + (l.r * AF1_(0.5) + l.g); + AF1 nL = n.b * AF1_(0.5) + (n.r * AF1_(0.5) + n.g); + AF1 oL = o.b * AF1_(0.5) + (o.r * AF1_(0.5) + o.g); + + // --- Accumulate direction and length --- + // Inlined FsrEasuSetF for each of the 4 bilinear quadrants. + // Each quadrant computes gradient direction and edge length + // from its 5-tap cross pattern centered on the quadrant's + // nearest texel. + // + // Quadrant layout (bilinear weights): + // s=(1-x)(1-y) t=x(1-y) + // u=(1-x)y v=xy + // + // Cross pattern for each quadrant: + // s: center=f, left=e, right=g, up=b, down=j + // t: center=g, left=f, right=h, up=c, down=k + // u: center=j, left=i, right=k, up=f, down=n + // v: center=k, left=j, right=l, up=g, down=o + + AF2 dir = AF2_(0.0); + AF1 len = AF1_(0.0); + + // Quadrant s + { + AF1 w = (AF1_(1.0) - pp.x) * (AF1_(1.0) - pp.y); + AF1 dc = gL - fL; AF1 cb = fL - eL; + AF1 lenX = max(abs(dc), abs(cb)); + lenX = APrxLoRcpF1(lenX); + AF1 dirX = gL - eL; + dir.x += dirX * w; + lenX = ASatF1(abs(dirX) * lenX); lenX *= lenX; len += lenX * w; + AF1 ec = jL - fL; AF1 ca = fL - bL; + AF1 lenY = max(abs(ec), abs(ca)); + lenY = APrxLoRcpF1(lenY); + AF1 dirY = jL - bL; + dir.y += dirY * w; + lenY = ASatF1(abs(dirY) * lenY); lenY *= lenY; len += lenY * w; + } + // Quadrant t + { + AF1 w = pp.x * (AF1_(1.0) - pp.y); + AF1 dc = hL - gL; AF1 cb = gL - fL; + AF1 lenX = max(abs(dc), abs(cb)); + lenX = APrxLoRcpF1(lenX); + AF1 dirX = hL - fL; + dir.x += dirX * w; + lenX = ASatF1(abs(dirX) * lenX); lenX *= lenX; len += lenX * w; + AF1 ec = kL - gL; AF1 ca = gL - cL; + AF1 lenY = max(abs(ec), abs(ca)); + lenY = APrxLoRcpF1(lenY); + AF1 dirY = kL - cL; + dir.y += dirY * w; + lenY = ASatF1(abs(dirY) * lenY); lenY *= lenY; len += lenY * w; + } + // Quadrant u + { + AF1 w = (AF1_(1.0) - pp.x) * pp.y; + AF1 dc = kL - jL; AF1 cb = jL - iL; + AF1 lenX = max(abs(dc), abs(cb)); + lenX = APrxLoRcpF1(lenX); + AF1 dirX = kL - iL; + dir.x += dirX * w; + lenX = ASatF1(abs(dirX) * lenX); lenX *= lenX; len += lenX * w; + AF1 ec = nL - jL; AF1 ca = jL - fL; + AF1 lenY = max(abs(ec), abs(ca)); + lenY = APrxLoRcpF1(lenY); + AF1 dirY = nL - fL; + dir.y += dirY * w; + lenY = ASatF1(abs(dirY) * lenY); lenY *= lenY; len += lenY * w; + } + // Quadrant v + { + AF1 w = pp.x * pp.y; + AF1 dc = lL - kL; AF1 cb = kL - jL; + AF1 lenX = max(abs(dc), abs(cb)); + lenX = APrxLoRcpF1(lenX); + AF1 dirX = lL - jL; + dir.x += dirX * w; + lenX = ASatF1(abs(dirX) * lenX); lenX *= lenX; len += lenX * w; + AF1 ec = oL - kL; AF1 ca = kL - gL; + AF1 lenY = max(abs(ec), abs(ca)); + lenY = APrxLoRcpF1(lenY); + AF1 dirY = oL - gL; + dir.y += dirY * w; + lenY = ASatF1(abs(dirY) * lenY); lenY *= lenY; len += lenY * w; + } + + // --- Normalize direction --- + AF2 dir2 = dir * dir; + AF1 dirR = dir2.x + dir2.y; + AP1 zro = dirR < AF1_(1.0 / 32768.0); + dirR = APrxLoRsqF1(dirR); + dirR = zro ? AF1_(1.0) : dirR; + dir.x = zro ? AF1_(1.0) : dir.x; + dir *= AF2_(dirR); + + // --- Shape length --- + len = len * AF1_(0.5); + len *= len; + AF1 stretch = (dir.x * dir.x + dir.y * dir.y) * APrxLoRcpF1(max(abs(dir.x), abs(dir.y))); + AF2 len2 = AF2(AF1_(1.0) + (stretch - AF1_(1.0)) * len, AF1_(1.0) + AF1_(-0.5) * len); + AF1 lob = AF1_(0.5) + AF1_((1.0 / 4.0 - 0.04) - 0.5) * len; + AF1 clp = APrxLoRcpF1(lob); + + // --- Min/max of 4 nearest (f, g, j, k) for de-ringing --- + AF3 min4 = min(min(f, g), min(j, k)); + AF3 max4 = max(max(f, g), max(j, k)); + + // --- Accumulate 12 taps (inlined FsrEasuTapF) --- + AF3 aC = AF3_(0.0); + AF1 aW = AF1_(0.0); + + // Macro for the Lanczos-like kernel evaluation per tap. + // Rotates offset by direction, applies anisotropic scaling, + // evaluates the approximated windowed Lanczos kernel, accumulates. + #define FSR_EASU_TAP(OFF_X, OFF_Y, COLOR) { \ + AF2 v; \ + v.x = ((OFF_X) - pp.x) * dir.x + ((OFF_Y) - pp.y) * dir.y; \ + v.y = ((OFF_X) - pp.x) * (-dir.y) + ((OFF_Y) - pp.y) * dir.x; \ + v *= len2; \ + AF1 d2 = min(v.x * v.x + v.y * v.y, clp); \ + AF1 wB = AF1_(2.0 / 5.0) * d2 + AF1_(-1.0); \ + AF1 wA = lob * d2 + AF1_(-1.0); \ + wB *= wB; wA *= wA; \ + wB = AF1_(25.0 / 16.0) * wB + AF1_(-(25.0 / 16.0 - 1.0)); \ + AF1 w = wB * wA; \ + aC += (COLOR) * w; aW += w; } + + FSR_EASU_TAP( 0.0, -1.0, b) // b + FSR_EASU_TAP( 1.0, -1.0, c) // c + FSR_EASU_TAP(-1.0, 1.0, i) // i + FSR_EASU_TAP( 0.0, 1.0, j) // j + FSR_EASU_TAP( 0.0, 0.0, f) // f + FSR_EASU_TAP(-1.0, 0.0, e) // e + FSR_EASU_TAP( 1.0, 1.0, k) // k + FSR_EASU_TAP( 2.0, 1.0, l) // l + FSR_EASU_TAP( 2.0, 0.0, h) // h + FSR_EASU_TAP( 1.0, 0.0, g) // g + FSR_EASU_TAP( 1.0, 2.0, o) // o + FSR_EASU_TAP( 0.0, 2.0, n) // n + + #undef FSR_EASU_TAP + + // --- Normalize and de-ring --- + AF3 pix = min(max4, max(min4, aC * AF3_(ARcpF1(aW)))); + color = vec4(pix, 1.0); +} diff --git a/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1.vert b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1.vert new file mode 100644 index 000000000..ea8e46919 --- /dev/null +++ b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1.vert @@ -0,0 +1,26 @@ +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +//? #version 450 +#version 450 core +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 vert_position; +layout(location = 1) in vec2 vert_tex_coord; +layout(location = 0) out vec2 frag_tex_coord; + +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; + int convert_colors; +}; + +void main() +{ + gl_Position = vec4(vert_position, 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} + diff --git a/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part1.frag b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part1.frag new file mode 100644 index 000000000..13af4f4c8 --- /dev/null +++ b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part1.frag @@ -0,0 +1,22 @@ +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +#version 450 core +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 frag_tex_coord; +layout(location = 0) out vec4 color; +layout (set = 0, binding = 0) uniform sampler2D color_texture; +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; + int convert_colors; + float FSR_SHARPENING; +}; + +#define A_GPU 1 +#define A_GLSL 1 +// #include "ffx_a.h" \ No newline at end of file diff --git a/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part2.frag b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part2.frag new file mode 100644 index 000000000..47e4739cb --- /dev/null +++ b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part2.frag @@ -0,0 +1,7 @@ +#define FSR_RCAS_F 1 +AU4 con0; + +AF4 FsrRcasLoadF(ASU2 p) { return AF4(texelFetch(color_texture, p, 0)); } +void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + +// #include "ffx_fsr1.h" \ No newline at end of file diff --git a/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part3.frag b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part3.frag new file mode 100644 index 000000000..2bb741071 --- /dev/null +++ b/src/video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part3.frag @@ -0,0 +1,9 @@ +void main() { + FsrRcasCon(con0, FSR_SHARPENING); + + AU2 gxy = AU2(frag_tex_coord.xy * o_resolution.xy); // Integer pixel position in output. + AF3 Gamma2Color = AF3(0, 0, 0); + FsrRcasF(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, gxy, con0); + + color = vec4(Gamma2Color, 1.0); +} diff --git a/src/video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.frag b/src/video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.frag new file mode 100644 index 000000000..9bc8773b7 --- /dev/null +++ b/src/video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.frag @@ -0,0 +1,61 @@ +/* + Author: KojoZero (modified from rsn8887's shader) + License: Public domain + + This is an integer prescale filter that should be combined + with a bilinear hardware filtering (GL_BILINEAR filter or some such) to achieve + a smooth scaling result with minimum blur. This is good for pixelgraphics + that are scaled by non-integer factors. + + This is a modified version rsn8887's shader which has been modified to scale + until above the output resolution, rather than right below the output resolution. + + The prescale factor and texel coordinates are precalculated + in the vertex shader for speed. +*/ +#version 450 core +#extension GL_ARB_separate_shader_objects : enable + + +layout(location = 0) in vec2 frag_tex_coord; +layout(location = 1) in vec2 precalc_texel; +layout(location = 2) in vec2 precalc_scale; +layout(location = 0) out vec4 color; +layout (set = 0, binding = 0) uniform sampler2D color_texture; +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; + int convert_colors; +}; + +vec3 LinearTosRGB(vec3 c) { + return mix(c * 12.92, 1.055 * pow(c, vec3(1.0/2.4)) - 0.055, step(0.0031308, c)); +} + +void main() +{ + vec2 texel = precalc_texel; + vec2 scale = precalc_scale; + vec2 texel_floored = floor(texel); + vec2 s = fract(texel); + vec2 region_range = 0.5 - 0.5 / scale; + + // Figure out where in the texel to sample to get correct pre-scaled bilinear. + // Uses the hardware bilinear interpolator to avoid having to sample 4 times manually. + + vec2 center_dist = s - 0.5; + vec2 f = (center_dist - clamp(center_dist, -region_range, region_range)) * scale + 0.5; + + vec2 mod_texel = texel_floored + f; + + vec4 pixel = vec4(texture(color_texture, mod_texel / i_resolution.xy).rgb, 1.0); + if (convert_colors == 2){ + pixel = vec4(LinearTosRGB(pixel.rgb), pixel.a); + } + color = pixel; +} \ No newline at end of file diff --git a/src/video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.vert b/src/video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.vert new file mode 100644 index 000000000..d1910dd04 --- /dev/null +++ b/src/video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear.vert @@ -0,0 +1,27 @@ +#version 450 core +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 vert_position; +layout(location = 1) in vec2 vert_tex_coord; +layout(location = 0) out vec2 frag_tex_coord; +layout(location = 1) out vec2 precalc_texel; +layout(location = 2) out vec2 precalc_scale; +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; + int convert_colors; +}; + +void main() +{ + vec4 position = vec4(vert_position, 0.0, 1.0) * modelview_matrix; + gl_Position = vec4(position.x, position.y, 0.0, 1.0); + frag_tex_coord = vert_tex_coord; + precalc_scale = ceil(o_resolution.xy / i_resolution.xy); + precalc_texel = vert_tex_coord.xy * i_resolution.xy; +} \ No newline at end of file diff --git a/src/video_core/host_shaders/scaling/vulkan_area_sampling.frag b/src/video_core/host_shaders/scaling/vulkan_area_sampling.frag index 347bc8af0..953034395 100644 --- a/src/video_core/host_shaders/scaling/vulkan_area_sampling.frag +++ b/src/video_core/host_shaders/scaling/vulkan_area_sampling.frag @@ -1,4 +1,6 @@ //? #version 460 core +#version 450 core +#extension GL_ARB_separate_shader_objects : enable layout(location = 0) in vec2 frag_tex_coord; layout(location = 0) out vec4 color; diff --git a/src/video_core/host_shaders/scaling/vulkan_area_sampling.vert b/src/video_core/host_shaders/scaling/vulkan_area_sampling.vert index 2117c62fc..e65f78cd2 100644 --- a/src/video_core/host_shaders/scaling/vulkan_area_sampling.vert +++ b/src/video_core/host_shaders/scaling/vulkan_area_sampling.vert @@ -1,4 +1,7 @@ //? #version 460 +#version 450 core +#extension GL_ARB_separate_shader_objects : enable + layout(location = 0) in vec2 vert_position; layout(location = 1) in vec2 vert_tex_coord; layout(location = 0) out vec2 frag_tex_coord; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 986ffdc24..fc89f4208 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -45,6 +45,17 @@ #include "video_core/host_shaders/antialiasing/SearchTex.h" #include "video_core/host_shaders/scaling/vulkan_area_sampling_frag.h" #include "video_core/host_shaders/scaling/vulkan_area_sampling_vert.h" +#include "video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_vert.h" +#include "video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part1_frag.h" +#include "video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass0_part2_frag.h" +#include "video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_vert.h" +#include "video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part1_frag.h" +#include "video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part2_frag.h" +#include "video_core/host_shaders/scaling/FSR/Vulkan/vulkan_fsr_pass1_part3_frag.h" +#include "video_core/host_shaders/scaling/FSR/ffx_a_h.h" +#include "video_core/host_shaders/scaling/FSR/ffx_fsr1_h.h" +#include "video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear_vert.h" +#include "video_core/host_shaders/scaling/SharpBilinear/Vulkan/vulkan_sharpbilinear_frag.h" #include @@ -959,6 +970,33 @@ void RendererVulkan::CompileShaders() { post_frag_shaders_texture[4] = Compile(smaa_pass_2_shader_frag_data, vk::ShaderStageFlagBits::eFragment, device); + std::string FSR_PASS_0_shader_frag_data = std::string(HostShaders::VULKAN_FSR_PASS0_PART1_FRAG); + FSR_PASS_0_shader_frag_data += std::string(HostShaders::FFX_A_H); + FSR_PASS_0_shader_frag_data += std::string(HostShaders::FFX_FSR1_H); + FSR_PASS_0_shader_frag_data += std::string(HostShaders::VULKAN_FSR_PASS0_PART2_FRAG); + std::string FSR_PASS_0_shader_vert_data = std::string(HostShaders::VULKAN_FSR_PASS0_VERT); + post_vert_shaders_texture[5] = + Compile(FSR_PASS_0_shader_vert_data, vk::ShaderStageFlagBits::eVertex, device); + post_frag_shaders_texture[5] = + Compile(FSR_PASS_0_shader_frag_data, vk::ShaderStageFlagBits::eFragment, device); + + std::string FSR_PASS_1_shader_frag_data = std::string(HostShaders::VULKAN_FSR_PASS1_PART1_FRAG); + FSR_PASS_1_shader_frag_data += std::string(HostShaders::FFX_A_H); + FSR_PASS_1_shader_frag_data += std::string(HostShaders::VULKAN_FSR_PASS1_PART2_FRAG); + FSR_PASS_1_shader_frag_data += std::string(HostShaders::FFX_FSR1_H); + FSR_PASS_1_shader_frag_data += std::string(HostShaders::VULKAN_FSR_PASS1_PART3_FRAG); + std::string FSR_PASS_1_shader_vert_data = std::string(HostShaders::VULKAN_FSR_PASS1_VERT); + post_vert_shaders_texture[6] = + Compile(FSR_PASS_1_shader_vert_data, vk::ShaderStageFlagBits::eVertex, device); + post_frag_shaders_texture[6] = + Compile(FSR_PASS_1_shader_frag_data, vk::ShaderStageFlagBits::eFragment, device); + + std::string SharpBilinear_shader_frag_data = std::string(HostShaders::VULKAN_SHARPBILINEAR_FRAG); + std::string SharpBilinear_shader_vert_data = std::string(HostShaders::VULKAN_SHARPBILINEAR_VERT); + post_vert_shaders_screen[1] = + Compile(SharpBilinear_shader_vert_data, vk::ShaderStageFlagBits::eVertex, device); + post_frag_shaders_screen[1] = + Compile(SharpBilinear_shader_frag_data, vk::ShaderStageFlagBits::eFragment, device); auto properties = instance.GetPhysicalDevice().getProperties(); for (std::size_t i = 0; i < present_samplers.size(); i++) { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index c14f49c67..24ba36e8e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -92,15 +92,15 @@ struct PresentUniformData { int layer = 0; int reverse_interlaced = 0; int convert_colors; + float FSR_SHARPENING; }; -static_assert(sizeof(PresentUniformData) == 116, +static_assert(sizeof(PresentUniformData) == 120, "PresentUniformData does not structure in shader!"); class RendererVulkan : public VideoCore::RendererBase { static constexpr std::size_t PRESENT_PIPELINES = 3; - static constexpr std::size_t POST_PIPELINES_SCREEN = 1; - static constexpr std::size_t POST_PIPELINES_TEXTURE = 5; - static constexpr std::size_t POST_SHADERS = 8; + static constexpr std::size_t POST_PIPELINES_SCREEN = 2; + static constexpr std::size_t POST_PIPELINES_TEXTURE = 7; public: explicit RendererVulkan(Core::System& system, Pica::PicaCore& pica, Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window);