From 05adddb04269f87277b6b479a15c31984311b595 Mon Sep 17 00:00:00 2001 From: crashGG Date: Wed, 13 May 2026 21:08:47 +0300 Subject: [PATCH] Enhance MMPX, Fixed some defects in the original algorithm by morphological analysis Enhance MMPX, Fixed some defects in the original algorithm by morphological analysis Set mmpx scaling to 2x --- .../host_shaders/texture_filtering/mmpx.frag | 1146 +++++++++++++++-- .../renderer_opengl/gl_blit_helper.cpp | 39 +- 2 files changed, 1103 insertions(+), 82 deletions(-) diff --git a/src/video_core/host_shaders/texture_filtering/mmpx.frag b/src/video_core/host_shaders/texture_filtering/mmpx.frag index 26f192e51..ca4b593bd 100644 --- a/src/video_core/host_shaders/texture_filtering/mmpx.frag +++ b/src/video_core/host_shaders/texture_filtering/mmpx.frag @@ -1,6 +1,18 @@ // Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. +/* MMPX.glc + Copyright 2020 Morgan McGuire & Mara Gagiu. + Provided under the Open Source MIT license https://opensource.org/licenses/MIT + + MMPXEnhanced v3.0.1 + This shader is an enhanced iteration of MMPX.glc + It improves the visual quality while preserving the pixel-art aesthetic by + identifying and analyzing specific geometric shapes, effectively resolving + the artifacts found in the original algorithm. + © 2025-2026 by crashGG. + +*/ +// Performs 2x upscaling. + //? #version 430 core precision mediump float; @@ -8,124 +20,1096 @@ precision mediump float; layout(location = 0) in vec2 tex_coord; layout(location = 0) out vec4 frag_color; layout(binding = 0) uniform sampler2D tex; +/////////////////////////////////////////// -#define src(x, y) texture(tex, coord + vec2(x, y) * 1.0 / source_size) +// RGB visual weight + alpha segmentation +float luma(vec4 col) { + + // Use the CRT-era BT.601 standard. Clamp the result to [0.0 - 0.999] + float rgbsum =min(dot(col.rgb, vec3(0.299, 0.587, 0.114)), 0.999); + + // Alpha weighting can be removed for subsequent fractional bit extraction + float alphafactor = + (col.a > 0.854102) ? 0.0 : // Upper segment of two short golden ratio divisions + (col.a > 0.618034) ? 2.0 : // One golden ratio division + (col.a > 0.381966) ? 4.0 : // Lower segment of one short golden ratio division + (col.a > 0.145898) ? 6.0 : // Lower segment of two short golden ratio divisions + (col.a > 0.002) ? 8.0 : 10.0; // Fully transparent + + return rgbsum + alphafactor; -float luma(vec4 col){ - return dot(col.rgb, vec3(0.2126, 0.7152, 0.0722)) * (1.0 - col.a); } -bool same(vec4 B, vec4 A0){ - return all(equal(B, A0)); +/* Constant Definitions: +0.145898 : Two short golden ratio divisions of 1.0 +0.0638587 : Squared value after two short golden ratio divisions of RGB Euclidean distance +0.4377 : Squared value after one short golden ratio division of RGB Euclidean distance +0.75 : Squared half of RGB Euclidean distance +*/ + +bool simb(vec4 col1, vec4 col2) { + + highp vec4 diff = col1 - col2; + + float maxdiff = max(diff.r, max(diff.g, diff.b)); + float mindiff = min(diff.r, min(diff.g, diff.b)); + + // Luminance base weight: both colors must satisfy > 0.078 (0.234÷3) + float weight = step(0.234, min(col1.r+col1.g+col1.b, col2.r+col2.g+col2.b)); + // Transparency base weight: both colors must be fully opaque // xxx.alpha + float weight2 = step(0.998, min(col1.a, col2.a)); + + // Find the most opposite channel: if one positive and one negative, take the one with the smallest absolute value; 0 for same sign + // Filter same-sign cases using max(0.0, ...) + // Skip team_rebel if either pixel has luminance < 0.078 or is not fully opaque + float team_rebel = min(max(0.0, maxdiff), max(0.0, -mindiff)) * weight * weight2; // xxx.alpha + float finaldist = (maxdiff - mindiff) + team_rebel; + + highp float dot_diff = dot(diff.rgb, diff.rgb); //xxx.alpha + + // Equivalent to (finaldist ÷ 0.145898 )² + highp float factor = (finaldist * finaldist) * 46.9787; + + // Mask = 0.0 when two alphas are close, dot_diff[0-3] is valid at this time + float alpha_match_mask = step(0.145898, abs(diff.a)); + + return dot_diff < mix(0.0638587, 0.0, factor) - alpha_match_mask*5.0; // xxx.alpha } -bool notsame(vec4 B, vec4 A0){ - return any(notEqual(B, A0)); +bool sim(vec4 col1, vec4 col2) { + + highp vec4 diff = col1 - col2; + + // RGB color difference variation range (max_diff - min_diff) + float delta_range = max(diff.r, max(diff.g, diff.b)) - min(diff.r, min(diff.g, diff.b)); + + highp float dot_diff = dot(diff.rgb, diff.rgb); //xxx.alpha + + // Equivalent to (delta_range ÷ 0.382 )² + highp float factor = (delta_range * delta_range) * 6.8541; + + // Both near transparent: both_near_trans=1.0, return true unconditionally + float both_near_trans = step(max(col1.a, col2.a), 0.381966); + // Mask = 0.0 when two alphas are close, dot_diff[0-3]< mix() takes effect at this time + float alpha_match_mask = step(0.381966, abs(diff.a)); + + return dot_diff < mix(0.0638587, 0.0, factor) - alpha_match_mask*5.0 + both_near_trans*10.0; // xxx.alpha } -bool all_eq2(vec4 B, vec4 A0, vec4 A1) { - return (same(B,A0) && same(B,A1)); +float mixGate(vec4 col1, vec4 col2) { + + highp vec4 diff = col1 - col2; + + // RGB color difference variation range (max_diff - min_diff) + float delta_range = max(diff.r, max(diff.g, diff.b)) - min(diff.r, min(diff.g, diff.b)); + + highp float dot_diff = dot(diff.rgb, diff.rgb); //xxx.alpha + + // Equivalent to (delta_range ÷ 0.618 )² + highp float factor = (delta_range * delta_range) * 2.618034; + + // Mask = 0.0 when alpha difference < 0.5, dot_diff[0-3]< mix() takes effect at this time + float alpha_match_mask = step(0.5, abs(diff.a)); + + return step(dot_diff , mix(0.75, 0.0, factor) - alpha_match_mask*5.0); // xxx.alpha } -bool all_eq3(vec4 B, vec4 A0, vec4 A1, vec4 A2) { - return (same(B,A0) && same(B,A1) && same(B,A2)); + +#define eq(a,b) all(lessThan(abs(a-b), vec4(0.01, 0.01, 0.01, 0.145898))) + +#define neq(a,b) !eq(a,b) + +#define all_eq2(a, b1, b2) \ + ( eq(a,b1) && eq(a,b2)) + +#define all_eq3(a, b1, b2, b3) \ + ( eq(a,b1) && eq(a,b2) && eq(a,b3)) + +#define all_eq4(a, b1, b2, b3, b4) \ + ( eq(a,b1) && eq(a,b2) && eq(a,b3) && eq(a,b4)) + +#define any_eq2(a, b1, b2) (eq(a,b1)||eq(a,b2)) +#define any_eq3(a, b1, b2, b3) (eq(a,b1)||eq(a,b2)||eq(a,b3)) +// Better than a!=b1 && a!=b2 +#define none_eq2(a, b1, b2) !any_eq2(a, b1, b2) + + +// Total int2 difference allowed for three channels +//#define vec_neq(a,b) (dot(abs(a-b), vec4(1.0)) > 0.01) +// Int2 difference allowed per channel for three channels +//#define vec_eq(a,b) all(lessThan(abs(a-b), vec4(0.01))) +#define vec_neq(a, b) !eq(a,b) + + +// Pre-define +//const vec4 testcolor = vec4(1.0, 0.0, 1.0, 1.0); // Magenta +//const vec4 testcolor2 = vec4(0.0, 1.0, 1.0, 1.0); // Cyan +//const vec4 testcolor3 = vec4(1.0, 1.0, 0.0, 1.0); // Yellow +//const vec4 testcolor4 = vec4(1.0, 1.0, 1.0, 1.0); // White +const vec4 slopOFF = vec4(2.0); +const vec4 slopeBAD = vec4(4.0); +const vec4 theEXIT = vec4(8.0); + +#define mixXE mix(vX,vE,mixFactor) +#define mixXEoff mixXE+slopOFF +#define Xoff vX+slopOFF +//#define checkblack(col) ((col).g < 0.078 && (col).r < 0.1 && (col).b < 0.1) +#define checkblack(col) all(lessThan((col).rgb, vec3(0.1, 0.078, 0.1))) +#define checkwhite(col) all(greaterThan((col).rgb, vec3(0.92, 0.92, 0.92))) + +//pin zz +// Concave + Cross weak mixing (weak blend / no blend) +vec4 admixC(vec4 vX, vec4 vE) { + // Weak blending. Mix enabled? 0.618 if true, else 1.0 + float mixFactor = mixGate(vX, vE) * (-0.381966) + 1.0; + + return mixXE; } -bool all_eq4(vec4 B, vec4 A0, vec4 A1, vec4 A2, vec4 A3) { - return (same(B,A0) && same(B,A1) && same(B,A2) && same(B,A3)); +// K-type forced weak blending +vec4 admixK(vec4 vX, vec4 vE) { + vec4 diff = vX - vE; + // mixFactor slides from 0.5 to 1.0 based on point set distance, quadratic curve, steeper closer to 1.0 + float mixFactor = dot(diff.rgb, diff.rgb) * 0.16666 + 0.5; // xxx.alpha + // mixFactor slides linearly from 0.5 to 1.0 based on Euclidean distance + //float mixFactor = distance(vX, vE) * 0.28867 + 0.5; + return mixXE; } -bool any_eq3(vec4 B, vec4 A0, vec4 A1, vec4 A2) { - return (same(B,A0) || same(B,A1) || same(B,A2)); +// L-type 2:1 slope, extension of main corner +// Practice: This rule requires 4 pixels on the strict slope to be identical. Otherwise, various glitches will occur! +vec4 admixL(vec4 vX, vec4 vE, vec4 vS) { + + // The original eqX,E check would catch many duplicate pixels, now the main thread has been filtered by slopeok. + + // If target X is different from reference S (sample), it means it has been blended once, return directly without re-blending + if (vec_neq(vX, vS)) return vX; + + float mixFactor = 0.381966 * mixGate(vX,vE); + + return mixXE; } -bool none_eq2(vec4 B, vec4 A0, vec4 A1) { - return (notsame(B,A0) && notsame(B,A1)); -} +#define vE E +#define vB B +#define vD D +#define vF F +#define vH H +#define vA A +#define vC C +#define vG G +#define vI I -bool none_eq4(vec4 B, vec4 A0, vec4 A1, vec4 A2, vec4 A3) { - return (notsame(B,A0) && notsame(B,A1) && notsame(B,A2) && notsame(B,A3)); +/************************************************************************************************************************************** + * main slope + X cross-processing mechanism * + ******************************************************************************************************************************** zz */ +vec4 admixX( vec4 A, vec4 B, vec4 C, vec4 D, vec4 E, vec4 F, vec4 G, vec4 H, vec4 I + , vec4 P, vec4 PA, vec4 PC, vec4 Q, vec4 QA, vec4 QG, vec4 R, vec4 RC, vec4 RI, vec4 S, vec4 SG, vec4 SI, vec4 AA, vec4 CC, vec4 GG + , float El, float Bl, float Dl, float Fl, float Hl + //, vec4 vE, vec4 vB, vec4 vD, vec4 vC, vec4 vG + ) { + + + bool eq_B_C = eq(B,C); + bool eq_D_G = eq(D,G); + + // Exit if clamped by bilateral straight walls + if (eq_B_C && eq_D_G) return slopeBAD; + + + //Pre-declare + bool eq_B_P; bool eq_B_PA; bool eq_B_PC; + bool eq_D_Q; bool eq_D_QA; bool eq_D_QG; + bool eq_E_F; bool eq_E_H; bool eq_A_AA; + + vec4 vX; + float mixFactor; + + bool eq_E_C = eq(E,C); + bool eq_E_G = eq(E,G); + bool eq_A_P = eq(A,P); + bool eq_A_Q = eq(A,Q); + bool comboE3 = eq_E_C && eq_E_G; + bool comboA3 = eq_A_P && eq_A_Q; + + // Remove alpha channel weighting xxx.alpha + Bl = fract(Bl); + Dl = fract(Dl); + El = fract(El); + Fl = fract(Fl); + Hl = fract(Hl); + +/*========================================= + B != D + ==================================== zz */ +if (neq(B,D)){ + + // E-A equality violates preset logic, exit + if (eq(E,A)) return slopeBAD; + + // B-D unconnected? Removed + + // B and D are different, and the difference is larger than either side to center E, exit + float diffBD = abs(Bl-Dl); + if (diffBD > El-Bl || diffBD > El-Dl) return slopeBAD; + + + // X is the blend of B and D + vX = mix(vB, vD, 0.5); + vX.a = min(vB.a, vD.a); // xxx.alpha + + mixFactor = 0.381966 * mixGate(vX,vE) * step(0.002, vE.a); // xxx.alpha + + eq_B_PC = eq(B,PC); + eq_D_QG = eq(D,QG); + + // Strong trend collection + if (none_eq2(A,B,D)){ + if (comboA3) return mixXEoff; + if ( eq_A_P && eq_B_PC && !eq_B_C ) return mixXEoff; + if ( eq_A_Q && eq_D_QG && !eq_D_G ) return mixXEoff; + + // Double slope clamping BD, note the matching direction + if ( eq_A_P && eq_E_G ) return mixXEoff; + if ( eq_A_Q && eq_E_C ) return mixXEoff; + + // Hollow L inner curve + if ( eq_E_C && eq_D_G ) return mixXEoff; + if ( eq_E_G && eq_B_C ) return mixXEoff; + +} + // E-side three-pixel alignment + if ( comboE3 ) return mixXEoff; + + // Original rule, add a slope condition + if ( eq_E_C && eq_B_PC && neq(B,P)) return mixXEoff; + if ( eq_E_G && eq_D_QG && neq(D,Q)) return mixXEoff; + + eq_E_F = eq(E,F); + + // F - H + if (eq(F,H)) { + + // Double slope (exclude single pixel surrounded by C, BD different colors, loose connection) + if ( eq_E_C && !eq_D_G && (!eq_E_F||neq(E,P)) ) return mixXEoff; + if ( eq_E_G && !eq_B_C && (!eq_E_F||neq(E,Q)) ) return mixXEoff; + + // F+ H+ extension + if ( !eq_E_F && eq_B_PC && eq(F,RC) ) return mixXEoff; + if ( !eq_E_F && eq_D_QG && eq(H,SG) ) return mixXEoff; + } + + return slopeBAD; +} // B != D + + + /********* B == D *********/ + + // Avoid font edges being squeezed by black background on three sides + bool Xisblack = checkblack(vB); + if ( Xisblack && El >0.5 && (Fl<0.078 || Hl<0.078) ) return theEXIT; + + vX = vB; + vX.a = min(vB.a, vD.a); // xxx.alpha + + mixFactor = 0.381966 * mixGate(vX,vE) * step(0.002, vE.a); // xxx.alpha + + bool B_slope; bool B_tower; bool B_wall; + bool D_slope; bool D_tower; bool D_wall; + bool En3; + #define En4square En3&&eq(E,I) + +/*=================================================== + E - A Cross + ============================================== zz */ +if (eq(E,A)) { + + // Special pattern: Dithering pattern + // Target: Forced blending + + eq_E_F = eq(E,F); + eq_E_H = eq(E,H); + + bool Eisblack = checkblack(vE); + + // 1. Dithering pattern center + if ( comboE3 && !eq_E_F && !eq_E_H && eq(E,I) ) { + + // Exit if center E is black (The King of Fighters '96 energy gauge, The Punisher's belt) to avoid excessive contrast blending + if (Eisblack) return theEXIT; + // Practice 1: Do not catch black B points, that is normal logic entry + // Mix enabled? 0.381966 if true, else 0.618034 + mixFactor = 0.618034 * (1.0 - mixFactor); + return mixXEoff; + } + + eq_A_AA = eq(A,AA); + + // 2. Dithering pattern edge + if ( comboA3 && eq_A_AA && none_eq2(A,PA,QA) ) { + if (Eisblack) return theEXIT; + // Mix enabled? 0.381966 if true, else 0.618034 + mixFactor = 0.618034 * (1.0 - mixFactor); + // Layered gradient edges, use strong blending + if ( neq(B,PA) && eq(PA,QA) ) return mixXEoff; + // Remaining perfect cross, must be dithering edge, use weak blending. + // Practice: No need to specify health bar border cases separately. + // Base weak blending. Mix enabled? 0.618 if true, else 0.854 + // Note: mixFactor has been modified once in the upper layer + mixFactor += 0.236068; + return mixXEoff; + } + + // xxx.alpha + if (vE.a<0.002 && !Xisblack) return vX; + + eq_B_PC = eq(B,PC); + eq_B_PA = eq(B,PA); + eq_D_QG = eq(D,QG); + eq_D_QA = eq(D,QA); + + // No need to judge Eisblack for the following two + // 3. Semi-dithering, usually shadow expression on outline edges, use weak blending + + if ( comboE3 && comboA3 && + (eq_B_PC || eq_D_QG) && eq_D_QA && eq_B_PA) { + // Base weak blending. Mix enabled? 0.618 if true, else 0.854 + mixFactor = mixFactor * (-0.618034) + 0.8541; + return mixXEoff; + } + + // 4. Quarter dithering, easy to cause ugly small tail effect (Guile's plane in SF2, Dino Crisis select screen) + + if ( comboE3 && eq_A_P + && eq_B_PA && eq_D_QA && eq_D_QG + && eq_E_H + ) {// Base weak blending. Mix enabled? 0.618 if true, else 0.854 + mixFactor = mixFactor * (-0.618034) + 0.8541; + return mixXEoff; + } + + if ( comboE3 && eq_A_Q + && eq_B_PA && eq_D_QA && eq_B_PC + && eq_E_F + ) {// Base weak blending. Mix enabled? 0.618 if true, else 0.854 + mixFactor = mixFactor * (-0.618034) + 0.8541; + return mixXEoff; + } + + + // A-side three-pixel alignment, strong pattern, must come after dithering rules. + if (comboA3) return Xoff; + + // E-side three-pixel alignment (must come after comboA3) + if (comboE3) return mixXEoff; + + eq_B_P = eq(B, P); + eq_D_Q = eq(D, Q); + + B_slope = eq_B_PC && !eq_B_P && !eq_B_C && !eq_B_PA; + D_slope = eq_D_QG && !eq_D_Q && !eq_D_G && !eq_D_QA; + + B_wall = eq_B_C && !eq_B_PC && !eq_B_P; // Remove one misalignment check + D_wall = eq_D_G && !eq_D_QG && !eq_D_Q; // Remove one misalignment check + + B_tower = eq_B_P && !eq_B_PC && !eq_B_C && !eq_B_PA; + D_tower = eq_D_Q && !eq_D_QG && !eq_D_G && !eq_D_QA; + + + if ( B_slope && eq_E_G ) return mixXEoff; + if ( D_slope && eq_E_C ) return mixXEoff; + + +// E B D regional chessboard scoring rules + + float scoreE = 0.0; float scoreB = 0.0; float scoreD = 0.0; float scoreZ = 0.0; + +// E Zone + if (eq_E_C) { + scoreE += 1.0 +float(eq(F,H)) +float(B_slope); + scoreE -= float(all_eq2(E,P,PC)&&!D_wall); + } + + if (eq_E_G) { + scoreE += 1.0 +float(eq(F,H)) +float(D_slope); + scoreE -= float(all_eq2(E,Q,QG)&&!B_wall); + } + + // Higher priority than rectangle + scoreE += float(B_slope && eq_A_Q || D_slope && eq_A_P); + + En3 = eq_E_F && eq_E_H; + + // Clear 4/6 rectangle, exit early, not participate in final Z long slope judgment + if ( scoreE<0.1 && mixFactor<0.1 && En4square && eq(E,S)==eq(E,SI) && eq(E,R)==eq(E,RI) ) return theEXIT; + + // No points for En3 + //if ( scoreE==0 && En3 ) scoreE += 1; + + // Single bar + if ( scoreE<0.1 && !En3 && neq(E,I) ) { + if ( B_wall && eq_E_F ) return theEXIT; + if ( D_wall && eq_E_H ) return theEXIT; + } + + // Lower priority than single bar + scoreE += float(B_slope && eq_A_P || D_slope && eq_A_Q); + + if ( !En3 && eq(F,H) ) { + if (Eisblack) return slopeBAD; //Single black pixel + // slope+ eq_F_H combination cannot be used, it will create bubbles with inner L in BD area + //scoreE += float(B_slope&&neq(C,F))+float(D_slope&&neq(G,H)); + + bool condZ1 = B_wall && (eq(F,R) || eq(F,RC) || eq(G,H) || eq(F,I)); + bool condZ2 = D_wall && (eq(C,F) || eq(H,SG) || eq(H,S) || eq(F,I)); + scoreZ = float(condZ1 || condZ2); + } + + +// B Zone + + if (eq_B_PA) { + scoreB -= 1.0 +float(eq(P,C)) +float(eq_A_AA); + } + + if (eq(P,C)){ + scoreB -= float(eq_A_AA); + // Important. Prevent clone patterns on this side caused by Z bonus only from F==H! + // Equivalent: if (scoreE==0) scoreZ = 0; + scoreZ *= float(scoreE < 0.1); + } + +// D Zone + + if (eq_D_QA) { + scoreD -= 1.0 +float(eq(G,Q)) +float(eq_A_AA); + } + + if (eq(G,Q)){ + scoreD -= float(eq_A_AA); + // Same logic as B zone + scoreZ *= float(scoreE < 0.1); + } + + float scoreFinal = scoreE + scoreB + scoreD + scoreZ ; + + // Long slope pattern: return vX without blending when forming a long gentle slope with no deductions in B and D zones + scoreFinal += float(min(scoreB,scoreD) > -0.1 && (B_wall && D_tower || B_tower && D_wall)) *2.0; + + // Set mixFactor to 0 and return vX without blending if scoreFinal >= 2 + mixFactor *= (1.0 - step(1.9, scoreFinal)); + // Return mixXE if scoreFinal >= 1, else slopeBAD + return mixXE + slopeBAD*(1.0 - step(0.9, scoreFinal)); + +} // E == A + + +/*=============================================== + Main Rule E - C - G + ========================================== zz */ + + if (eq_E_C ) { + if (comboA3) return vX; + if (comboE3) return mixXE; + if (all_eq2(B,A,PA) && all_eq3(E,F,P,PC)) return theEXIT; + return mixXE; + } + + if (eq_E_G) { + if (comboA3) return vX; + if (comboE3) return mixXE; + if (all_eq2(D,A,QA) && all_eq3(E,H,Q,QG)) return theEXIT; + return mixXE; + } + + +/*========================================================= + F - H / B+ D+ Extension New Rules + ==================================================== zz */ + + // This section is the remainder after the previous filtering, the central En4square and BD are naturally wall-isolated logically + // B-D unconnected? No longer needed after the new "double slope rule" processing. + // Experience 1: Hollow L(1+2) flattens the inner side but not the outer side + // Experience 2: "厂" shape edges flatten the outer side but not the inner side + + // No need to participate in subsequent calculations if E is hollow and has no diagonal connection + if (vE.a<0.002) return theEXIT; // xxx.alpha + + bool eq_A_B = eq(A,B); + bool eq_F_H = eq(F,H); + + eq_B_P = eq(B,P); + eq_B_PC = eq(B,PC); + eq_B_PA = eq(B,PA); + eq_D_Q = eq(D,Q); + eq_D_QG = eq(D,QG); + eq_D_QA = eq(D,QA); + + B_slope = eq_B_PC && !eq_B_P && !eq_B_C; + D_slope = eq_D_QG && !eq_D_Q && !eq_D_G; + B_tower = eq_B_P && !eq_B_PC && !eq_B_C && !eq_B_PA; + D_tower = eq_D_Q && !eq_D_QG && !eq_D_G && !eq_D_QA; + B_wall = eq_B_C && !eq_B_PC && !eq_B_P; + D_wall = eq_D_G && !eq_D_QG && !eq_D_Q; + + +// 1. B-D hollow slope + if (!eq_A_B) { + + // A-side three-pixel alignment, high priority + // Note: comboA3 cannot be used without A!=B in this section + if (comboA3) return Xoff; + + if ( (B_slope||B_tower) && (D_slope||D_tower) ) return Xoff; + + if ( B_slope && eq_A_P ) return mixXEoff; + if ( D_slope && eq_A_Q ) return mixXEoff; + + if ( (B_slope || D_slope) && eq_F_H ) return mixXEoff; + + if ( B_slope && eq(H,SG) ) return mixXEoff; + if ( D_slope && eq(F,RC) ) return mixXEoff; + + if ( B_slope && eq_A_Q && eq(Q,QG) ) return mixXEoff; + if ( D_slope && eq_A_P && eq(P,PC) ) return mixXEoff; + + } + + + + bool sim_EC = sim(vE, vC); + bool sim_EG = sim(vE, vG); + + // Exit if center E is a single high-contrast pixel + // Tighten threshold if E is bright + float E_lumDiff = mix(0.381966, 0.145898, max((El - 0.8541),0.0) * 6.8541); + + // Large difference from surroundings (lower priority than slope detection) // xxx.alpha + if ( mixFactor<0.1 && !sim_EC && !sim_EG && E.a>0.381966 && neq(E,I) && abs(El-Fl)>E_lumDiff && abs(El-Hl)>E_lumDiff ) return slopeBAD; + + + eq_E_F = eq(E,F); + eq_E_H = eq(E,H); + + // long slope special trend + // Note: let squares pass, judged by subsequent En4square + if ( eq_B_C && eq_D_Q ) { + if ( eq(P,PC) && eq(A,QA) && !eq_D_QG && eq_E_F && !eq_E_H && eq(H,I)) return theEXIT; + if ( eq_A_B ) return slopeBAD; + if ( B_wall && D_tower && eq_E_F) return vX; + return mixXEoff; + } + + if ( eq(D,G) && eq(B,P)) { + if ( eq(Q,QG) && eq(A,PA) && !eq_B_PC && eq_E_H && !eq_E_F && eq(F,I)) return theEXIT; + if ( eq_A_B ) return slopeBAD; + if ( B_tower && D_wall && eq_E_H) return vX; + return mixXEoff; + } + + + En3 = eq_E_F && eq_E_H; + + // Wall-enclosed 4-pixel square (En3 && eq(E,I)) + if ( En4square ) { // This square detection needs to be placed after the previous rule + // Exit for solid L inner enclosure (some font edges, building corners) + // L inner enclosure (hollow corner) / high-contrast independent clear 4-pixel square / 6-pixel rectangle (no need to judge eq(G,H), eq(C,F) for rectangle edges, impossible) + if ( ( eq_B_C || eq_D_G) && eq_A_B) return theEXIT; + if ( ( eq_B_C || eq_D_G || mixFactor<0.1) && (eq(E,S) == eq(E, SI) && eq(E,R) == eq(E, RI)) ) return theEXIT; + return mixXEoff; + } + + // xxx.alpha + if (vE.a<0.381966) return mixXEoff; + + // BD-side solid non-wall pattern + if (!eq_B_C && !eq_D_G ) { + // BD-side semi-solid 1 F-H required + if ( comboA3 && eq_F_H ) return Xoff; + + // BD-side semi-solid 2 (add a "definitely round" trend judgment) + if ( comboA3&&eq_B_PC&&eq(C,CC) ) return Xoff; + if ( comboA3&&eq_D_QG&&eq(G,GG) ) return Xoff; + + // BD unconnected and non-En3, exit (Practice: required for this branch section) + if ( !eq_B_P && !eq_B_PC && !eq_D_Q && !eq_D_QG && !En3 ) return slopeBAD; + + // 3 diagonal gradients after excluding the above + if (eq_A_Q&&sim_EC) return mixXEoff; + if (eq_A_P&&sim_EG) return mixXEoff; + if (sim_EC&&sim_EG ) return mixXEoff; + } + + // Wall-enclosed triangle (remove one solid corner, leave the rest to the next rule) + if ( En3 && eq_A_B) return theEXIT; + + // F - H + // Principle: Connect L inner curve, not L outer curve + if (eq_F_H) { + + // F-H three-pixel pattern, huge boost! Prioritize A==B + if ( eq_B_PC&&eq(F,RC) || eq_D_QG&&eq(H,SG) ) return mixXEoff; + + if (eq_A_B) return slopeBAD; + + if ( eq_B_C || eq_D_G) return mixXEoff; + if ( eq_B_PC || eq_D_QG) return mixXEoff; + + } + + return slopeBAD; + +} // admixX + + +vec4 admixS( vec4 A, vec4 B, vec4 C, vec4 D, vec4 E, vec4 F, vec4 G, vec4 H, vec4 I + , vec4 R, vec4 RC, vec4 RI, vec4 S, vec4 SG, vec4 SI, vec4 II, vec4 CC + //, vec4 vE, vec4 vF, vec4 vC + ) { + + // A B C . + // QD 🄴 🅵 🆁 Zone 4 + // 🅶 🅷 I + // S + + + if (any_eq2(F,C,I)) return vE; + + // Exclude contralateral damage patterns + if ( (eq(F,RI) || eq(G,S) || eq(R, RI)) && neq(R,I) ) return vE; + + if (eq(H, S) && none_eq2(H,I,SG)) return vE; + + if ( eq(R, RC) || eq(G,SG) ) return vE; + + // D==E==C pattern, need to extend one more pixel in the trend direction when E is white (Guile's face in Street Fighter 2) + if ( checkwhite(vE) && all_eq2(E,C,D) && none_eq2(E,RC,CC)) return vE; + + // Old contralateral trend judgment + // if ( none_eq2(I,H,S) && (neq(SI,RI) || eq(I,II)) ) return vE; + + + #define vX vF + float mixFactor = 0.381966 * mixGate(vX,vE) * step(0.002, vE.a); // xxx.alpha + + if ( eq(E,C) && (eq(E,D)||eq(B,D)) ) return mixXE; + + bool sim_E_C = sim(vE,vC); + + if ( sim_E_C && eq(E,D) && eq(B,C) ) return mixXE; + + if ( (sim_E_C || mixFactor>0.1) && all_eq2(B,C,D) ) return mixXE; + + return vE; } void main() { + // Get actual pixel dimensions of texture vec2 source_size = vec2(textureSize(tex, 0)); - vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5); - vec2 coord = tex_coord - pos / source_size; + // Precompute reciprocal for performance + vec2 inv_source_size = 1.0 / source_size; + // Calculate sub-pixel offset within pixel grid - vec4 E = src(0.0,0.0); + vec2 pos = fract(tex_coord * source_size); + // Force sampling coordinate to pixel geometric center - vec4 A = src(-1.0,-1.0); - vec4 B = src(0.0,-1.0); - vec4 C = src(1.0,-1.0); + vec2 coord = tex_coord - (pos - vec2(0.5)) * inv_source_size; - vec4 D = src(-1.0,0.0); - vec4 F = src(1.0,0.0); + // Map UV to float pixel coordinates + vec2 pixelPos = tex_coord * source_size; + // Integer index of current pixel (0-based) + // Use floor (not round) for consistent coordinate system + ivec2 currPixel = ivec2(floor(pixelPos)); + // Convert texture size to integer vector + ivec2 texSize = ivec2(source_size); - vec4 G = src(-1.0,1.0); - vec4 H = src(0.0,1.0); - vec4 I = src(1.0,1.0); + // Boundary check before texture sample; return transparent if out of bounds + #define checkp(c, d) (currPixel.x+int(c)>=0 && currPixel.x+int(c)=0 && currPixel.y+int(d)=Dl) || same(E,A)) && any_eq3(E,A,C,G) && ((El=Bl) || same(E,C)) && any_eq3(E,A,C,I) && ((El=Hl) || same(E,G)) && any_eq3(E,A,G,I) && ((El=Fl) || same(E,I)) && any_eq3(E,C,G,I) && ((El0.002 && vD.a>0.002) && // xxx.alpha + (!eq_E_B && !eq_E_D && !oppoPix) && (!eq_D_H && !eq_B_F) + && (eq(E,A) || El>=Dl&&El>=Bl) && ( (El 7.1); // theEXIT + J = (J.b > 3.1) ? vE : // Restore vE for slopeBAD、theEXIT + (J.b > 1.1) ? (J - 2.0) :// slopeoff + J; // Normal pixel [0-1.0] + } +// B - F + if ( !slope1 && (vB.a>0.002 && vF.a>0.002) + && (!eq_E_B && !eq_E_F && !oppoPix) && (!eq_B_D && !eq_F_H) + && (eq(E,C) || El>=Bl&&El>=Fl) && ( (El 7.1); + K = (K.b > 3.1) ? vE : + (K.b > 1.1) ? (K - 2.0) : + K; + } +// D - H + if ( !slope1 && !skiprest && (vD.a>0.002 && vH.a>0.002) + && (!eq_E_D && !eq_E_H && !oppoPix) && (!eq_F_H && !eq_B_D) + && (eq(E,G) || El>=Hl&&El>=Dl) && ((El 7.1); + L = (L.b > 3.1) ? vE : + (L.b > 1.1) ? (L - 2.0) : + L; + } +// F - H + if ( !slope2 && !slope3 && !skiprest && (vF.a>0.002 && vH.a>0.002) + && (!eq_E_F && !eq_E_H && !oppoPix) && (!eq_B_F && !eq_D_H) + && (eq(E,I) || El>=Fl&&El>=Hl) && ((El 7.1); + M = (M.b > 3.1) ? vE : + (M.b > 1.1) ? (M - 2.0) : + M; + } + + +// long gentle 2:1 slope (P100) + + if (slope4ok) { //zone4 long slope + // Original rule extension 1. Pass adjacent pixel comparison as the third parameter of adminxL to ensure no double blending + // Original rule extension 2. No L pattern can appear again within the interval of opposite two pixels unless forming a wall + if (all_eq2(R,F,G) && neq(R, RC) && (neq(Q,G)||eq(Q, QA))) {L=admixL(M,L,vH); skiprest = true;} + // vertical + if (all_eq2(S,H,C) && neq(S, SG) && (neq(P,C)||eq(P, PA))) {K=admixL(M,K,vF); skiprest = true;} + } + + if (slope3ok) { //zone3 long slope + // horizontal + if (all_eq2(Q,D,I) && neq(Q, QA) && (neq(R,I)||eq(R, RC))) {M=admixL(L,M,vH); skiprest = true;} + // vertical + if (all_eq2(S,H,A) && neq(S, SI) && (neq(A,P)||eq(P, PC))) {J=admixL(L,J,vD); skiprest = true;} + } + + if (slope2ok) { //zone2 long slope + // horizontal + if (all_eq2(R,F,A) && neq(R, RI) && (neq(A,Q)||eq(Q, QG))) {J=admixL(K,J,vB); skiprest = true;} + // vertical + if (all_eq2(P,B,I) && neq(P, PA) && (neq(I,S)||eq(S, SG))) {M=admixL(K,M,vF); skiprest = true;} + } + + if (slope1ok) { //zone1 long slope + // horizontal + if (all_eq2(Q,D,C) && neq(Q, QG) && (neq(C,R)||eq(R, RI))) {K=admixL(J,K,vB); skiprest = true;} + // vertical + if (all_eq2(P,B,G) && neq(P, PC) && (neq(G,S)||eq(S, SI))) {L=admixL(J,L,vD); skiprest = true;} + } + +// Longslope can exit after forming, basically no sawslope will form on the diagonal +// Note: sawslope entry cannot exclude diagonal slope (including slopeok) +if (!skiprest && !oppoPix) { + + + // horizontal bottom + if (!eq_E_H && none_eq2(H,A,C)) { + + // A B C ・ + // Q D 🄴 🅵 🆁 Zone 4 + // 🅶🅷 I + // S + // (!slope3 && D!=H) needs to be connected like this to completely exclude the trend + if ( (!slope2 && !eq_B_F) && (!slope3 && !eq_D_H) && (!slope4end && !eq_F_H) && vF.a>0.002 && + !eq_E_F && eq(R,H) && eq(F,G) ) { + M = admixS( A, B, C, D, E, F, G, H, I + , R, RC, RI, S, SG, SI, II, CC + //, vE, vF, vC + ); + skiprest = true;} + + // ・ A B C + // 🆀 🅳 🄴 F R Zone 3 + // G 🅷 🅸 + // S + if ( !skiprest && (!slope1 && !eq_B_D) && (!slope4 && !eq_F_H) && (!slope3end && !eq_D_H) && vD.a>0.002 && + !eq_E_D && eq(Q,H) && eq(D,I) ) { + L = admixS( C, B, A, F, E, D, I, H, G + , Q, QA, QG, S, SI, SG, GG, AA + //, vE, vD, vA + ); + skiprest = true;} } - if (notsame(F,D)) { - if (notsame(D,I) && notsame(D,E) && notsame(D,C)) { - if (all_eq3(D,A,H,S) && none_eq2(D,B,src(1.0,2.0))) J=mix(L, J, 0.5); - if (all_eq3(D,G,B,P) && none_eq2(D,H,src(1.0,2.0))) L=mix(J, L, 0.5); - } + // horizontal up + if ( !skiprest && !eq_E_B && none_eq2(B,G,I)) { + + // P + // 🅐 🅑 C + // QD 🄴 🅵 🆁 Zone 2 + // G H I . + if ( (!slope1 && !eq_B_D) && (!slope4 && !eq_F_H) && (!slope2end && !eq_B_F) && vF.a>0.002 && + !eq_E_F && eq(B,R) && eq(A,F) ) { + K = admixS( G, H, I, D, E, F, A, B, C + , R, RI, RC, P, PA, PC, CC, II + //, vE, vF, vI + ); + skiprest = true;} + + // P + // A 🅑 🅲 + // 🆀 🅳 🄴 F R Zone 1 + // . G H I + if ( !skiprest && (!slope2 && !eq_B_F) && (!slope3 && !eq_D_H) && (!slope1end && !eq_B_D) && vD.a>0.002 && + !eq_E_D && eq(B,Q) && eq(C,D) ) { + J = admixS( I, H, G, F, E, D, C, B, A + , Q, QG, QA, P, PC, PA, AA, GG + //, vE, vD, vG + ); + skiprest = true;} - if (notsame(F,E) && notsame(F,A) && notsame(F,G)) { - if (all_eq3(F,C,H,S) && none_eq2(F,B,src(-1.0,2.0))) K=mix(M, K, 0.5); - if (all_eq3(F,I,B,P) && none_eq2(F,H,src(-1.0,-2.0))) M=mix(K, M, 0.5); - } } - vec2 a = fract(tex_coord * source_size); - vec4 colour = (a.x < 0.5) ? (a.y < 0.5 ? J : L) : (a.y < 0.5 ? K : M); - frag_color = colour; + // vertical left + if ( !skiprest && !eq_E_D && none_eq2(D,C,I) ) { + + // 🅐 B C + // Q 🅳 🄴 F R + // G 🅷 I Zone 3 + // 🆂 ・ + if ( (!slope1 && !eq_B_D) && (!slope4 && !eq_F_H) && (!slope3end && !eq_D_H) && vH.a>0.002 && + !eq_E_H && eq(D,S) && eq(A,H) ) { + L = admixS( C, F, I, B, E, H, A, D, G + , S, SI, SG, Q, QA, QG, GG, II + //, vE, vH, vI + ); + skiprest = true;} + + // 🅟 ・ + // A 🅑 C + // Q 🅳 🄴 F R Zone 1 + // 🅶 HI + if ( !skiprest && (!slope3 && !eq_D_H) && (!slope2 && !eq_B_F) && (!slope1end && !eq_B_D) && vB.a>0.002 && + !eq_E_B && eq(P,D) && eq(B,G) ) { + J = admixS( I, F, C, H, E, B, G, D, A + , P, PC, PA, Q, QG, QA, AA, CC + //, vE, vB, vC + ); + skiprest = true;} + + } + + // vertical right + if ( !skiprest && !eq_E_F && none_eq2(F,A,G) ) { // right + + // A B 🅲 + // Q D 🄴 🅵 R + // G 🅷 I Zone 4 + // . 🆂 + if ( (!slope2 && !eq_B_F) && (!slope3 && !eq_D_H) && (!slope4end && !eq_F_H) && vH.a>0.002 && + !eq_E_H && eq(S,F) && eq(H,C) ) { + M = admixS( A, D, G, B, E, H, C, F + , I, S, SG, SI, R, RC, RI, II, GG + //, vE, vH, vG + ); + skiprest = true;} + + // ・ 🅟 + // A 🅑 C + // Q D 🄴 🅵 R Zone 2 + // G H 🅸 + if ( !skiprest && (!slope1 && !eq_B_D) && (!slope4 && !eq_F_H) && (!slope2end && !eq_B_F) && vB.a>0.002 && + !eq_E_B && eq(P,F) && eq(B,I) ) { + K = admixS( G, D, A, H, E, B, I, F, C + , P, PA, PC, R, RI, RC, CC, AA + //, vE, vB, vA + ); + skiprest = true;} + + } // vertical right +} // sawslope + +// Sawslope can exit after forming, old scheme: skiprest||slopeBAD (will also use slopeOFF (weak pattern) and slopok (strong pattern) but the effect is average) +skiprest = skiprest||slope1||slope2||slope3||slope4||vE.a<0.002||vB.a<0.002||vD.a<0.002||vF.a<0.002||vH.a<0.002; + +/************************************************** + "Concave + Cross" Shape(P100) + *************************************************/ +// Use approximate pixels at the far end of the cross star, useful for some horizontal line + jagged and layered gradient patterns. E.g. glowing text in Street Fighter III 3rd Strike opening, Japanese-style houses in SFZ3MIX, opening of Garou: Mark of the Wolves + +vec4 vT; // Temporary T + +if (!skiprest && + BlFl) && neq(F,src(+3.0, 0.0)) ) {K=admixK(vF,K); M=K;skiprest=true;} // RIGHT +if (!skiprest && !eq_E_D&&eq_E_F&&eq_B_D&&eq_D_H && all_eq2(E,A,G) && (eq(E,R)||El>Dl) && neq(D,src(-3.0, 0.0)) ) {J=admixK(vD,J); L=J;skiprest=true;} // LEFT +if (!skiprest && !eq_E_H&&eq_E_B&&eq_D_H&&eq_F_H && all_eq2(E,G,I) && (eq(E,P)||El>Hl) && neq(H,src(0.0, +3.0)) ) {L=admixK(vH,L); M=L;skiprest=true;} // BOTTOM +if (!skiprest && !eq_E_B&&eq_E_H&&eq_B_D&&eq_B_F && all_eq2(E,A,C) && (eq(E,S)||El>Bl) && neq(B,src(0.0, -3.0)) ) {J=admixK(vB,J); K=J;} // TOP + +} + //final write + + frag_color = (pos.x < 0.5) ? (pos.y < 0.5 ? J : L) : (pos.y < 0.5 ? K : M); + +} \ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_blit_helper.cpp b/src/video_core/renderer_opengl/gl_blit_helper.cpp index 30a502316..869f75706 100644 --- a/src/video_core/renderer_opengl/gl_blit_helper.cpp +++ b/src/video_core/renderer_opengl/gl_blit_helper.cpp @@ -271,13 +271,50 @@ void BlitHelper::FilterXbrz(Surface& surface, const VideoCore::TextureBlit& blit } void BlitHelper::FilterMMPX(Surface& surface, const VideoCore::TextureBlit& blit) { + static constexpr u8 internal_scale_factor = 2; const OpenGLState prev_state = OpenGLState::GetCurState(); SCOPE_EXIT({ prev_state.Apply(); }); + + const auto temp_rect{blit.src_rect * internal_scale_factor}; + const u32 tw = temp_rect.GetWidth(); + const u32 th = temp_rect.GetHeight(); + + const auto& tuple = surface.Tuple(); + TempTexture INTERMEDIATE; + INTERMEDIATE.fbo.Create(); + INTERMEDIATE.tex.Create(); + + glBindTexture(GL_TEXTURE_2D, INTERMEDIATE.tex.handle); + glTexStorage2D(GL_TEXTURE_2D, 1, tuple.internal_format, tw, th); + state.texture_units[0].texture_2d = surface.Handle(0); + state.texture_units[0].sampler = nearest_sampler.handle; + + state.texture_units[1].texture_2d = INTERMEDIATE.tex.handle; + + state.draw.draw_framebuffer = INTERMEDIATE.fbo.handle; + + state.Apply(); + + // MMPX SetParams(mmpx_program, surface.RealExtent(false), blit.src_rect); - Draw(mmpx_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); + Draw(mmpx_program, INTERMEDIATE.tex.handle, INTERMEDIATE.fbo.handle, 0, temp_rect); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, INTERMEDIATE.fbo.handle); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw_fbo.handle); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, surface.Handle(), blit.dst_level); + + // LINEAR + glBlitFramebuffer(0, 0, tw, th, + blit.dst_rect.left, blit.dst_rect.bottom, blit.dst_rect.right, blit.dst_rect.top, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + + // clean + state.texture_units[0].texture_2d = 0; + state.texture_units[1].texture_2d = 0; } + void BlitHelper::SetParams(OGLProgram& program, const VideoCore::Extent& src_extent, Common::Rectangle src_rect) { glProgramUniform2f(