mirror of
https://github.com/azahar-emu/azahar.git
synced 2026-06-05 18:23:39 -04:00
shader_jit: Emit LG2/EX2 subroutines on-demand (#2046)
Some checks are pending
citra-build / source (push) Waiting to run
citra-build / linux-x86_64 (appimage) (push) Waiting to run
citra-build / linux-x86_64 (appimage-wayland) (push) Waiting to run
citra-build / linux-x86_64 (gcc-nopch) (push) Waiting to run
citra-build / linux-arm64 (clang) (push) Waiting to run
citra-build / linux-arm64 (gcc-nopch) (push) Waiting to run
citra-build / macos (push) Waiting to run
citra-build / windows (msvc) (push) Waiting to run
citra-build / windows (msys2) (push) Waiting to run
citra-build / android (googleplay) (push) Waiting to run
citra-build / android (vanilla) (push) Waiting to run
citra-build / docker (push) Waiting to run
citra-format / clang-format (push) Waiting to run
citra-libretro / android (push) Waiting to run
citra-libretro / linux (push) Waiting to run
citra-libretro / windows (push) Waiting to run
citra-libretro / macos (arm64) (push) Waiting to run
citra-libretro / macos (x86_64) (push) Waiting to run
citra-libretro / ios (push) Waiting to run
citra-libretro / tvos (push) Waiting to run
citra-transifex / transifex (push) Waiting to run
Some checks are pending
citra-build / source (push) Waiting to run
citra-build / linux-x86_64 (appimage) (push) Waiting to run
citra-build / linux-x86_64 (appimage-wayland) (push) Waiting to run
citra-build / linux-x86_64 (gcc-nopch) (push) Waiting to run
citra-build / linux-arm64 (clang) (push) Waiting to run
citra-build / linux-arm64 (gcc-nopch) (push) Waiting to run
citra-build / macos (push) Waiting to run
citra-build / windows (msvc) (push) Waiting to run
citra-build / windows (msys2) (push) Waiting to run
citra-build / android (googleplay) (push) Waiting to run
citra-build / android (vanilla) (push) Waiting to run
citra-build / docker (push) Waiting to run
citra-format / clang-format (push) Waiting to run
citra-libretro / android (push) Waiting to run
citra-libretro / linux (push) Waiting to run
citra-libretro / windows (push) Waiting to run
citra-libretro / macos (arm64) (push) Waiting to run
citra-libretro / macos (x86_64) (push) Waiting to run
citra-libretro / ios (push) Waiting to run
citra-libretro / tvos (push) Waiting to run
citra-transifex / transifex (push) Waiting to run
Rather than emitting these subroutine functions for _every_ shader, only emit the subroutines when the `LG2` and `EX2` instructions are actually used. This saves a good chunk of memory across all shaders. Inspired by Tanuki3DS.
This commit is contained in:
parent
37b6c91de6
commit
91128d6625
4 changed files with 40 additions and 43 deletions
|
|
@ -508,6 +508,7 @@ void JitShader::Compile_DPH(Instruction instr) {
|
||||||
void JitShader::Compile_EX2(Instruction instr) {
|
void JitShader::Compile_EX2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
STR(X30, SP, POST_INDEXED, -16);
|
STR(X30, SP, POST_INDEXED, -16);
|
||||||
|
exp2_used = true;
|
||||||
BL(exp2_subroutine);
|
BL(exp2_subroutine);
|
||||||
LDR(X30, SP, PRE_INDEXED, 16);
|
LDR(X30, SP, PRE_INDEXED, 16);
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
|
|
@ -516,6 +517,7 @@ void JitShader::Compile_EX2(Instruction instr) {
|
||||||
void JitShader::Compile_LG2(Instruction instr) {
|
void JitShader::Compile_LG2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
STR(X30, SP, POST_INDEXED, -16);
|
STR(X30, SP, POST_INDEXED, -16);
|
||||||
|
log2_used = true;
|
||||||
BL(log2_subroutine);
|
BL(log2_subroutine);
|
||||||
LDR(X30, SP, PRE_INDEXED, 16);
|
LDR(X30, SP, PRE_INDEXED, 16);
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
|
|
@ -994,6 +996,14 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||||
// Compile entire program
|
// Compile entire program
|
||||||
Compile_Block(static_cast<u32>(program_code->size()));
|
Compile_Block(static_cast<u32>(program_code->size()));
|
||||||
|
|
||||||
|
// Compile utility functions
|
||||||
|
if (log2_used) {
|
||||||
|
Compile_Log2(log2_subroutine);
|
||||||
|
}
|
||||||
|
if (exp2_used) {
|
||||||
|
Compile_Exp2(exp2_subroutine);
|
||||||
|
}
|
||||||
|
|
||||||
// Free memory that's no longer needed
|
// Free memory that's no longer needed
|
||||||
program_code = nullptr;
|
program_code = nullptr;
|
||||||
swizzle_data = nullptr;
|
swizzle_data = nullptr;
|
||||||
|
|
@ -1021,18 +1031,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||||
code_vec.shrink_to_fit();
|
code_vec.shrink_to_fit();
|
||||||
}
|
}
|
||||||
|
|
||||||
JitShader::JitShader() : oaknut::VectorCodeGenerator(code_vec) {
|
JitShader::JitShader() : oaknut::VectorCodeGenerator(code_vec) {}
|
||||||
CompilePrelude();
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitShader::CompilePrelude() {
|
|
||||||
log2_subroutine = CompilePrelude_Log2();
|
|
||||||
exp2_subroutine = CompilePrelude_Exp2();
|
|
||||||
}
|
|
||||||
|
|
||||||
Label JitShader::CompilePrelude_Log2() {
|
|
||||||
Label subroutine;
|
|
||||||
|
|
||||||
|
void JitShader::Compile_Log2(Label subroutine) {
|
||||||
// We perform this approximation by first performing a range reduction into the range
|
// We perform this approximation by first performing a range reduction into the range
|
||||||
// [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then
|
// [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then
|
||||||
// evaluated. We multiply the result by (x - 1) then restore the result into the appropriate
|
// evaluated. We multiply the result by (x - 1) then restore the result into the appropriate
|
||||||
|
|
@ -1136,13 +1137,9 @@ Label JitShader::CompilePrelude_Log2() {
|
||||||
DUP(SRC1.S4(), SRC1.Selem()[0]);
|
DUP(SRC1.S4(), SRC1.Selem()[0]);
|
||||||
|
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
return subroutine;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Label JitShader::CompilePrelude_Exp2() {
|
void JitShader::Compile_Exp2(Label subroutine) {
|
||||||
Label subroutine;
|
|
||||||
|
|
||||||
// This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax
|
// This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax
|
||||||
// polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
|
// polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
|
||||||
// result into the appropriate range.
|
// result into the appropriate range.
|
||||||
|
|
@ -1241,8 +1238,6 @@ Label JitShader::CompilePrelude_Exp2() {
|
||||||
DUP(SRC1.S4(), SRC1.Selem()[0]);
|
DUP(SRC1.S4(), SRC1.Selem()[0]);
|
||||||
|
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
return subroutine;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Pica::Shader
|
} // namespace Pica::Shader
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright 2023 Citra Emulator Project
|
// Copyright Citra Emulator Project / Azahar Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
|
@ -123,9 +123,8 @@ private:
|
||||||
/**
|
/**
|
||||||
* Emits data and code for utility functions.
|
* Emits data and code for utility functions.
|
||||||
*/
|
*/
|
||||||
void CompilePrelude();
|
void Compile_Log2(oaknut::Label subroutine);
|
||||||
oaknut::Label CompilePrelude_Log2();
|
void Compile_Exp2(oaknut::Label subroutine);
|
||||||
oaknut::Label CompilePrelude_Exp2();
|
|
||||||
|
|
||||||
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
|
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
|
||||||
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
|
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
|
||||||
|
|
@ -146,6 +145,10 @@ private:
|
||||||
using CompiledShader = void(const void* setup, void* state, const std::byte* start_addr);
|
using CompiledShader = void(const void* setup, void* state, const std::byte* start_addr);
|
||||||
CompiledShader* program = nullptr;
|
CompiledShader* program = nullptr;
|
||||||
|
|
||||||
|
/// Library functions, emitted as used
|
||||||
|
bool log2_used : 1 = false;
|
||||||
|
bool exp2_used : 1 = false;
|
||||||
|
|
||||||
oaknut::Label log2_subroutine;
|
oaknut::Label log2_subroutine;
|
||||||
oaknut::Label exp2_subroutine;
|
oaknut::Label exp2_subroutine;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -511,12 +511,14 @@ void JitShader::Compile_DPH(Instruction instr) {
|
||||||
|
|
||||||
void JitShader::Compile_EX2(Instruction instr) {
|
void JitShader::Compile_EX2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
|
exp2_used = true;
|
||||||
call(exp2_subroutine);
|
call(exp2_subroutine);
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitShader::Compile_LG2(Instruction instr) {
|
void JitShader::Compile_LG2(Instruction instr) {
|
||||||
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
|
||||||
|
log2_used = true;
|
||||||
call(log2_subroutine);
|
call(log2_subroutine);
|
||||||
Compile_DestEnable(instr, SRC1);
|
Compile_DestEnable(instr, SRC1);
|
||||||
}
|
}
|
||||||
|
|
@ -1038,6 +1040,14 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||||
// Compile entire program
|
// Compile entire program
|
||||||
Compile_Block(static_cast<u32>(program_code->size()));
|
Compile_Block(static_cast<u32>(program_code->size()));
|
||||||
|
|
||||||
|
// Compile utility functions
|
||||||
|
if (log2_used) {
|
||||||
|
Compile_Log2(log2_subroutine);
|
||||||
|
}
|
||||||
|
if (exp2_used) {
|
||||||
|
Compile_Exp2(exp2_subroutine);
|
||||||
|
}
|
||||||
|
|
||||||
// Free memory that's no longer needed
|
// Free memory that's no longer needed
|
||||||
program_code = nullptr;
|
program_code = nullptr;
|
||||||
swizzle_data = nullptr;
|
swizzle_data = nullptr;
|
||||||
|
|
@ -1050,18 +1060,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||||
LOG_DEBUG(HW_GPU, "Compiled shader size={}", getSize());
|
LOG_DEBUG(HW_GPU, "Compiled shader size={}", getSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {
|
JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {}
|
||||||
CompilePrelude();
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitShader::CompilePrelude() {
|
|
||||||
log2_subroutine = CompilePrelude_Log2();
|
|
||||||
exp2_subroutine = CompilePrelude_Exp2();
|
|
||||||
}
|
|
||||||
|
|
||||||
Xbyak::Label JitShader::CompilePrelude_Log2() {
|
|
||||||
Xbyak::Label subroutine;
|
|
||||||
|
|
||||||
|
void JitShader::Compile_Log2(Xbyak::Label subroutine) {
|
||||||
// SSE does not have a log instruction, thus we must approximate.
|
// SSE does not have a log instruction, thus we must approximate.
|
||||||
// We perform this approximation first performaing a range reduction into the range [1.0, 2.0).
|
// We perform this approximation first performaing a range reduction into the range [1.0, 2.0).
|
||||||
// A minimax polynomial which was fit for the function log2(x) / (x - 1) is then evaluated.
|
// A minimax polynomial which was fit for the function log2(x) / (x - 1) is then evaluated.
|
||||||
|
|
@ -1163,12 +1164,9 @@ Xbyak::Label JitShader::CompilePrelude_Log2() {
|
||||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
|
|
||||||
return subroutine;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Label JitShader::CompilePrelude_Exp2() {
|
void JitShader::Compile_Exp2(Xbyak::Label subroutine) {
|
||||||
Xbyak::Label subroutine;
|
|
||||||
|
|
||||||
// SSE does not have a exp instruction, thus we must approximate.
|
// SSE does not have a exp instruction, thus we must approximate.
|
||||||
// We perform this approximation first performaing a range reduction into the range [-0.5, 0.5).
|
// We perform this approximation first performaing a range reduction into the range [-0.5, 0.5).
|
||||||
|
|
@ -1271,8 +1269,6 @@ Xbyak::Label JitShader::CompilePrelude_Exp2() {
|
||||||
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
|
|
||||||
return subroutine;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Pica::Shader
|
} // namespace Pica::Shader
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// Copyright 2015 Citra Emulator Project
|
// Copyright Citra Emulator Project / Azahar Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
|
@ -115,9 +115,8 @@ private:
|
||||||
/**
|
/**
|
||||||
* Emits data and code for utility functions.
|
* Emits data and code for utility functions.
|
||||||
*/
|
*/
|
||||||
void CompilePrelude();
|
void Compile_Log2(Xbyak::Label subroutine);
|
||||||
Xbyak::Label CompilePrelude_Log2();
|
void Compile_Exp2(Xbyak::Label subroutine);
|
||||||
Xbyak::Label CompilePrelude_Exp2();
|
|
||||||
|
|
||||||
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
|
const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
|
||||||
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
|
const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
|
||||||
|
|
@ -138,6 +137,10 @@ private:
|
||||||
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
|
using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
|
||||||
CompiledShader* program = nullptr;
|
CompiledShader* program = nullptr;
|
||||||
|
|
||||||
|
/// Library functions, emitted as used
|
||||||
|
bool log2_used : 1 = false;
|
||||||
|
bool exp2_used : 1 = false;
|
||||||
|
|
||||||
Xbyak::Label log2_subroutine;
|
Xbyak::Label log2_subroutine;
|
||||||
Xbyak::Label exp2_subroutine;
|
Xbyak::Label exp2_subroutine;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue