eden/src/dynarmic/tests/rsqrt_test.cpp
crueter 51b170b470
[cmake] refactor: Use CPM over submodules (#143)
Transfers the majority of submodules and large externals to CPM, using source archives rather than full Git clones. Not only does this save massive amounts of clone and configure time, but dependencies are grabbed on-demand rather than being required by default. Additionally, CPM will (generally) automatically search for system dependencies, though certain dependencies have options to control this.

Testing shows gains ranging from 5x to 10x in terms of overall clone/configure time.

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/143
Reviewed-by: CamilleLaVey <camillelavey99@gmail.com>
2025-08-04 04:50:14 +02:00

155 lines
4.2 KiB
C++

// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2021 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <catch2/benchmark/catch_benchmark.hpp>
#include <catch2/catch_test_macros.hpp>
#include <fmt/printf.h>
#include "dynarmic/common/common_types.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
extern "C" u32 rsqrt_inaccurate(u32);
extern "C" u32 rsqrt_full(u32);
extern "C" u32 rsqrt_full_gpr(u32);
extern "C" u32 rsqrt_full_nb(u32);
extern "C" u32 rsqrt_full_nb2(u32);
extern "C" u32 rsqrt_full_nb_gpr(u32);
extern "C" u32 rsqrt_newton(u32);
extern "C" u32 rsqrt_hack(u32);
using namespace Dynarmic;
extern "C" u32 rsqrt_fallback(u32 value) {
FP::FPCR fpcr;
FP::FPSR fpsr;
return FP::FPRSqrtEstimate(value, fpcr, fpsr);
}
extern "C" u32 _rsqrt_fallback(u32 value) {
return rsqrt_fallback(value);
}
void Test(u32 value) {
FP::FPCR fpcr;
FP::FPSR fpsr;
const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr);
const u32 full = rsqrt_full(value);
const u32 full_gpr = rsqrt_full_gpr(value);
const u32 newton = rsqrt_newton(value);
const u32 hack = rsqrt_hack(value);
if (expect != full || expect != full_gpr || expect != newton || expect != hack) {
fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack);
REQUIRE(expect == full);
REQUIRE(expect == full_gpr);
REQUIRE(expect == newton);
REQUIRE(expect == hack);
}
}
TEST_CASE("RSqrt Tests", "[fp][.]") {
Test(0x00000000);
Test(0x80000000);
Test(0x7f8b7201);
Test(0x7f800000);
Test(0x7fc00000);
Test(0xff800000);
Test(0xffc00000);
Test(0xff800001);
for (u64 i = 0; i < 0x1'0000'0000; i++) {
const u32 value = static_cast<u32>(i);
Test(value);
}
}
TEST_CASE("Benchmark RSqrt", "[fp][.]") {
BENCHMARK("Inaccurate") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_inaccurate(value);
}
return total;
};
BENCHMARK("Full divss") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full(value);
}
return total;
};
BENCHMARK("Full divss (GPR)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_gpr(value);
}
return total;
};
BENCHMARK("Full divss (NB)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_nb(value);
}
return total;
};
BENCHMARK("Full divss (NB2)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_nb2(value);
}
return total;
};
BENCHMARK("Full divss (NB + GPR)") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_full_nb_gpr(value);
}
return total;
};
BENCHMARK("One Newton iteration") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_newton(value);
}
return total;
};
BENCHMARK("Ugly Hack") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_hack(value);
}
return total;
};
BENCHMARK("Softfloat") {
u64 total = 0;
for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
const u32 value = static_cast<u32>(i);
total += rsqrt_fallback(value);
}
return total;
};
}