From 4ac3eb45c028b06cfd43ecca1e4648c644dc4920 Mon Sep 17 00:00:00 2001 From: Rodrigo Iglesias <8595185+RigleGit@users.noreply.github.com> Date: Tue, 28 Apr 2026 17:38:33 +0200 Subject: [PATCH] macOS: normalize SDMC directory filenames --- src/common/string_util.cpp | 62 ++++++++++++++++++++++++++++++ src/common/string_util.h | 2 + src/core/file_sys/disk_archive.cpp | 20 ++++++---- src/tests/common/file_util.cpp | 11 ++++++ 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 1637c01f1..8fd9a437b 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -20,6 +20,12 @@ #include #endif +#if defined(__APPLE__) +#include +#endif + + + namespace Common { /// Make a char lowercase @@ -164,6 +170,62 @@ std::u16string UTF8ToUTF16(std::string_view input) { return boost::locale::conv::utf_to_utf(input.data(), input.data() + input.size()); } +// macOS filesystems may expose decomposed Unicode names through directory listings. +// Normalize to NFC before passing names to guest APIs that expect stable text. +std::string NormalizeUTF8ToNFC(std::string_view input) { + const std::string fallback(input); + +#if defined(__APPLE__) + //Core Foundation string + CFStringRef source = CFStringCreateWithBytes( + kCFAllocatorDefault, + reinterpret_cast(input.data()), + static_cast(input.size()), + kCFStringEncodingUTF8, + false); + + if (source == nullptr) { + return fallback; + } + + // Mutable copy of the source string + CFMutableStringRef normalized = + CFStringCreateMutableCopy(kCFAllocatorDefault, 0, source); + CFRelease(source); + + if (normalized == nullptr) { + return fallback; + } + // Normalize the string to NFC form + CFStringNormalize(normalized, kCFStringNormalizationFormC); + + const CFIndex max_size = + CFStringGetMaximumSizeForEncoding( + CFStringGetLength(normalized), + kCFStringEncodingUTF8) + 1; // +1 for null terminator + + std::string output(static_cast(max_size), '\0'); + + // Convert the normalized string back to UTF-8 + const bool converted = CFStringGetCString( + normalized, + &output[0], + max_size, + kCFStringEncodingUTF8); + + CFRelease(normalized); + + if (!converted) { + return fallback; + } + + output.resize(std::strlen(output.c_str())); + return output; +#else + return fallback; +#endif +} + #ifdef _WIN32 static std::wstring CPToUTF16(u32 code_page, const std::string& input) { const auto size = diff --git a/src/common/string_util.h b/src/common/string_util.h index 342434928..2d5355d2d 100644 --- a/src/common/string_util.h +++ b/src/common/string_util.h @@ -49,6 +49,8 @@ void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _P [[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input); [[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input); +// Returns UTF-8 normalized to NFC on platforms that need explicit Unicode normalization. +[[nodiscard]] std::string NormalizeUTF8ToNFC(std::string_view input); #ifdef _WIN32 [[nodiscard]] std::string UTF16ToUTF8(const std::wstring& input); diff --git a/src/core/file_sys/disk_archive.cpp b/src/core/file_sys/disk_archive.cpp index a7ae5e92e..3e81bf42c 100644 --- a/src/core/file_sys/disk_archive.cpp +++ b/src/core/file_sys/disk_archive.cpp @@ -4,10 +4,12 @@ #include #include +#include #include "common/archives.h" #include "common/common_types.h" #include "common/file_util.h" #include "common/logging/log.h" +#include "common/string_util.h" #include "core/file_sys/disk_archive.h" #include "core/file_sys/errors.h" @@ -62,22 +64,25 @@ u32 DiskDirectory::Read(const u32 count, Entry* entries) { while (entries_read < count && children_iterator != directory.children.cend()) { const FileUtil::FSTEntry& file = *children_iterator; - const std::string& filename = file.virtualName; + // Directory entries are exposed to the guest as UTF-16. Normalize host UTF-8 names first + // so host Unicode normalization differences do not leak into guest-visible SDMC paths. + const std::string filename = Common::NormalizeUTF8ToNFC(file.virtualName); + const std::u16string filename_utf16 = Common::UTF8ToUTF16(filename); Entry& entry = entries[entries_read]; LOG_TRACE(Service_FS, "File {}: size={} dir={}", filename, file.size, file.isDirectory); - // TODO(Link Mauve): use a proper conversion to UTF-16. - for (std::size_t j = 0; j < FILENAME_LENGTH; ++j) { - entry.filename[j] = filename[j]; - if (!filename[j]) - break; + std::fill(std::begin(entry.filename), std::end(entry.filename), u'\0'); + + const std::size_t copy_length = std::min(filename_utf16.size(), FILENAME_LENGTH - 1); + for (std::size_t j = 0; j < copy_length; ++j) { + entry.filename[j] = filename_utf16[j]; } FileUtil::SplitFilename83(filename, entry.short_name, entry.extension); entry.is_directory = file.isDirectory; - entry.is_hidden = (filename[0] == '.'); + entry.is_hidden = (!filename.empty() && filename[0] == '.'); entry.is_read_only = 0; entry.file_size = file.size; @@ -93,4 +98,5 @@ u32 DiskDirectory::Read(const u32 count, Entry* entries) { return entries_read; } + } // namespace FileSys diff --git a/src/tests/common/file_util.cpp b/src/tests/common/file_util.cpp index bd7fcbdd9..90b6a4be0 100644 --- a/src/tests/common/file_util.cpp +++ b/src/tests/common/file_util.cpp @@ -24,3 +24,14 @@ TEST_CASE("SplitFilename83 Sanity", "[common]") { REQUIRE(std::memcmp(short_name.data(), expected_short_name.data(), short_name.size()) == 0); REQUIRE(std::memcmp(extension.data(), expected_extension.data(), extension.size()) == 0); } + +TEST_CASE("NormalizeUTF8ToNFC Sanity", "[common]") { + const std::string decomposed = "i\xCC\x81"; + const std::string composed = "\xC3\xAD"; + +#if defined(__APPLE__) + REQUIRE(Common::NormalizeUTF8ToNFC(decomposed) == composed); +#else + REQUIRE(Common::NormalizeUTF8ToNFC(decomposed) == decomposed); +#endif +}