macOS: normalize SDMC directory filenames

This commit is contained in:
Rodrigo Iglesias 2026-04-28 17:38:33 +02:00
parent ec6a0dd1c8
commit 4ac3eb45c0
4 changed files with 88 additions and 7 deletions

View file

@ -20,6 +20,12 @@
#include <windows.h>
#endif
#if defined(__APPLE__)
#include <CoreFoundation/CFString.h>
#endif
namespace Common {
/// Make a char lowercase
@ -164,6 +170,62 @@ std::u16string UTF8ToUTF16(std::string_view input) {
return boost::locale::conv::utf_to_utf<char16_t>(input.data(), input.data() + input.size());
}
// macOS filesystems may expose decomposed Unicode names through directory listings.
// Normalize to NFC before passing names to guest APIs that expect stable text.
std::string NormalizeUTF8ToNFC(std::string_view input) {
const std::string fallback(input);
#if defined(__APPLE__)
//Core Foundation string
CFStringRef source = CFStringCreateWithBytes(
kCFAllocatorDefault,
reinterpret_cast<const UInt8*>(input.data()),
static_cast<CFIndex>(input.size()),
kCFStringEncodingUTF8,
false);
if (source == nullptr) {
return fallback;
}
// Mutable copy of the source string
CFMutableStringRef normalized =
CFStringCreateMutableCopy(kCFAllocatorDefault, 0, source);
CFRelease(source);
if (normalized == nullptr) {
return fallback;
}
// Normalize the string to NFC form
CFStringNormalize(normalized, kCFStringNormalizationFormC);
const CFIndex max_size =
CFStringGetMaximumSizeForEncoding(
CFStringGetLength(normalized),
kCFStringEncodingUTF8) + 1; // +1 for null terminator
std::string output(static_cast<std::size_t>(max_size), '\0');
// Convert the normalized string back to UTF-8
const bool converted = CFStringGetCString(
normalized,
&output[0],
max_size,
kCFStringEncodingUTF8);
CFRelease(normalized);
if (!converted) {
return fallback;
}
output.resize(std::strlen(output.c_str()));
return output;
#else
return fallback;
#endif
}
#ifdef _WIN32
static std::wstring CPToUTF16(u32 code_page, const std::string& input) {
const auto size =

View file

@ -49,6 +49,8 @@ void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _P
[[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input);
[[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input);
// Returns UTF-8 normalized to NFC on platforms that need explicit Unicode normalization.
[[nodiscard]] std::string NormalizeUTF8ToNFC(std::string_view input);
#ifdef _WIN32
[[nodiscard]] std::string UTF16ToUTF8(const std::wstring& input);

View file

@ -4,10 +4,12 @@
#include <algorithm>
#include <memory>
#include <iterator>
#include "common/archives.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/file_sys/disk_archive.h"
#include "core/file_sys/errors.h"
@ -62,22 +64,25 @@ u32 DiskDirectory::Read(const u32 count, Entry* entries) {
while (entries_read < count && children_iterator != directory.children.cend()) {
const FileUtil::FSTEntry& file = *children_iterator;
const std::string& filename = file.virtualName;
// Directory entries are exposed to the guest as UTF-16. Normalize host UTF-8 names first
// so host Unicode normalization differences do not leak into guest-visible SDMC paths.
const std::string filename = Common::NormalizeUTF8ToNFC(file.virtualName);
const std::u16string filename_utf16 = Common::UTF8ToUTF16(filename);
Entry& entry = entries[entries_read];
LOG_TRACE(Service_FS, "File {}: size={} dir={}", filename, file.size, file.isDirectory);
// TODO(Link Mauve): use a proper conversion to UTF-16.
for (std::size_t j = 0; j < FILENAME_LENGTH; ++j) {
entry.filename[j] = filename[j];
if (!filename[j])
break;
std::fill(std::begin(entry.filename), std::end(entry.filename), u'\0');
const std::size_t copy_length = std::min(filename_utf16.size(), FILENAME_LENGTH - 1);
for (std::size_t j = 0; j < copy_length; ++j) {
entry.filename[j] = filename_utf16[j];
}
FileUtil::SplitFilename83(filename, entry.short_name, entry.extension);
entry.is_directory = file.isDirectory;
entry.is_hidden = (filename[0] == '.');
entry.is_hidden = (!filename.empty() && filename[0] == '.');
entry.is_read_only = 0;
entry.file_size = file.size;
@ -93,4 +98,5 @@ u32 DiskDirectory::Read(const u32 count, Entry* entries) {
return entries_read;
}
} // namespace FileSys

View file

@ -24,3 +24,14 @@ TEST_CASE("SplitFilename83 Sanity", "[common]") {
REQUIRE(std::memcmp(short_name.data(), expected_short_name.data(), short_name.size()) == 0);
REQUIRE(std::memcmp(extension.data(), expected_extension.data(), extension.size()) == 0);
}
TEST_CASE("NormalizeUTF8ToNFC Sanity", "[common]") {
const std::string decomposed = "i\xCC\x81";
const std::string composed = "\xC3\xAD";
#if defined(__APPLE__)
REQUIRE(Common::NormalizeUTF8ToNFC(decomposed) == composed);
#else
REQUIRE(Common::NormalizeUTF8ToNFC(decomposed) == decomposed);
#endif
}