mirror of
https://github.com/azahar-emu/azahar.git
synced 2026-06-08 03:33:44 -04:00
macOS: normalize SDMC directory filenames
This commit is contained in:
parent
ec6a0dd1c8
commit
4ac3eb45c0
4 changed files with 88 additions and 7 deletions
|
|
@ -20,6 +20,12 @@
|
|||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <CoreFoundation/CFString.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
namespace Common {
|
||||
|
||||
/// Make a char lowercase
|
||||
|
|
@ -164,6 +170,62 @@ std::u16string UTF8ToUTF16(std::string_view input) {
|
|||
return boost::locale::conv::utf_to_utf<char16_t>(input.data(), input.data() + input.size());
|
||||
}
|
||||
|
||||
// macOS filesystems may expose decomposed Unicode names through directory listings.
|
||||
// Normalize to NFC before passing names to guest APIs that expect stable text.
|
||||
std::string NormalizeUTF8ToNFC(std::string_view input) {
|
||||
const std::string fallback(input);
|
||||
|
||||
#if defined(__APPLE__)
|
||||
//Core Foundation string
|
||||
CFStringRef source = CFStringCreateWithBytes(
|
||||
kCFAllocatorDefault,
|
||||
reinterpret_cast<const UInt8*>(input.data()),
|
||||
static_cast<CFIndex>(input.size()),
|
||||
kCFStringEncodingUTF8,
|
||||
false);
|
||||
|
||||
if (source == nullptr) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
// Mutable copy of the source string
|
||||
CFMutableStringRef normalized =
|
||||
CFStringCreateMutableCopy(kCFAllocatorDefault, 0, source);
|
||||
CFRelease(source);
|
||||
|
||||
if (normalized == nullptr) {
|
||||
return fallback;
|
||||
}
|
||||
// Normalize the string to NFC form
|
||||
CFStringNormalize(normalized, kCFStringNormalizationFormC);
|
||||
|
||||
const CFIndex max_size =
|
||||
CFStringGetMaximumSizeForEncoding(
|
||||
CFStringGetLength(normalized),
|
||||
kCFStringEncodingUTF8) + 1; // +1 for null terminator
|
||||
|
||||
std::string output(static_cast<std::size_t>(max_size), '\0');
|
||||
|
||||
// Convert the normalized string back to UTF-8
|
||||
const bool converted = CFStringGetCString(
|
||||
normalized,
|
||||
&output[0],
|
||||
max_size,
|
||||
kCFStringEncodingUTF8);
|
||||
|
||||
CFRelease(normalized);
|
||||
|
||||
if (!converted) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
output.resize(std::strlen(output.c_str()));
|
||||
return output;
|
||||
#else
|
||||
return fallback;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
static std::wstring CPToUTF16(u32 code_page, const std::string& input) {
|
||||
const auto size =
|
||||
|
|
|
|||
|
|
@ -49,6 +49,8 @@ void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _P
|
|||
|
||||
[[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input);
|
||||
[[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input);
|
||||
// Returns UTF-8 normalized to NFC on platforms that need explicit Unicode normalization.
|
||||
[[nodiscard]] std::string NormalizeUTF8ToNFC(std::string_view input);
|
||||
|
||||
#ifdef _WIN32
|
||||
[[nodiscard]] std::string UTF16ToUTF8(const std::wstring& input);
|
||||
|
|
|
|||
|
|
@ -4,10 +4,12 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <iterator>
|
||||
#include "common/archives.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/string_util.h"
|
||||
#include "core/file_sys/disk_archive.h"
|
||||
#include "core/file_sys/errors.h"
|
||||
|
||||
|
|
@ -62,22 +64,25 @@ u32 DiskDirectory::Read(const u32 count, Entry* entries) {
|
|||
|
||||
while (entries_read < count && children_iterator != directory.children.cend()) {
|
||||
const FileUtil::FSTEntry& file = *children_iterator;
|
||||
const std::string& filename = file.virtualName;
|
||||
// Directory entries are exposed to the guest as UTF-16. Normalize host UTF-8 names first
|
||||
// so host Unicode normalization differences do not leak into guest-visible SDMC paths.
|
||||
const std::string filename = Common::NormalizeUTF8ToNFC(file.virtualName);
|
||||
const std::u16string filename_utf16 = Common::UTF8ToUTF16(filename);
|
||||
Entry& entry = entries[entries_read];
|
||||
|
||||
LOG_TRACE(Service_FS, "File {}: size={} dir={}", filename, file.size, file.isDirectory);
|
||||
|
||||
// TODO(Link Mauve): use a proper conversion to UTF-16.
|
||||
for (std::size_t j = 0; j < FILENAME_LENGTH; ++j) {
|
||||
entry.filename[j] = filename[j];
|
||||
if (!filename[j])
|
||||
break;
|
||||
std::fill(std::begin(entry.filename), std::end(entry.filename), u'\0');
|
||||
|
||||
const std::size_t copy_length = std::min(filename_utf16.size(), FILENAME_LENGTH - 1);
|
||||
for (std::size_t j = 0; j < copy_length; ++j) {
|
||||
entry.filename[j] = filename_utf16[j];
|
||||
}
|
||||
|
||||
FileUtil::SplitFilename83(filename, entry.short_name, entry.extension);
|
||||
|
||||
entry.is_directory = file.isDirectory;
|
||||
entry.is_hidden = (filename[0] == '.');
|
||||
entry.is_hidden = (!filename.empty() && filename[0] == '.');
|
||||
entry.is_read_only = 0;
|
||||
entry.file_size = file.size;
|
||||
|
||||
|
|
@ -93,4 +98,5 @@ u32 DiskDirectory::Read(const u32 count, Entry* entries) {
|
|||
return entries_read;
|
||||
}
|
||||
|
||||
|
||||
} // namespace FileSys
|
||||
|
|
|
|||
|
|
@ -24,3 +24,14 @@ TEST_CASE("SplitFilename83 Sanity", "[common]") {
|
|||
REQUIRE(std::memcmp(short_name.data(), expected_short_name.data(), short_name.size()) == 0);
|
||||
REQUIRE(std::memcmp(extension.data(), expected_extension.data(), extension.size()) == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("NormalizeUTF8ToNFC Sanity", "[common]") {
|
||||
const std::string decomposed = "i\xCC\x81";
|
||||
const std::string composed = "\xC3\xAD";
|
||||
|
||||
#if defined(__APPLE__)
|
||||
REQUIRE(Common::NormalizeUTF8ToNFC(decomposed) == composed);
|
||||
#else
|
||||
REQUIRE(Common::NormalizeUTF8ToNFC(decomposed) == decomposed);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue