Skip to content

Commit

Permalink
Initial support for compiling on ARM64. (#788)
Browse files Browse the repository at this point in the history
  • Loading branch information
squidbus authored Sep 9, 2024
1 parent adfb3af commit 411449c
Show file tree
Hide file tree
Showing 12 changed files with 166 additions and 25 deletions.
31 changes: 27 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ endif()

option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)

# First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR.
if (APPLE AND CMAKE_OSX_ARCHITECTURES)
set(BASE_ARCHITECTURE "${CMAKE_OSX_ARCHITECTURES}")
else()
set(BASE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
endif()

# Next, match common architecture strings down to a known common value.
if (BASE_ARCHITECTURE MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
set(ARCHITECTURE "x86_64")
elseif (BASE_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
set(ARCHITECTURE "arm64")
else()
message(FATAL_ERROR "Unsupported CPU architecture: ${BASE_ARCHITECTURE}")
endif()

# This function should be passed a list of all files in a target. It will automatically generate file groups
# following the directory hierarchy, so that the layout of the files in IDEs matches the one in the filesystem.
function(create_target_directory_groups target_name)
Expand Down Expand Up @@ -309,6 +325,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/logging/text_formatter.h
src/common/logging/types.h
src/common/alignment.h
src/common/arch.h
src/common/assert.cpp
src/common/assert.h
src/common/bit_field.h
Expand Down Expand Up @@ -358,8 +375,6 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/aerolib/aerolib.h
src/core/address_space.cpp
src/core/address_space.h
src/core/cpu_patches.cpp
src/core/cpu_patches.h
src/core/crypto/crypto.cpp
src/core/crypto/crypto.h
src/core/crypto/keys.h
Expand Down Expand Up @@ -417,6 +432,12 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/virtual_memory.h
)

if (ARCHITECTURE STREQUAL "x86_64")
set(CORE ${CORE}
src/core/cpu_patches.cpp
src/core/cpu_patches.h)
endif()

set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp
Expand Down Expand Up @@ -678,8 +699,10 @@ if (APPLE)
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
endif()

# Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
if (ARCHITECTURE STREQUAL "x86_64")
# Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
endif()

# Replacement for std::chrono::time_zone
target_link_libraries(shadps4 PRIVATE date::date-tz)
Expand Down
1 change: 0 additions & 1 deletion externals/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ else()
endif()

if (NOT TARGET FFmpeg::ffmpeg)
set(ARCHITECTURE "x86_64")
add_subdirectory(ffmpeg-core)
add_library(FFmpeg::ffmpeg ALIAS ffmpeg)
endif()
Expand Down
10 changes: 10 additions & 0 deletions src/common/arch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#if defined(__x86_64__) || defined(_M_X64)
#define ARCH_X86_64 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#define ARCH_ARM64 1
#endif
7 changes: 7 additions & 0 deletions src/common/assert.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/arch.h"
#include "common/assert.h"
#include "common/logging/backend.h"

#if defined(ARCH_X86_64)
#define Crash() __asm__ __volatile__("int $3")
#elif defined(ARCH_ARM64)
#define Crash() __asm__ __volatile__("brk 0")
#else
#error "Missing Crash() implementation for target CPU architecture."
#endif

void assert_fail_impl() {
Common::Log::Stop();
Expand Down
17 changes: 17 additions & 0 deletions src/common/rdtsc.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#pragma once

#include "common/arch.h"

#ifdef _MSC_VER
#include <intrin.h>
#endif
Expand All @@ -13,22 +15,37 @@ namespace Common {

#ifdef _MSC_VER
__forceinline static u64 FencedRDTSC() {
#ifdef ARCH_X86_64
_mm_lfence();
_ReadWriteBarrier();
const u64 result = __rdtsc();
_mm_lfence();
_ReadWriteBarrier();
return result;
#else
#error "Missing FencedRDTSC() implementation for target CPU architecture."
#endif
}
#else
static inline u64 FencedRDTSC() {
#ifdef ARCH_X86_64
u64 eax;
u64 edx;
asm volatile("lfence\n\t"
"rdtsc\n\t"
"lfence\n\t"
: "=a"(eax), "=d"(edx));
return (edx << 32) | eax;
#elif defined(ARCH_ARM64)
u64 ret;
asm volatile("isb\n\t"
"mrs %0, cntvct_el0\n\t"
"isb\n\t"
: "=r"(ret)::"memory");
return ret;
#else
#error "Missing FencedRDTSC() implementation for target CPU architecture."
#endif
}
#endif

Expand Down
39 changes: 29 additions & 10 deletions src/core/address_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@

#include <boost/icl/separate_interval_set.hpp>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "core/address_space.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/memory.h"
#include "libraries/error_codes.h"

#ifdef _WIN32
#include <windows.h>
#else
#include <fcntl.h>
#include <sys/mman.h>
#endif
#include "libraries/error_codes.h"

#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Reserve space for the system address space using a zerofill section.
asm(".zerofill GUEST_SYSTEM,GUEST_SYSTEM,__guest_system,0xFBFC00000");
#endif
Expand Down Expand Up @@ -308,12 +309,12 @@ struct AddressSpace::Impl {

constexpr int protection_flags = PROT_READ | PROT_WRITE;
constexpr int base_map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
#ifdef __APPLE__
// On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF
// and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. We can allocate the system
// managed region, as well as system reserved if reduced in size slightly, but we cannot map
// the user region where we want, so we must let the OS put it wherever possible and hope
// the game won't rely on its location.
#if defined(__APPLE__) && defined(ARCH_X86_64)
// On ARM64 Macs under Rosetta 2, we run into limitations due to the commpage from
// 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF.
// We can allocate the system managed region, as well as system reserved if reduced in size
// slightly, but we cannot map the user region where we want, so we must let the OS put it
// wherever possible and hope the game won't rely on its location.
system_managed_base = reinterpret_cast<u8*>(
mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), system_managed_size, protection_flags,
base_map_flags | MAP_FIXED, -1, 0));
Expand All @@ -325,12 +326,22 @@ struct AddressSpace::Impl {
protection_flags, base_map_flags, -1, 0));
#else
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
#if defined(ARCH_X86_64)
const auto virtual_base =
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), virtual_size,
protection_flags, base_map_flags | MAP_FIXED, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = reinterpret_cast<u8*>(SYSTEM_RESERVED_MIN);
user_base = reinterpret_cast<u8*>(USER_MIN);
#else
// Map memory wherever possible and instruction translation can handle offsetting to the
// base.
const auto virtual_base = reinterpret_cast<u8*>(
mmap(nullptr, virtual_size, protection_flags, base_map_flags, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = virtual_base + SYSTEM_RESERVED_MIN - SYSTEM_MANAGED_MIN;
user_base = virtual_base + USER_MIN - SYSTEM_MANAGED_MIN;
#endif
#endif
if (system_managed_base == MAP_FAILED || system_reserved_base == MAP_FAILED ||
user_base == MAP_FAILED) {
Expand Down Expand Up @@ -430,9 +441,11 @@ struct AddressSpace::Impl {
if (write) {
flags |= PROT_WRITE;
}
#ifdef ARCH_X86_64
if (execute) {
flags |= PROT_EXEC;
}
#endif
int ret = mprotect(reinterpret_cast<void*>(virtual_addr), size, flags);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
Expand Down Expand Up @@ -463,8 +476,14 @@ AddressSpace::~AddressSpace() = default;

void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr,
bool is_exec) {
return impl->Map(virtual_addr, phys_addr, size,
is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
#if ARCH_X86_64
const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
#else
// On non-native architectures, we can simplify things by ignoring the execute flag for the
// canonical copy of the memory and rely on the JIT to map translated code as executable.
constexpr auto prot = PAGE_READWRITE;
#endif
return impl->Map(virtual_addr, phys_addr, size, prot);
}

void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot,
Expand Down
3 changes: 2 additions & 1 deletion src/core/address_space.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include <memory>
#include "common/arch.h"
#include "common/enum.h"
#include "common/types.h"

Expand All @@ -23,7 +24,7 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL;
#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Can only comfortably reserve the first 0x7C0000000 of system reserved space.
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;
#else
Expand Down
3 changes: 3 additions & 0 deletions src/core/libraries/kernel/thread_management.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <thread>

#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/logging/log.h"
Expand Down Expand Up @@ -989,7 +990,9 @@ static void cleanup_thread(void* arg) {
static void* run_thread(void* arg) {
auto* thread = static_cast<ScePthread>(arg);
Common::SetCurrentThreadName(thread->name.c_str());
#ifdef ARCH_X86_64
Core::InitializeThreadPatchStack();
#endif
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->InitTlsForThread(false);
void* ret = nullptr;
Expand Down
7 changes: 7 additions & 0 deletions src/core/linker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h"
Expand All @@ -27,6 +28,7 @@ static PS4_SYSV_ABI void ProgramExitFunc() {
}

static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
#ifdef ARCH_X86_64
// reinterpret_cast<entry_func_t>(addr)(params, exit_func); // can't be used, stack has to have
// a specific layout
asm volatile("andq $-16, %%rsp\n" // Align to 16 bytes
Expand All @@ -46,6 +48,9 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
:
: "r"(addr), "r"(params), "r"(exit_func)
: "rax", "rsi", "rdi");
#else
UNIMPLEMENTED_MSG("Missing RunMainEntry() implementation for target CPU architecture.");
#endif
}

Linker::Linker() : memory{Memory::Instance()} {}
Expand Down Expand Up @@ -85,7 +90,9 @@ void Linker::Execute() {

// Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread");
#ifdef ARCH_X86_64
InitializeThreadPatchStack();
#endif
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true);

Expand Down
3 changes: 3 additions & 0 deletions src/core/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <xbyak/xbyak.h>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/logging/log.h"
#ifdef ENABLE_QT_GUI
Expand Down Expand Up @@ -134,9 +135,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LOG_INFO(Core_Linker, "segment_mode ..........: {}", segment_mode);

add_segment(elf_pheader[i]);
#ifdef ARCH_X86_64
if (elf_pheader[i].p_flags & PF_EXEC) {
PatchInstructions(segment_addr, segment_file_size, c);
}
#endif
break;
}
case PT_DYNAMIC:
Expand Down
Loading

0 comments on commit 411449c

Please sign in to comment.