From: Dimitris Panokostas Date: Thu, 21 May 2026 13:06:23 +0000 (+0200) Subject: Fix Windows ARM64 JIT startup X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=898ddc7c96809c713afd058c054f1bb835a7b00d;p=francis%2Fwinuae.git Fix Windows ARM64 JIT startup Enable the ARM64 JIT build path, keep natmem above 4GB, reserve the full direct-memory window, commit read-only natmem gaps, and install a Windows ARM64 vectored exception handler for JIT access faults. Add a GitHub Actions Windows ARM64 binary job that installs the ARM64 MSVC tools and builds winuae_msvc.vcxproj directly without ARM64-building host generator projects. Validated with ARM64 Release and FullRelease MSBuild builds, plus VMware Windows ARM64 runtime logs showing the handler catching ramsey_low probes instead of crashing. --- diff --git a/.github/workflows/build-winuae-binary.yml b/.github/workflows/build-winuae-binary.yml index fd973cd4..f97dc13e 100644 --- a/.github/workflows/build-winuae-binary.yml +++ b/.github/workflows/build-winuae-binary.yml @@ -108,3 +108,51 @@ jobs: with: name: WinUAE x64 path: D:\Amiga + + Build-WinUAE-ARM64-binary: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v4 + + - name: Add MSBuild to PATH + uses: microsoft/setup-msbuild@v2 + + - name: Install Visual Studio Build Tools + shell: pwsh + run: | + Write-Host "Installing VS 2026 Build Tools..." + choco install visualstudio2026buildtools ` + --package-parameters "--add Microsoft.VisualStudio.Workload.VCTools --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --add Microsoft.VisualStudio.Component.VC.Tools.ARM64 --add Microsoft.VisualStudio.Component.VC.ATL --add Microsoft.VisualStudio.Component.VC.ATL.ARM64 --add Microsoft.VisualStudio.Component.VC.ATLMFC --includeRecommended --quiet" ` + -y --ignore-package-exit-codes=3010 + Write-Host "VS 2026 Build Tools installation completed" + + # Running roughly step 4 of README.md + - name: Download WinUAE includes and libs + shell: powershell + run: Invoke-WebRequest -Uri "https://download.abime.net/winuae/files/b/winuaeinclibs.zip" -OutFile "winuaeinclibs.zip" + + - name: Unpack WinUAE includes and libs to C:\dev + uses: ihiroky/extract-action@v1 + with: + file_path: winuaeinclibs.zip + extract_dir: C:\dev + + # Running roughly step 7 of README.md + - name: Add NASM to PATH + uses: ilammy/setup-nasm@v1.5.1 + + # Running roughly step 12 of README.md + - name: Build ARM64 FullRelease + shell: cmd + working-directory: ${{env.GITHUB_WORKSPACE}} + # Add additional options to the MSBuild command line here (like platform or verbosity level). + # See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\18\BuildTools\Common7\Tools\VsDevCmd.bat" + msbuild /m /p:Platform=ARM64 /p:Configuration=FullRelease /p:BuildProjectReferences=false ${{env.SOLUTION_FILE_PATH}}\winuae_msvc.vcxproj + + - uses: actions/upload-artifact@v4 + with: + name: WinUAE ARM64 + path: D:\Amiga diff --git a/fpp_native.cpp b/fpp_native.cpp index 83b69e0c..01517945 100644 --- a/fpp_native.cpp +++ b/fpp_native.cpp @@ -393,7 +393,11 @@ static void fp_from_exten(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wr #endif } #else // if !USE_LONG_DOUBLE +#ifdef CPU_AARCH64 +void fp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +#else static void fp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +#endif { if (!currprefs.cachesize || !currprefs.compfpu) { floatx80 fx80; diff --git a/include/memory.h b/include/memory.h index 13568469..da665628 100644 --- a/include/memory.h +++ b/include/memory.h @@ -53,7 +53,7 @@ typedef void (REGPARAM3 *mem_put_func)(uaecptr, uae_u32) REGPARAM; typedef uae_u8 *(REGPARAM3 *xlate_func)(uaecptr) REGPARAM; typedef int (REGPARAM3 *check_func)(uaecptr, uae_u32) REGPARAM; -extern uae_u32 max_z3fastmem, max_physmem; +extern size_t max_z3fastmem, max_physmem; extern uae_u32 wait_cpu_cycle_read (uaecptr addr, int mode); extern void wait_cpu_cycle_write (uaecptr addr, int mode, uae_u32 v); @@ -881,7 +881,8 @@ extern shmpiece *shm_start; extern uae_u8* natmem_offset; extern uae_u8 *natmem_reserved; -extern uae_u32 natmem_reserved_size; +extern size_t natmem_reserved_size; +void commit_natmem_gaps(void); #endif diff --git a/include/newcpu.h b/include/newcpu.h index 9f93a427..db9405c5 100644 --- a/include/newcpu.h +++ b/include/newcpu.h @@ -183,6 +183,13 @@ typedef struct #endif } fpdata; +#ifdef CPU_AARCH64 +#ifdef JIT +#include "jit/comptbl.h" +#include "jit/compemu.h" +#endif +#endif + struct regstruct { uae_u32 regs[16]; @@ -277,6 +284,20 @@ struct regstruct int ce020_tail; evt_t ce020_tail_cycles; int memory_waitstate_cycles; + +#ifdef CPU_AARCH64 +#ifdef JIT + /* store scratch regs also in this struct to avoid load of mem pointer */ + uae_u32 scratchregs[VREGS - S1]; + fpu_register scratchfregs[VFREGS - 8]; + uae_u32 jit_exception; + + /* pointer to real arrays/structs for easier access in JIT */ + uae_u32* raw_cputbl_count; + uintptr mem_banks; + uintptr cache_tags; +#endif +#endif }; extern struct regstruct regs; diff --git a/include/uae/vm.h b/include/uae/vm.h index 047b709c..210df2c4 100644 --- a/include/uae/vm.h +++ b/include/uae/vm.h @@ -33,14 +33,14 @@ void *uae_vm_alloc(uae_u32 size); void *uae_vm_alloc(uae_u32 size, int flags); #endif -void *uae_vm_alloc(uae_u32 size, int flags, int protect); -bool uae_vm_protect(void *address, int size, int protect); -bool uae_vm_free(void *address, int size); - -void *uae_vm_reserve(uae_u32 size, int flags); -void *uae_vm_reserve_fixed(void *address, uae_u32 size, int flags); -void *uae_vm_commit(void *address, uae_u32 size, int protect); -bool uae_vm_decommit(void *address, uae_u32 size); +void *uae_vm_alloc(size_t size, int flags, int protect); +bool uae_vm_protect(void *address, size_t size, int protect); +bool uae_vm_free(void *address, size_t size); + +void *uae_vm_reserve(size_t size, int flags); +void *uae_vm_reserve_fixed(void *address, size_t size, int flags); +void *uae_vm_commit(void *address, size_t size, int protect); +bool uae_vm_decommit(void *address, size_t size); int uae_vm_page_size(void); diff --git a/jit/arm/compemu_arm.h b/jit/arm/compemu_arm.h index bca82c0a..47c85015 100644 --- a/jit/arm/compemu_arm.h +++ b/jit/arm/compemu_arm.h @@ -389,8 +389,25 @@ typedef fptype fpu_register; /* Flags for Bernie during development/debugging. Should go away eventually */ #define DISTRUST_CONSISTENT_MEM 0 +void jit_abort(const char *format,...); void jit_abort(const TCHAR *format,...); +#if defined(CPU_AARCH64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define JIT_HAS_BUS_ERROR_RECOVERY 1 +#if defined(_WIN32) && defined(D) +#define WINUAE_RESTORE_D_AFTER_SETJMP +#pragma push_macro("D") +#undef D +#endif +#include +#ifdef WINUAE_RESTORE_D_AFTER_SETJMP +#pragma pop_macro("D") +#undef WINUAE_RESTORE_D_AFTER_SETJMP +#endif +extern jmp_buf jit_bus_error_jmpbuf; +extern volatile bool jit_in_compiled_code; +#endif + #ifdef CPU_64_BIT static inline uae_u32 check_uae_p32(uintptr address, const char* file, int line) { diff --git a/jit/arm/compemu_support_arm.cpp b/jit/arm/compemu_support_arm.cpp index 099d0107..af327a33 100644 --- a/jit/arm/compemu_support_arm.cpp +++ b/jit/arm/compemu_support_arm.cpp @@ -956,6 +956,380 @@ void invalidate_block(blockinfo* bi) remove_deps(bi); } +#if defined(_WIN32) && defined(CPU_AARCH64) +static void* installed_arm64_vector_handler; + +enum transfer_type_t { + TYPE_UNKNOWN, + TYPE_LOAD, + TYPE_STORE +}; + +enum type_size_t { + SIZE_UNKNOWN, + SIZE_BYTE, + SIZE_WORD, + SIZE_INT +}; + +static bool windows_arm64_jit_pc(uintptr pc) +{ + return (compiled_code && pc >= (uintptr)compiled_code && pc < (uintptr)current_compile_p) || + (popallspace && pc >= (uintptr)popallspace && pc < (uintptr)(popallspace + POPALLSPACE_SIZE)); +} + +static int delete_trigger(blockinfo* bi, void* pc) +{ + while (bi) { + if (bi->handler && (uae_u8*)bi->direct_handler <= pc && (uae_u8*)bi->nexthandler > pc) { + write_log(_T("JIT: Deleted trigger (%p < %p < %p) %p\n"), + bi->handler, pc, bi->nexthandler, bi->pc_p); + invalidate_block(bi); + raise_in_cl_list(bi); + set_special(0); + return 1; + } + bi = bi->next; + } + return 0; +} + +static int windows_arm64_exception_size(const int transfer_size) +{ + switch (transfer_size) { + case SIZE_BYTE: + return sz_byte; + case SIZE_WORD: + return sz_word; + case SIZE_INT: + default: + return sz_long; + } +} + +static void windows_arm64_jit_bus_error(const uaecptr amiga_addr, const bool read, const int transfer_size) +{ + exception2_setup(regs.opcode, amiga_addr, read, windows_arm64_exception_size(transfer_size), regs.s ? 4 : 0); + flush_icache(3); + countdown = 0; + set_special(SPCFLAG_END_COMPILE); + longjmp(jit_bus_error_jmpbuf, 2); +} + +static LONG CALLBACK windows_arm64_jit_exception_handler(PEXCEPTION_POINTERS info) +{ + if (info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) + return EXCEPTION_CONTINUE_SEARCH; + if (!canbang || currprefs.cachesize == 0) + return EXCEPTION_CONTINUE_SEARCH; + + const uintptr fault_pc = (uintptr)info->ContextRecord->Pc; + if (!windows_arm64_jit_pc(fault_pc)) + return EXCEPTION_CONTINUE_SEARCH; + if (!natmem_offset) + return EXCEPTION_CONTINUE_SEARCH; + + if (info->ExceptionRecord->NumberParameters < 2) + return EXCEPTION_CONTINUE_SEARCH; + const auto access_type = info->ExceptionRecord->ExceptionInformation[0]; + if (access_type != 0 && access_type != 1) + return EXCEPTION_CONTINUE_SEARCH; + const uintptr fault_addr = (uintptr)info->ExceptionRecord->ExceptionInformation[1]; + const uintptr amiga_addr_wide = fault_addr - (uintptr)natmem_offset; + if (amiga_addr_wide > 0xffffffffULL) { + write_log(_T("JIT: Windows ARM64 AV outside natmem PC=%p fault=%p\n"), + (void*)fault_pc, (void*)fault_addr); + return EXCEPTION_CONTINUE_SEARCH; + } + const uaecptr amiga_addr = (uaecptr)amiga_addr_wide; + + if (a3000lmem_bank.allocated_size > 0 && + amiga_addr >= a3000lmem_bank.start - 0x00100000 && + amiga_addr < a3000lmem_bank.start - 0x00100000 + 8) { + write_log(_T("JIT: Windows ARM64 ramsey_low probe at 0x%08x, skipping faulting instruction.\n"), amiga_addr); + info->ContextRecord->Pc += 4; + return EXCEPTION_CONTINUE_EXECUTION; + } + if (a3000hmem_bank.allocated_size > 0 && + amiga_addr >= a3000hmem_bank.start + a3000hmem_bank.allocated_size && + amiga_addr < a3000hmem_bank.start + a3000hmem_bank.allocated_size + 8) { + write_log(_T("JIT: Windows ARM64 ramsey_high probe at 0x%08x, skipping faulting instruction.\n"), amiga_addr); + info->ContextRecord->Pc += 4; + return EXCEPTION_CONTINUE_EXECUTION; + } + + addrbank* ab = &get_mem_bank(amiga_addr); + const bool arm64_quarantine_candidate = (ab == &dummy_bank); + const uae_u32 opcode = *(uae_u32*)fault_pc; + transfer_type_t transfer_type = TYPE_UNKNOWN; + int transfer_size = SIZE_UNKNOWN; + + const int rd = opcode & 0x1f; + const int rn = (opcode >> 5) & 0x1f; + const int rm = (opcode >> 16) & 0x1f; + const uae_u32 option = (opcode >> 13) & 0x7; + const uae_u32 sbit = (opcode >> 12) & 0x1; + const uae_u32 imm12 = (opcode >> 10) & 0xfff; + bool reg_indexed = false; + bool unsigned_imm = false; + bool unscaled_imm = false; + + uae_u32 masked_op = opcode & 0xffe00c00; + switch (masked_op) { + case 0x38000000: // STURB_wXi + transfer_size = SIZE_BYTE; + transfer_type = TYPE_STORE; + unscaled_imm = true; + break; + case 0x38200800: // STRB_wXx + transfer_size = SIZE_BYTE; + transfer_type = TYPE_STORE; + reg_indexed = true; + break; + case 0x38400000: // LDURB_wXi + transfer_size = SIZE_BYTE; + transfer_type = TYPE_LOAD; + unscaled_imm = true; + break; + case 0x78000000: // STURH_wXi + transfer_size = SIZE_WORD; + transfer_type = TYPE_STORE; + unscaled_imm = true; + break; + case 0x78200800: // STRH_wXx + transfer_size = SIZE_WORD; + transfer_type = TYPE_STORE; + reg_indexed = true; + break; + case 0x78400000: // LDURH_wXi + transfer_size = SIZE_WORD; + transfer_type = TYPE_LOAD; + unscaled_imm = true; + break; + case 0xb8000000: // STUR_wXi + transfer_size = SIZE_INT; + transfer_type = TYPE_STORE; + unscaled_imm = true; + break; + case 0xb8200800: // STR_wXx + transfer_size = SIZE_INT; + transfer_type = TYPE_STORE; + reg_indexed = true; + break; + case 0xb8400000: // LDUR_wXi + transfer_size = SIZE_INT; + transfer_type = TYPE_LOAD; + unscaled_imm = true; + break; + case 0x38600800: // LDRB_wXx + transfer_size = SIZE_BYTE; + transfer_type = TYPE_LOAD; + reg_indexed = true; + break; + case 0x78600800: // LDRH_wXx + transfer_size = SIZE_WORD; + transfer_type = TYPE_LOAD; + reg_indexed = true; + break; + case 0xb8600800: // LDR_wXx + transfer_size = SIZE_INT; + transfer_type = TYPE_LOAD; + reg_indexed = true; + break; + default: + break; + } + + if (transfer_size == SIZE_UNKNOWN) { + masked_op = opcode & 0xffc00000; + switch (masked_op) { + case 0x39000000: // STRB_wXi + transfer_size = SIZE_BYTE; + transfer_type = TYPE_STORE; + unsigned_imm = true; + break; + case 0x79000000: // STRH_wXi + transfer_size = SIZE_WORD; + transfer_type = TYPE_STORE; + unsigned_imm = true; + break; + case 0xb9000000: // STR_wXi + transfer_size = SIZE_INT; + transfer_type = TYPE_STORE; + unsigned_imm = true; + break; + case 0x39400000: // LDRB_wXi + transfer_size = SIZE_BYTE; + transfer_type = TYPE_LOAD; + unsigned_imm = true; + break; + case 0x79400000: // LDRH_wXi + transfer_size = SIZE_WORD; + transfer_type = TYPE_LOAD; + unsigned_imm = true; + break; + case 0xb9400000: // LDR_wXi + transfer_size = SIZE_INT; + transfer_type = TYPE_LOAD; + unsigned_imm = true; + break; + default: + break; + } + } + + const auto get_reg_w = [&](const int reg) -> uae_u32 { + if (reg == 31) + return 0; + return (uae_u32)info->ContextRecord->X[reg]; + }; + const auto get_reg_x = [&](const int reg) -> uae_u64 { + if (reg == 31) + return 0; + return (uae_u64)info->ContextRecord->X[reg]; + }; + const auto get_base_x = [&](const int reg) -> uae_u64 { + if (reg == 31) + return (uae_u64)info->ContextRecord->Sp; + return get_reg_x(reg); + }; + const auto set_reg_w = [&](const int reg, const uae_u32 value) { + if (reg == 31) + return; + info->ContextRecord->X[reg] = value; + }; + + if (transfer_size != SIZE_UNKNOWN) { + const uae_u64 rn_val = get_base_x(rn); + uae_u64 rm_x = 0; + uae_u64 offset = 0; + const int scale_bits = transfer_size == SIZE_WORD ? 1 : transfer_size == SIZE_INT ? 2 : 0; + if (reg_indexed) { + const uae_u32 shift = sbit ? (uae_u32)scale_bits : 0; + rm_x = get_reg_x(rm); + offset = rm_x; + switch (option) { + case 0b010: // UXTW + offset = (uae_u64)(uae_u32)rm_x << shift; + break; + case 0b011: // LSL + offset = rm_x << shift; + break; + case 0b110: { // SXTW + uae_s64 s = (uae_s64)(uae_s32)(uae_u32)rm_x; + if (shift) + s <<= shift; + offset = (uae_u64)s; + break; + } + case 0b111: { // SXTX + uae_s64 s = (uae_s64)rm_x; + if (shift) + s <<= shift; + offset = (uae_u64)s; + break; + } + default: + offset = rm_x << shift; + break; + } + } else if (unsigned_imm) { + offset = (uae_u64)imm12 << scale_bits; + } else if (unscaled_imm) { + const uae_u32 imm9 = (opcode >> 12) & 0x1ff; + const auto signed_imm9 = (uae_s64)(imm9 & 0x100 ? (int)imm9 - 0x200 : (int)imm9); + offset = (uae_u64)signed_imm9; + } + + const uae_u64 eff_addr = rn_val + offset; + if (eff_addr != (uae_u64)fault_addr) { + write_log(_T("JIT: Windows ARM64 EA mismatch fault=%016llx ea=%016llx opcode=%08x\n"), + (unsigned long long)fault_addr, (unsigned long long)eff_addr, opcode); + if (arm64_quarantine_candidate && jit_in_compiled_code) { + windows_arm64_jit_bus_error(amiga_addr, transfer_type == TYPE_LOAD, transfer_size); + } + return EXCEPTION_CONTINUE_SEARCH; + } + + if (ab == &dummy_bank) { + if (transfer_type == TYPE_LOAD) { + set_reg_w(rd, 0); + write_log(_T("JIT: Windows ARM64 dummy_bank load at %08x, returning 0 to x%d\n"), amiga_addr, rd); + } else { + write_log(_T("JIT: Windows ARM64 dummy_bank store at %08x ignored\n"), amiga_addr); + } + } else if (transfer_type == TYPE_LOAD) { + uae_u32 newval = get_reg_w(rd); + switch (transfer_size) { + case SIZE_BYTE: + newval = (uae_u8)get_byte_jit(amiga_addr); + break; + case SIZE_WORD: + newval = do_byteswap_16((uae_u16)get_word_jit(amiga_addr)); + break; + case SIZE_INT: + newval = do_byteswap_32(get_long_jit(amiga_addr)); + break; + default: + break; + } + set_reg_w(rd, newval); + } else { + const uae_u32 regval = get_reg_w(rd); + switch (transfer_size) { + case SIZE_BYTE: + put_byte_jit(amiga_addr, regval); + break; + case SIZE_WORD: + put_word_jit(amiga_addr, do_byteswap_16((uae_u16)regval)); + break; + case SIZE_INT: + put_long_jit(amiga_addr, do_byteswap_32(regval)); + break; + default: + break; + } + } + + info->ContextRecord->Pc += 4; + countdown = 0; + set_special(SPCFLAG_END_COMPILE); + if (arm64_quarantine_candidate) { + flush_icache(3); + } + + bool deleted = delete_trigger(active, (void*)fault_pc); + if (!deleted) { + deleted = delete_trigger(dormant, (void*)fault_pc); + } + if (!deleted) { + set_special(0); + } + return EXCEPTION_CONTINUE_EXECUTION; + } + + if (arm64_quarantine_candidate && jit_in_compiled_code) { + write_log(_T("JIT: Windows ARM64 unhandled insn 0x%08x at unmapped %08x, returning to interpreter.\n"), + opcode, amiga_addr); + const bool read = access_type == 0; + windows_arm64_jit_bus_error(amiga_addr, read, transfer_size); + } + + write_log(_T("JIT: Windows ARM64 unhandled access violation at PC=%p fault=%p amiga=%08x opcode=%08x bank=%s\n"), + (void*)fault_pc, (void*)fault_addr, amiga_addr, opcode, + ab && ab->name ? ab->name : _T("NONE")); + return EXCEPTION_CONTINUE_SEARCH; +} + +static void install_windows_arm64_jit_exception_handler(void) +{ + if (!installed_arm64_vector_handler) { + write_log(_T("JIT: Installing Windows ARM64 vectored exception handler\n")); + installed_arm64_vector_handler = AddVectoredExceptionHandler(0, windows_arm64_jit_exception_handler); + } +} +#endif + static inline void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uintptr target) { blockinfo* tbi = get_blockinfo_addr((void*)target); @@ -1226,7 +1600,7 @@ static inline void emit_block(const uae_u8* block, uae_u32 blocklen) static inline uae_u32 reverse32(uae_u32 v) { - return uae_bswap_32(v); + return do_byteswap_32(v); } static void set_target(uae_u8* t) @@ -2119,6 +2493,10 @@ void compiler_init(void) flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard; +#if defined(_WIN32) && defined(CPU_AARCH64) + install_windows_arm64_jit_exception_handler(); +#endif + initialized = true; #ifdef PROFILE_UNTRANSLATED_INSNS @@ -3446,7 +3824,7 @@ int failure; static inline unsigned int get_opcode_cft_map(unsigned int f) { - return uae_bswap_16(f); + return do_byteswap_16(f); } #define DO_GET_OPCODE(a) (get_opcode_cft_map((uae_u16)*(a))) diff --git a/jit/compemu.cpp b/jit/compemu.cpp index 42fda8b9..f3534b2b 100644 --- a/jit/compemu.cpp +++ b/jit/compemu.cpp @@ -1,4 +1,4 @@ -#if defined(CPU_AARCH64) +#if defined(CPU_AARCH64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "arm/compemu_arm.cpp" #else #include "sysconfig.h" diff --git a/jit/compemu.h b/jit/compemu.h index 09c4e21a..b4b30f3a 100644 --- a/jit/compemu.h +++ b/jit/compemu.h @@ -1,4 +1,4 @@ -#if defined(CPU_AARCH64) +#if defined(CPU_AARCH64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "arm/compemu_arm.h" #else /* diff --git a/jit/compemu_fpp.cpp b/jit/compemu_fpp.cpp index c1c0576b..cda42187 100644 --- a/jit/compemu_fpp.cpp +++ b/jit/compemu_fpp.cpp @@ -1,4 +1,4 @@ -#if defined(CPU_AARCH64) +#if defined(CPU_AARCH64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "arm/compemu_fpp_arm.cpp" #else /* diff --git a/jit/compemu_support.cpp b/jit/compemu_support.cpp index 3202eb35..3e78fc8d 100644 --- a/jit/compemu_support.cpp +++ b/jit/compemu_support.cpp @@ -1,4 +1,4 @@ -#if defined(CPU_AARCH64) +#if defined(CPU_AARCH64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "arm/compemu_support_arm.cpp" #else /* diff --git a/jit/compstbl.cpp b/jit/compstbl.cpp index e272ed8b..f24b1357 100644 --- a/jit/compstbl.cpp +++ b/jit/compstbl.cpp @@ -1,4 +1,4 @@ -#if defined(CPU_AARCH64) +#if defined(CPU_AARCH64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #include "arm/compstbl_arm.cpp" #else #include "sysconfig.h" diff --git a/memory.cpp b/memory.cpp index ca37c6dd..6d3d038f 100644 --- a/memory.cpp +++ b/memory.cpp @@ -3381,6 +3381,11 @@ void memory_reset (void) if (mem_hardreset) { memory_clear (); } +#if defined(NATMEM_OFFSET) && defined(CPU_AARCH64) + if (canbang) { + commit_natmem_gaps(); + } +#endif write_log (_T("memory init end\n")); } diff --git a/newcpu.cpp b/newcpu.cpp index 98d46922..4c99d687 100644 --- a/newcpu.cpp +++ b/newcpu.cpp @@ -62,6 +62,12 @@ /* Need to have these somewhere */ bool check_prefs_changed_comp (bool checkonly) { return false; } #endif + +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) +jmp_buf jit_bus_error_jmpbuf; +volatile bool jit_in_compiled_code = false; +#endif + /* For faster JIT cycles handling */ int pissoff = 0; @@ -5464,6 +5470,17 @@ static void custom_reset_cpu(bool hardreset, bool keyboardreset) #ifdef JIT /* Completely different run_2 replacement */ +#ifdef CPU_AARCH64 +void execute_exception(uae_u32 cycles) +{ + countdown -= cycles; + Exception_cpu(regs.jit_exception); + regs.jit_exception = 0; + cpu_cycles = adjust_cycles(4 * CYCLE_UNIT / 2); + do_cycles(cpu_cycles); +} +#endif + void do_nothing (void) { if (!currprefs.cpu_thread) { @@ -5584,12 +5601,29 @@ static void cpu_thread_run_jit(void *v) #endif { for (;;) { +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + { + int bus_error_exc = setjmp(jit_bus_error_jmpbuf); + if (bus_error_exc != 0) { + jit_in_compiled_code = false; + Exception(bus_error_exc); + continue; + } + } + jit_in_compiled_code = true; +#endif ((compiled_handler*)(pushall_call_handler))(); /* Whenever we return from that, we should check spcflags */ if (regs.spcflags || cpu_thread_ilvl > 0) { +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + jit_in_compiled_code = false; +#endif if (do_specialties_thread()) { break; } +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + jit_in_compiled_code = true; +#endif } } } @@ -5604,6 +5638,9 @@ static void cpu_thread_run_jit(void *v) } #endif cpu_thread_active = 0; +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + jit_in_compiled_code = false; +#endif } #endif @@ -5625,19 +5662,38 @@ static void m68k_run_jit(void) for (;;) { #ifdef USE_STRUCTURED_EXCEPTION_HANDLING __try { +#endif +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + { + int bus_error_exc = setjmp(jit_bus_error_jmpbuf); + if (bus_error_exc != 0) { + jit_in_compiled_code = false; + Exception(bus_error_exc); + } + } + jit_in_compiled_code = true; #endif for (;;) { ((compiled_handler*)(pushall_call_handler))(); /* Whenever we return from that, we should check spcflags */ check_uae_int_request(); if (regs.spcflags) { +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + jit_in_compiled_code = false; +#endif if (do_specialties(0)) { STOPTRY; return; } +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + jit_in_compiled_code = true; +#endif } // If T0, T1 or M got set: run normal emulation loop if (regs.t0 || regs.t1 || regs.m) { +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + jit_in_compiled_code = false; +#endif flush_icache(3); struct regstruct *r = ®s; bool exit = false; @@ -5654,6 +5710,9 @@ static void m68k_run_jit(void) } } unset_special(SPCFLAG_END_COMPILE); +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + jit_in_compiled_code = true; +#endif } } #ifdef USE_STRUCTURED_EXCEPTION_HANDLING @@ -7831,7 +7890,11 @@ void exception3_write(uae_u32 opcode, uaecptr addr, int size, uae_u32 val, int f void exception2_setup(uae_u32 opcode, uaecptr addr, bool read, int size, uae_u32 fc) { +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + last_addr_for_exception_3 = jit_in_compiled_code ? regs.instruction_pc : m68k_getpc(); +#else last_addr_for_exception_3 = m68k_getpc(); +#endif last_fault_for_exception_3 = addr; last_writeaccess_for_exception_3 = read == 0; last_op_for_exception_3 = opcode; @@ -7873,6 +7936,11 @@ void hardware_exception2(uaecptr addr, uae_u32 v, bool read, bool ins, int size) } // Non-MMU exception2_setup(regs.opcode, addr, read, size, fc); +#if defined(JIT_HAS_BUS_ERROR_RECOVERY) + if (jit_in_compiled_code) { + longjmp(jit_bus_error_jmpbuf, 2); + } +#endif THROW(2); } } diff --git a/od-win32/machdep/m68k.h b/od-win32/machdep/m68k.h index cea10e77..798c8529 100644 --- a/od-win32/machdep/m68k.h +++ b/od-win32/machdep/m68k.h @@ -13,7 +13,54 @@ extern int cctrue(int cc); -#ifndef SAHF_SETO_PROFITABLE +#if defined(CPU_AARCH64) + +struct flag_struct { + union { + uae_u64 cznv; + uae_u64 nzcv; + }; + uae_u64 x; +}; + +extern struct flag_struct regflags; + +/* + * ARM64 JIT stores the 68k CZNV flags in the host NZCV bit positions. + */ + +#define FLAGBIT_N 31 +#define FLAGBIT_Z 30 +#define FLAGBIT_C 29 +#define FLAGBIT_V 28 +#define FLAGBIT_X 0 + +#define FLAGVAL_N (1u << FLAGBIT_N) +#define FLAGVAL_Z (1u << FLAGBIT_Z) +#define FLAGVAL_C (1u << FLAGBIT_C) +#define FLAGVAL_V (1u << FLAGBIT_V) +#define FLAGVAL_X (1u << FLAGBIT_X) + +#define SET_NFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_N) | (((y) & 1) << FLAGBIT_N)) +#define SET_ZFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_Z) | (((y) & 1) << FLAGBIT_Z)) +#define SET_CFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_C) | (((y) & 1) << FLAGBIT_C)) +#define SET_VFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_V) | (((y) & 1) << FLAGBIT_V)) +#define SET_XFLG(y) (regflags.x = ((y) & 1)) + +#define GET_NFLG() ((regflags.nzcv >> FLAGBIT_N) & 1) +#define GET_ZFLG() ((regflags.nzcv >> FLAGBIT_Z) & 1) +#define GET_CFLG() ((regflags.nzcv >> FLAGBIT_C) & 1) +#define GET_VFLG() ((regflags.nzcv >> FLAGBIT_V) & 1) +#define GET_XFLG() ((regflags.x) & 1) + +#define CLEAR_CZNV() (regflags.nzcv = 0) +#define GET_CZNV() (regflags.nzcv) +#define IOR_CZNV(X) (regflags.nzcv |= (X)) +#define SET_CZNV(X) (regflags.nzcv = (X)) + +#define COPY_CARRY() (regflags.x = (regflags.nzcv >> FLAGBIT_C) & 1) + +#elif !defined(SAHF_SETO_PROFITABLE) struct flag_struct { #if defined(CPU_x86_64) diff --git a/od-win32/mman.cpp b/od-win32/mman.cpp index 6e04d291..eb4c998e 100644 --- a/od-win32/mman.cpp +++ b/od-win32/mman.cpp @@ -12,6 +12,8 @@ #include "gfxboard.h" #include "cpuboard.h" #include "gui.h" +#include +#include #ifdef WINUAE #include "win32.h" #endif @@ -20,8 +22,8 @@ #define WIN32_NATMEM_TEST 0 -uae_u32 max_z3fastmem; -uae_u32 max_physmem; +size_t max_z3fastmem; +size_t max_physmem; /* BARRIER is used in case Amiga memory is access across memory banks, * for example move.l $1fffffff,d0 when $10000000-$1fffffff is mapped and @@ -36,7 +38,7 @@ uae_u32 max_physmem; static struct uae_shmid_ds shmids[MAX_SHMID]; uae_u8 *natmem_reserved, *natmem_offset; -uae_u32 natmem_reserved_size; +size_t natmem_reserved_size; static uae_u8 *p96mem_offset; static int p96mem_size; static uae_u32 p96base_offset; @@ -191,7 +193,7 @@ bool preinit_shm (void) if ((uae_u64)max_allowed_mman * 1024 * 1024 > size64) max_allowed_mman = (uae_u32)(size64 / (1024 * 1024)); - uae_u32 natmem_size = (max_allowed_mman + 1) * 1024 * 1024; + size_t natmem_size = (max_allowed_mman + 1) * (size_t)1024 * 1024; if (natmem_size < 17 * 1024 * 1024) natmem_size = 17 * 1024 * 1024; @@ -202,13 +204,27 @@ bool preinit_shm (void) if (natmem_size > 0xc0000000) { natmem_size = 0xc0000000; } +#if defined(CPU_AARCH64) + /* Windows ARM64 JIT direct memory can address the full 68040 32-bit + * address space through natmem_offset + m68k_addr. Reserve the whole + * range so later gap commits can make unmapped reads safe. */ + if (natmem_size < 0x100000000ULL) + natmem_size = 0x100000000ULL; +#endif write_log (_T("MMAN: Total physical RAM %llu MB, all RAM %llu MB\n"), totalphys64 >> 20, total64 >> 20); - write_log(_T("MMAN: Attempting to reserve: %u MB\n"), natmem_size >> 20); + write_log(_T("MMAN: Attempting to reserve: %zu MB\n"), natmem_size >> 20); #if 1 - natmem_reserved = (uae_u8 *) uae_vm_reserve(natmem_size, UAE_VM_32BIT | UAE_VM_WRITE_WATCH); + int vm_flags = UAE_VM_32BIT | UAE_VM_WRITE_WATCH; +#if defined(CPU_AARCH64) + /* Keep Windows ARM64 natmem above 4GB. A low mapping at 0x80000000 + * can make normal Kickstart host PCs look like signed 32-bit values + * if any ARM64 path sign-extends a word-sized pointer component. */ + vm_flags &= ~UAE_VM_32BIT; +#endif + natmem_reserved = (uae_u8 *) uae_vm_reserve(natmem_size, vm_flags); #else natmem_size = 0x20000000; natmem_reserved = (uae_u8 *) uae_vm_reserve_fixed( @@ -217,7 +233,7 @@ bool preinit_shm (void) if (!natmem_reserved) { if (natmem_size <= 768 * 1024 * 1024) { - uae_u32 p = 0x78000000 - natmem_size; + uae_u32 p = 0x78000000 - (uae_u32)natmem_size; for (;;) { natmem_reserved = (uae_u8*) VirtualAlloc((void*)(intptr_t)p, natmem_size, MEM_RESERVE | MEM_WRITE_WATCH, PAGE_READWRITE); if (natmem_reserved) @@ -256,8 +272,8 @@ bool preinit_shm (void) max_z3fastmem = natmem_size; } max_physmem = natmem_size; - write_log (_T("MMAN: Reserved %p-%p (0x%08x %dM)\n"), - natmem_reserved, (uae_u8 *) natmem_reserved + natmem_reserved_size, + write_log (_T("MMAN: Reserved %p-%p (0x%zx %zuM)\n"), + natmem_reserved, natmem_reserved + natmem_reserved_size, natmem_reserved_size, natmem_reserved_size / (1024 * 1024)); clear_shm (); @@ -413,11 +429,11 @@ static int doinit_shm (void) // rtg outside of natmem? if (start_rtg > 0 && start_rtg < 0xffffffff && end_rtg > natmem_reserved_size) { if (jit_direct_compatible_memory) { - write_log(_T("MMAN: VRAM outside of natmem (%08x > %08x), switching off JIT Direct.\n"), end_rtg, natmem_reserved_size); + write_log(_T("MMAN: VRAM outside of natmem (%08x > %zx), switching off JIT Direct.\n"), end_rtg, natmem_reserved_size); jit_direct_compatible_memory = false; } if (end_rtg - start_rtg > natmem_reserved_size) { - write_log(_T("MMAN: VRAMs don't fit in natmem space! (%08x > %08x)\n"), end_rtg - start_rtg, natmem_reserved_size); + write_log(_T("MMAN: VRAMs don't fit in natmem space! (%08x > %zx)\n"), end_rtg - start_rtg, natmem_reserved_size); notify_user(NUMSG_NOMEMORY); return -1; } @@ -455,7 +471,7 @@ static int doinit_shm (void) ab->jit_read_flag = 0; ab->jit_write_flag = 0; if (aci->start + aci->size > natmem_reserved_size) { - write_log(_T("%s %08x-%08x: not JIT direct capable (>%08x)!\n"), ab->name, aci->start, aci->start + aci->size - 1, natmem_reserved_size); + write_log(_T("%s %08x-%08x: not JIT direct capable (>%zx)!\n"), ab->name, aci->start, aci->start + aci->size - 1, natmem_reserved_size); ab->flags |= ABFLAG_ALLOCINDIRECT; ab->jit_read_flag = S_READ; ab->jit_write_flag = S_WRITE; @@ -1012,6 +1028,100 @@ void protect_roms(bool protect) } } +/* + * Commit dummy read-only pages for unmapped gaps in the natmem reservation. + * + * ARM64 JIT direct memory can access natmem_offset + m68k_addr for normal + * memory reads. Actual RAM/ROM banks are committed separately; the gaps stay + * reserved but inaccessible unless we back them here. Read-only gaps preserve + * dummy-bank read behavior without creating writable phantom RAM. + */ +void commit_natmem_gaps(void) +{ + if (!canbang || !natmem_reserved || !natmem_reserved_size) + return; + + struct range { + size_t start; + size_t end; + }; + + std::vector committed; + for (int i = 0; i < MAX_SHMID; i++) { + if (shmids[i].key == -1 || !shmids[i].attached || !shmids[i].natmembase) + continue; + + uae_u8 *host_addr = (uae_u8*)shmids[i].attached; + if (host_addr < natmem_reserved || host_addr >= natmem_reserved + natmem_reserved_size) + continue; + + size_t offset = (size_t)(host_addr - natmem_reserved); + size_t size = shmids[i].size; + if (offset + size > natmem_reserved_size) + size = natmem_reserved_size - offset; + if (size > 0) + committed.push_back({ offset, offset + size }); + } + + std::sort(committed.begin(), committed.end(), + [](const range& a, const range& b) { return a.start < b.start; }); + + std::vector merged; + for (const auto& r : committed) { + if (!merged.empty() && r.start <= merged.back().end) { + if (r.end > merged.back().end) + merged.back().end = r.end; + } else { + merged.push_back(r); + } + } + + uae_u8 fill_byte = currprefs.cs_unmapped_space == 2 ? 0xff : 0x00; + size_t page_mask = (size_t)uae_vm_page_size() - 1; + size_t total_gap = 0; + size_t pos = 0; + + for (const auto& r : merged) { + if (r.start > pos) { + size_t gap_start = (pos + page_mask) & ~page_mask; + size_t gap_end = r.start & ~page_mask; + if (gap_end > gap_start) { + size_t gap_size = gap_end - gap_start; + uae_u8 *addr = natmem_reserved + gap_start; + if (uae_vm_commit(addr, gap_size, UAE_VM_READ_WRITE)) { + if (fill_byte) + memset(addr, fill_byte, gap_size); + uae_vm_protect(addr, gap_size, UAE_VM_READ); + total_gap += gap_size; + write_log(_T("MMAN: Committed gap %zx-%zx (%zuK) fill=0x%02x [read-only]\n"), + gap_start, gap_end, gap_size >> 10, fill_byte); + } + } + } + pos = r.end; + } + + if (pos < natmem_reserved_size) { + size_t gap_start = (pos + page_mask) & ~page_mask; + size_t gap_end = natmem_reserved_size & ~page_mask; + if (gap_end > gap_start) { + size_t gap_size = gap_end - gap_start; + uae_u8 *addr = natmem_reserved + gap_start; + if (uae_vm_commit(addr, gap_size, UAE_VM_READ_WRITE)) { + if (fill_byte) + memset(addr, fill_byte, gap_size); + uae_vm_protect(addr, gap_size, UAE_VM_READ); + total_gap += gap_size; + write_log(_T("MMAN: Committed trailing gap %zx-%zx (%zuK) fill=0x%02x [read-only]\n"), + gap_start, gap_end, gap_size >> 10, fill_byte); + } + } + } + + if (total_gap > 0) + write_log(_T("MMAN: Total gap pages committed: %zuK\n"), total_gap >> 10); +} + // Mark indirect regions (indirect VRAM) as non-accessible when JIT direct is active. // Beginning of region might have barrier region which is not marked as non-accessible, // allowing JIT direct to think it is directly accessible VRAM. diff --git a/od-win32/win32gui.cpp b/od-win32/win32gui.cpp index 6fe6262d..de06e98c 100644 --- a/od-win32/win32gui.cpp +++ b/od-win32/win32gui.cpp @@ -10249,11 +10249,12 @@ static void setfastram_selectmenu(HWND hDlg, int mode) enable_for_memorydlg(hDlg); } -extern uae_u32 natmem_reserved_size; +extern size_t natmem_reserved_size; static void setmax32bitram (HWND hDlg) { TCHAR tmp[256], tmp2[256]; - uae_u32 size32 = 0, z3size_uae = 0, z3size_real = 0; + uae_u32 size32 = 0; + size_t z3size_uae = 0, z3size_real = 0; z3size_uae = natmem_reserved_size >= expamem_z3_pointer_uae ? natmem_reserved_size - expamem_z3_pointer_uae : 0; z3size_real = natmem_reserved_size >= expamem_z3_pointer_real ? natmem_reserved_size - expamem_z3_pointer_real : 0; @@ -10274,7 +10275,10 @@ static void setmax32bitram (HWND hDlg) size32 -= first; WIN32GUI_LoadUIString(IDS_MEMINFO, tmp2, sizeof(tmp2) / sizeof(TCHAR)); - _stprintf (tmp, tmp2, size32 / (1024 * 1024), (natmem_reserved_size - 256 * 1024 * 1024) / (1024 * 1024), z3size_uae / (1024 * 1024), z3size_real / (1024 * 1024)); + _stprintf (tmp, tmp2, size32 / (1024 * 1024), + (uae_u32)((natmem_reserved_size - 256 * 1024 * 1024) / (1024 * 1024)), + (uae_u32)(z3size_uae / (1024 * 1024)), + (uae_u32)(z3size_real / (1024 * 1024))); SetDlgItemText (hDlg, IDC_MAX32RAM, tmp); } diff --git a/od-win32/winuae_msvc15/winuae_msvc.vcxproj b/od-win32/winuae_msvc15/winuae_msvc.vcxproj index 77a074d8..2ee8f01b 100644 --- a/od-win32/winuae_msvc15/winuae_msvc.vcxproj +++ b/od-win32/winuae_msvc15/winuae_msvc.vcxproj @@ -700,7 +700,7 @@ NotSet $(OutDir)$(TargetName)$(TargetExt) true - %(AdditionalLibraryDirectories);$(SolutionDir)\..\lib\ + C:\dev\lib\arm64;%(AdditionalLibraryDirectories);$(SolutionDir)\..\lib\ MSVCRT.lib;%(IgnoreSpecificDefaultLibraries);MSVCRT ws2_32.dll;msacm32.dll;wtsapi32.dll;dsound.dll;Iphlpapi.dll;portaudio_arm64.dll;%(DelayLoadDLLs) true @@ -928,7 +928,7 @@ NotSet $(OutDir)$(TargetName)$(TargetExt) true - %(AdditionalLibraryDirectories);$(SolutionDir)\..\lib\ + C:\dev\lib\arm64;%(AdditionalLibraryDirectories);$(SolutionDir)\..\lib\ %(IgnoreSpecificDefaultLibraries);MSVCRT ws2_32.dll;msacm32.dll;wtsapi32.dll;dsound.dll;Iphlpapi.dll;portaudio_arm64.dll;%(DelayLoadDLLs) true @@ -1244,7 +1244,7 @@ NotSet $(OutDir)$(TargetName)$(TargetExt) true - %(AdditionalLibraryDirectories);$(SolutionDir)\..\lib\ + C:\dev\lib\arm64;%(AdditionalLibraryDirectories);$(SolutionDir)\..\lib\ %(IgnoreSpecificDefaultLibraries);MSVCRT ws2_32.dll;msacm32.dll;wtsapi32.dll;dsound.dll;Iphlpapi.dll;portaudio_arm64.dll;%(DelayLoadDLLs) true diff --git a/vm.cpp b/vm.cpp index d45168c9..d844b4c6 100644 --- a/vm.cpp +++ b/vm.cpp @@ -38,7 +38,7 @@ struct alloc_size { void *address; - uae_u32 size; + size_t size; }; #define MAX_ALLOCATIONS 2048 @@ -47,9 +47,9 @@ struct alloc_size { * could be awkward if/when you want to allocate page-aligned memory. */ static struct alloc_size alloc_sizes[MAX_ALLOCATIONS]; -static void add_allocation(void *address, uae_u32 size) +static void add_allocation(void *address, size_t size) { - uae_log("VM: add_allocation %p (%d)\n", address, size); + uae_log("VM: add_allocation %p (%zu)\n", address, size); for (int i = 0; i < MAX_ALLOCATIONS; i++) { if (alloc_sizes[i].address == NULL) { alloc_sizes[i].address = address; @@ -60,7 +60,7 @@ static void add_allocation(void *address, uae_u32 size) abort(); } -static uae_u32 find_allocation(void *address) +static size_t find_allocation(void *address) { for (int i = 0; i < MAX_ALLOCATIONS; i++) { if (alloc_sizes[i].address == address) { @@ -70,12 +70,12 @@ static uae_u32 find_allocation(void *address) abort(); } -static uae_u32 remove_allocation(void *address) +static size_t remove_allocation(void *address) { for (int i = 0; i < MAX_ALLOCATIONS; i++) { if (alloc_sizes[i].address == address) { alloc_sizes[i].address = NULL; - uae_u32 size = alloc_sizes[i].size; + size_t size = alloc_sizes[i].size; alloc_sizes[i].size = 0; return size; } @@ -133,7 +133,7 @@ int uae_vm_page_size(void) return page_size; } -static void *uae_vm_alloc_with_flags(uae_u32 size, int flags, int protect) +static void *uae_vm_alloc_with_flags(size_t size, int flags, int protect) { void *address = NULL; static bool first_allocation = true; @@ -143,7 +143,7 @@ static void *uae_vm_alloc_with_flags(uae_u32 size, int flags, int protect) first_allocation = false; } #ifdef LOG_ALLOCATIONS - uae_log("VM: Allocate 0x%-8x bytes [%d] (%s)\n", + uae_log("VM: Allocate 0x%-8zx bytes [%d] (%s)\n", size, flags, protect_description(protect)); #endif @@ -208,7 +208,7 @@ static void *uae_vm_alloc_with_flags(uae_u32 size, int flags, int protect) } if (address == NULL) { - uae_log("VM: uae_vm_alloc(%u, %d, %d) mmap failed (%d)\n", + uae_log("VM: uae_vm_alloc(%zu, %d, %d) mmap failed (%d)\n", size, flags, protect, errno); return NULL; } @@ -221,27 +221,27 @@ static void *uae_vm_alloc_with_flags(uae_u32 size, int flags, int protect) return address; } -void *uae_vm_alloc(uae_u32 size, int flags, int protect) +void *uae_vm_alloc(size_t size, int flags, int protect) { return uae_vm_alloc_with_flags(size, flags, protect); } -static bool do_protect(void *address, int size, int protect) +static bool do_protect(void *address, size_t size, int protect) { #ifdef TRACK_ALLOCATIONS - uae_u32 allocated_size = find_allocation(address); + size_t allocated_size = find_allocation(address); assert(allocated_size == size); #endif #ifdef _WIN32 DWORD old; if (VirtualProtect(address, size, protect_to_native(protect), &old) == 0) { - uae_log("VM: uae_vm_protect(%p, %d, %d) VirtualProtect failed (%lu)\n", + uae_log("VM: uae_vm_protect(%p, %zu, %d) VirtualProtect failed (%lu)\n", address, size, protect, GetLastError()); return false; } #else if (mprotect(address, size, protect_to_native(protect)) != 0) { - uae_log("VM: uae_vm_protect(%p, %d, %d) mprotect failed (%d)\n", + uae_log("VM: uae_vm_protect(%p, %zu, %d) mprotect failed (%d)\n", address, size, protect, errno); return false; } @@ -249,22 +249,22 @@ static bool do_protect(void *address, int size, int protect) return true; } -bool uae_vm_protect(void *address, int size, int protect) +bool uae_vm_protect(void *address, size_t size, int protect) { return do_protect(address, size, protect); } -static bool do_free(void *address, int size) +static bool do_free(void *address, size_t size) { #ifdef TRACK_ALLOCATIONS - uae_u32 allocated_size = remove_allocation(address); + size_t allocated_size = remove_allocation(address); assert(allocated_size == size); #endif #ifdef _WIN32 return VirtualFree(address, 0, MEM_RELEASE) != 0; #else if (munmap(address, size) != 0) { - uae_log("VM: uae_vm_free(%p, %d) munmap failed (%d)\n", + uae_log("VM: uae_vm_free(%p, %zu) munmap failed (%d)\n", address, size, errno); return false; } @@ -272,20 +272,20 @@ static bool do_free(void *address, int size) return true; } -bool uae_vm_free(void *address, int size) +bool uae_vm_free(void *address, size_t size) { - uae_log("VM: Free 0x%-8x bytes at %p\n", size, address); + uae_log("VM: Free 0x%-8zx bytes at %p\n", size, address); return do_free(address, size); } -static void *try_reserve(uintptr_t try_addr, uae_u32 size, int flags) +static void *try_reserve(uintptr_t try_addr, size_t size, int flags) { void *address = NULL; if (try_addr) { - uae_log("VM: Reserve 0x%-8x bytes, try address 0x%llx\n", + uae_log("VM: Reserve 0x%-8zx bytes, try address 0x%llx\n", size, (uae_u64) try_addr); } else { - uae_log("VM: Reserve 0x%-8x bytes\n", size); + uae_log("VM: Reserve 0x%-8zx bytes\n", size); } #ifdef _WIN32 int va_type = MEM_RESERVE; @@ -308,7 +308,7 @@ static void *try_reserve(uintptr_t try_addr, uae_u32 size, int flags) if (flags & UAE_VM_32BIT) { uintptr_t end = (uintptr_t) address + size; if (address && end > (uintptr_t) 0x100000000ULL) { - uae_log("VM: Reserve 0x%-8x bytes, got address 0x%llx (> 32-bit)\n", + uae_log("VM: Reserve 0x%-8zx bytes, got address 0x%llx (> 32-bit)\n", size, (uae_u64) (uintptr_t) address); #ifdef _WIN32 VirtualFree(address, 0, MEM_RELEASE); @@ -322,11 +322,20 @@ static void *try_reserve(uintptr_t try_addr, uae_u32 size, int flags) return address; } -void *uae_vm_reserve(uae_u32 size, int flags) +void *uae_vm_reserve(size_t size, int flags) { void *address = NULL; #ifdef _WIN32 +#if defined(CPU_AARCH64) + if ((flags & UAE_VM_32BIT) == 0) { + address = try_reserve(0x100000000ULL, size, flags); + } + if (address == NULL) { + address = try_reserve(0x80000000, size, flags); + } +#else address = try_reserve(0x80000000, size, flags); +#endif if (address == NULL && (flags & UAE_VM_32BIT)) { if (size <= 768 * 1024 * 1024) { address = try_reserve(0x78000000 - size, size, flags); @@ -358,35 +367,35 @@ void *uae_vm_reserve(uae_u32 size, int flags) } #endif if (address) { - uae_log("VM: Reserve 0x%-8x bytes, got address 0x%llx\n", + uae_log("VM: Reserve 0x%-8zx bytes, got address 0x%llx\n", size, (uae_u64) (uintptr_t) address); } else { - uae_log("VM: Reserve 0x%-8x bytes failed!\n", size); + uae_log("VM: Reserve 0x%-8zx bytes failed!\n", size); } return address; } -void *uae_vm_reserve_fixed(void *want_addr, uae_u32 size, int flags) +void *uae_vm_reserve_fixed(void *want_addr, size_t size, int flags) { void *address = NULL; - uae_log("VM: Reserve 0x%-8x bytes at %p (fixed)\n", size, want_addr); + uae_log("VM: Reserve 0x%-8zx bytes at %p (fixed)\n", size, want_addr); address = try_reserve((uintptr_t) want_addr, size, flags); if (address == NULL) { - uae_log("VM: Reserve 0x%-8x bytes at %p failed!\n", size, want_addr); + uae_log("VM: Reserve 0x%-8zx bytes at %p failed!\n", size, want_addr); return NULL; } if (address != want_addr) { do_free(address, size); return NULL; } - uae_log("VM: Reserve 0x%-8x bytes, got address 0x%llx\n", + uae_log("VM: Reserve 0x%-8zx bytes, got address 0x%llx\n", size, (uae_u64) (uintptr_t) address); return address; } -void *uae_vm_commit(void *address, uae_u32 size, int protect) +void *uae_vm_commit(void *address, size_t size, int protect) { - uae_log("VM: Commit 0x%-8x bytes at %p (%s)\n", + uae_log("VM: Commit 0x%-8zx bytes at %p (%s)\n", size, address, protect_description(protect)); #ifdef _WIN32 int va_type = MEM_COMMIT ; @@ -402,9 +411,9 @@ void *uae_vm_commit(void *address, uae_u32 size, int protect) return address; } -bool uae_vm_decommit(void *address, uae_u32 size) +bool uae_vm_decommit(void *address, size_t size) { - uae_log("VM: Decommit 0x%-8x bytes at %p\n", size, address); + uae_log("VM: Decommit 0x%-8zx bytes at %p\n", size, address); #ifdef _WIN32 return VirtualFree (address, size, MEM_DECOMMIT) != 0; #else