From 363e80f3734cb283dbe639ec2899e8f6001fbb18 Mon Sep 17 00:00:00 2001 From: Frode Solheim Date: Wed, 16 Sep 2015 20:46:06 +0200 Subject: [PATCH] JIT: Windows / 64-bit fixes --- fpp.cpp | 2 ++ jit/codegen_x86.cpp | 4 ++-- jit/codegen_x86.h | 8 +++---- jit/compemu.h | 17 +++++++++++--- jit/exception_handler.cpp | 23 +++++++++++++++++-- newcpu.cpp | 2 ++ od-win32/mman.cpp | 47 +++++++++++++++++++++++++-------------- od-win32/sysconfig.h | 1 - od-win32/writelog.cpp | 13 ++++++++++- vm.cpp | 28 +++++++++++++++++++---- 10 files changed, 111 insertions(+), 34 deletions(-) diff --git a/fpp.cpp b/fpp.cpp index 36870322..a9fc38c9 100644 --- a/fpp.cpp +++ b/fpp.cpp @@ -574,7 +574,9 @@ static void native_set_fpucw (uae_u32 m68k_cw) #endif /* USE_X86_FPUCW */ #endif #else +#ifndef _MSC_VER #warning NATIVE_FPUCW not enabled +#endif #endif } } diff --git a/jit/codegen_x86.cpp b/jit/codegen_x86.cpp index dfd7f52b..74ddaff7 100644 --- a/jit/codegen_x86.cpp +++ b/jit/codegen_x86.cpp @@ -70,7 +70,7 @@ only target, and it's easier this way... */ #define REG_RESULT EAX_INDEX /* The registers subroutines take their first and second argument in */ -#if defined(_WIN32) +#ifdef _MSC_VER /* Handle the _fastcall parameters of ECX and EDX */ #define REG_PAR1 ECX_INDEX #define REG_PAR2 EDX_INDEX @@ -83,7 +83,7 @@ only target, and it's easier this way... */ #endif #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */ -#if defined(_WIN32) +#ifdef _MSC_VER #define REG_PC_TMP ECX_INDEX #else #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */ diff --git a/jit/codegen_x86.h b/jit/codegen_x86.h index bd1f83a7..6743392d 100644 --- a/jit/codegen_x86.h +++ b/jit/codegen_x86.h @@ -241,10 +241,10 @@ typedef unsigned short _us; typedef signed int _sl; typedef unsigned int _ul; -#define _UC(X) ((_uc )(unsigned long)(X)) -#define _US(X) ((_us )(unsigned long)(X)) -#define _SL(X) ((_sl )(unsigned long)(X)) -#define _UL(X) ((_ul )(unsigned long)(X)) +#define _UC(X) ((_uc )(uintptr_t)(X)) +#define _US(X) ((_us )(uintptr_t)(X)) +#define _SL(X) ((_sl )(uintptr_t)(X)) +#define _UL(X) ((_ul )(uintptr_t)(X)) #define _PUC(X) ((_uc *)(X)) #define _PUS(X) ((_us *)(X)) diff --git a/jit/compemu.h b/jit/compemu.h index c8b95fa1..acf0dfce 100644 --- a/jit/compemu.h +++ b/jit/compemu.h @@ -79,6 +79,9 @@ union cacheline { #error implementation in progress #endif +#ifdef UAE +/* Temporarily disabled due to some issues on x86-64 */ +#else /* (gb) When on, this option can save save up to 30% compilation time * when many lazy flushes occur (e.g. apps in MacOS 8.x). */ @@ -89,6 +92,7 @@ union cacheline { /* Use code inlining, aka follow-up of constant jumps */ #define USE_INLINING 1 +#endif /* Inlining requires the chained checksuming information */ #if USE_INLINING @@ -486,23 +490,30 @@ STATIC_INLINE int end_block(uae_u16 opcode) #ifdef _WIN32 LONG WINAPI EvalException(LPEXCEPTION_POINTERS info); #if defined(_MSC_VER) && !defined(NO_WIN32_EXCEPTION_HANDLER) +#ifdef _WIN64 +/* Structured exception handling is table based for Windows x86-64, so + * Windows will not be able to find the exception handler. */ +#else #define USE_STRUCTURED_EXCEPTION_HANDLING #endif #endif +#endif #endif #endif /* COMPEMU_H */ #ifdef CPU_64_BIT -static inline uae_u32 check_uae_p32(uae_u64 address) +static inline uae_u32 check_uae_p32(uae_u64 address, const char *file, int line) { - if (address > 0xffffffffLL) { + if (address > (uintptr_t) 0xffffffff) { + write_log("JIT: 64-bit pointer (0x%llx) at %s:%d (fatal)\n", + address, file, line); abort(); } return (uae_u32) address; } -#define uae_p32(x) (check_uae_p32((uae_u64)(x))) +#define uae_p32(x) (check_uae_p32((uae_u64)(x), __FILE__, __LINE__)) #else #define uae_p32(x) ((uae_u32)(x)) #endif diff --git a/jit/exception_handler.cpp b/jit/exception_handler.cpp index 813840e8..ecff5eda 100644 --- a/jit/exception_handler.cpp +++ b/jit/exception_handler.cpp @@ -135,7 +135,12 @@ static int handle_access(uintptr_t fault_addr, CONTEXT_T context) { uae_u8 *fault_pc = (uae_u8 *) CONTEXT_PC(context); #ifdef CPU_64_BIT - if (fault_addr > 0xffffffff) { +#if 0 + if ((fault_addr & 0xffffffff00000000) == 0xffffffff00000000) { + fault_addr &= 0xffffffff; + } +#endif + if (fault_addr > (uintptr_t) 0xffffffff) { return 0; } #endif @@ -402,6 +407,14 @@ LONG WINAPI EvalException(LPEXCEPTION_POINTERS info) return EXCEPTION_CONTINUE_SEARCH; } +static void *installed_vector_handler; + +static LONG CALLBACK JITVectoredHandler(PEXCEPTION_POINTERS info) +{ +// write_log(_T("JitVectoredHandler\n")); + return EvalException(info); +} + #elif defined(HAVE_CONTEXT_T) static void sigsegv_handler(int signum, siginfo_t *info, void *context) @@ -432,8 +445,14 @@ static void install_exception_handler(void) #ifdef USE_STRUCTURED_EXCEPTION_HANDLING /* Structured exception handler is installed in main.cpp */ #elif defined(_WIN32) - write_log (_T("JIT: Installing unhandled exception filter\n")); +#if 1 + write_log(_T("JIT: Installing vectored exception handler\n")); + installed_vector_handler = AddVectoredExceptionHandler( + 0, JITVectoredHandler); +#else + write_log(_T("JIT: Installing unhandled exception filter\n")); SetUnhandledExceptionFilter(EvalException); +#endif #elif defined(HAVE_CONTEXT_T) write_log (_T("JIT: Installing segfault handler\n")); struct sigaction act; diff --git a/newcpu.cpp b/newcpu.cpp index c3fb4892..079f8116 100644 --- a/newcpu.cpp +++ b/newcpu.cpp @@ -1294,6 +1294,8 @@ static void build_cpufunctbl (void) opcnt, lvl, currprefs.cpu_cycle_exact ? -1 : currprefs.cpu_compatible ? 1 : 0, currprefs.address_space_24); #ifdef JIT + write_log(_T("JIT: &countdown = %p\n"), &countdown); + write_log(_T("JIT: &build_comp = %p\n"), &build_comp); build_comp (); #endif diff --git a/od-win32/mman.cpp b/od-win32/mman.cpp index da4890c8..e4a2e949 100644 --- a/od-win32/mman.cpp +++ b/od-win32/mman.cpp @@ -206,22 +206,34 @@ bool preinit_shm (void) write_log (_T("Total physical RAM %lluM, all RAM %lluM. Attempting to reserve: %uM.\n"), totalphys64 >> 20, total64 >> 20, natmem_size >> 20); natmem_offset_allocated = 0; - if (natmem_size <= 768 * 1024 * 1024) { - uae_u32 p = 0x78000000 - natmem_size; - for (;;) { - natmem_offset_allocated = (uae_u8*) VirtualAlloc((void*)(intptr_t)p, natmem_size, MEM_RESERVE | MEM_WRITE_WATCH, PAGE_READWRITE); - if (natmem_offset_allocated) - break; - p -= 128 * 1024 * 1024; - if (p <= 128 * 1024 * 1024) - break; + +#ifdef _WIN64 + natmem_offset_allocated = (uae_u8*) VirtualAlloc((void*)(uintptr_t)0x80000000, 0x80000000, MEM_RESERVE | MEM_WRITE_WATCH, PAGE_READWRITE); + if (natmem_offset_allocated) { + natmem_size = 0x80000000; + } +#endif + + if (!natmem_offset_allocated) { + if (natmem_size <= 768 * 1024 * 1024) { + uae_u32 p = 0x78000000 - natmem_size; + for (;;) { + natmem_offset_allocated = (uae_u8*) VirtualAlloc((void*)(intptr_t)p, natmem_size, MEM_RESERVE | MEM_WRITE_WATCH, PAGE_READWRITE); + if (natmem_offset_allocated) + break; + p -= 128 * 1024 * 1024; + if (p <= 128 * 1024 * 1024) + break; + } } } if (!natmem_offset_allocated) { DWORD vaflags = MEM_RESERVE | MEM_WRITE_WATCH; #ifdef _WIN32 +#ifndef _WIN64 if (!os_vista) vaflags |= MEM_TOP_DOWN; +#endif #endif for (;;) { natmem_offset_allocated = (uae_u8*)VirtualAlloc (NULL, natmem_size, vaflags, PAGE_READWRITE); @@ -241,13 +253,14 @@ bool preinit_shm (void) } } natmem_offset = natmem_offset_allocated; - if (natmem_size <= 257 * 1024 * 1024) + if (natmem_size <= 257 * 1024 * 1024) { max_z3fastmem = 0; - else + } else { max_z3fastmem = natmem_size; - write_log (_T("Reserved: 0x%p-0x%p (%08x %dM)\n"), - natmem_offset, (uae_u8*)natmem_offset + natmem_size, - natmem_size, natmem_size >> 20); + } + write_log (_T("NATMEM: Reserved %p-%p (0x%08x %dM)\n"), + natmem_offset, (uae_u8 *) natmem_offset + natmem_size, + natmem_size, natmem_size / (1024 * 1024)); clear_shm (); @@ -442,9 +455,9 @@ static int doinit_shm (void) if (!natmem_offset) { write_log (_T("NATMEM: No special area could be allocated! err=%d\n"), GetLastError ()); } else { - write_log (_T("NATMEM: Our special area: 0x%p-0x%p (%08x %dM)\n"), - natmem_offset, (uae_u8*)natmem_offset + natmemsize, - natmemsize, natmemsize >> 20); + write_log(_T("NATMEM: Our special area: %p-%p (0x%08x %dM)\n"), + natmem_offset, (uae_u8*)natmem_offset + natmemsize, + natmemsize, natmemsize / (1024 * 1024)); if (changed_prefs.rtgmem_size) write_log (_T("NATMEM: P96 special area: 0x%p-0x%p (%08x %dM)\n"), p96mem_offset, (uae_u8*)p96mem_offset + changed_prefs.rtgmem_size, diff --git a/od-win32/sysconfig.h b/od-win32/sysconfig.h index f7db535c..0d3999ec 100644 --- a/od-win32/sysconfig.h +++ b/od-win32/sysconfig.h @@ -153,7 +153,6 @@ #ifdef WIN64 #undef X86_MSVC_ASSEMBLY_MEMACCESS #undef X86_MSVC_ASSEMBLY -#undef JIT #define X64_MSVC_ASSEMBLY #define SIZEOF_VOID_P 8 #else diff --git a/od-win32/writelog.cpp b/od-win32/writelog.cpp index d6fe438c..f0857c63 100644 --- a/od-win32/writelog.cpp +++ b/od-win32/writelog.cpp @@ -573,6 +573,17 @@ void write_log (const TCHAR *format, ...) _ftprintf (debugfile, _T("%s"), ts); _ftprintf (debugfile, _T("%s"), bufp); } + +#if 0 + static int is_debugger_present = -1; + if (is_debugger_present == -1) { + is_debugger_present = IsDebuggerPresent(); + } + if (is_debugger_present) { + OutputDebugString(bufp); + } +#endif + lfdetected = 0; if (_tcslen (bufp) > 0 && bufp[_tcslen (bufp) - 1] == '\n') lfdetected = 1; @@ -693,4 +704,4 @@ void jit_abort(const char *format, ...) jit_abort(_T("%s"), b); xfree(b); va_end(parms); -} \ No newline at end of file +} diff --git a/vm.cpp b/vm.cpp index c531beef..7d170034 100644 --- a/vm.cpp +++ b/vm.cpp @@ -118,9 +118,25 @@ static void *uae_vm_alloc_with_flags(uae_u32 size, int flags, int protect) void *address = NULL; uae_log("uae_vm_alloc(%u, %d, %d)\n", size, flags, protect); #ifdef _WIN32 - int va_type = MEM_COMMIT; + int va_type = MEM_COMMIT | MEM_RESERVE; int va_protect = protect_to_native(protect); - address = VirtualAlloc(NULL, size, va_type, va_protect); +#ifdef CPU_64_BIT + if (flags & UAE_VM_32BIT) { + /* Stupid algorithm to find available space, but should + * work well enough when there is not a lot of allocations. */ + uae_u8 *p = (uae_u8 *) 0x50000000; + while (address == NULL) { + address = VirtualAlloc(p, size, va_type, va_protect); + p += uae_vm_page_size(); + if (p > (void*) 0x60000000) { + break; + } + } + } +#endif + if (!address) { + address = VirtualAlloc(NULL, size, va_type, va_protect); + } #else //size = size < uae_vm_page_size() ? uae_vm_page_size() : size; int mmap_flags = MAP_PRIVATE | MAP_ANON; @@ -132,11 +148,15 @@ static void *uae_vm_alloc_with_flags(uae_u32 size, int flags, int protect) #endif address = mmap(0, size, mmap_prot, mmap_flags, -1, 0); if (address == MAP_FAILED) { - uae_log("uae_vm_alloc(%u, %d, %d) mmap failed (%d)\n", - size, flags, protect, errno); + address = NULL; return NULL; } #endif + if (address == NULL) { + uae_log("uae_vm_alloc(%u, %d, %d) mmap failed (%d)\n", + size, flags, protect, errno); + return NULL; + } #ifdef TRACK_ALLOCATIONS add_allocation(address, size); #endif -- 2.47.3