From: Dimitris Panokostas Date: Sat, 30 May 2026 16:49:28 +0000 (+0200) Subject: jit/x86-64: fix FPU FLDCW codegen crashes on 64-bit hosts X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=d1a17dd4255a628f38c1bd5dd66c6902d347922b;p=francis%2Fwinuae.git jit/x86-64: fix FPU FLDCW codegen crashes on 64-bit hosts The x86-64 JIT could crash when compiling FPU rounding-mode changes (FLDCW with an indexed operand) via two defects in codegen_x86.cpp: 1. x86_64_rex() dereferenced its 'b' (REX.B) pointer unconditionally and ignored 'r'/'x'. raw_fldcw_m_indexed() passes its index register in the 'x' (REX.X) slot with b == NULL, so the function read through a NULL pointer; an index register in r8-r15 would also have been mis-encoded. x86_64_rex() now null-guards each pointer and emits REX.R/X/B (and W). 2. raw_fldcw_m_indexed() loaded the 64-bit base of the x87 control-word table into RAX. RAX is allocatable (not in always_used[]) and may hold a live m68k value mid-block, and a LOWFUNC cannot declare a register clobber, so the allocator's view of RAX was silently corrupted; a later access through that register (e.g. MOVEM.L ...,-(An)) then dereferenced the table pointer as an m68k address and faulted. The base is now materialized in a push/pop-preserved scratch register chosen to differ from the index register. Only x86-64 is affected; the 32-bit path uses a direct FLDCW [reg] encoding and ARM64 uses different codegen. --- diff --git a/jit/codegen_x86.cpp b/jit/codegen_x86.cpp index b49753f3..e87997ca 100644 --- a/jit/codegen_x86.cpp +++ b/jit/codegen_x86.cpp @@ -236,18 +236,37 @@ static inline void x86_64_addr32(void) #endif } -static inline void x86_64_rex(bool /* w */, uae_u32 * /* r */, uae_u32 * /* x */, uae_u32 *b) +static inline void x86_64_rex(bool w, uae_u32 *r, uae_u32 *x, uae_u32 *b) { #ifdef CPU_x86_64 int rex_byte = 0x40; - if (*b >= R8_INDEX) { + /* Each of r/x/b is optional: a NULL pointer means that REX field is not + used by this instruction. The previous version dereferenced 'b' + unconditionally and ignored 'r'/'x' entirely, which crashed when a + caller (e.g. raw_fldcw_m_indexed, which only needs REX.X for its index + register) passed b == NULL. */ + if (w) { + rex_byte |= 8; /* REX.W */ + } + if (r && *r >= R8_INDEX) { + *r -= R8_INDEX; + rex_byte |= 4; /* REX.R */ + } + if (x && *x >= R8_INDEX) { + *x -= R8_INDEX; + rex_byte |= 2; /* REX.X */ + } + if (b && *b >= R8_INDEX) { *b -= R8_INDEX; - rex_byte |= 1; + rex_byte |= 1; /* REX.B */ } if (rex_byte != 0x40) { emit_byte(rex_byte); } #else + UNUSED(w); + UNUSED(r); + UNUSED(x); UNUSED(b); #endif } @@ -2426,15 +2445,22 @@ LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) LOWFUNC(NONE,READ,2,raw_fldcw_m_indexed,(R4 index, MEMR base)) { #if X86_TARGET_64BIT - /* x86-64: [index + disp32] can't reach 64-bit addresses. - Load base into RAX, use FLDCW [RAX + index*1] via SIB encoding. */ - MOVQir(base, X86_RAX); + /* x86-64: [index + disp32] can't reach a 64-bit base address, so the base + must be materialized in a register, then used as FLDCW [scratch + index*1] + via SIB encoding. RAX/RCX are allocatable (not in always_used[]) and may + hold a live m68k value here, and this LOWFUNC has no way to declare a + register clobber - so preserve the scratch register across the FLDCW. + The scratch is chosen to differ from the index register. */ + int scratch = (_rR(index) == _rR(X86_RAX)) ? X86_RCX : X86_RAX; + PUSHQr(scratch); + MOVQir(base, scratch); x86_64_prefix(false, false, NULL, &index, NULL); emit_byte(0xd9); /* ModRM: mod=00, reg=5 (FLDCW), rm=100 (SIB follows) */ emit_byte(0x2c); - /* SIB: scale=00 (x1), index=index_reg, base=RAX (000) */ - emit_byte((_r(index) << 3) | 0x00); + /* SIB: scale=00 (x1), index=index_reg, base=scratch */ + emit_byte((_r(index) << 3) | _r(scratch)); + POPQr(scratch); #else x86_64_prefix(true, false, NULL, NULL, &index); emit_byte(0xd9);