From 97b1908fdce4ec6b08947372c77bd011296d063d Mon Sep 17 00:00:00 2001 From: Frode Solheim Date: Mon, 12 Oct 2015 20:43:40 +0200 Subject: [PATCH] JIT: Fixed RSP inc/dec, fixed Windows x64 reg saving and stack shadow space --- jit/codegen_x86.cpp | 32 ++++++++++++++++++++++++++++++-- jit/compemu_midfunc_x86.cpp | 6 ++++++ jit/compemu_support.cpp | 6 ++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/jit/codegen_x86.cpp b/jit/codegen_x86.cpp index 5b824e02..b5cfb05d 100644 --- a/jit/codegen_x86.cpp +++ b/jit/codegen_x86.cpp @@ -96,6 +96,14 @@ only target, and it's easier this way... */ #define STACK_ALIGN 16 #define STACK_OFFSET sizeof(void *) +#ifdef _WIN64 +/* In the Microsoft x64 calling convention, it's the caller's responsibility + * to allocate 32 bytes of "shadow space" on the stack right before calling + * the function (regardless of the actual number of parameters used). */ +#define STACK_SHADOW_SPACE 32 +#else +#define STACK_SHADOW_SPACE 0 +#endif uae_s8 always_used[]={4,-1}; #if defined(CPU_x86_64) @@ -124,9 +132,19 @@ uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}; by pushing, even though they are "saved" across function calls */ #if defined(CPU_x86_64) +#ifdef _WIN64 +/* https://msdn.microsoft.com/en-us/library/6t169e9c.aspx: + * "The registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are + * considered nonvolatile and must be saved and restored by a function that + * uses them". Also saving r11 for now (see comment below). */ +static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1}; +#else /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */ /* preserve r11 because it's generally used to hold pointers to functions */ +/* FIXME: not really sure what the point of saving r11 is (??). If functions + * cannot assume calle preserves it, it will not be used across calls anyway? */ static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1}; +#endif #else /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */ static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1}; @@ -3484,12 +3502,22 @@ static inline void raw_load_flagx(uae_u32 target, uae_u32 r) static inline void raw_dec_sp(int off) { - if (off) raw_sub_l_ri(ESP_INDEX,off); + if (off) { +#ifdef CPU_x86_64 + emit_byte(0x48); /* REX prefix */ +#endif + raw_sub_l_ri(ESP_INDEX,off); + } } static inline void raw_inc_sp(int off) { - if (off) raw_add_l_ri(ESP_INDEX,off); + if (off) { +#ifdef CPU_x86_64 + emit_byte(0x48); /* REX prefix */ +#endif + raw_add_l_ri(ESP_INDEX,off); + } } static inline void raw_push_regs_to_preserve(void) { diff --git a/jit/compemu_midfunc_x86.cpp b/jit/compemu_midfunc_x86.cpp index 8263a524..f228830f 100644 --- a/jit/compemu_midfunc_x86.cpp +++ b/jit/compemu_midfunc_x86.cpp @@ -234,7 +234,9 @@ MENDFUNC(2,mov_l_rm,(W4 d, IMM s)) MIDFUNC(1,call_r,(RR4 r)) /* Clobbering is implicit */ { r=readreg(r,4); + raw_dec_sp(STACK_SHADOW_SPACE); raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); unlock2(r); } MENDFUNC(1,call_r,(RR4 r)) /* Clobbering is implicit */ @@ -2313,7 +2315,9 @@ MIDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)) unlock2(r); prepare_for_call_2(); + raw_dec_sp(STACK_SHADOW_SPACE); raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); #if USE_NORMAL_CALLING_CONVENTION raw_inc_sp(4); @@ -2351,7 +2355,9 @@ MIDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)) unlock2(in1); unlock2(in2); prepare_for_call_2(); + raw_dec_sp(STACK_SHADOW_SPACE); raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); #if USE_NORMAL_CALLING_CONVENTION raw_inc_sp(8); #endif diff --git a/jit/compemu_support.cpp b/jit/compemu_support.cpp index 66a7b07d..433a6437 100644 --- a/jit/compemu_support.cpp +++ b/jit/compemu_support.cpp @@ -4488,7 +4488,9 @@ static void compile_block(cpu_history* pc_hist, int blocklen) compemu_raw_jcc_b_oponly(NATIVE_CC_GT); uae_s8 *branchadd=(uae_s8*)get_target(); skip_byte(); + raw_dec_sp(STACK_SHADOW_SPACE); compemu_raw_call((uintptr)cpu_do_check_ticks); + raw_inc_sp(STACK_SHADOW_SPACE); *branchadd=(uintptr)get_target()-((uintptr)branchadd+1); #endif @@ -4533,7 +4535,9 @@ static void compile_block(cpu_history* pc_hist, int blocklen) prepare_for_call_1(); unlock2(arg); prepare_for_call_2(); + raw_dec_sp(STACK_SHADOW_SPACE); compemu_raw_call((uintptr)m68k_record_step); + raw_inc_sp(STACK_SHADOW_SPACE); } #endif @@ -4571,7 +4575,9 @@ static void compile_block(cpu_history* pc_hist, int blocklen) #endif compemu_raw_mov_l_mi((uintptr)®s.pc_p, (uintptr)pc_hist[i].location); + raw_dec_sp(STACK_SHADOW_SPACE); compemu_raw_call((uintptr)cputbl[opcode]); + raw_inc_sp(STACK_SHADOW_SPACE); #ifdef PROFILE_UNTRANSLATED_INSNS // raw_cputbl_count[] is indexed with plain opcode (in m68k order) compemu_raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1); -- 2.47.3