]> git.unchartedbackwaters.co.uk Git - francis/winuae.git/commitdiff
JIT: Fixed RSP inc/dec, fixed Windows x64 reg saving and stack shadow space
authorFrode Solheim <frode@fs-uae.net>
Mon, 12 Oct 2015 18:43:40 +0000 (20:43 +0200)
committerFrode Solheim <frode@fs-uae.net>
Mon, 12 Oct 2015 18:43:40 +0000 (20:43 +0200)
jit/codegen_x86.cpp
jit/compemu_midfunc_x86.cpp
jit/compemu_support.cpp

index 5b824e0234f93d616046b4c0d82470d58dce7539..b5cfb05d7e6e3076b64902c18402653490d3f3ae 100644 (file)
@@ -96,6 +96,14 @@ only target, and it's easier this way... */
 
 #define STACK_ALIGN            16
 #define STACK_OFFSET   sizeof(void *)
+#ifdef _WIN64
+/* In the Microsoft x64 calling convention, it's the caller's responsibility
+ * to allocate 32 bytes of "shadow space" on the stack right before calling
+ * the function (regardless of the actual number of parameters used). */
+#define STACK_SHADOW_SPACE 32
+#else
+#define STACK_SHADOW_SPACE 0
+#endif
 
 uae_s8 always_used[]={4,-1};
 #if defined(CPU_x86_64)
@@ -124,9 +132,19 @@ uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
      by pushing, even though they are "saved" across function calls
 */
 #if defined(CPU_x86_64)
+#ifdef _WIN64
+/* https://msdn.microsoft.com/en-us/library/6t169e9c.aspx:
+ * "The registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are
+ * considered nonvolatile and must be saved and restored by a function that
+ * uses them". Also saving r11 for now (see comment below). */
+static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1};
+#else
 /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
 /* preserve r11 because it's generally used to hold pointers to functions */
+/* FIXME: not really sure what the point of saving r11 is (??). If functions
+ * cannot assume calle preserves it, it will not be used across calls anyway? */
 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
+#endif
 #else
 /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
 static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
@@ -3484,12 +3502,22 @@ static inline void raw_load_flagx(uae_u32 target, uae_u32 r)
 
 static inline void raw_dec_sp(int off)
 {
-       if (off) raw_sub_l_ri(ESP_INDEX,off);
+       if (off) {
+#ifdef CPU_x86_64
+               emit_byte(0x48); /* REX prefix */
+#endif
+               raw_sub_l_ri(ESP_INDEX,off);
+       }
 }
 
 static inline void raw_inc_sp(int off)
 {
-       if (off) raw_add_l_ri(ESP_INDEX,off);
+       if (off) {
+#ifdef CPU_x86_64
+               emit_byte(0x48); /* REX prefix */
+#endif
+               raw_add_l_ri(ESP_INDEX,off);
+       }
 }
 
 static inline void raw_push_regs_to_preserve(void) {
index 8263a524f6a61572d70d76c3d4c2502906397efa..f228830f70566a48374d3ddc00c5ca7c5692a0ed 100644 (file)
@@ -234,7 +234,9 @@ MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
 MIDFUNC(1,call_r,(RR4 r)) /* Clobbering is implicit */
 {
        r=readreg(r,4);
+       raw_dec_sp(STACK_SHADOW_SPACE);
        raw_call_r(r);
+       raw_inc_sp(STACK_SHADOW_SPACE);
        unlock2(r);
 }
 MENDFUNC(1,call_r,(RR4 r)) /* Clobbering is implicit */
@@ -2313,7 +2315,9 @@ MIDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize))
        unlock2(r);
 
        prepare_for_call_2();
+       raw_dec_sp(STACK_SHADOW_SPACE);
        raw_call_r(r);
+       raw_inc_sp(STACK_SHADOW_SPACE);
 
 #if USE_NORMAL_CALLING_CONVENTION
        raw_inc_sp(4);
@@ -2351,7 +2355,9 @@ MIDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2))
        unlock2(in1);
        unlock2(in2);
        prepare_for_call_2();
+       raw_dec_sp(STACK_SHADOW_SPACE);
        raw_call_r(r);
+       raw_inc_sp(STACK_SHADOW_SPACE);
 #if USE_NORMAL_CALLING_CONVENTION
        raw_inc_sp(8);
 #endif
index 66a7b07dc3723c48d4c34ac797ae9811a2311d56..433a643742229c10637c74cf32e80734c9e79037 100644 (file)
@@ -4488,7 +4488,9 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
                        compemu_raw_jcc_b_oponly(NATIVE_CC_GT);
                        uae_s8 *branchadd=(uae_s8*)get_target();
                        skip_byte();
+                       raw_dec_sp(STACK_SHADOW_SPACE);
                        compemu_raw_call((uintptr)cpu_do_check_ticks);
+                       raw_inc_sp(STACK_SHADOW_SPACE);
                        *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
 #endif
 
@@ -4533,7 +4535,9 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
                                                prepare_for_call_1();
                                                unlock2(arg);
                                                prepare_for_call_2();
+                                               raw_dec_sp(STACK_SHADOW_SPACE);
                                                compemu_raw_call((uintptr)m68k_record_step);
+                                               raw_inc_sp(STACK_SHADOW_SPACE);
                                        }
 #endif
 
@@ -4571,7 +4575,9 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
 #endif
                                                compemu_raw_mov_l_mi((uintptr)&regs.pc_p,
                                                        (uintptr)pc_hist[i].location);
+                                               raw_dec_sp(STACK_SHADOW_SPACE);
                                                compemu_raw_call((uintptr)cputbl[opcode]);
+                                               raw_inc_sp(STACK_SHADOW_SPACE);
 #ifdef PROFILE_UNTRANSLATED_INSNS
                                                // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
                                                compemu_raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);