From: Toni Wilen <twilen@winuae.net>
Date: Thu, 13 Mar 2014 17:12:56 +0000 (+0200)
Subject: FPU status bits emulated more accurately, partial support for arithmetic exceptions... 
X-Git-Tag: 2800~21
X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=c5d0dac8bfb9c67993a461c10dc157ee8eb736d2;p=francis%2Fwinuae.git

FPU status bits emulated more accurately, partial support for arithmetic exceptions and other small updates.
---

diff --git a/fpp.cpp b/fpp.cpp
index a6df488b..1a3a1d64 100644
--- a/fpp.cpp
+++ b/fpp.cpp
@@ -11,10 +11,15 @@
 
 #include <math.h>
 #include <float.h>
+#include <fenv.h>
 
 #include "sysconfig.h"
 #include "sysdeps.h"
 
+#ifdef _MSC_VER
+#pragma fenv_access(on)
+#endif
+
 #include "options.h"
 #include "memory.h"
 #include "custom.h"
@@ -113,12 +118,24 @@ static double *fp_nan    = (double *)dhex_nan;
 #endif
 double fp_1e8 = 1.0e8;
 float  fp_1e0 = 1, fp_1e1 = 10, fp_1e2 = 100, fp_1e4 = 10000;
+static bool fpu_mmu_fixup;
 
 #define FFLAG_Z	    0x4000
 #define FFLAG_N	    0x0100
 #define FFLAG_NAN   0x0400
 
-#define MAKE_FPSR(r)  (regs).fp_result.fp=(r)
+STATIC_INLINE void MAKE_FPSR (fptype *fp)
+{
+	int status = fetestexcept (FE_ALL_EXCEPT);
+	if (status)
+		regs.fp_result_status |= status;
+	regs.fp_result.fp = *fp;
+}
+
+STATIC_INLINE void CLEAR_STATUS (void)
+{
+	feclearexcept (FE_ALL_EXCEPT);
+}
 
 static void fpnan (fpdata *fpd)
 {
@@ -152,27 +169,17 @@ static __inline__ void native_set_fpucw (uae_u32 m68k_cw)
 #ifdef _WIN32
 	static int ex = 0;
 	// RN, RZ, RM, RP
-	static unsigned int fp87_round[4] = { _RC_NEAR, _RC_CHOP, _RC_DOWN, _RC_UP };
+	static const unsigned int fp87_round[4] = { _RC_NEAR, _RC_CHOP, _RC_DOWN, _RC_UP };
 	// Extend X, Single S, Double D, Undefined
-	static unsigned int fp87_prec[4] = { _PC_64 , _PC_24 , _PC_53, 0 };
+	static const unsigned int fp87_prec[4] = { _PC_64 , _PC_24 , _PC_53, 0 };
 	
-#if 0
-	if (m68k_cw & (0x0100 | 0x0200))
-		ex |= _EM_INEXACT;
-	if (m68k_cw & (0x0400))
-		ex |= _EM_ZERODIVIDE;
-	if (m68k_cw & (0x0800))
-		ex |= _EM_UNDERFLOW;
-	if (m68k_cw & (0x1000))
-		ex |= _EM_OVERFLOW;
-#endif
 #ifdef WIN64
 	_controlfp (ex | fp87_round[(m68k_cw >> 4) & 3], _MCW_RC);
 #else
 	_control87 (ex | fp87_round[(m68k_cw >> 4) & 3] | fp87_prec[(m68k_cw >> 6) & 3], _MCW_RC | _MCW_PC);
 #endif
 #else
-static uae_u16 x87_cw_tab[] = {
+static const uae_u16 x87_cw_tab[] = {
 	0x137f, 0x1f7f, 0x177f, 0x1b7f,	/* Extended */
 	0x107f, 0x1c7f, 0x147f, 0x187f,	/* Single */
 	0x127f, 0x1e7f, 0x167f, 0x1a7f,	/* Double */
@@ -229,6 +236,11 @@ static void fpu_format_error (void)
 #define FPU_EXP_UNIMP_DATATYPE_PACKED_POST 3
 #define FPU_EXP_UNIMP_EA 4
 
+static void fpu_arithmetic_exception (uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr oldpc, int type, fpdata *src, int reg)
+{
+	// TODO
+}
+
 static void fpu_op_unimp (uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr oldpc, int type, fpdata *src, int reg)
 {
 	/* 68040 unimplemented/68060 FPU disabled exception.
@@ -314,13 +326,6 @@ static void fpu_op_unimp (uae_u16 opcode, uae_u16 extra, uae_u32 ea, uaecptr old
 			x_put_long (m68k_areg (regs, 7), ea);
 			m68k_areg (regs, 7) -= 2;
 			x_put_word (m68k_areg (regs, 7), 0x2000 + vector * 4);
-		} else { // FPU_EXP_UNIMP_DATATYPE_POST
-			// PC = next instruction
-			vector = 55;
-			m68k_areg (regs, 7) -= 4;
-			x_put_long (m68k_areg (regs, 7), ea);
-			m68k_areg (regs, 7) -= 2;
-			x_put_word (m68k_areg (regs, 7), 0x3000 + vector * 4);
 		}
 	}
 	oldpc = newpc;
@@ -544,7 +549,6 @@ static void fpu_null (void)
 	regs.fpcr = 0;
 	regs.fpsr = 0;
 	regs.fpiar = 0;
-	regs.fpsr_highbyte = 0;
 	fpclear (&regs.fp_result);
 	for (int i = 0; i < 8; i++)
 		fpnan (&regs.fp[i]);
@@ -597,22 +601,39 @@ STATIC_INLINE tointtype toint (fptype src, fptype minval, fptype maxval)
 uae_u32 get_fpsr (void)
 {
 	uae_u32 answer = regs.fpsr & 0x00ffffff;
+
+	// exception status byte
+	if (regs.fp_result_status & FE_INEXACT)
+		answer |= 1 << 9;
+	if (regs.fp_result_status & FE_DIVBYZERO)
+		answer |= 1 << 10;
+	if (regs.fp_result_status & FE_UNDERFLOW)
+		answer |= 1 << 11;
+	if (regs.fp_result_status & FE_OVERFLOW)
+		answer |= 1 << 12;
+	if (regs.fp_result_status & FE_INVALID)
+		answer |= 1 << 13;
+
+	// accrued exception byte
+	answer |= (answer >> 6) & (0x80 | 0x40 | 0x20 | 0x10 | 0x08);
+
+	// condition code byte
 #ifdef HAVE_ISNAN
 	if (isnan (regs.fp_result.fp))
-		answer |= 0x01000000;
+		answer |= 1 << 24;
 	else
 #endif
 	{
 		if (regs.fp_result.fp == 0)
-			answer |= 0x04000000;
+			answer |= 1 << 26;
 		else if (regs.fp_result.fp < 0)
-			answer |= 0x08000000;
+			answer |= 1 << 27;
 #ifdef _MSC_VER
 		if (!_finite (regs.fp_result.fp))
-			answer |= 0x02000000;
+			answer |= 1 << 25;
 #elif HAVE_ISINF
 		if (_isinf (regs.fp_result.fp))
-			answer |= 0x02000000;
+			answer |= 1 << 25;
 #endif
 	}
 	return answer;
@@ -621,10 +642,10 @@ uae_u32 get_fpsr (void)
 STATIC_INLINE void set_fpsr (uae_u32 x)
 {
 	regs.fpsr = x;
+	regs.fp_result_status = 0;
 
-	if (x & 0x01000000) {
+	if (x & 0x01000000)
 		fpset (&regs.fp_result, *fp_nan);
-	}
 	else if (x & 0x04000000)
 		fpset (&regs.fp_result, 0);
 	else if (x & 0x08000000)
@@ -674,7 +695,7 @@ static fptype to_pack (uae_u32 *wrd)
 	*cp++ = ((wrd[1] >> 8) & 0xf) + '0';
 	*cp++ = ((wrd[1] >> 4) & 0xf) + '0';
 	*cp++ = ((wrd[1] >> 0) & 0xf) + '0';
-	*cp++ = ((wrd[1] >> 28) & 0xf) + '0';
+	*cp++ = ((wrd[2] >> 28) & 0xf) + '0';
 	*cp++ = ((wrd[2] >> 24) & 0xf) + '0';
 	*cp++ = ((wrd[2] >> 20) & 0xf) + '0';
 	*cp++ = ((wrd[2] >> 16) & 0xf) + '0';
@@ -801,10 +822,20 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old
 			ad = m68k_areg (regs, reg);
 			break;
 		case 3:
+			if (currprefs.mmu_model) {
+				mmufixup[0].reg = reg;
+				mmufixup[0].value = m68k_areg (regs, reg);
+				fpu_mmu_fixup = true;
+			}
 			ad = m68k_areg (regs, reg);
 			m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size];
 			break;
 		case 4:
+			if (currprefs.mmu_model) {
+				mmufixup[0].reg = reg;
+				mmufixup[0].value = m68k_areg (regs, reg);
+				fpu_mmu_fixup = true;
+			}
 			m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size];
 			ad = m68k_areg (regs, reg);
 			break;
@@ -980,10 +1011,20 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
 			ad = m68k_areg (regs, reg);
 			break;
 		case 3:
+			if (currprefs.mmu_model) {
+				mmufixup[1].reg = reg;
+				mmufixup[1].value = m68k_areg (regs, reg);
+				fpu_mmu_fixup = true;
+			}
 			ad = m68k_areg (regs, reg);
 			m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size];
 			break;
 		case 4:
+			if (currprefs.mmu_model) {
+				mmufixup[1].reg = reg;
+				mmufixup[1].value = m68k_areg (regs, reg);
+				fpu_mmu_fixup = true;
+			}
 			m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size];
 			ad = m68k_areg (regs, reg);
 			break;
@@ -1237,7 +1278,7 @@ void fpuop_dbcc (uae_u32 opcode, uae_u16 extra)
 	regs.fpiar = pc - 4;
 	maybe_idle_state ();
 	cc = fpp_cond (extra & 0x3f);
-	if (cc == -1) {
+	if (cc < 0) {
 		fpu_op_illg (opcode, extra, regs.fpiar);
 	} else if (!cc) {
 		int reg = opcode & 0x7;
@@ -1277,7 +1318,7 @@ void fpuop_scc (uae_u32 opcode, uae_u16 extra)
 	regs.fpiar = pc;
 	maybe_idle_state ();
 	cc = fpp_cond (extra & 0x3f);
-	if (cc == -1) {
+	if (cc < 0) {
 		fpu_op_illg (opcode, extra, regs.fpiar);
 	} else if ((opcode & 0x38) == 0) {
 		m68k_dreg (regs, opcode & 7) = (m68k_dreg (regs, opcode & 7) & ~0xff) | (cc ? 0xff : 0x00);
@@ -1301,7 +1342,7 @@ void fpuop_trapcc (uae_u32 opcode, uaecptr oldpc, uae_u16 extra)
 	regs.fpiar = oldpc;
 	maybe_idle_state ();
 	cc = fpp_cond (extra & 0x3f);
-	if (cc == -1) {
+	if (cc < 0) {
 		fpu_op_illg (opcode, extra, oldpc);
 	} else if (cc) {
 		Exception (7);
@@ -1323,7 +1364,7 @@ void fpuop_bcc (uae_u32 opcode, uaecptr oldpc, uae_u32 extra)
 	regs.fpiar = oldpc - 2;
 	maybe_idle_state ();
 	cc = fpp_cond (opcode & 0x3f);
-	if (cc == -1) {
+	if (cc < 0) {
 		fpu_op_illg (opcode, extra, oldpc - 2);
 	} else if (cc) {
 		if ((opcode & 0x40) == 0)
@@ -1775,6 +1816,7 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 	fpdata srcd;
 	uaecptr pc = m68k_getpc () - 4;
 	uaecptr ad = 0;
+	bool sgl;
 
 #if DEBUG_FPP
 	if (!isinrom ())
@@ -2002,6 +2044,7 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 					return;
 				if (fault_if_unimplemented_680x0 (opcode, extra, ad, pc, &srcd, reg))
 					return;
+				CLEAR_STATUS ();
 				switch (extra & 0x7f)
 				{
 					case 0x00:
@@ -2074,7 +2117,7 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 						fpu_noinst (opcode, pc);
 						return;
 				}
-				MAKE_FPSR (regs.fp[reg].fp);
+				MAKE_FPSR (&regs.fp[reg].fp);
 				return;
 			}
 
@@ -2096,6 +2139,8 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 
 			regs.fpiar =  pc;
 
+			CLEAR_STATUS ();
+			sgl = false;
 			switch (extra & 0x7f)
 			{
 				case 0x00: /* FMOVE */
@@ -2103,8 +2148,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				case 0x44: /* Same for all other cases that have three choices */
 					regs.fp[reg].fp = src;        /* Brian King was here. */
 					/*<ea> to register needs FPSR updated. See Motorola 68K Manual. */
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x01: /* FINT */
 					/* need to take the current rounding mode into account */
@@ -2150,8 +2193,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				case 0x41: /* FSSQRT */
 				case 0x45: /* FDSQRT */
 					regs.fp[reg].fp = sqrt (src);
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x06: /* FLOGNP1 */
 					regs.fp[reg].fp = log (src + 1.0);
@@ -2169,11 +2210,7 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 					regs.fp[reg].fp = asin (src);
 					break;
 				case 0x0d: /* FATANH */
-#if 1	/* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */
-					regs.fp[reg].fp = 0.5 * log ((1 + src) / (1 - src));
-#else
 					regs.fp[reg].fp = atanh (src);
-#endif
 					break;
 				case 0x0e: /* FSIN */
 					regs.fp[reg].fp = sin (src);
@@ -2203,8 +2240,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				case 0x58: /* FSABS */
 				case 0x5c: /* FDABS */
 					regs.fp[reg].fp = src < 0 ? -src : src;
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x19: /* FCOSH */
 					regs.fp[reg].fp = cosh (src);
@@ -2213,8 +2248,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				case 0x5a: /* FSNEG */
 				case 0x5e: /* FDNEG */
 					regs.fp[reg].fp = -src;
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x1c: /* FACOS */
 					regs.fp[reg].fp = acos (src);
@@ -2247,8 +2280,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				case 0x60: /* FSDIV */
 				case 0x64: /* FDDIV */
 					regs.fp[reg].fp /= src;
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x21: /* FMOD */
 					{
@@ -2260,19 +2291,15 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 				case 0x62: /* FSADD */
 				case 0x66: /* FDADD */
 					regs.fp[reg].fp += src;
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x23: /* FMUL */
 				case 0x63: /* FSMUL */
 				case 0x67: /* FDMUL */
 					regs.fp[reg].fp *= src;
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x24: /* FSGLDIV */
 					regs.fp[reg].fp /= src;
-					fround (reg);
+					sgl = true;
 					break;
 				case 0x25: /* FREM */
 					{
@@ -2291,14 +2318,12 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 					break;
 				case 0x27: /* FSGLMUL */
 					regs.fp[reg].fp *= src;
-					fround (reg);
+					sgl = true;
 					break;
 				case 0x28: /* FSUB */
 				case 0x68: /* FSSUB */
 				case 0x6c: /* FDSUB */
 					regs.fp[reg].fp -= src;
-					if ((extra & 0x44) == 0x40)
-						fround (reg);
 					break;
 				case 0x30: /* FSINCOS */
 				case 0x31:
@@ -2315,18 +2340,21 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 					{
 						fptype tmp = regs.fp[reg].fp - src;
 						regs.fpsr = 0;
-						MAKE_FPSR (tmp);
+						MAKE_FPSR (&tmp);
 					}
 					return;
 				case 0x3a: /* FTST */
 					regs.fpsr = 0;
-					MAKE_FPSR (src);
+					MAKE_FPSR (&src);
 					return;
 				default:
 					fpu_noinst (opcode, pc);
 					return;
 			}
-			MAKE_FPSR (regs.fp[reg].fp);
+			// round to float?
+			if (sgl || (extra & 0x44) == 0x40)
+				fround (reg);
+			MAKE_FPSR (&regs.fp[reg].fp);
 			return;
 		default:
 		break;
@@ -2336,23 +2364,31 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
 
 void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra)
 {
-#if 0
-	if (opcode == 0xf210 && extra == 0x7c00) {
-//		activate_debugger();
-//		return;
-		write_log (_T("*"));
-	}
-#endif
-	regs.fpsr_highbyte = 0;
 	regs.fpu_state = 1;
 	regs.fp_exception = false;
+	fpu_mmu_fixup = false;
 	fpuop_arithmetic2 (opcode, extra);
-	if (regs.fpsr_highbyte) {
-		regs.fpsr &= 0xffff00ff;
-		regs.fpsr |= regs.fpsr_highbyte;
-		regs.fpsr |= regs.fpsr_highbyte << 8;
-		write_log (_T("FPU exception: %04x\n"), regs.fpsr);
+	if (fpu_mmu_fixup) {
+		mmufixup[0].reg = -1;
+		mmufixup[1].reg = -1;
+	}
+#if 0
+	// Any exception status bit and matching exception enable bits set?
+	if ((regs.fpcr >> 8) & (regs.fpsr >> 8)) {
+		uae_u32 mask = regs.fpcr >> 8;
+		int vector = 0;
+		for (int i = 7; i >= 0; i--) {
+			if (mask & (1 << i)) {
+				if (i > 0)
+					i--;
+				vector = i + 48;
+				break;
+			}
+		}
+		// logging only so far
+		write_log (_T("FPU exception: %08x %d!\n"), regs.fpsr, vector);
 	}
+#endif
 }
 
 void fpu_reset (void)
@@ -2364,11 +2400,6 @@ void fpu_reset (void)
 	fpux_restore (NULL);
 }
 
-void fpp_setexcept (uae_u16 mask)
-{
-	regs.fpsr_highbyte |= mask >> 8;
-}
-
 uae_u8 *restore_fpu (uae_u8 *src)
 {
 	int i;
@@ -2426,3 +2457,6 @@ uae_u8 *save_fpu (int *len, uae_u8 *dstptr)
 	return dstbak;
 }
 
+#ifdef _MSC_VER
+#pragma fenv_access(off)
+#endif
diff --git a/od-win32/md-fpp.h b/od-win32/md-fpp.h
index d50bb74d..42c1b9a8 100644
--- a/od-win32/md-fpp.h
+++ b/od-win32/md-fpp.h
@@ -173,7 +173,7 @@ STATIC_INLINE void from_double(double src, uae_u32 * wrd1, uae_u32 * wrd2)
 }
 #endif
 
-static double twoto32 = 4294967296.0;
+static const double twoto32 = 4294967296.0;
 #ifndef HAVE_to_exten
 #define HAVE_to_exten
 STATIC_INLINE void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)