From 2ced8d598d8c822141568e2f39a3deaee95b1906 Mon Sep 17 00:00:00 2001
From: Toni Wilen <twilen@winuae.net>
Date: Wed, 13 Sep 2023 20:10:00 +0300
Subject: [PATCH] cputester FPU double/float mode

---
 cputest/asm.S          | 36 +++++++++++++++++++++++++++++++-----
 cputest/cputestgen.ini | 12 +++++++++---
 cputest/main.c         | 37 ++++++++++++++++++++++++++++++-------
 3 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/cputest/asm.S b/cputest/asm.S
index e8e6b15f..fa10af8a 100644
--- a/cputest/asm.S
+++ b/cputest/asm.S
@@ -26,6 +26,8 @@
 	.globl _cyclereg_address6
 	.globl _berrcopy
 	.globl _fpucomp
+	.globl _fpucompzero
+	.globl _initfpu
 
 | must match main.c
 S_DREG = 0
@@ -53,17 +55,41 @@ S_NEXT = S_FSAVE+216
 
 asm_start:
 
+_initfpu:
+	moveq #0,d0
+	fmove.l d0,fpcr
+	rts
+
+
+	| v1, v2, limit
+	| abs(v2 - v1) > limit
+_fpucompzero:
+	move.l 4(sp),a0
+	fmovem.x (a0),fp0-fp2
+	fmove.x fp0,fp3
+	fsub.x fp1,fp3
+	fabs.x fp3
+	moveq #1,d0
+	fcmp.x fp3,fp2
+	fbge .larger0
+	moveq #0,d0
+.larger0:
+	rts
+
+
 	| v1, v2, limit
 	| abs((v2 - v1) / v1) > limit
 _fpucomp:
 	move.l 4(sp),a0
 	fmovem.x (a0),fp0-fp2
-	fsub.x fp0,fp1
-	fdiv.x fp0,fp1
-	fabs.x fp1
+	fmove.x fp0,fp3
+	fsub.x fp1,fp3
+	fmove.x fp3,fp4
+	fabs.x fp4
+	fdiv.x fp0,fp4
 	moveq #1,d0
-	fcmp.x fp1,fp2
-	fbgt .larger
+	fcmp.x fp4,fp2
+	fbge .larger
 	moveq #0,d0
 .larger:
 	rts
diff --git a/cputest/cputestgen.ini b/cputest/cputestgen.ini
index c9680242..a5b10341 100644
--- a/cputest/cputestgen.ini
+++ b/cputest/cputestgen.ini
@@ -1,7 +1,7 @@
 [cputest]
 
 ; CPU model (68000, 68020, 68030, 68040 or 68060).
-cpu=68000
+cpu=68040
 
 ; CPU address space.
 ; If 24-bit, tester will assume upper 8-bits of addresses gets ignored.
@@ -18,12 +18,17 @@ fpu=
 
 ; 1 = all instructions are supported (for example FSxxx and FDxx if 6888x, all normally
 ; unimplemented (software emulated) if 68040/68060
-fpu_no_unimplemented=0
+;fpu_unimplemented=1
+
+; 0 = test inputs are never unnormals. 1 = test inputs can be unnormals.
+;fpu_unnormals=0
 
 ; Don't generate tests that create result that has larger or smaller 16-bit extended double exponent.
 ; Min exponent >0 does not prevent zero results.
+; Max precision: 1 = float, 2 = double, default = extended.
 fpu_min_exponent=
 fpu_max_exponent=
+;fpu_max_precision=2
 
 ; Write generated instructions to standard output. Always disabled in "all" mode.
 verbose=1
@@ -521,7 +526,8 @@ verbose=1
 cpu=68020-68060
 fpu=68882
 feature_sr_mask=0xc000
-exceptions=-48,-49,-50,-51,-52,-53,-54
+exceptions=-48,-49,-50,-51,-52,-53,-54,-55,-11
+feature_instruction_size=B,W,L,S,D,X
 min_opcode_test_rounds=5000
 mode=fmove,fsmove,fdmove,fint,fintrz,fneg,fsneg,fdneg,fabs,fsabs,fdabs,fdiv,fsdiv,fddiv,fadd,fsadd,fdadd,fmul,fsmul,fdmul,fsgldiv,fsglmul,fsub,fssub,fdsub,fcmp,ftst,fsqrt
 
diff --git a/cputest/main.c b/cputest/main.c
index 0c9e01f3..0e3c7372 100644
--- a/cputest/main.c
+++ b/cputest/main.c
@@ -253,6 +253,13 @@ static uae_u32 fpucomp(void *v)
 {
 	return 0;
 }
+static uae_u32 fpucompzero(void *v)
+{
+	return 0;
+}
+static void initfpu(void)
+{
+}
 static void *error_vector;
 #else
 
@@ -279,7 +286,9 @@ extern void setcpu(uae_u32, uae_u32*, uae_u32*);
 extern void flushcache(uae_u32);
 extern void *error_vector;
 extern void berrcopy(void*, void*, uae_u32, uae_u32);
-extern uae_u32 fpucomp(void*);
+extern uae_u32 fpucomp(void *);
+extern uae_u32 fpucompzero(void *);
+extern void initfpu(void);
 
 #endif
 static uae_u32 exceptiontableinuse;
@@ -2415,6 +2424,8 @@ static int check_cycles(int exc, short extratrace, short extrag2w1, struct regis
 // not returning identical values (6888x algorithms are unknown)
 static short fpucheckextra(struct fpureg *f1, struct fpureg *f2)
 {
+	uae_u32 vx[9];
+
 	if (!is_fpu_adjust)
 		return 0;
 
@@ -2440,15 +2451,25 @@ static short fpucheckextra(struct fpureg *f1, struct fpureg *f2)
 		}
 		return 1;
 	}
-	// Zero: both must match
+
+	// One zero: other must be close enough to zero
 	if ((!exp1 && !m1[0] && !m1[1]) || (!exp2 && !m2[0] && !m2[1])) {
+		vx[0] = f1->exp << 16;
+		vx[1] = f1->m[0];
+		vx[2] = f1->m[1];
+		vx[3] = f2->exp << 16;
+		vx[4] = f2->m[0];
+		vx[5] = f2->m[1];
+		vx[6] = (16383 - 10) << 16;
+		vx[7] = 0x80000000;
+		vx[8] = 0x00000000;
+		if (fpucompzero(vx)) {
+			fpu_approx++;
+			return 1;
+		}
 		return 0;
 	}
-	if ((!exp1 && !m1[0] && !m1[1]) && (!exp2 && !m2[0] && !m2[1])) {
-		return 1;
-	}
 
-	uae_u32 vx[9];
 	vx[0] = f1->exp << 16;
 	vx[1] = f1->m[0];
 	vx[2] = f1->m[1];
@@ -3219,7 +3240,9 @@ static void process_test(uae_u8 *p)
 		opcode_memory_end = (uae_u8*)endpc;
 
 		int fpumode = fpu_model && (opcode_memory[0] & 0xf0) == 0xf0;
-
+		if (fpumode) {
+			initfpu();
+		}
 		copyregs(&last_regs, &cur_regs, fpumode);
 
 		uae_u32 originalopcodeend = (NOP_OPCODE << 16) | ILLG_OPCODE;
-- 
2.47.3