From c9af3b4ad0d6373ffdb1751e5a7b0615d1787a81 Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Mon, 6 Mar 2017 18:02:40 +0200 Subject: [PATCH] FPU update. --- fpp.cpp | 181 +++++++++++----------- fpp_native.cpp | 291 ++++++++++++++++++++++------------- fpp_softfloat.cpp | 308 ++++++++++++++++++++++--------------- include/fpp.h | 19 +-- newcpu.cpp | 21 +-- softfloat/softfloat.cpp | 328 ++++++++++++++++++++++++++++++++++++---- softfloat/softfloat.h | 16 +- 7 files changed, 796 insertions(+), 368 deletions(-) diff --git a/fpp.cpp b/fpp.cpp index 0c7a05bf..46623643 100644 --- a/fpp.cpp +++ b/fpp.cpp @@ -75,7 +75,7 @@ FPP_A fpp_round64; FPP_AB fpp_int; FPP_AB fpp_sinh; FPP_AB fpp_intrz; -FPP_AB fpp_sqrt; +FPP_ABP fpp_sqrt; FPP_AB fpp_lognp1; FPP_AB fpp_etoxm1; FPP_AB fpp_tanh; @@ -90,28 +90,28 @@ FPP_AB fpp_tentox; FPP_AB fpp_logn; FPP_AB fpp_log10; FPP_AB fpp_log2; -FPP_AB fpp_abs; +FPP_ABP fpp_abs; FPP_AB fpp_cosh; -FPP_AB fpp_neg; +FPP_ABP fpp_neg; FPP_AB fpp_acos; FPP_AB fpp_cos; FPP_AB fpp_getexp; FPP_AB fpp_getman; -FPP_AB fpp_div; +FPP_ABP fpp_div; FPP_ABQS fpp_mod; -FPP_AB fpp_add; -FPP_AB fpp_mul; +FPP_ABP fpp_add; +FPP_ABP fpp_mul; FPP_ABQS fpp_rem; FPP_AB fpp_scale; -FPP_AB fpp_sub; +FPP_ABP fpp_sub; FPP_AB fpp_sgldiv; FPP_AB fpp_sglmul; FPP_AB fpp_cmp; FPP_AB fpp_tst; -FPP_AB fpp_move; +FPP_ABP fpp_move; #define DEBUG_FPP 0 -#define EXCEPTION_FPP 1 +#define EXCEPTION_FPP 0 STATIC_INLINE int isinrom (void) { @@ -427,17 +427,9 @@ static void fpsr_check_arithmetic_exception(uae_u32 mask, fpdata *src, uae_u32 o } } else if (regs.fp_exp_pend == 53) { // OVFL fpp_get_internal_overflow(&eo); - if (((regs.fpcr >> 6) & 3) == 1) - fpp_round32(&eo); - if (((regs.fpcr >> 6) & 3) >= 2) - fpp_round64(&eo); fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]); } else if (regs.fp_exp_pend == 51) { // UNFL fpp_get_internal_underflow(&eo); - if (((regs.fpcr >> 6) & 3) == 1) - fpp_round32(&eo); - if (((regs.fpcr >> 6) & 3) >= 2) - fpp_round64(&eo); fpp_from_exten_fmovem(&eo, &fsave_data.eo[0], &fsave_data.eo[1], &fsave_data.eo[2]); } // else INEX1, INEX2: do nothing @@ -558,12 +550,17 @@ static int fpsr_set_bsun(void) return 0; } -void fpsr_set_quotient(uae_u64 quot, uae_s8 sign) +static void fpsr_set_quotient(uae_u64 quot, uae_u8 sign) { regs.fpsr &= 0x0f00fff8; regs.fpsr |= (quot << 16) & FPSR_QUOT_LSB; regs.fpsr |= sign ? FPSR_QUOT_SIGN : 0; } +static void fpsr_get_quotient(uae_u64 *quot, uae_u8 *sign) +{ + *quot = (regs.fpsr & FPSR_QUOT_LSB) >> 16; + *sign = (regs.fpsr & FPSR_QUOT_SIGN) ? 1 : 0; +} uae_u32 fpp_get_fpsr (void) { @@ -2618,7 +2615,7 @@ static uaecptr fmovem2fpp (uaecptr ad, uae_u32 list, int incr, int regdir) return ad; } -static bool arithmetic(fpdata *src, fpdata *dst, int extra) +static bool fp_arithmetic(fpdata *src, fpdata *dst, int extra) { uae_u64 q = 0; uae_u8 s = 0; @@ -2626,122 +2623,145 @@ static bool arithmetic(fpdata *src, fpdata *dst, int extra) switch (extra & 0x7f) { case 0x00: /* FMOVE */ - case 0x40: - case 0x44: - fpp_move(src, dst); + fpp_move(dst, src, 0); + break; + case 0x40: /* FSMOVE */ + fpp_move(dst, src, 32); + break; + case 0x44: /* FDMOVE */ + fpp_move(dst, src, 64); break; case 0x01: /* FINT */ - fpp_int(src, dst); + fpp_int(dst, src); break; case 0x02: /* FSINH */ - fpp_sinh(src, dst); + fpp_sinh(dst, src); break; case 0x03: /* FINTRZ */ - fpp_intrz(src, dst); + fpp_intrz(dst, src); break; case 0x04: /* FSQRT */ + fpp_sqrt(dst, src, 0); + break; case 0x41: /* FSSQRT */ + fpp_sqrt(dst, src, 32); + break; case 0x45: /* FDSQRT */ - fpp_sqrt(src, dst); + fpp_sqrt(dst, src, 64); break; case 0x06: /* FLOGNP1 */ - fpp_lognp1(src, dst); + fpp_lognp1(dst, src); break; case 0x08: /* FETOXM1 */ - fpp_etoxm1(src, dst); + fpp_etoxm1(dst, src); break; case 0x09: /* FTANH */ - fpp_tanh(src, dst); + fpp_tanh(dst, src); break; case 0x0a: /* FATAN */ - fpp_atan(src, dst); + fpp_atan(dst, src); break; case 0x0c: /* FASIN */ - fpp_asin(src, dst); + fpp_asin(dst, src); break; case 0x0d: /* FATANH */ - fpp_atanh(src, dst); + fpp_atanh(dst, src); break; case 0x0e: /* FSIN */ - fpp_sin(src, dst); + fpp_sin(dst, src); break; case 0x0f: /* FTAN */ - fpp_tan(src, dst); + fpp_tan(dst, src); break; case 0x10: /* FETOX */ - fpp_etox(src, dst); + fpp_etox(dst, src); break; case 0x11: /* FTWOTOX */ - fpp_twotox(src, dst); + fpp_twotox(dst, src); break; case 0x12: /* FTENTOX */ - fpp_tentox(src, dst); + fpp_tentox(dst, src); break; case 0x14: /* FLOGN */ - fpp_logn(src, dst); + fpp_logn(dst, src); break; case 0x15: /* FLOG10 */ - fpp_log10(src, dst); + fpp_log10(dst, src); break; case 0x16: /* FLOG2 */ - fpp_log2(src, dst); + fpp_log2(dst, src); break; case 0x18: /* FABS */ + fpp_abs(dst, src, 0); + break; case 0x58: /* FSABS */ + fpp_abs(dst, src, 32); + break; case 0x5c: /* FDABS */ - fpp_abs(src, dst); + fpp_abs(dst, src, 64); break; case 0x19: /* FCOSH */ - fpp_cosh(src, dst); + fpp_cosh(dst, src); break; case 0x1a: /* FNEG */ + fpp_neg(dst, src, 0); + break; case 0x5a: /* FSNEG */ + fpp_neg(dst, src, 32); + break; case 0x5e: /* FDNEG */ - fpp_neg(src, dst); + fpp_neg(dst, src, 64); break; case 0x1c: /* FACOS */ - fpp_acos(src, dst); + fpp_acos(dst, src); break; case 0x1d: /* FCOS */ - fpp_cos(src, dst); + fpp_cos(dst, src); break; case 0x1e: /* FGETEXP */ - fpp_getexp(src, dst); + fpp_getexp(dst, src); break; case 0x1f: /* FGETMAN */ - fpp_getman(src, dst); + fpp_getman(dst, src); break; case 0x20: /* FDIV */ + fpp_div(dst, src, 0); + break; case 0x60: /* FSDIV */ + fpp_div(dst, src, 32); + break; case 0x64: /* FDDIV */ - fpp_div(dst, src); + fpp_div(dst, src, 64); break; case 0x21: /* FMOD */ + fpsr_get_quotient(&q, &s); fpp_mod(dst, src, &q, &s); - if (fpsr_make_status()) - return false; fpsr_set_quotient(q, s); break; case 0x22: /* FADD */ + fpp_add(dst, src, 0); + break; case 0x62: /* FSADD */ + fpp_add(dst, src, 32); + break; case 0x66: /* FDADD */ - fpp_add(dst, src); + fpp_add(dst, src, 64); break; case 0x23: /* FMUL */ + fpp_mul(dst, src, 0); + break; case 0x63: /* FSMUL */ + fpp_mul(dst, src, 32); + break; case 0x67: /* FDMUL */ - fpp_mul(dst, src); + fpp_mul(dst, src, 64); break; case 0x24: /* FSGLDIV */ fpp_sgldiv(dst, src); - fpsr_set_result(dst); - if (fpsr_make_status()) - return false; - return true; + break; case 0x25: /* FREM */ + fpsr_get_quotient(&q, &s); fpp_rem(dst, src, &q, &s); - if (fpsr_make_status()) - return false; fpsr_set_quotient(q, s); break; case 0x26: /* FSCALE */ @@ -2749,14 +2769,15 @@ static bool arithmetic(fpdata *src, fpdata *dst, int extra) break; case 0x27: /* FSGLMUL */ fpp_sglmul(dst, src); - fpsr_set_result(dst); - if (fpsr_make_status()) - return false; - return true; + break; case 0x28: /* FSUB */ + fpp_sub(dst, src, 0); + break; case 0x68: /* FSSUB */ + fpp_sub(dst, src, 32); + break; case 0x6c: /* FDSUB */ - fpp_sub(dst, src); + fpp_sub(dst, src, 64); break; case 0x30: /* FSINCOS */ case 0x31: /* FSINCOS */ @@ -2766,28 +2787,22 @@ static bool arithmetic(fpdata *src, fpdata *dst, int extra) case 0x35: /* FSINCOS */ case 0x36: /* FSINCOS */ case 0x37: /* FSINCOS */ - fpp_cos(src, dst); - if (((regs.fpcr >> 6) & 3) == 1) - fpp_round_single(dst); - else if (((regs.fpcr >> 6) & 3) == 2) - fpp_round_double(dst); + fpp_cos(dst, src); regs.fp[extra & 7] = *dst; - fpp_sin(src, dst); + fpp_sin(dst, src); break; case 0x38: /* FCMP */ { fpp_cmp(dst, src); + fpsr_make_status(); fpsr_set_result(dst); - if (fpsr_make_status()) - return false; return false; } case 0x3a: /* FTST */ { fpp_tst(dst, src); + fpsr_make_status(); fpsr_set_result(dst); - if (fpsr_make_status()) - return false; return false; } default: @@ -2795,17 +2810,6 @@ static bool arithmetic(fpdata *src, fpdata *dst, int extra) return false; } - // must check instruction rounding overrides first - if ((extra & 0x44) == 0x40) { - fpp_round_single(dst); - } else if ((extra & 0x44) == 0x44) { - fpp_round_double(dst); - } else if (((regs.fpcr >> 6) & 3) == 1) { - fpp_round_single(dst); - } else if (((regs.fpcr >> 6) & 3) == 2) { - fpp_round_double(dst); - } - fpsr_set_result(dst); if (fpsr_make_status()) @@ -3083,11 +3087,13 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) if (regs.fp_unimp_pend) return; - v = arithmetic(&src, &dst, extra); + v = fp_arithmetic(&src, &dst, extra); + + fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad); + if (v) regs.fp[reg] = dst; - fpsr_check_arithmetic_exception(0, &src, opcode, extra, ad); return; default: break; @@ -3145,6 +3151,9 @@ void fpu_reset (void) fpp_set_fpcr (0); fpp_set_fpsr (0); fpux_restore (NULL); + // reset precision + fpp_set_mode(0x00000080 | 0x00000010); + fpp_set_mode(0x00000000); } uae_u8 *restore_fpu (uae_u8 *src) diff --git a/fpp_native.cpp b/fpp_native.cpp index 8df7b92e..19cdcb02 100644 --- a/fpp_native.cpp +++ b/fpp_native.cpp @@ -122,6 +122,9 @@ static const double twoto32 = 4294967296.0; #define FPCR_PRECISION_EXTENDED 0x00000000 static struct float_status fs; +static uae_u32 fpu_mode_control = 0; +static int fpu_prec; +static int temp_prec; #if defined(CPU_i386) || defined(CPU_x86_64) @@ -212,32 +215,44 @@ static void native_set_fpucw(uae_u32 m68k_cw) /* Functions for setting host/library modes and getting status */ static void fp_set_mode(uae_u32 mode_control) { + if (mode_control == fpu_mode_control) + return; switch(mode_control & FPCR_ROUNDING_PRECISION) { case FPCR_PRECISION_EXTENDED: // X +#ifdef USE_LONG_DOUBLE + fpu_prec = 80; +#else + fpu_prec = 64; +#endif break; case FPCR_PRECISION_SINGLE: // S + fpu_prec = 32; break; case FPCR_PRECISION_DOUBLE: // D default: // undefined + fpu_prec = 64; break; } #ifdef USE_HOST_ROUNDING - switch(mode_control & FPCR_ROUNDING_MODE) { - case FPCR_ROUND_NEAR: // to neareset - fesetround(FE_TONEAREST); - break; - case FPCR_ROUND_ZERO: // to zero - fesetround(FE_TOWARDZERO); - break; - case FPCR_ROUND_MINF: // to minus - fesetround(FE_DOWNWARD); - break; - case FPCR_ROUND_PINF: // to plus - fesetround(FE_UPWARD); - break; - } + if ((mode_control & FPCR_ROUNDING_MODE) != (fpu_mode_control & FPCR_ROUNDING_MODE)) { + switch(mode_control & FPCR_ROUNDING_MODE) { + case FPCR_ROUND_NEAR: // to neareset + fesetround(FE_TONEAREST); + break; + case FPCR_ROUND_ZERO: // to zero + fesetround(FE_TOWARDZERO); + break; + case FPCR_ROUND_MINF: // to minus + fesetround(FE_DOWNWARD); + break; + case FPCR_ROUND_PINF: // to plus + fesetround(FE_UPWARD); + break; + } + } native_set_fpucw(mode_control); #endif + fpu_mode_control = mode_control; } @@ -264,30 +279,6 @@ static void fp_clear_status(void) feclearexcept (FE_ALL_EXCEPT); } -static const TCHAR *fp_print(fpdata *fpd) -{ - static TCHAR fs[32]; - bool n, d; - - n = signbit(fpd->fp) ? 1 : 0; - d = isnormal(fpd->fp) ? 0 : 1; - - if(isinf(fpd->fp)) { - _stprintf(fs, _T("%c%s"), n ? '-' : '+', _T("inf")); - } else if(isnan(fpd->fp)) { - _stprintf(fs, _T("%c%s"), n ? '-' : '+', _T("nan")); - } else { - if(n) - fpd->fp *= -1.0; -#if USE_LONG_DOUBLE - _stprintf(fs, _T("#%Le"), fpd->fp); -#else - _stprintf(fs, _T("#%e"), fpd->fp); -#endif - } - return fs; -} - /* Functions for detecting float type */ static bool fp_is_snan(fpdata *fpd) { @@ -602,19 +593,95 @@ static void fp_round_double(fpdata *fpd) #endif } +static const TCHAR *fp_print(fpdata *fpd, int mode) +{ + static TCHAR fsout[32]; + bool n, d; + + if (mode < 0) { + uae_u32 w1, w2, w3; + fp_from_exten(fpd, &w1, &w2, &w3); + _stprintf(fsout, _T("%04X-%08X-%08X"), w1 >> 16, w2, w3); + return fsout; + } + + n = signbit(fpd->fp) ? 1 : 0; + d = isnormal(fpd->fp) ? 0 : 1; + + if(isinf(fpd->fp)) { + _stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("inf")); + } else if(isnan(fpd->fp)) { + _stprintf(fsout, _T("%c%s"), n ? '-' : '+', _T("nan")); + } else { + if(n) + fpd->fp *= -1.0; +#if USE_LONG_DOUBLE + _stprintf(fsout, _T("#%Le"), fpd->fp); +#else + _stprintf(fsout, _T("#%e"), fpd->fp); +#endif + } + if (mode == 0 || mode > _tcslen(fsout)) + return fsout; + fsout[mode] = 0; + return fsout; +} + +static void fp_set_prec(int prec) +{ +#if 0 + temp_fpu_mode_control = fpu_mode_control; + if (prec && fpu_prec > prec) { + fpu_mode_control &= ~FPCR_ROUNDING_PRECISION; + switch (prec) + { + case 80: + fpu_mode_control |= FPCR_PRECISION_EXTENDED; + break; + case 64: + default: + fpu_mode_control |= FPCR_PRECISION_DOUBLE; + break; + case 32: + fpu_mode_control |= FPCR_PRECISION_SINGLE; + break; + } + fp_set_mode(fpu_mode_control); + } +#endif + temp_prec = prec; +} +static void fp_reset_prec(fpdata *fpd) +{ +#if 0 + fp_set_mode(temp_fpu_mode_control); +#else + int prec = temp_prec; + if (temp_prec == 0) + prec = fpu_prec; + if (prec == 64) { + fp_round_double(fpd); + } else if (prec == 32) { + fp_round_single(fpd); + } +#endif +} + /* Arithmetic functions */ -static void fp_move(fpdata *src, fpdata *dst) +static void fp_move(fpdata *a, fpdata *b, int prec) { - dst->fp = src->fp; + fp_set_prec(prec); + a->fp = b->fp; + fp_reset_prec(a); } #ifdef USE_LONG_DOUBLE -STATIC_INLINE fptype fp_int(fpdata *a, fpdata *dst) +STATIC_INLINE fptype fp_int(fpdata *a, fpdata *b) { #ifdef USE_HOST_ROUNDING - dst->fp = rintl(a->dst); + a->fp = rintl(b->dst); #else switch (regs.fpcr & FPCR_ROUNDING_MODE) { @@ -668,42 +735,44 @@ STATIC_INLINE fptype fp_rem(fptype a, fptype b, uae_u64 *q, uae_s8 *s) #else // if !USE_LONG_DOUBLE -static void fp_int(fpdata *fpd, fpdata *dst) +static void fp_int(fpdata *a, fpdata *b) { - fptype a = fpd->fp; + fptype bb = b->fp; #ifdef USE_HOST_ROUNDING - dst->fp = rintl(a); + a->fp = rintl(bb); #else switch (regs.fpcr & FPCR_ROUNDING_MODE) { case FPCR_ROUND_NEAR: - dst->fp = fp_round_to_nearest(a); + a->fp = fp_round_to_nearest(bb); case FPCR_ROUND_ZERO: - dst->fp = fp_round_to_zero(a); + a->fp = fp_round_to_zero(bb); case FPCR_ROUND_MINF: - dst->fp = fp_round_to_minus_infinity(a); + a->fp = fp_round_to_minus_infinity(bb); case FPCR_ROUND_PINF: - dst->fp = fp_round_to_plus_infinity(a); + a->fp = fp_round_to_plus_infinity(bb); default: /* never reached */ break; } #endif } -static void fp_getexp(fpdata *a, fpdata *dst) +static void fp_getexp(fpdata *a, fpdata *b) { int expon; - frexpl(a->fp, &expon); - dst->fp = (double) (expon - 1); + frexpl(b->fp, &expon); + a->fp = (double) (expon - 1); } -static void fp_getman(fpdata *a, fpdata *dst) +static void fp_getman(fpdata *a, fpdata *b) { int expon; - dst->fp = frexpl(a->fp, &expon) * 2.0; + a->fp = frexpl(b->fp, &expon) * 2.0; } -static void fp_div(fpdata *a, fpdata *b) +static void fp_div(fpdata *a, fpdata *b, int prec) { + fp_set_prec(prec); a->fp = a->fp / b->fp; + fp_reset_prec(b); } static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { @@ -747,113 +816,124 @@ static void fp_scale(fpdata *a, fpdata *b) #endif // !USE_LONG_DOUBLE -static void fp_sinh(fpdata *a, fpdata *dst) +static void fp_sinh(fpdata *a, fpdata *b) { - dst->fp = sinhl(a->fp); + a->fp = sinhl(b->fp); } -static void fp_intrz(fpdata *fpd, fpdata *dst) +static void fp_intrz(fpdata *a, fpdata *b) { #ifdef USE_HOST_ROUNDING - dst->fp = truncl(fpd->fp); + a->fp = truncl(b->fp); #else - dst->fp = fp_round_to_zero (fpd->fp); + a->fp = fp_round_to_zero (b->fp); #endif } -static void fp_sqrt(fpdata *a, fpdata *dst) +static void fp_sqrt(fpdata *a, fpdata *b, int prec) { - dst->fp = sqrtl(a->fp); + fp_set_prec(prec); + a->fp = sqrtl(b->fp); + fp_reset_prec(b); } -static void fp_lognp1(fpdata *a, fpdata *dst) +static void fp_lognp1(fpdata *a, fpdata *b) { - dst->fp = log1pl(a->fp); + a->fp = log1pl(b->fp); } -static void fp_etoxm1(fpdata *a, fpdata *dst) +static void fp_etoxm1(fpdata *a, fpdata *b) { - dst->fp = expm1l(a->fp); + a->fp = expm1l(b->fp); } -static void fp_tanh(fpdata *a, fpdata *dst) +static void fp_tanh(fpdata *a, fpdata *b) { - dst->fp = tanhl(a->fp); + a->fp = tanhl(b->fp); } -static void fp_atan(fpdata *a, fpdata *dst) +static void fp_atan(fpdata *a, fpdata *b) { - dst->fp = atanl(a->fp); + a->fp = atanl(b->fp); } -static void fp_atanh(fpdata *a, fpdata *dst) +static void fp_atanh(fpdata *a, fpdata *b) { - dst->fp = atanhl(a->fp); + a->fp = atanhl(b->fp); } -static void fp_sin(fpdata *a, fpdata *dst) +static void fp_sin(fpdata *a, fpdata *b) { - dst->fp = sinl(a->fp); + a->fp = sinl(b->fp); } -static void fp_asin(fpdata *a, fpdata *dst) +static void fp_asin(fpdata *a, fpdata *b) { - dst->fp = asinl(a->fp); + a->fp = asinl(b->fp); } -static void fp_tan(fpdata *a, fpdata *dst) +static void fp_tan(fpdata *a, fpdata *b) { - dst->fp = tanl(a->fp); + a->fp = tanl(b->fp); } -static void fp_etox(fpdata *a, fpdata *dst) +static void fp_etox(fpdata *a, fpdata *b) { - dst->fp = expl(a->fp); + a->fp = expl(b->fp); } -static void fp_twotox(fpdata *a, fpdata *dst) +static void fp_twotox(fpdata *a, fpdata *b) { - dst->fp = powl(2.0, a->fp); + a->fp = powl(2.0, b->fp); } -static void fp_tentox(fpdata *a, fpdata *dst) +static void fp_tentox(fpdata *a, fpdata *b) { - dst->fp = powl(10.0, a->fp); + a->fp = powl(10.0, b->fp); } -static void fp_logn(fpdata *a, fpdata *dst) +static void fp_logn(fpdata *a, fpdata *b) { - dst->fp = logl(a->fp); + a->fp = logl(b->fp); } -static void fp_log10(fpdata *a, fpdata *dst) +static void fp_log10(fpdata *a, fpdata *b) { - dst->fp = log10l(a->fp); + a->fp = log10l(b->fp); } -static void fp_log2(fpdata *a, fpdata *dst) +static void fp_log2(fpdata *a, fpdata *b) { - dst->fp = log2l(a->fp); + a->fp = log2l(b->fp); } -static void fp_abs(fpdata *a, fpdata *dst) +static void fp_abs(fpdata *a, fpdata *b, int prec) { - dst->fp = a->fp < 0.0 ? -a->fp : a->fp; + fp_set_prec(prec); + a->fp = b->fp < 0.0 ? -b->fp : b->fp; + fp_reset_prec(a); } -static void fp_cosh(fpdata *a, fpdata *dst) +static void fp_cosh(fpdata *a, fpdata *b) { - dst->fp = coshl(a->fp); + a->fp = coshl(b->fp); } -static void fp_neg(fpdata *a, fpdata *dst) +static void fp_neg(fpdata *a, fpdata *b, int prec) { - dst->fp = -a->fp; + fp_set_prec(prec); + a->fp = -b->fp; + fp_reset_prec(a); } -static void fp_acos(fpdata *a, fpdata *dst) +static void fp_acos(fpdata *a, fpdata *b) { - dst->fp = acosl(a->fp); + a->fp = acosl(b->fp); } -static void fp_cos(fpdata *a, fpdata *dst) +static void fp_cos(fpdata *a, fpdata *b) { - dst->fp = cosl(a->fp); + a->fp = cosl(b->fp); } -static void fp_sub(fpdata *a, fpdata *b) +static void fp_sub(fpdata *a, fpdata *b, int prec) { + fp_set_prec(prec); a->fp = a->fp - b->fp; + fp_reset_prec(a); } -static void fp_add(fpdata *a, fpdata *b) +static void fp_add(fpdata *a, fpdata *b, int prec) { + fp_set_prec(prec); a->fp = a->fp + b->fp; + fp_reset_prec(a); } -static void fp_mul(fpdata *a, fpdata *b) +static void fp_mul(fpdata *a, fpdata *b, int prec) { + fp_set_prec(prec); a->fp = a->fp * b->fp; + fp_reset_prec(a); } static void fp_sglmul(fpdata *a, fpdata *b) { - // not exact a->fp = a->fp * b->fp; fpp_round32(a); } @@ -910,8 +990,7 @@ static void fp_cmp(fpdata *a, fpdata *b) if (!b_neg) v = -1.0; } else { - fpp_sub(a, b); - v = a->fp; + v = a->fp - b->fp; fp_clear_status(); } a->fp = v; diff --git a/fpp_softfloat.cpp b/fpp_softfloat.cpp index be98f617..862c35e9 100644 --- a/fpp_softfloat.cpp +++ b/fpp_softfloat.cpp @@ -43,9 +43,22 @@ static struct float_status fs; /* Functions for setting host/library modes and getting status */ static void fp_set_mode(uae_u32 mode_control) { - set_floatx80_rounding_precision(80, &fs); set_float_detect_tininess(float_tininess_before_rounding, &fs); - switch(mode_control & FPCR_ROUNDING_MODE) { + + switch(mode_control & FPCR_ROUNDING_PRECISION) { + case FPCR_PRECISION_SINGLE: // single + set_floatx80_rounding_precision(32, &fs); + break; + default: // double + case FPCR_PRECISION_DOUBLE: // double + set_floatx80_rounding_precision(64, &fs); + break; + case FPCR_PRECISION_EXTENDED: // extended + set_floatx80_rounding_precision(80, &fs); + break; + } + + switch(mode_control & FPCR_ROUNDING_MODE) { case FPCR_ROUND_NEAR: // to neareset set_float_rounding_mode(float_round_nearest_even, &fs); break; @@ -59,7 +72,6 @@ static void fp_set_mode(uae_u32 mode_control) set_float_rounding_mode(float_round_up, &fs); break; } - return; } static void fp_get_status(uae_u32 *status) @@ -83,7 +95,7 @@ STATIC_INLINE void fp_clear_status(void) } -static const TCHAR *fp_print(fpdata *fpd) +static const TCHAR *fp_print(fpdata *fpd, int mode) { static TCHAR fsout[32]; flag n, u, d; @@ -91,6 +103,11 @@ static const TCHAR *fp_print(fpdata *fpd) int i; floatx80 *fx = &fpd->fpx; + if (mode < 0) { + _stprintf(fsout, _T("%04X-%08X-%08X"), fx->high, (uae_u32)(fx->low >> 32), (uae_u32)fx->low); + return fsout; + } + n = floatx80_is_negative(*fx); u = floatx80_is_unnormal(*fx); d = floatx80_is_denormal(*fx); @@ -121,6 +138,9 @@ static const TCHAR *fp_print(fpdata *fpd) _stprintf(fsout, _T("%c%#.17e%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T("")); #endif } + if (mode == 0 || mode > _tcslen(fsout)) + return fsout; + fsout[mode] = 0; return fsout; } @@ -334,7 +354,8 @@ static void fp_get_internal_overflow(fpdata *fpd) } fpd->fpx.high = ((uint16_t)floatx80_internal_exp) & 0x7fff; - fpd->fpx.low = floatx80_internal_sig0; + fpd->fpx.high |= ((uint16_t)floatx80_internal_sign) << 15; + fpd->fpx.low = floatx80_internal_sig; } static void fp_get_internal_underflow(fpdata *fpd) @@ -346,7 +367,8 @@ static void fp_get_internal_underflow(fpdata *fpd) } fpd->fpx.high = ((uint16_t)floatx80_internal_exp) & 0x7fff; - fpd->fpx.low = floatx80_internal_sig0; + fpd->fpx.high |= ((uint16_t)floatx80_internal_sign) << 15; + fpd->fpx.low = floatx80_internal_sig; } static void fp_get_exceptional_operand_grs(uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3, uae_u32 *grs) @@ -359,6 +381,26 @@ static void fp_get_exceptional_operand_grs(uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *grs |= (floatx80_internal_sig1 & 0x3fffffffffffffffULL) ? 1 : 0; } +static void fp_get_internal_unmodified(uae_u32 *grs, fpdata *fpd) +{ + uint64_t roundbits; + + fpd->fpx.high = ((uint16_t)floatx80_internal_exp) & 0x7fff; + fpd->fpx.high |= ((uint16_t)floatx80_internal_sign) << 15; + fpd->fpx.low = floatx80_internal_sig0; + + shift64RightJamming(floatx80_internal_sig1, 61, &roundbits); + *grs = (uae_u32)roundbits; +} + +static void fp_get_internal(fpdata *fpd) +{ + + fpd->fpx.high = ((int16_t)floatx80_internal_exp) & 0x7fff; + fpd->fpx.high |= ((int16_t)floatx80_internal_sign) << 15; + fpd->fpx.low = floatx80_internal_sig; +} + /* Functions for rounding */ static floatx80 fp_to_sgl(floatx80 a) @@ -395,134 +437,105 @@ static void fp_round_double(fpdata *fpd) /* Arithmetic functions */ -static void fp_move(fpdata *src, fpdata *dst) -{ - dst->fpx = floatx80_move(src->fpx, &fs); -} - -static void fp_int(fpdata *a, fpdata *dst) +static void fp_int(fpdata *a, fpdata *b) { - dst->fpx = floatx80_round_to_int(a->fpx, &fs); + a->fpx = floatx80_round_to_int(b->fpx, &fs); } -static void fp_intrz(fpdata *a, fpdata *dst) +static void fp_intrz(fpdata *a, fpdata *b) { - dst->fpx = floatx80_round_to_int_toward_zero(a->fpx, &fs); + a->fpx = floatx80_round_to_int_toward_zero(b->fpx, &fs); } -static void fp_sqrt(fpdata *a, fpdata *dst) -{ - dst->fpx = floatx80_sqrt(a->fpx, &fs); -} -static void fp_lognp1(fpdata *a, fpdata *dst) +static void fp_lognp1(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_lognp1_check(a->fpx, &e, &fs); + a->fpx = floatx80_lognp1_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = log(a->fp + 1.0); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_sin(fpdata *a, fpdata *dst) +static void fp_sin(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_sin_check(a->fpx, &e, &fs); + a->fpx = floatx80_sin_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = sin(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_tan(fpdata *a, fpdata *dst) +static void fp_tan(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_tan_check(a->fpx, &e, &fs); + a->fpx = floatx80_tan_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = tan(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_logn(fpdata *a, fpdata *dst) +static void fp_logn(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_logn_check(a->fpx, &e, &fs); + a->fpx = floatx80_logn_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = log(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_log10(fpdata *a, fpdata *dst) +static void fp_log10(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_log10_check(a->fpx, &e, &fs); + a->fpx = floatx80_log10_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = log10(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_log2(fpdata *a, fpdata *dst) +static void fp_log2(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_log2_check(a->fpx, &e, &fs); + a->fpx = floatx80_log2_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = log2(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_abs(fpdata *a, fpdata *dst) -{ - dst->fpx = floatx80_abs(a->fpx, &fs); -} -static void fp_neg(fpdata *a, fpdata *dst) -{ - dst->fpx = floatx80_neg(a->fpx, &fs); -} -static void fp_cos(fpdata *a, fpdata *dst) +static void fp_cos(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_sin_check(a->fpx, &e, &fs); + a->fpx = floatx80_sin_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = cos(fpa); - from_native(fpa, dst); -} -static void fp_getexp(fpdata *a, fpdata *dst) -{ - dst->fpx = floatx80_getexp(a->fpx, &fs); + from_native(fpa, a); } -static void fp_getman(fpdata *a, fpdata *dst) +static void fp_getexp(fpdata *a, fpdata *b) { - dst->fpx = floatx80_getman(a->fpx, &fs); + a->fpx = floatx80_getexp(b->fpx, &fs); } -static void fp_div(fpdata *a, fpdata *b) +static void fp_getman(fpdata *a, fpdata *b) { - a->fpx = floatx80_div(a->fpx, b->fpx, &fs); + a->fpx = floatx80_getman(b->fpx, &fs); } static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { a->fpx = floatx80_mod(a->fpx, b->fpx, q, s, &fs); } -static void fp_add(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_add(a->fpx, b->fpx, &fs); -} -static void fp_mul(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_mul(a->fpx, b->fpx, &fs); -} static void fp_sgldiv(fpdata *a, fpdata *b) { a->fpx = floatx80_sgldiv(a->fpx, b->fpx, &fs); @@ -539,10 +552,6 @@ static void fp_scale(fpdata *a, fpdata *b) { a->fpx = floatx80_scale(a->fpx, b->fpx, &fs); } -static void fp_sub(fpdata *a, fpdata *b) -{ - a->fpx = floatx80_sub(a->fpx, b->fpx, &fs); -} static void fp_cmp(fpdata *a, fpdata *b) { a->fpx = floatx80_cmp(a->fpx, b->fpx, &fs); @@ -552,128 +561,189 @@ static void fp_tst(fpdata *a, fpdata *b) a->fpx = floatx80_tst(b->fpx, &fs); } +#define SETPREC \ + uint8_t oldprec = fs.floatx80_rounding_precision; \ + if (prec > 0) \ + set_floatx80_rounding_precision(prec, &fs); + +#define RESETPREC \ + if (prec > 0) \ + set_floatx80_rounding_precision(oldprec, &fs); + + +/* Functions with fixed precision */ +static void fp_move(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_move(b->fpx, &fs); + RESETPREC +} +static void fp_abs(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_abs(b->fpx, &fs); + RESETPREC +} +static void fp_neg(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_neg(b->fpx, &fs); + RESETPREC +} +static void fp_add(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_add(a->fpx, b->fpx, &fs); + RESETPREC +} +static void fp_sub(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_sub(a->fpx, b->fpx, &fs); + RESETPREC +} +static void fp_mul(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_mul(a->fpx, b->fpx, &fs); + RESETPREC +} +static void fp_div(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_div(a->fpx, b->fpx, &fs); + RESETPREC +} +static void fp_sqrt(fpdata *a, fpdata *b, int prec) +{ + SETPREC + a->fpx = floatx80_sqrt(b->fpx, &fs); + RESETPREC +} + + /* FIXME: create softfloat functions for following arithmetics */ -static void fp_sinh(fpdata *a, fpdata *dst) +static void fp_sinh(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_sinh_check(a->fpx, &e, &fs); + a->fpx = floatx80_sinh_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = sinhl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_etoxm1(fpdata *a, fpdata *dst) +static void fp_etoxm1(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_etoxm1_check(a->fpx, &e, &fs); + a->fpx = floatx80_etoxm1_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = expl(fpa) - 1.0; - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_tanh(fpdata *a, fpdata *dst) +static void fp_tanh(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_tanh_check(a->fpx, &e, &fs); + a->fpx = floatx80_tanh_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = tanhl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_atan(fpdata *a, fpdata *dst) +static void fp_atan(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_atan_check(a->fpx, &e, &fs); + a->fpx = floatx80_atan_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = atanl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_asin(fpdata *a, fpdata *dst) +static void fp_asin(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_asin_check(a->fpx, &e, &fs); + a->fpx = floatx80_asin_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = asinl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_atanh(fpdata *a, fpdata *dst) +static void fp_atanh(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_atanh_check(a->fpx, &e, &fs); + a->fpx = floatx80_atanh_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = atanhl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_etox(fpdata *a, fpdata *dst) +static void fp_etox(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_etox_check(a->fpx, &e, &fs); + a->fpx = floatx80_etox_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = expl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_twotox(fpdata *a, fpdata *dst) +static void fp_twotox(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_twotox_check(a->fpx, &e, &fs); + a->fpx = floatx80_twotox_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = powl(2.0, fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_tentox(fpdata *a, fpdata *dst) +static void fp_tentox(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_tentox_check(a->fpx, &e, &fs); + a->fpx = floatx80_tentox_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = powl(10.0, fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_cosh(fpdata *a, fpdata *dst) +static void fp_cosh(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_cosh_check(a->fpx, &e, &fs); + a->fpx = floatx80_cosh_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = coshl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } -static void fp_acos(fpdata *a, fpdata *dst) +static void fp_acos(fpdata *a, fpdata *b) { fptype fpa; flag e = 0; - dst->fpx = floatx80_acos_check(a->fpx, &e, &fs); + a->fpx = floatx80_acos_check(b->fpx, &e, &fs); if (e) return; - to_native(&fpa, a); + to_native(&fpa, b); fpa = acosl(fpa); - from_native(fpa, dst); + from_native(fpa, a); } static void fp_normalize(fpdata *a) diff --git a/include/fpp.h b/include/fpp.h index 4f2f8835..81d62e43 100644 --- a/include/fpp.h +++ b/include/fpp.h @@ -19,6 +19,7 @@ extern void init_fpucw_x87(void); typedef void (*FPP_ABQS)(fpdata*, fpdata*, uae_u64*, uae_u8*); typedef void (*FPP_AB)(fpdata*, fpdata*); +typedef void (*FPP_ABP)(fpdata*, fpdata*, int); typedef void (*FPP_A)(fpdata*); typedef bool (*FPP_IS)(fpdata*); @@ -44,7 +45,7 @@ typedef void (*FPP_FROM_EXTEN)(fpdata*, uae_u32*, uae_u32*, uae_u32*); typedef void (*FPP_PACK)(uae_u32*, uae_u32*, uae_u32*); typedef void (*FPP_PACKG)(uae_u32*, uae_u32*, uae_u32*, uae_u32*); -typedef const TCHAR* (*FPP_PRINT)(fpdata*); +typedef const TCHAR* (*FPP_PRINT)(fpdata*,int); extern FPP_PRINT fpp_print; @@ -89,7 +90,7 @@ extern FPP_PACKG fpp_get_exceptional_operand_grs; extern FPP_AB fpp_int; extern FPP_AB fpp_sinh; extern FPP_AB fpp_intrz; -extern FPP_AB fpp_sqrt; +extern FPP_ABP fpp_sqrt; extern FPP_AB fpp_lognp1; extern FPP_AB fpp_etoxm1; extern FPP_AB fpp_tanh; @@ -104,22 +105,22 @@ extern FPP_AB fpp_tentox; extern FPP_AB fpp_logn; extern FPP_AB fpp_log10; extern FPP_AB fpp_log2; -extern FPP_AB fpp_abs; +extern FPP_ABP fpp_abs; extern FPP_AB fpp_cosh; -extern FPP_AB fpp_neg; +extern FPP_ABP fpp_neg; extern FPP_AB fpp_acos; extern FPP_AB fpp_cos; extern FPP_AB fpp_getexp; extern FPP_AB fpp_getman; -extern FPP_AB fpp_div; +extern FPP_ABP fpp_div; extern FPP_ABQS fpp_mod; -extern FPP_AB fpp_add; -extern FPP_AB fpp_mul; +extern FPP_ABP fpp_add; +extern FPP_ABP fpp_mul; extern FPP_ABQS fpp_rem; extern FPP_AB fpp_scale; -extern FPP_AB fpp_sub; +extern FPP_ABP fpp_sub; extern FPP_AB fpp_sgldiv; extern FPP_AB fpp_sglmul; extern FPP_AB fpp_cmp; extern FPP_AB fpp_tst; -extern FPP_AB fpp_move; +extern FPP_ABP fpp_move; diff --git a/newcpu.cpp b/newcpu.cpp index 8efec75d..b5e26d37 100644 --- a/newcpu.cpp +++ b/newcpu.cpp @@ -1790,7 +1790,7 @@ static uaecptr ShowEA (void *f, uaecptr pc, uae_u16 opcode, int reg, amodes mode { fpdata fp; fpp_to_single(&fp, get_ilong_debug(pc)); - _stprintf(buffer, _T("#%s"), fpp_print(&fp)); + _stprintf(buffer, _T("#%s"), fpp_print(&fp, 0)); pc += 4; } break; @@ -1798,7 +1798,7 @@ static uaecptr ShowEA (void *f, uaecptr pc, uae_u16 opcode, int reg, amodes mode { fpdata fp; fpp_to_double(&fp, get_ilong_debug(pc), get_ilong_debug(pc + 4)); - _stprintf(buffer, _T("#%s"), fpp_print(&fp)); + _stprintf(buffer, _T("#%s"), fpp_print(&fp, 0)); pc += 8; } break; @@ -1806,7 +1806,7 @@ static uaecptr ShowEA (void *f, uaecptr pc, uae_u16 opcode, int reg, amodes mode { fpdata fp; fpp_to_exten(&fp, get_ilong_debug(pc), get_ilong_debug(pc + 4), get_ilong_debug(pc + 8)); - _stprintf(buffer, _T("#%s"), fpp_print(&fp)); + _stprintf(buffer, _T("#%s"), fpp_print(&fp, 0)); pc += 12; break; } @@ -2570,13 +2570,13 @@ static void Exception_build_stack_frame_common (uae_u32 oldpc, uae_u32 currpc, u Exception_build_stack_frame(oldpc, regs.instruction_pc, regs.mmu_ssw, nr, 0x0); } else if (nr >= 48 && nr <= 55) { if (regs.fpu_exp_pre) { - if (currprefs.cpu_model == 68060 && nr == 55 && (regs.fp_unimp_pend & 2)) { // packed decimal real + if (currprefs.cpu_model == 68060 && nr == 55 && regs.fp_unimp_pend == 2) { // packed decimal real Exception_build_stack_frame(regs.fp_ea, regs.instruction_pc, 0, nr, 0x2); } else { Exception_build_stack_frame(oldpc, regs.instruction_pc, 0, nr, 0x0); } } else { /* post-instruction */ - if (currprefs.cpu_model == 68060 && nr == 55 && (regs.fp_unimp_pend & 2)) { // packed decimal real + if (currprefs.cpu_model == 68060 && nr == 55 && regs.fp_unimp_pend == 2) { // packed decimal real Exception_build_stack_frame(regs.fp_ea, currpc, 0, nr, 0x2); } else { Exception_build_stack_frame(oldpc, currpc, 0, nr, 0x3); @@ -6336,13 +6336,16 @@ void m68k_dumpstate (uaecptr pc, uaecptr *nextpc) #ifdef FPUEMU if (currprefs.fpu_model) { uae_u32 fpsr; - for (i = 0; i < 8; i++){ - console_out_f (_T("FP%d: %g "), i, regs.fp[i].fp); - if ((i & 3) == 3) + for (i = 0; i < 8; i++) { + if (!(i & 1)) + console_out_f(_T("%d: "), i); + console_out_f (_T("%s "), fpp_print(®s.fp[i], -1)); + console_out_f (_T("%s "), fpp_print(®s.fp[i], 0)); + if (i & 1) console_out_f (_T("\n")); } fpsr = fpp_get_fpsr (); - console_out_f (_T("FPSR: %04X FPCR: %08x FPIAR: %08x N=%d Z=%d I=%d NAN=%d\n"), + console_out_f (_T("FPSR: %08X FPCR: %08x FPIAR: %08x N=%d Z=%d I=%d NAN=%d\n"), fpsr, regs.fpcr, regs.fpiar, (fpsr & 0x8000000) != 0, (fpsr & 0x4000000) != 0, diff --git a/softfloat/softfloat.cpp b/softfloat/softfloat.cpp index 410a9436..2b093df1 100644 --- a/softfloat/softfloat.cpp +++ b/softfloat/softfloat.cpp @@ -101,7 +101,9 @@ this code that are retained. | double-precision floating-point value for external use. *----------------------------------------------------------------------------*/ flag floatx80_internal_sign = 0; -uint32_t floatx80_internal_exp = 0; +int32_t floatx80_internal_exp = 0; +uint64_t floatx80_internal_sig = 0; +int32_t floatx80_internal_exp0 = 0; uint64_t floatx80_internal_sig0 = 0; uint64_t floatx80_internal_sig1 = 0; @@ -109,32 +111,126 @@ uint64_t floatx80_internal_sig1 = 0; | Function for storing sign, exponent and significand of extended | double-precision floating-point intermediate result for external use. *----------------------------------------------------------------------------*/ -static void saveFloatx80Internal( flag zSign, uint32_t zExp, uint64_t zSig0, uint64_t zSig1 ) +floatx80 roundSaveFloatx80Internal( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) { + int64_t roundIncrement, roundMask, roundBits; + flag increment; + + if ( roundingPrecision == 80 ) { + goto precision80; + } else if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } else { + goto precision80; + } + + zSig0 |= ( zSig1 != 0 ); + if ( status->float_rounding_mode != float_round_nearest_even ) { + if ( status->float_rounding_mode == float_round_to_zero ) { + roundIncrement = 0; + } else { + roundIncrement = roundMask; + if ( zSign ) { + if ( status->float_rounding_mode == float_round_up ) roundIncrement = 0; + } else { + if ( status->float_rounding_mode == float_round_down ) roundIncrement = 0; + } + } + } + + roundBits = zSig0 & roundMask; + + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( status->float_rounding_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + +precision80: + increment = ( (int64_t) zSig1 < 0 ); + if ( status->float_rounding_mode != float_round_nearest_even ) { + if ( status->float_rounding_mode == float_round_to_zero ) { + increment = 0; + } else { + if ( zSign ) { + increment = ( status->float_rounding_mode == float_round_down ) && zSig1; + } else { + increment = ( status->float_rounding_mode == float_round_up ) && zSig1; + } + } + } + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } else { + zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & ( status->float_rounding_mode == float_round_nearest_even ) ); + } + } else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); +} + +static void saveFloatx80Internal( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) +{ + floatx80 z = roundSaveFloatx80Internal( status->floatx80_rounding_precision, zSign, zExp, zSig0, zSig1, status ); + floatx80 z0 = roundSaveFloatx80Internal( 80, zSign, zExp, zSig0, zSig1, status ); + + floatx80_internal_sign = zSign; + floatx80_internal_exp = extractFloatx80Exp( z ); + floatx80_internal_sig = extractFloatx80Frac( z ); + floatx80_internal_exp0 = extractFloatx80Exp( z0 ); + floatx80_internal_sig0 = extractFloatx80Frac( z0 ); + floatx80_internal_sig1 = zSig1; +} + +static void saveFloat64Internal( flag zSign, int16_t zExp, uint64_t zSig, float_status *status ) +{ + floatx80 z = roundSaveFloatx80Internal( 64, zSign, zExp + 0x3C01, zSig<<1, 0, status ); + floatx80_internal_sign = zSign; - floatx80_internal_exp = zExp; - floatx80_internal_sig0 = zSig0; - floatx80_internal_sig1 = zSig1; + floatx80_internal_exp = extractFloatx80Exp( z ); + floatx80_internal_sig = extractFloatx80Frac( z ); + floatx80_internal_exp0 = zExp + 0x3C01; + floatx80_internal_sig0 = zSig<<1; + floatx80_internal_sig1 = 0; } -static void saveFloat64Internal( flag zSign, uint16_t zExp, uint64_t zSig, float_status *status ) +static void saveFloat32Internal( flag zSign, int16_t zExp, uint32_t zSig, float_status *status ) { - floatx80 z = roundAndPackFloatx80( 64, zSign, 0x3FFF, zSig<<1, 0, status ); + floatx80 z = roundSaveFloatx80Internal( 32, zSign, zExp + 0x3F81, ( (uint64_t) zSig )<<33, 0, status ); floatx80_internal_sign = zSign; - floatx80_internal_exp = zExp + 0x3C01; - floatx80_internal_sig0 = extractFloatx80Frac( z ); + floatx80_internal_exp = extractFloatx80Exp( z ); + floatx80_internal_sig = extractFloatx80Frac( z ); + floatx80_internal_exp0 = zExp + 0x3F81; + floatx80_internal_sig0 = ( (uint64_t) zSig )<<33; floatx80_internal_sig1 = 0; } -static void saveFloat32Internal( flag zSign, uint16_t zExp, uint32_t zSig, float_status *status ) +void saveFloatx80InternalSgl( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) { - floatx80 z = roundAndPackFloatx80( 32, zSign, 0x3FFF, ( (uint64_t) zSig )<<33, 0, status ); + floatx80 z = roundSaveFloatx80Internal( 32, zSign, zExp, zSig0, zSig1, status ); floatx80_internal_sign = zSign; - floatx80_internal_exp = zExp + 0x3F81; - floatx80_internal_sig0 = extractFloatx80Frac( z ); - floatx80_internal_sig1 = 0; + floatx80_internal_exp = extractFloatx80Exp( z ); + floatx80_internal_sig = extractFloatx80Frac( z ); + floatx80_internal_exp0 = zExp; + floatx80_internal_sig0 = zSig0; + floatx80_internal_sig1 = zSig1; } /*---------------------------------------------------------------------------- @@ -831,6 +927,7 @@ floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ +#ifndef SOFTFLOAT_68K floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) @@ -902,7 +999,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, #ifdef SOFTFLOAT_68K if ( isTiny ) { float_raise( float_flag_underflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1 ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); } shift64RightJamming( zSig0, -zExp, &zSig0 ); #else @@ -982,7 +1079,7 @@ if (roundBits) { float_raise(float_flag_overflow | float_flag_inexact, status); #else float_raise( float_flag_overflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1 ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status ); #endif if ( ( roundingMode == float_round_to_zero ) @@ -1011,7 +1108,7 @@ if (roundBits) { #ifdef SOFTFLOAT_68K if ( isTiny ) { float_raise( float_flag_underflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1 ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); } shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 ); #else @@ -1070,6 +1167,160 @@ if (roundBits) { } +#else // SOFTFLOAT_68K + +floatx80 roundAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) +{ + int8_t roundingMode; + flag roundNearestEven, increment; + int64_t roundIncrement, roundMask, roundBits; + int32_t expOffset; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + expOffset = 0x3C00; + } else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + expOffset = 0x3F80; + } else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + if ( ( ( 0x7FFE - expOffset ) < zExp ) || + ( ( zExp == ( 0x7FFE - expOffset ) ) && ( zSig0 + roundIncrement < zSig0 ) ) ) { + float_raise( float_flag_overflow, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); + if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE - expOffset, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( zExp < ( expOffset + 1 ) ) { + float_raise( float_flag_underflow, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); + shift64RightJamming( zSig0, -( zExp - ( expOffset + 1 ) ), &zSig0 ); + zExp = expOffset + 1; + roundBits = zSig0 & roundMask; + if ( roundBits ) float_raise( float_flag_inexact, status ); + zSig0 += roundIncrement; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + if ( roundBits ) { + float_raise( float_flag_inexact, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); + } + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); +precision80: + increment = ( (int64_t) zSig1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + } + if ( 0x7FFE <= (uint32_t) zExp ) { + if ( ( 0x7FFE < zExp ) || + ( ( zExp == 0x7FFE ) && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) && increment ) + ) { + roundMask = 0; + float_raise( float_flag_overflow, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); + if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( zExp < 0 ) { + float_raise( float_flag_underflow, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); + shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 ); + zExp = 0; + if ( zSig1 ) float_raise( float_flag_inexact, status ); + if ( roundNearestEven ) { + increment = ( (int64_t) zSig1 < 0 ); + } else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + if ( increment ) { + ++zSig0; + zSig0 &= + ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( zSig1 ) { + float_raise( float_flag_inexact, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); + } + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } else { + zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + } + } else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); + +} + +#endif + #ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous floatx80 roundAndPackFloatx80Sgl( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) { @@ -1103,7 +1354,7 @@ floatx80 roundAndPackFloatx80Sgl( flag zSign, int32_t zExp, uint64_t zSig0, uint || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) ) { float_raise( float_flag_overflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1 ); + saveFloatx80InternalSgl( zSign, zExp, zSig0, zSig1, status ); if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status ); if ( ( roundingMode == float_round_to_zero ) || ( zSign && ( roundingMode == float_round_up ) ) @@ -1121,12 +1372,12 @@ floatx80 roundAndPackFloatx80Sgl( flag zSign, int32_t zExp, uint64_t zSig0, uint || ( zSig0 <= zSig0 + roundIncrement ); if ( isTiny ) { float_raise( float_flag_underflow, status ); - saveFloatx80Internal( zSign, zExp, zSig0, zSig1 ); + saveFloatx80InternalSgl( zSign, zExp, zSig0, zSig1, status ); } shift64RightJamming( zSig0, -zExp, &zSig0 ); zExp = 0; roundBits = zSig0 & roundMask; - if ( roundBits ) status->float_exception_flags |= float_flag_inexact; + if ( roundBits ) float_raise ( float_flag_inexact, status ); zSig0 += roundIncrement; if ( roundNearestEven && ( roundBits == roundIncrement ) ) { roundMask |= roundIncrement<<1; @@ -1135,7 +1386,10 @@ floatx80 roundAndPackFloatx80Sgl( flag zSign, int32_t zExp, uint64_t zSig0, uint return packFloatx80( zSign, zExp, zSig0 ); } } - if ( roundBits ) status->float_exception_flags |= float_flag_inexact; + if ( roundBits ) { + float_raise( float_flag_inexact, status ); + saveFloatx80InternalSgl( zSign, zExp, zSig0, zSig1, status ); + } zSig0 += roundIncrement; if ( zSig0 < roundIncrement ) { ++zExp; @@ -2475,9 +2729,8 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_statu bSig = extractFloatx80Frac( b ); bExp = extractFloatx80Exp( b ); bSign = extractFloatx80Sign( b ); - *q = 0; - *s = 0; - if ( aExp == 0x7FFF ) { + + if ( aExp == 0x7FFF ) { if ( (uint64_t) ( aSig0<<1 ) || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { return propagateFloatx80NaN( a, b, status ); @@ -2486,6 +2739,8 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_statu } if ( bExp == 0x7FFF ) { if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + *s = (aSign != bSign); + *q = 0; return a; } if ( bExp == 0 ) { @@ -2500,7 +2755,11 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_statu } if ( aExp == 0 ) { #ifdef SOFTFLOAT_68K - if ( aSig0 == 0 ) return a; + if ( aSig0 == 0 ) { + *s = (aSign != bSign); + *q = 0; + return a; + } #else if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; #endif @@ -2585,9 +2844,8 @@ floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_statu bSig = extractFloatx80Frac( b ); bExp = extractFloatx80Exp( b ); bSign = extractFloatx80Sign( b ); - *q = 0; - *s = 0; - if ( aExp == 0x7FFF ) { + + if ( aExp == 0x7FFF ) { if ( (uint64_t) ( aSig0<<1 ) || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { return propagateFloatx80NaN( a, b, status ); @@ -2596,6 +2854,8 @@ floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_statu } if ( bExp == 0x7FFF ) { if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + *s = (aSign != bSign); + *q = 0; return a; } if ( bExp == 0 ) { @@ -2610,7 +2870,11 @@ floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_statu } if ( aExp == 0 ) { #ifdef SOFTFLOAT_68K - if ( aSig0 == 0 ) return a; + if ( aSig0 == 0 ) { + *s = (aSign != bSign); + *q = 0; + return a; + } #else if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; #endif @@ -2835,8 +3099,8 @@ floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) if ( bExp < 0x3FFF ) return a; - if ( 0x400E < bExp ) { - aExp = bSign ? -0x4000 : 0x7FFF; + if ( 0x400F < bExp ) { + aExp = bSign ? -0x6001 : 0xE000; return roundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); } @@ -2985,7 +3249,7 @@ floatx80 floatx80_move( floatx80 a, float_status *status ) if ( aSig == 0 ) return a; normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); } - return a; + return roundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); } #endif // End of addition for Previous diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index 6fe6c44c..707f3d56 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -207,16 +207,12 @@ enum { *----------------------------------------------------------------------------*/ extern flag floatx80_internal_sign; -extern uint32_t floatx80_internal_exp; +extern int32_t floatx80_internal_exp; +extern uint64_t floatx80_internal_sig; +extern int32_t floatx80_internal_exp0; extern uint64_t floatx80_internal_sig0; extern uint64_t floatx80_internal_sig1; -/*---------------------------------------------------------------------------- - | Function for storing sign, exponent and significand of extended - | double-precision floating-point intermediate result for external use. - *----------------------------------------------------------------------------*/ -void saveFloatx80Internal( flag zSign, uint32_t zExp, uint64_t zSig0, uint64_t zSig1 ); - typedef struct float_status { signed char float_detect_tininess; signed char float_rounding_mode; @@ -230,6 +226,12 @@ typedef struct float_status { flag snan_bit_is_one; } float_status; +/*---------------------------------------------------------------------------- + | Function for storing sign, exponent and significand of extended + | double-precision floating-point intermediate result for external use. + *----------------------------------------------------------------------------*/ +void saveFloatx80Internal( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ); + static inline void set_float_detect_tininess(int val, float_status *status) { status->float_detect_tininess = val; -- 2.47.3