From: Toni Wilen Date: Fri, 27 Jan 2017 18:18:43 +0000 (+0200) Subject: Softfloat/FPU update. X-Git-Tag: 3500~112 X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=9e55624fd03d2cc4f28c56a6108207a979f29464;p=francis%2Fwinuae.git Softfloat/FPU update. --- diff --git a/fpp.cpp b/fpp.cpp index d8d1e8ce..b0089dba 100644 --- a/fpp.cpp +++ b/fpp.cpp @@ -65,6 +65,8 @@ FPP_FROM_DOUBLE fpp_from_double_x; FPP_TO_EXTEN fpp_to_exten_x; FPP_FROM_EXTEN fpp_from_exten_x; +FPP_A fpp_normalize; + FPP_A fpp_roundsgl; FPP_A fpp_rounddbl; FPP_A fpp_round32; @@ -101,6 +103,8 @@ FPP_AB fpp_mul; FPP_ABQS fpp_rem; FPP_AB fpp_scale; FPP_AB fpp_sub; +FPP_AB fpp_sgldiv; +FPP_AB fpp_sglmul; #define DEBUG_FPP 0 #define EXCEPTION_FPP 1 @@ -195,11 +199,13 @@ static void normalize_exten(uae_u32 *pwrd1, uae_u32 *pwrd2, uae_u32 *pwrd3) void to_single(fpdata *fpd, uae_u32 wrd1) { - // automatically fix denormals if 6888x +#if 0 // now done in get_fp_value + // automatically fix denormals if 6888x if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) fpp_to_single_xn(fpd, wrd1); else - fpp_to_single_x(fpd, wrd1); +#endif + fpp_to_single_x(fpd, wrd1); } static uae_u32 from_single(fpdata *fpd) { @@ -207,11 +213,13 @@ static uae_u32 from_single(fpdata *fpd) } void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) { +#if 0 // now done in get_fp_value // automatically fix denormals if 6888x if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) fpp_to_double_xn(fpd, wrd1, wrd2); else - fpp_to_double_x(fpd, wrd1, wrd2); +#endif + fpp_to_double_x(fpd, wrd1, wrd2); } static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) { @@ -220,10 +228,12 @@ static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) { +#if 0 // now done in get_fp_value // automatically fix unnormals if 6888x if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) { normalize_exten(&wrd1, &wrd2, &wrd3); } +#endif fpp_to_exten_x(fpd, wrd1, wrd2, wrd3); } static void to_exten_fmovem(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) @@ -737,17 +747,63 @@ static bool fault_if_unimplemented_680x0 (uae_u16 opcode, uae_u16 extra, uaecptr return true; } uae_u16 v = extra & 0x7f; + /* 68040/68060 only variants. 6888x = F-line exception. */ switch (v) { + case 0x00: /* FMOVE */ + case 0x40: /* FSMOVE */ + case 0x44: /* FDMOVE */ + case 0x04: /* FSQRT */ + case 0x41: /* FSSQRT */ + case 0x45: /* FDSQRT */ + case 0x18: /* FABS */ + case 0x58: /* FSABS */ + case 0x5c: /* FDABS */ + case 0x1a: /* FNEG */ + case 0x5a: /* FSNEG */ + case 0x5e: /* FDNEG */ + case 0x20: /* FDIV */ + case 0x60: /* FSDIV */ + case 0x64: /* FDDIV */ + case 0x22: /* FADD */ + case 0x62: /* FSADD */ + case 0x66: /* FDADD */ + case 0x23: /* FMUL */ + case 0x63: /* FSMUL */ + case 0x67: /* FDMUL */ + case 0x24: /* FSGLDIV */ + case 0x27: /* FSGLMUL */ + case 0x28: /* FSUB */ + case 0x68: /* FSSUB */ + case 0x6c: /* FDSUB */ + case 0x38: /* FCMP */ + case 0x3a: /* FTST */ + return false; case 0x01: /* FINT */ case 0x03: /* FINTRZ */ // Unimplemented only in 68040. - if (currprefs.cpu_model == 68040) { - fpu_op_unimp (opcode, extra, ea, oldpc, FPU_EXP_UNIMP_INS, src, reg, -1); - return true; + if(currprefs.cpu_model != 68040) { + return false; } - return false; + default: + fpu_op_unimp (opcode, extra, ea, oldpc, FPU_EXP_UNIMP_INS, src, reg, -1); + return true; + } + } + return false; +} + +static bool fault_if_unimplemented_6888x (uae_u16 opcode, uae_u16 extra, uaecptr oldpc) +{ + if ((currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) && currprefs.fpu_no_unimplemented) { + uae_u16 v = extra & 0x7f; + switch(v) + { + case 0x00: /* FMOVE */ + case 0x01: /* FINT */ case 0x02: /* FSINH */ + case 0x03: /* FINTRZ */ + case 0x04: /* FSQRT */ case 0x06: /* FLOGNP1 */ case 0x08: /* FETOXM1 */ case 0x09: /* FTANH */ @@ -762,11 +818,22 @@ static bool fault_if_unimplemented_680x0 (uae_u16 opcode, uae_u16 extra, uaecptr case 0x14: /* FLOGN */ case 0x15: /* FLOG10 */ case 0x16: /* FLOG2 */ + case 0x18: /* FABS */ case 0x19: /* FCOSH */ + case 0x1a: /* FNEG */ case 0x1c: /* FACOS */ case 0x1d: /* FCOS */ case 0x1e: /* FGETEXP */ case 0x1f: /* FGETMAN */ + case 0x20: /* FDIV */ + case 0x21: /* FMOD */ + case 0x22: /* FADD */ + case 0x23: /* FMUL */ + case 0x24: /* FSGLDIV */ + case 0x25: /* FREM */ + case 0x26: /* FSCALE */ + case 0x27: /* FSGLMUL */ + case 0x28: /* FSUB */ case 0x30: /* FSINCOS */ case 0x31: /* FSINCOS */ case 0x32: /* FSINCOS */ @@ -775,37 +842,12 @@ static bool fault_if_unimplemented_680x0 (uae_u16 opcode, uae_u16 extra, uaecptr case 0x35: /* FSINCOS */ case 0x36: /* FSINCOS */ case 0x37: /* FSINCOS */ - case 0x21: /* FMOD */ - case 0x25: /* FREM */ - case 0x26: /* FSCALE */ - fpu_op_unimp (opcode, extra, ea, oldpc, FPU_EXP_UNIMP_INS, src, reg, -1); - return true; - } - } - return false; -} - -static bool fault_if_unimplemented_6888x (uae_u16 opcode, uae_u16 extra, uaecptr oldpc) -{ - if ((currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) && currprefs.fpu_no_unimplemented) { - uae_u16 v = extra & 0x7f; - /* 68040/68060 only variants. 6888x = F-line exception. */ - switch (v) - { - case 0x62: /* FSADD */ - case 0x66: /* FDADD */ - case 0x68: /* FSSUB */ - case 0x6c: /* FDSUB */ - case 0x5a: /* FSNEG */ - case 0x5e: /* FDNEG */ - case 0x58: /* FSABS */ - case 0x5c: /* FDABS */ - case 0x63: /* FSMUL */ - case 0x67: /* FDMUL */ - case 0x41: /* FSSQRT */ - case 0x45: /* FDSQRT */ - fpu_noinst (opcode, oldpc); - return true; + case 0x38: /* FCMP */ + case 0x3a: /* FTST */ + return false; + default: + fpu_noinst (opcode, oldpc); + return true; } } return false; @@ -1126,22 +1168,28 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) } // 68040/060 does not support denormals -static bool fault_if_no_denormal_support_pre(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) +static bool normalize_or_fault_if_no_denormal_support_pre(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) { - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented && currprefs.fpu_softfloat) { - if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { - fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_PRE, fpd, -1, size); + if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { + if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) { + fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_PRE, fpd, -1, size); return true; + } else { + fpp_normalize(fpd); + return false; } } return false; } -static bool fault_if_no_denormal_support_post(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) +static bool normalize_or_fault_if_no_denormal_support_post(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) { - if (currprefs.fpu_softfloat && currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { - if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { + if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { + if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) { fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_POST, fpd, -1, size); return true; + } else { + fpp_normalize(fpd); + return false; } } return false; @@ -1160,7 +1208,7 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old if (fault_if_no_fpu (opcode, extra, 0, oldpc)) return -1; *src = regs.fp[(extra >> 10) & 7]; - if (fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 2)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 2)) return -1; return 1; } @@ -1183,7 +1231,7 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old break; case 1: to_single (src, m68k_dreg (regs, reg)); - if (fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 0)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 0)) return -1; break; default: @@ -1281,7 +1329,7 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old break; case 1: to_single (src, (doext ? exts[0] : x_cp_get_long (ad))); - if (fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 0)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 0)) return -1; break; case 2: @@ -1293,7 +1341,7 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old ad += 4; wrd3 = (doext ? exts[2] : x_cp_get_long (ad)); to_exten (src, wrd1, wrd2, wrd3); - if (fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 2)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 2)) return -1; } break; @@ -1326,7 +1374,7 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old ad += 4; wrd2 = (doext ? exts[1] : x_cp_get_long (ad)); to_double (src, wrd1, wrd2); - if (fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 1)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, 0, oldpc, src, 1)) return -1; } break; @@ -1439,19 +1487,19 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o switch (size) { case 0: - if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) return 1; x_cp_put_long(ad, (uae_u32)fpp_to_int(value, 2)); break; case 1: - if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) return -1; x_cp_put_long(ad, from_single(value)); break; case 2: { uae_u32 wrd1, wrd2, wrd3; - if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) return 1; from_exten(value, &wrd1, &wrd2, &wrd3); x_cp_put_long (ad, wrd1); @@ -1481,14 +1529,14 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o } break; case 4: - if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) return 1; x_cp_put_word(ad, (uae_s16)fpp_to_int(value, 1)); break; case 5: { uae_u32 wrd1, wrd2; - if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 1)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 1)) return -1; from_double(value, &wrd1, &wrd2); x_cp_put_long (ad, wrd1); @@ -1497,7 +1545,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o } break; case 6: - if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) + if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) return 1; x_cp_put_byte(ad, (uae_s8)fpp_to_int(value, 0)); break; @@ -2218,7 +2266,6 @@ static uaecptr fmovem2fpp (uaecptr ad, uae_u32 list, int incr, int regdir) static bool arithmetic(fpdata *src, int reg, int extra) { - bool sgl = false; uae_u64 q = 0; uae_u8 s = 0; fpdata *dst = ®s.fp[reg]; @@ -2336,9 +2383,9 @@ static bool arithmetic(fpdata *src, int reg, int extra) fpp_mul(dst, src); break; case 0x24: /* FSGLDIV */ - fpp_div(dst, src); - sgl = true; - break; + fpp_sgldiv(dst, src); + fpsr_set_result(dst); + return true; case 0x25: /* FREM */ fpp_rem(dst, src, &q, &s); fpsr_set_quotient(q, s); @@ -2347,22 +2394,22 @@ static bool arithmetic(fpdata *src, int reg, int extra) fpp_scale(dst, src); break; case 0x27: /* FSGLMUL */ - fpp_mul(dst, src); - sgl = true; - break; + fpp_sglmul(dst, src); + fpsr_set_result(dst); + return true; case 0x28: /* FSUB */ case 0x68: /* FSSUB */ case 0x6c: /* FDSUB */ fpp_sub(dst, src); break; case 0x30: /* FSINCOS */ - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: + case 0x31: /* FSINCOS */ + case 0x32: /* FSINCOS */ + case 0x33: /* FSINCOS */ + case 0x34: /* FSINCOS */ + case 0x35: /* FSINCOS */ + case 0x36: /* FSINCOS */ + case 0x37: /* FSINCOS */ fpp_cos(src, ®s.fp[extra & 7]); fpp_sin(src, dst); if (((regs.fpcr >> 6) & 3) == 1) @@ -2387,19 +2434,17 @@ static bool arithmetic(fpdata *src, int reg, int extra) } // must check instruction rounding overrides first - if (sgl) { - fpp_roundsgl(®s.fp[reg]); - } else if ((extra & 0x44) == 0x40) { - fpp_round32(®s.fp[reg]); + if ((extra & 0x44) == 0x40) { + fpp_round32(dst); } else if ((extra & 0x44) == 0x44) { - fpp_round64(®s.fp[reg]); + fpp_round64(dst); } else if (((regs.fpcr >> 6) & 3) == 1) { - fpp_round32(®s.fp[reg]); + fpp_round32(dst); } else if (((regs.fpcr >> 6) & 3) == 2) { - fpp_round64(®s.fp[reg]); + fpp_round64(dst); } - fpsr_set_result(®s.fp[reg]); + fpsr_set_result(dst); return true; } @@ -2650,6 +2695,11 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) regs.fpiar = pc; + if((extra & 0x30) == 0x20 || (extra & 0x7f) == 0x38) { // dyadic operation + if(normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, pc, ®s.fp[reg], 2)) + return; + } + fpsr_clear_status(); v = arithmetic(&srcd, reg, extra); if (!v) diff --git a/fpp_native.cpp b/fpp_native.cpp index 456bcbf0..5a4728b0 100644 --- a/fpp_native.cpp +++ b/fpp_native.cpp @@ -810,6 +810,20 @@ static void fp_mul(fpdata *a, fpdata *b) { a->fp = a->fp * b->fp; } +static void fp_sglmul(fpdata *a, fpdata *b) +{ + a->fp = a->fp * b->fp; + fpp_roundsgl(a); +} +static void fp_sgldiv(fpdata *a, fpdata *b) +{ + a->fp = a->fp / b->fp; + fpp_roundsgl(a); +} + +static void fp_normalize(fpdata *a) +{ +} void fp_init_native(void) { @@ -849,6 +863,8 @@ void fp_init_native(void) fpp_round32 = fp_round32; fpp_round64 = fp_round64; + fpp_normalize = fp_normalize; + fpp_int = fp_int; fpp_sinh = fp_sinh; fpp_intrz = fp_intrz; @@ -881,4 +897,6 @@ void fp_init_native(void) fpp_rem = fp_rem; fpp_scale = fp_scale; fpp_sub = fp_sub; + fpp_sgldiv = fp_sgldiv; + fpp_sglmul = fp_sglmul; } diff --git a/fpp_softfloat.cpp b/fpp_softfloat.cpp index 62be8aaa..c092ffd7 100644 --- a/fpp_softfloat.cpp +++ b/fpp_softfloat.cpp @@ -114,10 +114,11 @@ static const TCHAR *fp_print(fpdata *fpd) result += (fptype) 1.0 / (((uae_u64)1)<<(63-i)); } } - result *= powl(2.0, (fx->high&0x7FFF) - 0x3FFF); #if USE_LONG_DOUBLE + result *= powl(2.0, (fx->high&0x7FFF) - 0x3FFF); _stprintf(fsout, _T("%c%#.17Le%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T("")); #else + result *= pow(2.0, (fx->high&0x7FFF) - 0x3FFF); _stprintf(fsout, _T("%c%#.17e%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T("")); #endif } @@ -173,7 +174,7 @@ static bool fp_is_unnormal(fpdata *fpd) } /* Functions for converting between float formats */ -static const long double twoto32 = 4294967296.0; +static const fptype twoto32 = 4294967296.0; static void to_native(fptype *fp, fpdata *fpd) { @@ -187,18 +188,31 @@ static void to_native(fptype *fp, fpdata *fpd) return; } if (fp_is_nan(fpd)) { +#if USE_LONG_DOUBLE *fp = sqrtl(-1); +#else + *fp = sqrt(-1); +#endif return; } if (fp_is_infinity(fpd)) { - //*fp = fp_is_neg(fpd) ? logl(0.0) : (1.0/0.0); + double zero = 0.0; +#if USE_LONG_DOUBLE + *fp = fp_is_neg(fpd) ? logl(0.0) : (1.0 / zero); +#else + *fp = fp_is_neg(fpd) ? log(0.0) : (1.0 / zero); +#endif return; } frac = (fptype)fpd->fpx.low / (fptype)(twoto32 * 2147483648.0); if (fp_is_neg(fpd)) frac = -frac; +#if USE_LONG_DOUBLE *fp = ldexpl (frac, expon - 16383); +#else + *fp = ldexp (frac, expon - 16383); +#endif } static void from_native(fptype fp, fpdata *fpd) @@ -228,7 +242,11 @@ static void from_native(fptype fp, fpdata *fpd) if (fp < 0.0) fp = -fp; - frac = frexpl (fp, &expon); +#if USE_LONG_DOUBLE + frac = frexpl (fp, &expon); +#else + frac = frexp (fp, &expon); +#endif frac += 0.5 / (twoto32 * twoto32); if (frac >= 1.0) { frac /= 2.0; @@ -394,13 +412,36 @@ static void fp_log2(fpdata *a, fpdata *dst) fpa = log2(fpa); from_native(fpa, dst); } + +static inline int32_t extractFloatx80Exp( floatx80 a ) +{ + return a.high & 0x7FFF; +} +static inline uint64_t extractFloatx80Frac( floatx80 a ) +{ + return a.low; +} + + static void fp_abs(fpdata *a, fpdata *dst) { - dst->fpx = floatx80_abs(a->fpx); + uint64_t aSig = extractFloatx80Frac(a->fpx); + int32_t aExp = extractFloatx80Exp(a->fpx); + if (aExp == 0x7FFF && (uint64_t)(aSig << 1)) { + dst->fpx = propagateFloatx80NaN(a->fpx, a->fpx, &fs); + return; + } + dst->fpx = floatx80_abs(a->fpx); } static void fp_neg(fpdata *a, fpdata *dst) { - dst->fpx = floatx80_chs(a->fpx); + uint64_t aSig = extractFloatx80Frac(a->fpx); + int32_t aExp = extractFloatx80Exp(a->fpx); + if (aExp == 0x7FFF && (uint64_t)(aSig << 1)) { + dst->fpx = propagateFloatx80NaN(a->fpx, a->fpx, &fs); + return; + } + dst->fpx = floatx80_chs(a->fpx); } static void fp_cos(fpdata *a, fpdata *dst) { @@ -433,6 +474,14 @@ static void fp_mul(fpdata *a, fpdata *b) { a->fpx = floatx80_mul(a->fpx, b->fpx, &fs); } +static void fp_sgldiv(fpdata *a, fpdata *b) +{ + a->fpx = floatx80_sgldiv(a->fpx, b->fpx, &fs); +} +static void fp_sglmul(fpdata *a, fpdata *b) +{ + a->fpx = floatx80_sglmul(a->fpx, b->fpx, &fs); +} static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { a->fpx = floatx80_rem(a->fpx, b->fpx, q, s, &fs); @@ -527,6 +576,11 @@ static void fp_acos(fpdata *a, fpdata *dst) from_native(fpa, dst); } +static void fp_normalize(fpdata *a) +{ + a->fpx = floatx80_normalize(a->fpx); +} + void fp_init_softfloat(void) { float_status fsx = { 0 }; @@ -575,6 +629,8 @@ void fp_init_softfloat(void) fpp_round32 = fp_round32; fpp_round64 = fp_round64; + fpp_normalize = fp_normalize; + fpp_int = fp_int; fpp_sinh = fp_sinh; fpp_intrz = fp_intrz; @@ -607,5 +663,7 @@ void fp_init_softfloat(void) fpp_rem = fp_rem; fpp_scale = fp_scale; fpp_sub = fp_sub; + fpp_sgldiv = fp_sgldiv; + fpp_sglmul = fp_sglmul; } diff --git a/include/fpp.h b/include/fpp.h index bdc5bf26..0eed12d6 100644 --- a/include/fpp.h +++ b/include/fpp.h @@ -73,6 +73,8 @@ extern FPP_A fpp_rounddbl; extern FPP_A fpp_round32; extern FPP_A fpp_round64; +extern FPP_A fpp_normalize; + extern FPP_AB fpp_int; extern FPP_AB fpp_sinh; extern FPP_AB fpp_intrz; @@ -105,3 +107,5 @@ extern FPP_AB fpp_mul; extern FPP_ABQS fpp_rem; extern FPP_AB fpp_scale; extern FPP_AB fpp_sub; +extern FPP_AB fpp_sgldiv; +extern FPP_AB fpp_sglmul; diff --git a/softfloat/softfloat.cpp b/softfloat/softfloat.cpp index 64c435d3..52fae6af 100644 --- a/softfloat/softfloat.cpp +++ b/softfloat/softfloat.cpp @@ -1005,6 +1005,89 @@ static floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, } +#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous +floatx80 roundAndPackFloatx80Sgl( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) +{ + int8_t roundingMode; + flag roundNearestEven, isTiny; + int64_t roundIncrement, roundMask, roundBits; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + + if ( 0x7FFE <= (uint32_t) zExp ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + float_raise( float_flag_overflow | float_flag_inexact, status ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, LIT64( 0xFFFFFFFFFFFFFFFF ) ); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + + if ( zExp < 0 ) { + isTiny = + ( status->float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig0 <= zSig0 + roundIncrement ); + shift64RightJamming( zSig0, -zExp, &zSig0 ); + zExp = 0; + roundBits = zSig0 & roundMask; + if ( isTiny && roundBits ) float_raise( float_flag_underflow, status ); + if ( roundBits ) status->float_exception_flags |= float_flag_inexact; + if ( ( zSig0 & ~roundMask ) == 0 ) { + zSig0 = ( roundIncrement != roundMask ); + return packFloatx80( zSign, zExp, zSig0 ); + } + zSig0 += roundIncrement; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( roundBits ) status->float_exception_flags |= float_flag_inexact; + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + +} +#endif // End of Addition for Previous + + /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', @@ -5192,6 +5275,33 @@ floatx80 floatx80_round64( floatx80 a, float_status *status ) return roundAndPackFloatx80(64, aSign, aExp, aSig, 0, status); } + +floatx80 floatx80_normalize( floatx80 a ) +{ + flag aSign; + int16_t aExp; + uint64_t aSig; + int8_t shiftCount; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF || aExp == 0 ) return a; + if ( aSig == 0 ) return packFloatx80(aSign, 0, 0); + + shiftCount = countLeadingZeros64( aSig ); + + if ( shiftCount > aExp ) { + shiftCount = aExp; + aExp = 0; + } else { + aExp -= shiftCount; + } + aSig <<= shiftCount; + + return packFloatx80( aSign, aExp, aSig ); +} #endif // end of addition for Previous /*---------------------------------------------------------------------------- @@ -5360,11 +5470,9 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, bExp = extractFloatx80Exp( b ); #ifdef SOFTFLOAT_68K if ( aExp == 0 ) { - if ( aSig == 0 ) return b; normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } if ( bExp == 0 ) { - if ( bSig == 0 ) return a; normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } #endif @@ -5411,7 +5519,10 @@ static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, } #endif zExp = aExp; - goto shiftRight1; +#ifdef SOFTFLOAT_68K + if ( aSig == 0 || bSig == 0 ) goto roundAndPack; +#endif + goto shiftRight1; } zSig0 = aSig + bSig; if ( (int64_t) zSig0 < 0 ) goto roundAndPack; @@ -5608,6 +5719,62 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) zSign, zExp, zSig0, zSig1, status); } +#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous +floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + aSig &= LIT64( 0xFFFFFF0000000000 ); + bSig &= LIT64( 0xFFFFFF0000000000 ); + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (uint64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return roundAndPackFloatx80Sgl( zSign, zExp, zSig0, zSig1, status ); + +} +#endif // End of addition for Previous + + /*---------------------------------------------------------------------------- | Returns the result of dividing the extended double-precision floating-point | value `a' by the corresponding value `b'. The operation is performed @@ -5693,6 +5860,83 @@ floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) zSign, zExp, zSig0, zSig1, status); } +#ifdef SOFTFLOAT_68K // 21-01-2017: Addition for Previous +floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + uint64_t rem0, rem1, rem2, term0, term1, term2; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero, status ); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + aSig &= LIT64( 0xFFFFFF0000000000 ); + bSig &= LIT64( 0xFFFFFF0000000000 ); + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return roundAndPackFloatx80Sgl( zSign, zExp, zSig0, zSig1, status ); + +} +#endif // End of addition for Previous + + /*---------------------------------------------------------------------------- | Returns the remainder of the extended double-precision floating-point value | `a' with respect to the corresponding value `b'. The operation is performed diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index e65beeeb..e9eeeaac 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -622,6 +622,8 @@ floatx80 floatx80_getexp( floatx80 a, float_status *status); floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status); +floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status); +floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status); /*---------------------------------------------------------------------------- | Software IEC/IEEE extended double-precision operations. @@ -651,6 +653,8 @@ floatx80 floatx80_scalbn(floatx80, int, float_status *status); //flag floatx80_is_unnormal( floatx80 a ); //flag floatx80_is_denormal( floatx80 a ); +floatx80 floatx80_normalize(floatx80); + static inline floatx80 floatx80_abs(floatx80 a) { a.high &= 0x7fff;