From: Toni Wilen Date: Sun, 5 Feb 2017 18:35:29 +0000 (+0200) Subject: FPU update 19 merge. X-Git-Tag: 3500~107 X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=5130b054e672bc46ea9f7ce82fe77975d773cd2e;p=francis%2Fwinuae.git FPU update 19 merge. --- diff --git a/fpp.cpp b/fpp.cpp index c0a2fe36..c4b5c39b 100644 --- a/fpp.cpp +++ b/fpp.cpp @@ -65,8 +65,8 @@ FPP_FROM_EXTEN fpp_from_exten_fmovem; FPP_A fpp_normalize; -FPP_A fpp_roundsgl; -FPP_A fpp_rounddbl; +FPP_A fpp_round_single; +FPP_A fpp_round_double; FPP_A fpp_round32; FPP_A fpp_round64; FPP_AB fpp_int; @@ -272,14 +272,19 @@ void fpsr_set_exception(uae_u32 exception) { regs.fpsr |= exception; } -static void fpsr_check_exception(void) +static void fpsr_check_exception(uae_u32 mask) { + uae_u32 exception; + bool nonmaskable; // Any exception status bit and matching exception enable bits set? - uae_u32 exception = (regs.fpsr >> 8) & (regs.fpcr >> 8); + exception = (regs.fpsr >> 8) & (regs.fpcr >> 8); + // Add 68040/68060 nonmaskable exceptions + if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) + exception |= (regs.fpsr & (FPSR_OVFL | FPSR_UNFL | mask)) >> 8; if (exception) { int vector = 0; - int vtable[8] = { 49, 49, 50, 51, 53, 52, 54, 48 }; + static const int vtable[8] = { 49, 49, 50, 51, 53, 52, 54, 48 }; int i; // BSUN is handled separately for (i = 6; i >= 0; i--) { @@ -289,7 +294,9 @@ static void fpsr_check_exception(void) } } // logging only so far - write_log (_T("FPU exception: FPSR: %08x, FPCR: %04x (vector: %d)!\n"), regs.fpsr, regs.fpcr, vector); + nonmaskable = (exception != ((regs.fpsr >> 8) & (regs.fpcr >> 8))); + write_log (_T("FPU %s exception: FPSR: %08x, FPCR: %04x (vector: %d)!\n"), + nonmaskable ? _T("nonmaskable") : _T(""), regs.fpsr, regs.fpcr, vector); } } @@ -321,6 +328,8 @@ static void fpsr_clear_status(void) static uae_u32 fpsr_make_status(void) { + uae_u32 exception; + // get external status fpp_get_status(®s.fpsr); @@ -336,7 +345,11 @@ static uae_u32 fpsr_make_status(void) if (regs.fpsr & (FPSR_OVFL | FPSR_INEX2 | FPSR_INEX1)) regs.fpsr |= FPSR_AE_INEX; // INEX = INEX1 || INEX2 || OVFL - return (regs.fpsr & regs.fpcr & (FPSR_SNAN | FPSR_OPERR | FPSR_DZ)); + // return exceptions that interrupt calculation + exception = regs.fpsr & regs.fpcr & (FPSR_SNAN | FPSR_OPERR | FPSR_DZ); + if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) + exception |= regs.fpsr & (FPSR_OVFL | FPSR_UNFL); + return exception; } static int fpsr_set_bsun(void) @@ -482,9 +495,9 @@ bool fpu_get_constant(fpdata *fpd, int cr) fpp_to_exten_fmovem(fpd, f[0], f[1], f[2]); if (((regs.fpcr >> 6) & 3) == 1) - fpp_roundsgl(fpd); + fpp_round32(fpd); if (((regs.fpcr >> 6) & 3) >= 2) - fpp_rounddbl(fpd); + fpp_round64(fpd); fpsr_set_result(fpd); @@ -915,13 +928,13 @@ static void to_pack (fpdata *fpd, uae_u32 *wrd) if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) { // infinity has extended exponent and all 0 packed fraction // nans are copies bit by bit - fpp_to_exten_fmovem(fpd, wrd[0], wrd[1], wrd[2]); + fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); return; } if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) { // exponent is not cared about, if mantissa is zero wrd[0] &= 0x80000000; - fpp_to_exten_fmovem(fpd, wrd[0], wrd[1], wrd[2]); + fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); return; } @@ -971,13 +984,13 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) fptype fp; if (fpp_is_nan (src)) { - // copied bit by bit, no conversion - fpp_from_exten_fmovem(src, &wrd[0], &wrd[1], &wrd[2]); + // copy bit by bit, handle signaling nan + fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); return; } if (fpp_is_infinity (src)) { // extended exponent and all 0 packed fraction - fpp_from_exten_fmovem(src, &wrd[0], &wrd[1], &wrd[2]); + fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); wrd[1] = wrd[2] = 0; return; } @@ -1111,7 +1124,7 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) static bool normalize_or_fault_if_no_denormal_support_pre(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) { if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) { + if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_PRE, fpd, -1, size); return true; } else { @@ -1124,12 +1137,24 @@ static bool normalize_or_fault_if_no_denormal_support_pre(uae_u16 opcode, uae_u1 static bool normalize_or_fault_if_no_denormal_support_post(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) { if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) { + if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_POST, fpd, -1, size); return true; } else { fpp_normalize(fpd); - return false; + //return true; FIXME: enable this once exception works + } + } + return false; +} + +static bool fault_if_68040_integer_nonmaskable(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd) +{ + if (currprefs.cpu_model == 68040 && currprefs.fpu_model) { + fpsr_make_status(); + if (regs.fpsr & (FPSR_SNAN | FPSR_OPERR)) { + fpsr_check_exception(FPSR_SNAN | FPSR_OPERR); + //return true; FIXME: enable this once exception works } } return false; @@ -1301,6 +1326,7 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old if (fault_if_4060 (opcode, extra, adold, oldpc, FPU_EXP_UNIMP_DATATYPE_PACKED_PRE, NULL, wrd)) return -1; to_pack (src, wrd); + fpp_normalize(src); return 1; } break; @@ -1338,12 +1364,14 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o if (!isinrom ()) write_log (_T("PUTFP: %f %04X %04X\n"), value, opcode, extra); #endif +#if 0 if (!(extra & 0x4000)) { if (fault_if_no_fpu (opcode, extra, 0, oldpc)) return 1; regs.fp[(extra >> 10) & 7] = *value; return 1; } +#endif reg = opcode & 7; mode = (opcode >> 3) & 7; size = (extra >> 10) & 7; @@ -1351,18 +1379,27 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o switch (mode) { case 0: + if (normalize_or_fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) + return -1; + switch (size) { case 6: m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 0) & 0xff) | (m68k_dreg (regs, reg) & ~0xff))); + if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) + return -1; break; case 4: m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 1) & 0xffff) | (m68k_dreg (regs, reg) & ~0xffff))); + if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) + return -1; break; case 0: m68k_dreg (regs, reg) = (uae_u32)fpp_to_int (value, 2); + if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) + return -1; break; case 1: m68k_dreg (regs, reg) = fpp_from_single (value); @@ -1428,8 +1465,10 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o { case 0: if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) - return 1; + return -1; x_cp_put_long(ad, (uae_u32)fpp_to_int(value, 2)); + if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) + return -1; break; case 1: if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) @@ -1440,7 +1479,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o { uae_u32 wrd1, wrd2, wrd3; if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) - return 1; + return -1; fpp_from_exten(value, &wrd1, &wrd2, &wrd3); x_cp_put_long (ad, wrd1); ad += 4; @@ -1455,11 +1494,12 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o uae_u32 wrd[3]; int kfactor; if (fault_if_4060 (opcode, extra, ad, oldpc, FPU_EXP_UNIMP_DATATYPE_PACKED_POST, value, NULL)) - return 1; + return -1; kfactor = size == 7 ? m68k_dreg (regs, (extra >> 4) & 7) : extra; kfactor &= 127; if (kfactor & 64) kfactor |= ~63; + fpp_normalize(value); from_pack (value, wrd, kfactor); x_cp_put_long (ad, wrd[0]); ad += 4; @@ -1470,8 +1510,10 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o break; case 4: if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) - return 1; + return -1; x_cp_put_word(ad, (uae_s16)fpp_to_int(value, 1)); + if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) + return -1; break; case 5: { @@ -1486,8 +1528,10 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o break; case 6: if (normalize_or_fault_if_no_denormal_support_pre(opcode, extra, ad, oldpc, value, 2)) - return 1; + return -1; x_cp_put_byte(ad, (uae_s8)fpp_to_int(value, 0)); + if (fault_if_68040_integer_nonmaskable(opcode, extra, ad, oldpc, value)) + return -1; break; default: return 0; @@ -2352,9 +2396,9 @@ static bool arithmetic(fpdata *src, fpdata *dst, int extra) case 0x37: /* FSINCOS */ fpp_cos(src, dst); if (((regs.fpcr >> 6) & 3) == 1) - fpp_round32(dst); + fpp_round_single(dst); else if (((regs.fpcr >> 6) & 3) == 2) - fpp_round64(dst); + fpp_round_double(dst); regs.fp[extra & 7] = *dst; fpp_sin(src, dst); break; @@ -2381,13 +2425,13 @@ static bool arithmetic(fpdata *src, fpdata *dst, int extra) // must check instruction rounding overrides first if ((extra & 0x44) == 0x40) { - fpp_round32(dst); + fpp_round_single(dst); } else if ((extra & 0x44) == 0x44) { - fpp_round64(dst); + fpp_round_double(dst); } else if (((regs.fpcr >> 6) & 3) == 1) { - fpp_round32(dst); + fpp_round_single(dst); } else if (((regs.fpcr >> 6) & 3) == 2) { - fpp_round64(dst); + fpp_round_double(dst); } if (fpsr_make_status()) @@ -2415,9 +2459,16 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) switch ((extra >> 13) & 0x7) { case 3: - if (put_fp_value (®s.fp[(extra >> 7) & 7], opcode, extra, pc) == 0) - fpu_noinst (opcode, pc); + fpsr_clear_status(); + src = regs.fp[(extra >> 7) & 7]; + v = put_fp_value (&src, opcode, extra, pc); + if (v <= 0) { + if (v == 0) + fpu_noinst (opcode, pc); + return; + } fpsr_make_status(); + fpsr_check_exception(0); return; case 4: @@ -2621,6 +2672,7 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) fpsr_clear_status(); fpu_get_constant(®s.fp[reg], extra); fpsr_make_status(); + fpsr_check_exception(0); return; } @@ -2650,11 +2702,11 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) return; } - fpsr_clear_status(); v = arithmetic(&src, &dst, extra); if (v) regs.fp[reg] = dst; - fpsr_check_exception(); + + fpsr_check_exception(0); return; default: break; @@ -2671,23 +2723,6 @@ void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra) if (fpu_mmu_fixup) { mmufixup[0].reg = -1; } - if (currprefs.fpu_softfloat) { - // Any exception status bit and matching exception enable bits set? - if ((regs.fpcr >> 8) & (regs.fpsr >> 8)) { - uae_u32 mask = regs.fpcr >> 8; - int vector = 0; - for (int i = 7; i >= 0; i--) { - if (mask & (1 << i)) { - if (i > 0) - i--; - vector = i + 48; - break; - } - } - // logging only so far - write_log (_T("FPU exception: %08x %d!\n"), regs.fpsr, vector); - } - } } void fpu_reset (void) @@ -2702,9 +2737,10 @@ void fpu_reset (void) init_fpucw_x87(); #endif - regs.fpcr = regs.fpsr = regs.fpiar = 0; + regs.fpiar = 0; regs.fpu_exp_state = 0; - fpsr_make_status(); + fpp_set_fpcr (0); + fpp_set_fpsr (0); fpux_restore (NULL); } diff --git a/fpp_native.cpp b/fpp_native.cpp index c9fcf2fa..efc125d1 100644 --- a/fpp_native.cpp +++ b/fpp_native.cpp @@ -256,6 +256,7 @@ static void fp_get_status(uae_u32 *status) if (exp_flags & FE_INVALID) *status |= FPSR_OPERR; } + /* FIXME: how to detect SNAN? */ } static void fp_clear_status(void) @@ -266,11 +267,24 @@ static void fp_clear_status(void) static const TCHAR *fp_print(fpdata *fpd) { static TCHAR fs[32]; + bool n, d; + + n = signbit(fpd->fp) ? 1 : 0; + d = isnormal(fpd->fp) ? 0 : 1; + + if(isinf(fpd->fp)) { + _stprintf(fs, _T("%c%s"), n ? '-' : '+', _T("inf")); + } else if(isnan(fpd->fp)) { + _stprintf(fs, _T("%c%s"), n ? '-' : '+', _T("nan")); + } else { + if(n) + fpd->fp *= -1.0; #if USE_LONG_DOUBLE - _stprintf(fs, _T("#%Le"), fpd->fp); + _stprintf(fs, _T("#%Le"), fpd->fp); #else - _stprintf(fs, _T("#%e"), fpd->fp); + _stprintf(fs, _T("#%e"), fpd->fp); #endif + } return fs; } @@ -294,7 +308,7 @@ static bool fp_is_infinity (fpdata *fpd) } static bool fp_is_zero(fpdata *fpd) { - return (fpd->fp == 0.0); + return fpd->fp == 0.0; } static bool fp_is_neg(fpdata *fpd) { @@ -547,7 +561,7 @@ static void fp_from_int(fpdata *fpd, uae_s32 src) /* Functions for rounding */ // round to float with extended precision exponent -static void fp_roundsgl(fpdata *fpd) +static void fp_round32(fpdata *fpd) { int expon; float mant; @@ -561,7 +575,7 @@ static void fp_roundsgl(fpdata *fpd) } // round to double with extended precision exponent -static void fp_rounddbl(fpdata *fpd) +static void fp_round64(fpdata *fpd) { int expon; double mant; @@ -575,13 +589,13 @@ static void fp_rounddbl(fpdata *fpd) } // round to float -static void fp_round32(fpdata *fpd) +static void fp_round_single(fpdata *fpd) { fpd->fp = (float) fpd->fp; } // round to double -static void fp_round64(fpdata *fpd) +static void fp_round_double(fpdata *fpd) { #ifdef USE_LONG_DOUBLE fpd->fp = (double) fpd->fp; @@ -590,6 +604,11 @@ static void fp_round64(fpdata *fpd) /* Arithmetic functions */ +static void fp_move(fpdata *src, fpdata *dst) +{ + dst->fp = src->fp; +} + #ifdef USE_LONG_DOUBLE STATIC_INLINE fptype fp_int(fpdata *a, fpdata *dst) @@ -653,7 +672,7 @@ static void fp_int(fpdata *fpd, fpdata *dst) { fptype a = fpd->fp; #ifdef USE_HOST_ROUNDING - dst->fp = rint(a); + dst->fp = rintl(a); #else switch (regs.fpcr & FPCR_ROUNDING_MODE) { @@ -674,13 +693,13 @@ static void fp_int(fpdata *fpd, fpdata *dst) static void fp_getexp(fpdata *a, fpdata *dst) { int expon; - frexp(a->fp, &expon); + frexpl(a->fp, &expon); dst->fp = (double) (expon - 1); } static void fp_getman(fpdata *a, fpdata *dst) { int expon; - dst->fp = frexp(a->fp, &expon) * 2.0; + dst->fp = frexpl(a->fp, &expon) * 2.0; } static void fp_div(fpdata *a, fpdata *b) { @@ -690,7 +709,7 @@ static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { fptype quot; #ifdef USE_HOST_ROUNDING - quot = trunc(a->fp / b->fp); + quot = truncl(a->fp / b->fp); #else quot = fp_round_to_zero(a->fp / b->fp); #endif @@ -701,13 +720,13 @@ static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) *s = 0; } *q = (uae_u64)quot; - a->fp = fmod(a->fp, b->fp); + a->fp = fmodl(a->fp, b->fp); } static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { fptype quot; #ifdef USE_HOST_ROUNDING - quot = round(a->fp / b->fp); + quot = roundl(a->fp / b->fp); #else quot = fp_round_to_nearest(a->fp / b->fp); #endif @@ -718,87 +737,87 @@ static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) *s = 0; } *q = (uae_u64)quot; - a->fp = remainder(a->fp, b->fp); + a->fp = remainderl(a->fp, b->fp); } static void fp_scale(fpdata *a, fpdata *b) { - a->fp = ldexp(a->fp, (int)b->fp); + a->fp = ldexpl(a->fp, (int)b->fp); } #endif // !USE_LONG_DOUBLE static void fp_sinh(fpdata *a, fpdata *dst) { - dst->fp = sinh(a->fp); + dst->fp = sinhl(a->fp); } static void fp_intrz(fpdata *fpd, fpdata *dst) { #ifdef USE_HOST_ROUNDING - dst->fp = trunc(fpd->fp); + dst->fp = truncl(fpd->fp); #else dst->fp = fp_round_to_zero (fpd->fp); #endif } static void fp_sqrt(fpdata *a, fpdata *dst) { - dst->fp = sqrt(a->fp); + dst->fp = sqrtl(a->fp); } static void fp_lognp1(fpdata *a, fpdata *dst) { - dst->fp = log(a->fp + 1.0); + dst->fp = logl(a->fp + 1.0); } static void fp_etoxm1(fpdata *a, fpdata *dst) { - dst->fp = exp(a->fp) - 1.0; + dst->fp = expl(a->fp) - 1.0; } static void fp_tanh(fpdata *a, fpdata *dst) { - dst->fp = tanh(a->fp); + dst->fp = tanhl(a->fp); } static void fp_atan(fpdata *a, fpdata *dst) { - dst->fp = atan(a->fp); + dst->fp = atanl(a->fp); } static void fp_atanh(fpdata *a, fpdata *dst) { - dst->fp = atanh(a->fp); + dst->fp = atanhl(a->fp); } static void fp_sin(fpdata *a, fpdata *dst) { - dst->fp = sin(a->fp); + dst->fp = sinl(a->fp); } static void fp_asin(fpdata *a, fpdata *dst) { - dst->fp = asin(a->fp); + dst->fp = asinl(a->fp); } static void fp_tan(fpdata *a, fpdata *dst) { - dst->fp = tan(a->fp); + dst->fp = tanl(a->fp); } static void fp_etox(fpdata *a, fpdata *dst) { - dst->fp = exp(a->fp); + dst->fp = expl(a->fp); } static void fp_twotox(fpdata *a, fpdata *dst) { - dst->fp = pow(2.0, a->fp); + dst->fp = powl(2.0, a->fp); } static void fp_tentox(fpdata *a, fpdata *dst) { - dst->fp = pow(10.0, a->fp); + dst->fp = powl(10.0, a->fp); } static void fp_logn(fpdata *a, fpdata *dst) { - dst->fp = log(a->fp); + dst->fp = logl(a->fp); } static void fp_log10(fpdata *a, fpdata *dst) { - dst->fp = log10(a->fp); + dst->fp = log10l(a->fp); } static void fp_log2(fpdata *a, fpdata *dst) { - dst->fp = log2(a->fp); + dst->fp = log2l(a->fp); } static void fp_abs(fpdata *a, fpdata *dst) { @@ -806,7 +825,7 @@ static void fp_abs(fpdata *a, fpdata *dst) } static void fp_cosh(fpdata *a, fpdata *dst) { - dst->fp = cosh(a->fp); + dst->fp = coshl(a->fp); } static void fp_neg(fpdata *a, fpdata *dst) { @@ -814,11 +833,11 @@ static void fp_neg(fpdata *a, fpdata *dst) } static void fp_acos(fpdata *a, fpdata *dst) { - dst->fp = acos(a->fp); + dst->fp = acosl(a->fp); } static void fp_cos(fpdata *a, fpdata *dst) { - dst->fp = cos(a->fp); + dst->fp = cosl(a->fp); } static void fp_sub(fpdata *a, fpdata *b) { @@ -836,13 +855,13 @@ static void fp_sglmul(fpdata *a, fpdata *b) { // not exact a->fp = a->fp * b->fp; - fpp_roundsgl(a); + fpp_round32(a); } static void fp_sgldiv(fpdata *a, fpdata *b) { // not exact a->fp = a->fp / b->fp; - fpp_roundsgl(a); + fpp_round32(a); } static void fp_normalize(fpdata *a) @@ -900,12 +919,7 @@ static void fp_cmp(fpdata *a, fpdata *b) static void fp_tst(fpdata *a, fpdata *b) { - a->fpx = b->fpx; -} - -static void fp_move(fpdata *src, fpdata *dst) -{ - dst->fp = src->fp; + a->fp = b->fp; } void fp_init_native(void) @@ -942,8 +956,8 @@ void fp_init_native(void) fpp_to_exten_fmovem = fp_to_exten; fpp_from_exten_fmovem = fp_from_exten; - fpp_roundsgl = fp_roundsgl; - fpp_rounddbl = fp_rounddbl; + fpp_round_single = fp_round_single; + fpp_round_double = fp_round_double; fpp_round32 = fp_round32; fpp_round64 = fp_round64; diff --git a/fpp_softfloat.cpp b/fpp_softfloat.cpp index af4843f0..7043ebab 100644 --- a/fpp_softfloat.cpp +++ b/fpp_softfloat.cpp @@ -63,20 +63,18 @@ static void fp_set_mode(uae_u32 mode_control) static void fp_get_status(uae_u32 *status) { - if (fs.float_exception_flags & float_flag_signaling) { + if (fs.float_exception_flags & float_flag_signaling) *status |= FPSR_SNAN; - } else { - if (fs.float_exception_flags & float_flag_invalid) - *status |= FPSR_OPERR; - if (fs.float_exception_flags & float_flag_divbyzero) - *status |= FPSR_DZ; - if (fs.float_exception_flags & float_flag_overflow) - *status |= FPSR_OVFL; - if (fs.float_exception_flags & float_flag_underflow) - *status |= FPSR_UNFL; - if (fs.float_exception_flags & float_flag_inexact) - *status |= FPSR_INEX2; - } + if (fs.float_exception_flags & float_flag_invalid) + *status |= FPSR_OPERR; + if (fs.float_exception_flags & float_flag_divbyzero) + *status |= FPSR_DZ; + if (fs.float_exception_flags & float_flag_overflow) + *status |= FPSR_OVFL; + if (fs.float_exception_flags & float_flag_underflow) + *status |= FPSR_UNFL; + if (fs.float_exception_flags & float_flag_inexact) + *status |= FPSR_INEX2; } STATIC_INLINE void fp_clear_status(void) { @@ -104,7 +102,7 @@ static const TCHAR *fp_print(fpdata *fpd) #endif } else if (floatx80_is_infinity(*fx)) { _stprintf(fsout, _T("%c%s"), n?'-':'+', _T("inf")); - } else if (floatx80_is_signaling_nan(*fx, &fs)) { + } else if (floatx80_is_signaling_nan(*fx)) { _stprintf(fsout, _T("%c%s"), n?'-':'+', _T("snan")); } else if (floatx80_is_any_nan(*fx)) { _stprintf(fsout, _T("%c%s"), n?'-':'+', _T("nan")); @@ -128,7 +126,7 @@ static const TCHAR *fp_print(fpdata *fpd) /* Functions for detecting float type */ static bool fp_is_snan(fpdata *fpd) { - return floatx80_is_signaling_nan(fpd->fpx, &fs) != 0; + return floatx80_is_signaling_nan(fpd->fpx) != 0; } static bool fp_unset_snan(fpdata *fpd) { @@ -160,15 +158,6 @@ static bool fp_is_unnormal(fpdata *fpd) return floatx80_is_unnormal(fpd->fpx) != 0; } -static inline int32_t extractFloatx80Exp( floatx80 a ) -{ - return a.high & 0x7FFF; -} -static inline uint64_t extractFloatx80Frac( floatx80 a ) -{ - return a.low; -} - /* Functions for converting between float formats */ static const fptype twoto32 = 4294967296.0; @@ -275,7 +264,7 @@ static void from_native(fptype fp, fpdata *fpd) static void to_single(fpdata *fpd, uae_u32 wrd1) { - float32 f = wrd1; + float32 f = wrd1; fpd->fpx = float32_to_floatx80_allowunnormal(f, &fs); } static uae_u32 from_single(fpdata *fpd) @@ -335,7 +324,6 @@ static void from_int(fpdata *fpd, uae_s32 src) fpd->fpx = int32_to_floatx80(src, &fs); } - /* Functions for rounding */ static floatx80 fp_to_sgl(floatx80 a) @@ -347,33 +335,27 @@ static floatx80 fp_to_sgl(floatx80 a) } // round to float with extended precision exponent -static void fp_roundsgl(fpdata *fpd) +static void fp_round32(fpdata *fpd) { fpd->fpx = floatx80_round32(fpd->fpx, &fs); } // round to double with extended precision exponent -static void fp_rounddbl(fpdata *fpd) +static void fp_round64(fpdata *fpd) { fpd->fpx = floatx80_round64(fpd->fpx, &fs); } // round to float -static void fp_round32(fpdata *fpd) +static void fp_round_single(fpdata *fpd) { - if (fp_is_nan(fpd)) - return; - float32 f = floatx80_to_float32(fpd->fpx, &fs); - fpd->fpx = float32_to_floatx80(f, &fs); + fpd->fpx = floatx80_round_to_float32(fpd->fpx, &fs); } // round to double -static void fp_round64(fpdata *fpd) +static void fp_round_double(fpdata *fpd) { - if (fp_is_nan(fpd)) - return; - float64 f = floatx80_to_float64(fpd->fpx, &fs); - fpd->fpx = float64_to_floatx80(f, &fs); + fpd->fpx = floatx80_round_to_float64(fpd->fpx, &fs); } /* Arithmetic functions */ @@ -399,22 +381,32 @@ static void fp_sqrt(fpdata *a, fpdata *dst) static void fp_lognp1(fpdata *a, fpdata *dst) { fptype fpa; - if (to_native_checked(&fpa, a, dst)) + flag e = 0; + dst->fpx = floatx80_lognp1_check(a->fpx, &e, &fs); + if (e) return; + to_native(&fpa, a); fpa = log(a->fp + 1.0); from_native(fpa, dst); } static void fp_sin(fpdata *a, fpdata *dst) { fptype fpa; - if (to_native_checked(&fpa, a, dst)) + flag e = 0; + dst->fpx = floatx80_sin_check(a->fpx, &e, &fs); + if (e) return; + to_native(&fpa, a); fpa = sin(fpa); from_native(fpa, dst); } static void fp_tan(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_tan_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = tan(fpa); from_native(fpa, dst); @@ -422,6 +414,10 @@ static void fp_tan(fpdata *a, fpdata *dst) static void fp_logn(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_logn_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = log(fpa); from_native(fpa, dst); @@ -429,6 +425,10 @@ static void fp_logn(fpdata *a, fpdata *dst) static void fp_log10(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_log10_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = log10(fpa); from_native(fpa, dst); @@ -436,6 +436,10 @@ static void fp_log10(fpdata *a, fpdata *dst) static void fp_log2(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_log2_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = log2(fpa); from_native(fpa, dst); @@ -443,27 +447,19 @@ static void fp_log2(fpdata *a, fpdata *dst) static void fp_abs(fpdata *a, fpdata *dst) { - uint64_t aSig = extractFloatx80Frac(a->fpx); - int32_t aExp = extractFloatx80Exp(a->fpx); - if (aExp == 0x7FFF && (uint64_t)(aSig << 1)) { - dst->fpx = propagateFloatx80NaN(a->fpx, a->fpx, &fs); - return; - } - dst->fpx = floatx80_abs(a->fpx); + dst->fpx = floatx80_abs(a->fpx, &fs); } static void fp_neg(fpdata *a, fpdata *dst) { - uint64_t aSig = extractFloatx80Frac(a->fpx); - int32_t aExp = extractFloatx80Exp(a->fpx); - if (aExp == 0x7FFF && (uint64_t)(aSig << 1)) { - dst->fpx = propagateFloatx80NaN(a->fpx, a->fpx, &fs); - return; - } - dst->fpx = floatx80_chs(a->fpx); + dst->fpx = floatx80_neg(a->fpx, &fs); } static void fp_cos(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_sin_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = cos(fpa); from_native(fpa, dst); @@ -526,6 +522,10 @@ static void fp_tst(fpdata *a, fpdata *b) static void fp_sinh(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_sinh_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = sinhl(fpa); from_native(fpa, dst); @@ -533,6 +533,10 @@ static void fp_sinh(fpdata *a, fpdata *dst) static void fp_etoxm1(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_etoxm1_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = expl(fpa) - 1.0; from_native(fpa, dst); @@ -540,6 +544,10 @@ static void fp_etoxm1(fpdata *a, fpdata *dst) static void fp_tanh(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_tanh_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = tanhl(fpa); from_native(fpa, dst); @@ -547,6 +555,10 @@ static void fp_tanh(fpdata *a, fpdata *dst) static void fp_atan(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_atan_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = atanl(fpa); from_native(fpa, dst); @@ -554,6 +566,10 @@ static void fp_atan(fpdata *a, fpdata *dst) static void fp_asin(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_asin_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = asinl(fpa); from_native(fpa, dst); @@ -561,6 +577,10 @@ static void fp_asin(fpdata *a, fpdata *dst) static void fp_atanh(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_atanh_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = atanhl(fpa); from_native(fpa, dst); @@ -568,6 +588,10 @@ static void fp_atanh(fpdata *a, fpdata *dst) static void fp_etox(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_etox_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = expl(fpa); from_native(fpa, dst); @@ -575,6 +599,10 @@ static void fp_etox(fpdata *a, fpdata *dst) static void fp_twotox(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_twotox_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = powl(2.0, fpa); from_native(fpa, dst); @@ -582,6 +610,10 @@ static void fp_twotox(fpdata *a, fpdata *dst) static void fp_tentox(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_tentox_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = powl(10.0, fpa); from_native(fpa, dst); @@ -589,6 +621,10 @@ static void fp_tentox(fpdata *a, fpdata *dst) static void fp_cosh(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_cosh_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = coshl(fpa); from_native(fpa, dst); @@ -596,6 +632,10 @@ static void fp_cosh(fpdata *a, fpdata *dst) static void fp_acos(fpdata *a, fpdata *dst) { fptype fpa; + flag e = 0; + dst->fpx = floatx80_acos_check(a->fpx, &e, &fs); + if (e) + return; to_native(&fpa, a); fpa = acosl(fpa); from_native(fpa, dst); @@ -641,8 +681,8 @@ void fp_init_softfloat(void) fpp_to_exten_fmovem = to_exten_fmovem; fpp_from_exten_fmovem = from_exten_fmovem; - fpp_roundsgl = fp_roundsgl; - fpp_rounddbl = fp_rounddbl; + fpp_round_single = fp_round_single; + fpp_round_double = fp_round_double; fpp_round32 = fp_round32; fpp_round64 = fp_round64; diff --git a/include/fpp.h b/include/fpp.h index 8a952bbf..b410c281 100644 --- a/include/fpp.h +++ b/include/fpp.h @@ -72,8 +72,8 @@ extern FPP_FROM_EXTEN fpp_from_exten; extern FPP_TO_EXTEN fpp_to_exten_fmovem; extern FPP_FROM_EXTEN fpp_from_exten_fmovem; -extern FPP_A fpp_roundsgl; -extern FPP_A fpp_rounddbl; +extern FPP_A fpp_round_single; +extern FPP_A fpp_round_double; extern FPP_A fpp_round32; extern FPP_A fpp_round64; diff --git a/softfloat/SOFTFLOAT-MACROS.H b/softfloat/SOFTFLOAT-MACROS.H index 9cc6158c..a84956e6 100644 --- a/softfloat/SOFTFLOAT-MACROS.H +++ b/softfloat/SOFTFLOAT-MACROS.H @@ -669,7 +669,7 @@ static uint32_t estimateSqrt32(int aExp, uint32_t a) | `a'. If `a' is zero, 32 is returned. *----------------------------------------------------------------------------*/ -static int8_t countLeadingZeros32( uint32_t a ) +static inline int8_t countLeadingZeros32( uint32_t a ) { #if SOFTFLOAT_GNUC_PREREQ(3, 4) if (a) { @@ -717,7 +717,7 @@ static int8_t countLeadingZeros32( uint32_t a ) | `a'. If `a' is zero, 64 is returned. *----------------------------------------------------------------------------*/ -static int8_t countLeadingZeros64( uint64_t a ) +static inline int8_t countLeadingZeros64( uint64_t a ) { #if SOFTFLOAT_GNUC_PREREQ(3, 4) if (a) { diff --git a/softfloat/softfloat-specialize.h b/softfloat/softfloat-specialize.h index 962dff2c..1586f3f7 100644 --- a/softfloat/softfloat-specialize.h +++ b/softfloat/softfloat-specialize.h @@ -79,86 +79,17 @@ this code that are retained. * version 2 or later. See the COPYING file in the top-level directory. */ -#if defined(TARGET_XTENSA) -/* Define for architectures which deviate from IEEE in not supporting - * signaling NaNs (so all NaNs are treated as quiet). - */ -#define NO_SIGNALING_NANS 1 -#endif - /*---------------------------------------------------------------------------- -| The pattern for a default generated half-precision NaN. +| Returns 1 if the extended double-precision floating-point value `a' is a +| NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -static float16 float16_default_nan(float_status *status) -{ -#if defined(TARGET_ARM) - return const_float16(0x7E00); -#else - if (status->snan_bit_is_one) { - return const_float16(0x7DFF); - } else { -#if defined(TARGET_MIPS) - return const_float16(0x7E00); -#else - return const_float16(0xFE00); -#endif - } -#endif -} -/*---------------------------------------------------------------------------- -| The pattern for a default generated single-precision NaN. -*----------------------------------------------------------------------------*/ -static float32 float32_default_nan(float_status *status) +static inline flag floatx80_is_nan( floatx80 a ) { -#if defined(TARGET_SPARC) - return const_float32(0x7FFFFFFF); -#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \ - defined(TARGET_XTENSA) || defined(TARGET_S390X) || defined(TARGET_TRICORE) - return const_float32(0x7FC00000); -#else - if (status->snan_bit_is_one) { - return const_float32(0x7FBFFFFF); - } else { -#if defined(TARGET_MIPS) - return const_float32(0x7FC00000); -#else - return const_float32(0xFFC00000); -#endif - } -#endif -} -/*---------------------------------------------------------------------------- -| The pattern for a default generated double-precision NaN. -*----------------------------------------------------------------------------*/ -static float64 float64_default_nan(float_status *status) -{ -#if defined(TARGET_SPARC) - return const_float64(LIT64(0x7FFFFFFFFFFFFFFF)); -#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \ - defined(TARGET_S390X) - return const_float64(LIT64(0x7FF8000000000000)); -#else - if (status->snan_bit_is_one) { - return const_float64(LIT64(0x7FF7FFFFFFFFFFFF)); - } else { -#if defined(TARGET_MIPS) - return const_float64(LIT64(0x7FF8000000000000)); -#else - return const_float64(LIT64(0xFFF8000000000000)); -#endif - } -#endif -} + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (uint64_t) ( a.low<<1 ); -/*---------------------------------------------------------------------------- -| The pattern for a default generated extended double-precision NaN. The -| `high' and `low' values hold the most- and least-significant bits, -| respectively. -*----------------------------------------------------------------------------*/ -#define floatx80_default_nan_high 0xFFFF -#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) +} /*---------------------------------------------------------------------------- | The pattern for a default generated extended double-precision NaN. @@ -177,27 +108,6 @@ static floatx80 floatx80_default_nan(float_status *status) return r; } -/*---------------------------------------------------------------------------- -| The pattern for a default generated quadruple-precision NaN. -*----------------------------------------------------------------------------*/ -static float128 float128_default_nan(float_status *status) -{ - float128 r; - - if (status->snan_bit_is_one) { - r.low = LIT64(0xFFFFFFFFFFFFFFFF); - r.high = LIT64(0x7FFF7FFFFFFFFFFF); - } else { - r.low = LIT64(0x0000000000000000); -#if defined(TARGET_S390X) - r.high = LIT64(0x7FFF800000000000); -#else - r.high = LIT64(0xFFFF800000000000); -#endif - } - return r; -} - /*---------------------------------------------------------------------------- | Raises the exceptions specified by `flags'. Floating-point traps can be | defined here if desired. It is currently not possible for such a trap @@ -218,130 +128,16 @@ typedef struct { uint64_t high, low; } commonNaNT; -#ifdef NO_SIGNALING_NANS -static int float16_is_quiet_nan(float16 a_, float_status *status) -{ - return float16_is_any_nan(a_); -} - -static int float16_is_signaling_nan(float16 a_, float_status *status) -{ - return 0; -} -#else /*---------------------------------------------------------------------------- -| Returns 1 if the half-precision floating-point value `a' is a quiet -| NaN; otherwise returns 0. +| Returns 1 if the single-precision floating-point value `a' is a NaN; +| otherwise returns 0. *----------------------------------------------------------------------------*/ -static int float16_is_quiet_nan(float16 a_, float_status *status) +static inline flag float32_is_nan( float32 a ) { - uint16_t a = float16_val(a_); - if (status->snan_bit_is_one) { - return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); - } else { - return ((a & ~0x8000) >= 0x7C80); - } -} -/*---------------------------------------------------------------------------- -| Returns 1 if the half-precision floating-point value `a' is a signaling -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ + return ( 0xFF000000 < (uint32_t) ( a<<1 ) ); -static int float16_is_signaling_nan(float16 a_, float_status *status) -{ - uint16_t a = float16_val(a_); - if (status->snan_bit_is_one) { - return ((a & ~0x8000) >= 0x7C80); - } else { - return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); - } -} -#endif - -/*---------------------------------------------------------------------------- -| Returns a quiet NaN if the half-precision floating point value `a' is a -| signaling NaN; otherwise returns `a'. -*----------------------------------------------------------------------------*/ -static float16 float16_maybe_silence_nan(float16 a_, float_status *status) -{ - if (float16_is_signaling_nan(a_, status)) { - if (status->snan_bit_is_one) { - return float16_default_nan(status); - } else { - uint16_t a = float16_val(a_); - a |= (1 << 9); - return make_float16(a); - } - } - return a_; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the half-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float16ToCommonNaN(float16 a, float_status *status) -{ - commonNaNT z; - - if (float16_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - z.sign = float16_val(a) >> 15; - z.low = 0; - z.high = ((uint64_t) float16_val(a)) << 54; - return z; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the half- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float16 commonNaNToFloat16(commonNaNT a, float_status *status) -{ - uint16_t mantissa = a.high >> 54; - - if (status->default_nan_mode) { - return float16_default_nan(status); - } - - if (mantissa) { - return make_float16(((((uint16_t) a.sign) << 15) - | (0x1F << 10) | mantissa)); - } else { - return float16_default_nan(status); - } -} - -#ifdef NO_SIGNALING_NANS -static int float32_is_quiet_nan(float32 a_, float_status *status) -{ - return float32_is_any_nan(a_); -} - -static int float32_is_signaling_nan(float32 a_, float_status *status) -{ - return 0; -} -#else -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a quiet -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static int float32_is_quiet_nan(float32 a_, float_status *status) -{ - uint32_t a = float32_val(a_); - if (status->snan_bit_is_one) { - return (((a >> 22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF); - } else { - return ((uint32_t)(a << 1) >= 0xFF800000); - } } /*---------------------------------------------------------------------------- @@ -349,34 +145,11 @@ static int float32_is_quiet_nan(float32 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -static int float32_is_signaling_nan(float32 a_, float_status *status) +static inline flag float32_is_signaling_nan( float32 a ) { - uint32_t a = float32_val(a_); - if (status->snan_bit_is_one) { - return ((uint32_t)(a << 1) >= 0xFF800000); - } else { - return (((a >> 22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF); - } -} -#endif -/*---------------------------------------------------------------------------- -| Returns a quiet NaN if the single-precision floating point value `a' is a -| signaling NaN; otherwise returns `a'. -*----------------------------------------------------------------------------*/ + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); -static float32 float32_maybe_silence_nan(float32 a_, float_status *status) -{ - if (float32_is_signaling_nan(a_, status)) { - if (status->snan_bit_is_one) { - return float32_default_nan(status); - } else { - uint32_t a = float32_val(a_); - a |= (1 << 22); - return make_float32(a); - } - } - return a_; } /*---------------------------------------------------------------------------- @@ -385,17 +158,16 @@ static float32 float32_maybe_silence_nan(float32 a_, float_status *status) | exception is raised. *----------------------------------------------------------------------------*/ -static commonNaNT float32ToCommonNaN(float32 a, float_status *status) +static commonNaNT float32ToCommonNaN( float32 a, float_status *status ) { commonNaNT z; - if (float32_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - z.sign = float32_val(a) >> 31; + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status ); + z.sign = a>>31; z.low = 0; - z.high = ((uint64_t)float32_val(a)) << 41; + z.high = ( (uint64_t) a )<<41; return z; + } /*---------------------------------------------------------------------------- @@ -403,377 +175,49 @@ static commonNaNT float32ToCommonNaN(float32 a, float_status *status) | precision floating-point format. *----------------------------------------------------------------------------*/ -static float32 commonNaNToFloat32(commonNaNT a, float_status *status) +static float32 commonNaNToFloat32( commonNaNT a ) { - uint32_t mantissa = a.high >> 41; - if (status->default_nan_mode) { - return float32_default_nan(status); - } + return ( ( (uint32_t) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); - if (mantissa) { - return make_float32( - (((uint32_t)a.sign) << 31) | 0x7F800000 | (a.high >> 41)); - } else { - return float32_default_nan(status); - } } -/*---------------------------------------------------------------------------- -| Select which NaN to propagate for a two-input operation. -| IEEE754 doesn't specify all the details of this, so the -| algorithm is target-specific. -| The routine is passed various bits of information about the -| two NaNs and should return 0 to select NaN a and 1 for NaN b. -| Note that signalling NaNs are always squashed to quiet NaNs -| by the caller, by calling floatXX_maybe_silence_nan() before -| returning them. -| -| aIsLargerSignificand is only valid if both a and b are NaNs -| of some kind, and is true if a has the larger significand, -| or if both a and b have the same significand but a is -| positive but b is negative. It is only needed for the x87 -| tie-break rule. -*----------------------------------------------------------------------------*/ - -#if defined(TARGET_ARM) -static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag aIsLargerSignificand) -{ - /* ARM mandated NaN propagation rules: take the first of: - * 1. A if it is signaling - * 2. B if it is signaling - * 3. A (quiet) - * 4. B (quiet) - * A signaling NaN is always quietened before returning it. - */ - if (aIsSNaN) { - return 0; - } else if (bIsSNaN) { - return 1; - } else if (aIsQNaN) { - return 0; - } else { - return 1; - } -} -#elif defined(TARGET_MIPS) -static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag aIsLargerSignificand) -{ - /* According to MIPS specifications, if one of the two operands is - * a sNaN, a new qNaN has to be generated. This is done in - * floatXX_maybe_silence_nan(). For qNaN inputs the specifications - * says: "When possible, this QNaN result is one of the operand QNaN - * values." In practice it seems that most implementations choose - * the first operand if both operands are qNaN. In short this gives - * the following rules: - * 1. A if it is signaling - * 2. B if it is signaling - * 3. A (quiet) - * 4. B (quiet) - * A signaling NaN is always silenced before returning it. - */ - if (aIsSNaN) { - return 0; - } else if (bIsSNaN) { - return 1; - } else if (aIsQNaN) { - return 0; - } else { - return 1; - } -} -#elif defined(TARGET_PPC) || defined(TARGET_XTENSA) -static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag aIsLargerSignificand) -{ - /* PowerPC propagation rules: - * 1. A if it sNaN or qNaN - * 2. B if it sNaN or qNaN - * A signaling NaN is always silenced before returning it. - */ - if (aIsSNaN || aIsQNaN) { - return 0; - } else { - return 1; - } -} -#else -static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag aIsLargerSignificand) -{ - /* This implements x87 NaN propagation rules: - * SNaN + QNaN => return the QNaN - * two SNaNs => return the one with the larger significand, silenced - * two QNaNs => return the one with the larger significand - * SNaN and a non-NaN => return the SNaN, silenced - * QNaN and a non-NaN => return the QNaN - * - * If we get down to comparing significands and they are the same, - * return the NaN with the positive sign bit (if any). - */ - if (aIsSNaN) { - if (bIsSNaN) { - return aIsLargerSignificand ? 0 : 1; - } - return bIsQNaN ? 1 : 0; - } else if (aIsQNaN) { - if (bIsSNaN || !bIsQNaN) { - return 0; - } else { - return aIsLargerSignificand ? 0 : 1; - } - } else { - return 1; - } -} -#endif - -/*---------------------------------------------------------------------------- -| Select which NaN to propagate for a three-input operation. -| For the moment we assume that no CPU needs the 'larger significand' -| information. -| Return values : 0 : a; 1 : b; 2 : c; 3 : default-NaN -*----------------------------------------------------------------------------*/ -#if defined(TARGET_ARM) -static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag cIsQNaN, flag cIsSNaN, flag infzero, - float_status *status) -{ - /* For ARM, the (inf,zero,qnan) case sets InvalidOp and returns - * the default NaN - */ - if (infzero && cIsQNaN) { - float_raise(float_flag_invalid, status); - return 3; - } - - /* This looks different from the ARM ARM pseudocode, because the ARM ARM - * puts the operands to a fused mac operation (a*b)+c in the order c,a,b. - */ - if (cIsSNaN) { - return 2; - } else if (aIsSNaN) { - return 0; - } else if (bIsSNaN) { - return 1; - } else if (cIsQNaN) { - return 2; - } else if (aIsQNaN) { - return 0; - } else { - return 1; - } -} -#elif defined(TARGET_MIPS) -static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag cIsQNaN, flag cIsSNaN, flag infzero, - float_status *status) -{ - /* For MIPS, the (inf,zero,qnan) case sets InvalidOp and returns - * the default NaN - */ - if (infzero) { - float_raise(float_flag_invalid, status); - return 3; - } - - if (status->snan_bit_is_one) { - /* Prefer sNaN over qNaN, in the a, b, c order. */ - if (aIsSNaN) { - return 0; - } else if (bIsSNaN) { - return 1; - } else if (cIsSNaN) { - return 2; - } else if (aIsQNaN) { - return 0; - } else if (bIsQNaN) { - return 1; - } else { - return 2; - } - } else { - /* Prefer sNaN over qNaN, in the c, a, b order. */ - if (cIsSNaN) { - return 2; - } else if (aIsSNaN) { - return 0; - } else if (bIsSNaN) { - return 1; - } else if (cIsQNaN) { - return 2; - } else if (aIsQNaN) { - return 0; - } else { - return 1; - } - } -} -#elif defined(TARGET_PPC) -static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag cIsQNaN, flag cIsSNaN, flag infzero, - float_status *status) -{ - /* For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer - * to return an input NaN if we have one (ie c) rather than generating - * a default NaN - */ - if (infzero) { - float_raise(float_flag_invalid, status); - return 2; - } - - /* If fRA is a NaN return it; otherwise if fRB is a NaN return it; - * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB - */ - if (aIsSNaN || aIsQNaN) { - return 0; - } else if (cIsSNaN || cIsQNaN) { - return 2; - } else { - return 1; - } -} -#else -/* A default implementation: prefer a to b to c. - * This is unlikely to actually match any real implementation. - */ -static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, - flag cIsQNaN, flag cIsSNaN, flag infzero, - float_status *status) -{ - if (aIsSNaN || aIsQNaN) { - return 0; - } else if (bIsSNaN || bIsQNaN) { - return 1; - } else { - return 2; - } -} -#endif - /*---------------------------------------------------------------------------- | Takes two single-precision floating-point values `a' and `b', one of which | is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a | signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status) +static float32 propagateFloat32NaN( float32 a, float32 b, float_status *status ) { - flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; - flag aIsLargerSignificand; - uint32_t av, bv; - - aIsQuietNaN = float32_is_quiet_nan(a, status); - aIsSignalingNaN = float32_is_signaling_nan(a, status); - bIsQuietNaN = float32_is_quiet_nan(b, status); - bIsSignalingNaN = float32_is_signaling_nan(b, status); - av = float32_val(a); - bv = float32_val(b); - - if (aIsSignalingNaN | bIsSignalingNaN) { - float_raise(float_flag_invalid, status); - } + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - if (status->default_nan_mode) { - return float32_default_nan(status); + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; } - - if ((uint32_t)(av << 1) < (uint32_t)(bv << 1)) { - aIsLargerSignificand = 0; - } else if ((uint32_t)(bv << 1) < (uint32_t)(av << 1)) { - aIsLargerSignificand = 1; - } else { - aIsLargerSignificand = (av < bv) ? 1 : 0; + else { + return b; } - if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, - aIsLargerSignificand)) { - return float32_maybe_silence_nan(b, status); - } else { - return float32_maybe_silence_nan(a, status); - } } /*---------------------------------------------------------------------------- -| Takes three single-precision floating-point values `a', `b' and `c', one of -| which is a NaN, and returns the appropriate NaN result. If any of `a', -| `b' or `c' is a signaling NaN, the invalid exception is raised. -| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case -| obviously c is a NaN, and whether to propagate c or some other NaN is -| implementation defined). +| Returns 1 if the double-precision floating-point value `a' is a NaN; +| otherwise returns 0. *----------------------------------------------------------------------------*/ -static float32 propagateFloat32MulAddNaN(float32 a, float32 b, - float32 c, flag infzero, - float_status *status) +static inline flag float64_is_nan( float64 a ) { - flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, - cIsQuietNaN, cIsSignalingNaN; - int which; - - aIsQuietNaN = float32_is_quiet_nan(a, status); - aIsSignalingNaN = float32_is_signaling_nan(a, status); - bIsQuietNaN = float32_is_quiet_nan(b, status); - bIsSignalingNaN = float32_is_signaling_nan(b, status); - cIsQuietNaN = float32_is_quiet_nan(c, status); - cIsSignalingNaN = float32_is_signaling_nan(c, status); - - if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) { - float_raise(float_flag_invalid, status); - } - - which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN, - bIsQuietNaN, bIsSignalingNaN, - cIsQuietNaN, cIsSignalingNaN, infzero, status); - - if (status->default_nan_mode) { - /* Note that this check is after pickNaNMulAdd so that function - * has an opportunity to set the Invalid flag. - */ - return float32_default_nan(status); - } - switch (which) { - case 0: - return float32_maybe_silence_nan(a, status); - case 1: - return float32_maybe_silence_nan(b, status); - case 2: - return float32_maybe_silence_nan(c, status); - case 3: - default: - return float32_default_nan(status); - } -} + return ( LIT64( 0xFFE0000000000000 ) < (uint64_t) ( a<<1 ) ); -#ifdef NO_SIGNALING_NANS -static int float64_is_quiet_nan(float64 a_, float_status *status) -{ - return float64_is_any_nan(a_); -} - -int float64_is_signaling_nan(float64 a_, float_status *status) -{ - return 0; -} -#else -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a quiet -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static int float64_is_quiet_nan(float64 a_, float_status *status) -{ - uint64_t a = float64_val(a_); - if (status->snan_bit_is_one) { - return (((a >> 51) & 0xFFF) == 0xFFE) - && (a & 0x0007FFFFFFFFFFFFULL); - } else { - return ((a << 1) >= 0xFFF0000000000000ULL); - } } /*---------------------------------------------------------------------------- @@ -781,35 +225,13 @@ static int float64_is_quiet_nan(float64 a_, float_status *status) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -static int float64_is_signaling_nan(float64 a_, float_status *status) +static inline flag float64_is_signaling_nan( float64 a ) { - uint64_t a = float64_val(a_); - if (status->snan_bit_is_one) { - return ((a << 1) >= 0xFFF0000000000000ULL); - } else { - return (((a >> 51) & 0xFFF) == 0xFFE) - && (a & LIT64(0x0007FFFFFFFFFFFF)); - } -} -#endif -/*---------------------------------------------------------------------------- -| Returns a quiet NaN if the double-precision floating point value `a' is a -| signaling NaN; otherwise returns `a'. -*----------------------------------------------------------------------------*/ + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); -static float64 float64_maybe_silence_nan(float64 a_, float_status *status) -{ - if (float64_is_signaling_nan(a_, status)) { - if (status->snan_bit_is_one) { - return float64_default_nan(status); - } else { - uint64_t a = float64_val(a_); - a |= LIT64(0x0008000000000000); - return make_float64(a); - } - } - return a_; } /*---------------------------------------------------------------------------- @@ -822,7 +244,7 @@ static commonNaNT float64ToCommonNaN(float64 a, float_status *status) { commonNaNT z; - if (float64_is_signaling_nan(a, status)) { + if (float64_is_signaling_nan(a)) { float_raise(float_flag_invalid, status); } z.sign = float64_val(a) >> 63; @@ -838,20 +260,10 @@ static commonNaNT float64ToCommonNaN(float64 a, float_status *status) static float64 commonNaNToFloat64(commonNaNT a, float_status *status) { - uint64_t mantissa = a.high >> 12; - - if (status->default_nan_mode) { - return float64_default_nan(status); - } - - if (mantissa) { - return make_float64( - (((uint64_t) a.sign) << 63) - | LIT64(0x7FF0000000000000) - | (a.high >> 12)); - } else { - return float64_default_nan(status); - } + return + ( ( (uint64_t) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ); } /*---------------------------------------------------------------------------- @@ -860,192 +272,41 @@ static float64 commonNaNToFloat64(commonNaNT a, float_status *status) | signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status) -{ - flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; - flag aIsLargerSignificand; - uint64_t av, bv; - - aIsQuietNaN = float64_is_quiet_nan(a, status); - aIsSignalingNaN = float64_is_signaling_nan(a, status); - bIsQuietNaN = float64_is_quiet_nan(b, status); - bIsSignalingNaN = float64_is_signaling_nan(b, status); - av = float64_val(a); - bv = float64_val(b); - - if (aIsSignalingNaN | bIsSignalingNaN) { - float_raise(float_flag_invalid, status); - } - - if (status->default_nan_mode) { - return float64_default_nan(status); - } - - if ((uint64_t)(av << 1) < (uint64_t)(bv << 1)) { - aIsLargerSignificand = 0; - } else if ((uint64_t)(bv << 1) < (uint64_t)(av << 1)) { - aIsLargerSignificand = 1; - } else { - aIsLargerSignificand = (av < bv) ? 1 : 0; - } - - if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, - aIsLargerSignificand)) { - return float64_maybe_silence_nan(b, status); - } else { - return float64_maybe_silence_nan(a, status); - } -} - -/*---------------------------------------------------------------------------- -| Takes three double-precision floating-point values `a', `b' and `c', one of -| which is a NaN, and returns the appropriate NaN result. If any of `a', -| `b' or `c' is a signaling NaN, the invalid exception is raised. -| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case -| obviously c is a NaN, and whether to propagate c or some other NaN is -| implementation defined). -*----------------------------------------------------------------------------*/ - -static float64 propagateFloat64MulAddNaN(float64 a, float64 b, - float64 c, flag infzero, - float_status *status) +static float64 propagateFloat64NaN( float64 a, float64 b, float_status *status ) { - flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, - cIsQuietNaN, cIsSignalingNaN; - int which; - - aIsQuietNaN = float64_is_quiet_nan(a, status); - aIsSignalingNaN = float64_is_signaling_nan(a, status); - bIsQuietNaN = float64_is_quiet_nan(b, status); - bIsSignalingNaN = float64_is_signaling_nan(b, status); - cIsQuietNaN = float64_is_quiet_nan(c, status); - cIsSignalingNaN = float64_is_signaling_nan(c, status); - - if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) { - float_raise(float_flag_invalid, status); - } - - which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN, - bIsQuietNaN, bIsSignalingNaN, - cIsQuietNaN, cIsSignalingNaN, infzero, status); + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - if (status->default_nan_mode) { - /* Note that this check is after pickNaNMulAdd so that function - * has an opportunity to set the Invalid flag. - */ - return float64_default_nan(status); + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= LIT64( 0x0008000000000000 ); + b |= LIT64( 0x0008000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; } - - switch (which) { - case 0: - return float64_maybe_silence_nan(a, status); - case 1: - return float64_maybe_silence_nan(b, status); - case 2: - return float64_maybe_silence_nan(c, status); - case 3: - default: - return float64_default_nan(status); + else { + return b; } -} - -#ifdef NO_SIGNALING_NANS -static int floatx80_is_quiet_nan(floatx80 a_, float_status *status) -{ - return floatx80_is_any_nan(a_); -} -static int floatx80_is_signaling_nan(floatx80 a_, float_status *status) -{ - return 0; } -#else -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| quiet NaN; otherwise returns 0. This slightly differs from the same -| function for other types as floatx80 has an explicit bit. -*----------------------------------------------------------------------------*/ - -static int floatx80_is_quiet_nan(floatx80 a, float_status *status) -{ - if (status->snan_bit_is_one) { - uint64_t aLow; - - aLow = a.low & ~0x4000000000000000ULL; - return ((a.high & 0x7FFF) == 0x7FFF) - && (aLow << 1) - && (a.low == aLow); - } else { - return ((a.high & 0x7FFF) == 0x7FFF) - && (LIT64(0x8000000000000000) <= ((uint64_t)(a.low << 1))); - } -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| signaling NaN; otherwise returns 0. This slightly differs from the same -| function for other types as floatx80 has an explicit bit. -*----------------------------------------------------------------------------*/ - -static int floatx80_is_signaling_nan(floatx80 a, float_status *status) -{ - if (status->snan_bit_is_one) { - return ((a.high & 0x7FFF) == 0x7FFF) - && ((a.low << 1) >= 0x8000000000000000ULL); - } else { - uint64_t aLow; - - aLow = a.low & ~LIT64(0x4000000000000000); - return ((a.high & 0x7FFF) == 0x7FFF) - && (uint64_t)(aLow << 1) - && (a.low == aLow); - } -} -#endif - -/*---------------------------------------------------------------------------- -| Returns a quiet NaN if the extended double-precision floating point value -| `a' is a signaling NaN; otherwise returns `a'. -*----------------------------------------------------------------------------*/ - -static floatx80 floatx80_maybe_silence_nan(floatx80 a, float_status *status) -{ - if (floatx80_is_signaling_nan(a, status)) { - if (status->snan_bit_is_one) { - a = floatx80_default_nan(status); - } else { - a.low |= LIT64(0xC000000000000000); - return a; - } - } - return a; -} - /*---------------------------------------------------------------------------- | Returns the result of converting the extended double-precision floating- | point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the | invalid exception is raised. *----------------------------------------------------------------------------*/ -static commonNaNT floatx80ToCommonNaN(floatx80 a, float_status *status) +static commonNaNT floatx80ToCommonNaN( floatx80 a, float_status *status ) { - floatx80 dflt; commonNaNT z; - if (floatx80_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - if (a.low >> 63) { - z.sign = a.high >> 15; - z.low = 0; - z.high = a.low << 1; - } else { - dflt = floatx80_default_nan(status); - z.sign = dflt.high >> 15; - z.low = 0; - z.high = dflt.low << 1; - } + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; return z; + } /*---------------------------------------------------------------------------- @@ -1056,17 +317,12 @@ static commonNaNT floatx80ToCommonNaN(floatx80 a, float_status *status) static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) { floatx80 z; - - if (status->default_nan_mode) { - return floatx80_default_nan(status); - } - - if (a.high >> 1) { - z.low = LIT64(0x8000000000000000) | a.high >> 1; - z.high = (((uint16_t)a.sign) << 15) | 0x7FFF; - } else { - z = floatx80_default_nan(status); - } +#ifdef SOFTFLOAT_68K + z.low = LIT64( 0x4000000000000000 ) | ( a.high>>1 ); +#else + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); +#endif + z.high = ( ( (int16_t) a.sign )<<15 ) | 0x7FFF; return z; } @@ -1076,178 +332,65 @@ static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) | `b' is a signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, - float_status *status) +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b, float_status *status ) { - flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; - flag aIsLargerSignificand; - - aIsQuietNaN = floatx80_is_quiet_nan(a, status); - aIsSignalingNaN = floatx80_is_signaling_nan(a, status); - bIsQuietNaN = floatx80_is_quiet_nan(b, status); - bIsSignalingNaN = floatx80_is_signaling_nan(b, status); - - if (aIsSignalingNaN | bIsSignalingNaN) { - float_raise(float_flag_invalid, status); - } - - if (status->default_nan_mode) { - return floatx80_default_nan(status); - } + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - if (a.low < b.low) { - aIsLargerSignificand = 0; - } else if (b.low < a.low) { - aIsLargerSignificand = 1; - } else { - aIsLargerSignificand = (a.high < b.high) ? 1 : 0; - } - - if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, - aIsLargerSignificand)) { - return floatx80_maybe_silence_nan(b, status); - } else { - return floatx80_maybe_silence_nan(a, status); - } -} - -#ifdef NO_SIGNALING_NANS -int float128_is_quiet_nan(float128 a_, float_status *status) -{ - return float128_is_any_nan(a_); -} - -int float128_is_signaling_nan(float128 a_, float_status *status) -{ - return 0; -} + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); +#ifdef SOFTFLOAT_68K + a.low |= LIT64( 0x4000000000000000 ); + b.low |= LIT64( 0x4000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); + return aIsNaN ? a : b; #else -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is a quiet -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static int float128_is_quiet_nan(float128 a, float_status *status) -{ - if (status->snan_bit_is_one) { - return (((a.high >> 47) & 0xFFFF) == 0xFFFE) - && (a.low || (a.high & 0x00007FFFFFFFFFFFULL)); - } else { - return ((a.high << 1) >= 0xFFFF000000000000ULL) - && (a.low || (a.high & 0x0000FFFFFFFFFFFFULL)); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; } -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is a -| signaling NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -static int float128_is_signaling_nan(float128 a, float_status *status) -{ - if (status->snan_bit_is_one) { - return ((a.high << 1) >= 0xFFFF000000000000ULL) - && (a.low || (a.high & 0x0000FFFFFFFFFFFFULL)); - } else { - return (((a.high >> 47) & 0xFFFF) == 0xFFFE) - && (a.low || (a.high & LIT64(0x00007FFFFFFFFFFF))); + else { + return b; } -} #endif -/*---------------------------------------------------------------------------- -| Returns a quiet NaN if the quadruple-precision floating point value `a' is -| a signaling NaN; otherwise returns `a'. -*----------------------------------------------------------------------------*/ - -static float128 float128_maybe_silence_nan(float128 a, float_status *status) -{ - if (float128_is_signaling_nan(a, status)) { - if (status->snan_bit_is_one) { - a = float128_default_nan(status); - } else { - a.high |= LIT64(0x0000800000000000); - return a; - } - } - return a; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float128ToCommonNaN(float128 a, float_status *status) -{ - commonNaNT z; - - if (float128_is_signaling_nan(a, status)) { - float_raise(float_flag_invalid, status); - } - z.sign = a.high >> 63; - shortShift128Left(a.high, a.low, 16, &z.high, &z.low); - return z; } +#ifdef SOFTFLOAT_68K /*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the quadruple- -| precision floating-point format. -*----------------------------------------------------------------------------*/ + | Takes extended double-precision floating-point NaN `a' and returns the + | appropriate NaN result. If `a' is a signaling NaN, the invalid exception + | is raised. + *----------------------------------------------------------------------------*/ -static float128 commonNaNToFloat128(commonNaNT a, float_status *status) +static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) { - float128 z; - - if (status->default_nan_mode) { - return float128_default_nan(status); - } - - shift128Right(a.high, a.low, 16, &z.high, &z.low); - z.high |= (((uint64_t)a.sign) << 63) | LIT64(0x7FFF000000000000); - return z; + if ( floatx80_is_signaling_nan( a ) ) + float_raise( float_flag_signaling, status ); + a.low |= LIT64( 0x4000000000000000 ); + + return a; } +#endif /*---------------------------------------------------------------------------- -| Takes two quadruple-precision floating-point values `a' and `b', one of -| which is a NaN, and returns the appropriate NaN result. If either `a' or -| `b' is a signaling NaN, the invalid exception is raised. +| Returns 1 if the extended double-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -static float128 propagateFloat128NaN(float128 a, float128 b, - float_status *status) +static flag floatx80_is_signaling_nan( floatx80 a ) { - flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; - flag aIsLargerSignificand; - - aIsQuietNaN = float128_is_quiet_nan(a, status); - aIsSignalingNaN = float128_is_signaling_nan(a, status); - bIsQuietNaN = float128_is_quiet_nan(b, status); - bIsSignalingNaN = float128_is_signaling_nan(b, status); - - if (aIsSignalingNaN | bIsSignalingNaN) { - float_raise(float_flag_invalid, status); - } + uint64_t aLow; - if (status->default_nan_mode) { - return float128_default_nan(status); - } + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (uint64_t) ( aLow<<1 ) + && ( a.low == aLow ); - if (lt128(a.high << 1, a.low, b.high << 1, b.low)) { - aIsLargerSignificand = 0; - } else if (lt128(b.high << 1, b.low, a.high << 1, a.low)) { - aIsLargerSignificand = 1; - } else { - aIsLargerSignificand = (a.high < b.high) ? 1 : 0; - } - - if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, - aIsLargerSignificand)) { - return float128_maybe_silence_nan(b, status); - } else { - return float128_maybe_silence_nan(a, status); - } } // 28-12-2016: Added for Previous: diff --git a/softfloat/softfloat.cpp b/softfloat/softfloat.cpp index 4883d1a8..d61934f9 100644 --- a/softfloat/softfloat.cpp +++ b/softfloat/softfloat.cpp @@ -106,33 +106,6 @@ this code that are retained. *----------------------------------------------------------------------------*/ #include "softfloat-specialize.h" -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the half-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline uint32_t extractFloat16Frac(float16 a) -{ - return float16_val(a) & 0x3ff; -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the half-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline int extractFloat16Exp(float16 a) -{ - return (float16_val(a) >> 10) & 0x1f; -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -static inline flag extractFloat16Sign(float16 a) -{ - return float16_val(a)>>15; -} - /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 | and 7, and returns the properly rounded 32-bit integer corresponding to the @@ -326,61 +299,6 @@ static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, } -/*---------------------------------------------------------------------------- -| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and -| `absZ1', with binary point between bits 63 and 64 (between the input words), -| and returns the properly rounded 64-bit unsigned integer corresponding to the -| input. Ordinarily, the fixed-point input is simply rounded to an integer, -| with the inexact exception raised if the input cannot be represented exactly -| as an integer. However, if the fixed-point input is too large, the invalid -| exception is raised and the largest unsigned integer is returned. -*----------------------------------------------------------------------------*/ - -static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0, - uint64_t absZ1, float_status *status) -{ - int8_t roundingMode; - flag roundNearestEven, increment; - - roundingMode = status->float_rounding_mode; - roundNearestEven = (roundingMode == float_round_nearest_even); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)absZ1 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && absZ1; - break; - case float_round_down: - increment = zSign && absZ1; - break; - default: - abort(); - } - if (increment) { - ++absZ0; - if (absZ0 == 0) { - float_raise(float_flag_invalid, status); - return LIT64(0xFFFFFFFFFFFFFFFF); - } - absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); - } - - if (zSign && absZ0) { - float_raise(float_flag_invalid, status); - return 0; - } - - if (absZ1) { - status->float_exception_flags |= float_flag_inexact; - } - return absZ0; -} - /*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -414,21 +332,6 @@ static inline flag extractFloat32Sign( float32 a ) } -/*---------------------------------------------------------------------------- -| If `a' is denormal and we are in flush-to-zero mode then set the -| input-denormal exception and return zero. Otherwise just return the value. -*----------------------------------------------------------------------------*/ -float32 float32_squash_input_denormal(float32 a, float_status *status) -{ - if (status->flush_inputs_to_zero) { - if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) { - //float_raise(float_flag_input_denormal, status); - return make_float32(float32_val(a) & 0x80000000); - } - } - return a; -} - /*---------------------------------------------------------------------------- | Normalizes the subnormal single-precision floating-point value represented | by the denormalized significand `aSig'. The normalized exponent and @@ -771,7 +674,7 @@ static float64 | value `a'. *----------------------------------------------------------------------------*/ -static inline uint64_t extractFloatx80Frac( floatx80 a ) +uint64_t extractFloatx80Frac( floatx80 a ) { return a.low; @@ -783,7 +686,7 @@ static inline uint64_t extractFloatx80Frac( floatx80 a ) | value `a'. *----------------------------------------------------------------------------*/ -static inline int32_t extractFloatx80Exp( floatx80 a ) +int32_t extractFloatx80Exp( floatx80 a ) { return a.high & 0x7FFF; @@ -795,7 +698,7 @@ static inline int32_t extractFloatx80Exp( floatx80 a ) | `a'. *----------------------------------------------------------------------------*/ -static inline flag extractFloatx80Sign( floatx80 a ) +flag extractFloatx80Sign( floatx80 a ) { return a.high>>15; @@ -809,8 +712,7 @@ static inline flag extractFloatx80Sign( floatx80 a ) | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ -static void - normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ) +void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ) { int8_t shiftCount; @@ -828,7 +730,7 @@ static void | extended double-precision floating-point value, returning the result. *----------------------------------------------------------------------------*/ -static inline floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig ) +floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig ) { floatx80 z; @@ -862,7 +764,7 @@ static inline floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) { @@ -1008,7 +910,7 @@ static floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, ) { return packFloatx80( zSign, 0x7FFE, ~ roundMask ); } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } #ifdef SOFTFLOAT_68K if ( zExp < 0 ) { @@ -1124,7 +1026,7 @@ floatx80 roundAndPackFloatx80Sgl( flag zSign, int32_t zExp, uint64_t zSig0, uint ) { return packFloatx80( zSign, 0x7FFE, LIT64( 0xFFFFFFFFFFFFFFFF ) ); } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } if ( zExp < 0 ) { @@ -1198,6661 +1100,1501 @@ static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, } /*---------------------------------------------------------------------------- -| Returns the least-significant 64 fraction bits of the quadruple-precision -| floating-point value `a'. +| Returns the result of converting the 32-bit two's complement integer `a' +| to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. *----------------------------------------------------------------------------*/ -static inline uint64_t extractFloat128Frac1( float128 a ) +floatx80 int32_to_floatx80(int32_t a, float_status *status) { + flag zSign; + uint32_t absA; + int8_t shiftCount; + uint64_t zSig; - return a.low; + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 32; + zSig = absA; + return packFloatx80( zSign, 0x403E - shiftCount, zSig<>48 ) & 0x7FFF; - + flag aSign; + int16_t aExp; + uint32_t aSig; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + if (aExp == 0xFF) { + aSig |= 0x00800000; + return packFloatx80( aSign, 0x7FFF, ( (uint64_t) aSig )<<40 ); + } + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + return packFloatx80(aSign, 0x3F81, ((uint64_t) aSig) << 40); + } + aSig |= 0x00800000; + return packFloatx80(aSign, aExp + 0x3F80, ((uint64_t)aSig) << 40); + } +#endif // end of addition for Previous /*---------------------------------------------------------------------------- -| Returns the sign bit of the quadruple-precision floating-point value `a'. +| Returns the result of converting the double-precision floating-point value +| `a' to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. *----------------------------------------------------------------------------*/ -static inline flag extractFloat128Sign( float128 a ) +floatx80 float64_to_floatx80(float64 a, float_status *status) { + flag aSign; + int aExp; + uint64_t aSig; - return a.high>>63; + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a, status ), status ); + return packFloatx80( aSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); } -/*---------------------------------------------------------------------------- -| Normalizes the subnormal quadruple-precision floating-point value -| represented by the denormalized significand formed by the concatenation of -| `aSig0' and `aSig1'. The normalized exponent is stored at the location -| pointed to by `zExpPtr'. The most significant 49 bits of the normalized -| significand are stored at the location pointed to by `zSig0Ptr', and the -| least significant 64 bits of the normalized significand are stored at the -| location pointed to by `zSig1Ptr'. -*----------------------------------------------------------------------------*/ - -static void - normalizeFloat128Subnormal( - uint64_t aSig0, - uint64_t aSig1, - int32_t *zExpPtr, - uint64_t *zSig0Ptr, - uint64_t *zSig1Ptr - ) +#ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous +floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ) { - int8_t shiftCount; - - if ( aSig0 == 0 ) { - shiftCount = countLeadingZeros64( aSig1 ) - 15; - if ( shiftCount < 0 ) { - *zSig0Ptr = aSig1>>( - shiftCount ); - *zSig1Ptr = aSig1<<( shiftCount & 63 ); - } - else { - *zSig0Ptr = aSig1<>32); + } + float_raise( float_flag_invalid, status ); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } +#else + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; +#endif + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32(aSign, aSig, status); } -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and extended significand formed by the concatenation of `zSig0', `zSig1', -| and `zSig2', and returns the proper quadruple-precision floating-point value -| corresponding to the abstract input. Ordinarily, the abstract value is -| simply rounded and packed into the quadruple-precision format, with the -| inexact exception raised if the abstract input cannot be represented -| exactly. However, if the abstract value is too large, the overflow and -| inexact exceptions are raised and an infinity or maximal finite value is -| returned. If the abstract value is too small, the input value is rounded to -| a subnormal number, and the underflow and inexact exceptions are raised if -| the abstract input cannot be represented exactly as a subnormal quadruple- -| precision floating-point number. -| The input significand must be normalized or smaller. If the input -| significand is not normalized, `zExp' must be 0; in that case, the result -| returned is a subnormal number, and it must not require rounding. In the -| usual case that the input significand is normalized, `zExp' must be 1 less -| than the ``true'' floating-point exponent. The handling of underflow and -| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float128 roundAndPackFloat128(flag zSign, int32_t zExp, - uint64_t zSig0, uint64_t zSig1, - uint64_t zSig2, float_status *status) +#ifdef SOFTFLOAT_68K // 30-01-2017: Addition for Previous +int16_t floatx80_to_int16( floatx80 a, float_status *status) { - int8_t roundingMode; - flag roundNearestEven, increment, isTiny; - - roundingMode = status->float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig2 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig2; - break; - case float_round_down: - increment = zSign && zSig2; - break; - default: - abort(); - } - if ( 0x7FFD <= (uint32_t) zExp ) { - if ( ( 0x7FFD < zExp ) - || ( ( zExp == 0x7FFD ) - && eq128( - LIT64( 0x0001FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ), - zSig0, - zSig1 - ) - && increment - ) - ) { - float_raise(float_flag_overflow | float_flag_inexact, status); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return - packFloat128( - zSign, - 0x7FFE, - LIT64( 0x0000FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ) - ); - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( zExp < 0 ) { - if (status->flush_to_zero) { - //float_raise(float_flag_output_denormal, status); - return packFloat128(zSign, 0, 0, 0); - } - isTiny = - (status->float_detect_tininess - == float_tininess_before_rounding) - || ( zExp < -1 ) - || ! increment - || lt128( - zSig0, - zSig1, - LIT64( 0x0001FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ) - ); - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); - zExp = 0; - if (isTiny && zSig2) { - float_raise(float_flag_underflow, status); - } - switch (roundingMode) { - case float_round_nearest_even: - case float_round_ties_away: - increment = ((int64_t)zSig2 < 0); - break; - case float_round_to_zero: - increment = 0; - break; - case float_round_up: - increment = !zSign && zSig2; - break; - case float_round_down: - increment = zSign && zSig2; - break; - default: - abort(); - } + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + float_raise( float_flag_invalid, status ); + if ( (uint64_t) ( aSig<<1 ) ) { + a = propagateFloatx80NaNOneArg( a, status ); + if ( a.low == aSig ) float_raise( float_flag_invalid, status ); + return (int16_t)(a.low>>48); } + return aSign ? (int16_t) 0x8000 : 0x7FFF; } - if (zSig2) { - status->float_exception_flags |= float_flag_inexact; - } - if ( increment ) { - add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); - zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); - } - else { - if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; - } - return packFloat128( zSign, zExp, zSig0, zSig1 ); - + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt16( aSign, aSig, status ); + } - -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand formed by the concatenation of `zSig0' and `zSig1', and -| returns the proper quadruple-precision floating-point value corresponding -| to the abstract input. This routine is just like `roundAndPackFloat128' -| except that the input significand has fewer bits and does not have to be -| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- -| point exponent. -*----------------------------------------------------------------------------*/ - -static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, - uint64_t zSig0, uint64_t zSig1, - float_status *status) +int8_t floatx80_to_int8( floatx80 a, float_status *status) { - int8_t shiftCount; - uint64_t zSig2; - - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = countLeadingZeros64( zSig0 ) - 15; - if ( 0 <= shiftCount ) { - zSig2 = 0; - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - } - else { - shift128ExtraRightJamming( - zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) { + a = propagateFloatx80NaNOneArg( a, status ); + if ( a.low == aSig ) float_raise( float_flag_invalid, status ); + return (int8_t)(a.low>>56); + } + float_raise( float_flag_invalid, status ); + return aSign ? (int8_t) 0x80 : 0x7F; } - zExp -= shiftCount; - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); - + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt8( aSign, aSig, status ); + } +#endif // End of addition for Previous -/*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the single-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 int32_to_float32(int32_t a, float_status *status) -{ - flag zSign; - - if ( a == 0 ) return float32_zero; - if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); - zSign = ( a < 0 ); - return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status); -} /*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the double-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. *----------------------------------------------------------------------------*/ -float64 int32_to_float64(int32_t a, float_status *status) +int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) { - flag zSign; - uint32_t absA; - int8_t shiftCount; - uint64_t zSig; + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig, savedASig; + int32_t z; - if ( a == 0 ) return float64_zero; - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros32( absA ) + 21; - zSig = absA; - return packFloat64( zSign, 0x432 - shiftCount, zSig<float_exception_flags |= float_flag_inexact; + } + return 0; + } + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return z; } /*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, +| the largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -floatx80 int32_to_floatx80(int32_t a, float_status *status) +int64_t floatx80_to_int64(floatx80 a, float_status *status) { - flag zSign; - uint32_t absA; - int8_t shiftCount; - uint64_t zSig; + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig, aSigExtra; - if ( a == 0 ) return packFloatx80( 0, 0, 0 ); - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros32( absA ) + 32; - zSig = absA; - return packFloatx80( zSign, 0x403E - shiftCount, zSig<floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + } + return a; + } +#endif -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit two's complement integer `a' -| to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 int64_to_floatx80(int64_t a, float_status *status) +#ifdef SOFTFLOAT_68K // 30-01-2016: Added for Previous +floatx80 floatx80_round32( floatx80 a, float_status *status ) { - flag zSign; - uint64_t absA; - int8_t shiftCount; - - if ( a == 0 ) return packFloatx80( 0, 0, 0 ); - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros64( absA ); - return packFloatx80( zSign, 0x403E - shiftCount, absA<= 0) { - return packFloat32(0, 0x95 - shiftcount, a << shiftcount); + shift64RightJamming( aSig, 33, &aSig ); + aExp -= 0x3F81; + z = roundAndPackFloat32( zSign, aExp, aSig, status ); + + zSig = extractFloat32Frac( z ); + zExp = extractFloat32Exp( z ); + if ( zExp == 0xFF ) { + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } - /* Otherwise we need to do a round-and-pack. roundAndPackFloat32() - * expects the binary point between bits 30 and 29, hence the + 7. - */ - shiftcount += 7; - if (shiftcount < 0) { - shift64RightJamming(a, -shiftcount, &a); - } else { - a <<= shiftcount; + if ( zExp == 0 ) { + if ( zSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloat32Subnormal( zSig, &zExp, &zSig ); } - - return roundAndPackFloat32(0, 0x9c - shiftcount, a, status); + zSig |= 0x00800000; + return packFloatx80( zSign, zExp + 0x3F80, ( (uint64_t) zSig )<<40 ); } - -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit unsigned integer `a' -| to the double-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 uint64_to_float64(uint64_t a, float_status *status) + +floatx80 floatx80_round_to_float64( floatx80 a, float_status *status ) { - int exp = 0x43C; - int shiftcount; + int32_t aExp; + uint64_t aSig, zSig; + flag zSign; + int32_t zExp; + float64 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + zSign = extractFloatx80Sign( a ); - if (a == 0) { - return float64_zero; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); + return a; } + if ( aExp == 0 && aSig == 0 ) return a; - shiftcount = countLeadingZeros64(a) - 1; - if (shiftcount < 0) { - shift64RightJamming(a, -shiftcount, &a); - } else { - a <<= shiftcount; + shift64RightJamming( aSig, 1, &aSig ); + aExp -= 0x3C01; + z = roundAndPackFloat64( zSign, aExp, aSig, status ); + + zSig = extractFloat64Frac( z ); + zExp = extractFloat64Exp( z ); + if ( zExp == 0x7FF ) { + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } - return roundAndPackFloat64(0, exp - shiftcount, a, status); -} - -#if 0 -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit unsigned integer `a' -| to the quadruple-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 uint64_to_float128(uint64_t a, float_status *status) -{ - if (a == 0) { - return float128_zero; + if ( zExp == 0 ) { + if ( zSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloat64Subnormal( zSig, &zExp, &zSig ); } - return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status); -} -#endif + zSig |= LIT64( 0x0010000000000000 ); + return packFloatx80( zSign, zExp + 0x3C00, zSig<<11 ); -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ +} + -int32_t float32_to_int32(float32 a, float_status *status) +floatx80 floatx80_normalize( floatx80 a ) { flag aSign; - int aExp; - int shiftCount; - uint32_t aSig; - uint64_t aSig64; - - a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( ( aExp == 0xFF ) && aSig ) aSign = 0; - if ( aExp ) aSig |= 0x00800000; - shiftCount = 0xAF - aExp; - aSig64 = aSig; - aSig64 <<= 32; - if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); - return roundAndPackInt32(aSign, aSig64, status); - + int16_t aExp; + uint64_t aSig; + int8_t shiftCount; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF || aExp == 0 ) return a; + if ( aSig == 0 ) return packFloatx80(aSign, 0, 0); + + shiftCount = countLeadingZeros64( aSig ); + + if ( shiftCount > aExp ) shiftCount = aExp; + + aExp -= shiftCount; + aSig <<= shiftCount; + + return packFloatx80( aSign, aExp, aSig ); } +#endif // end of addition for Previous /*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. +| Rounds the extended double-precision floating-point value `a' to an integer, +| and returns the result as an extended quadruple-precision floating-point +| value. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -int32_t float32_to_int32_round_to_zero(float32 a, float_status *status) +floatx80 floatx80_round_to_int(floatx80 a, float_status *status) { flag aSign; - int aExp; - int shiftCount; - uint32_t aSig; - int32_t z; - a = float32_squash_input_denormal(a, status); + int32_t aExp; + uint64_t lastBitMask, roundBitsMask; + floatx80 z; - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = aExp - 0x9E; - if ( 0 <= shiftCount ) { - if ( float32_val(a) != 0xCF000000 ) { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaNOneArg(a, status); } - return (int32_t) 0x80000000; + return a; } - else if ( aExp <= 0x7E ) { - if (aExp | aSig) { - status->float_exception_flags |= float_flag_inexact; + if ( aExp < 0x3FFF ) { + if ( ( aExp == 0 ) + #ifdef SOFTFLOAT_68K + && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { +#else + && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { +#endif + return a; } - return 0; + status->float_exception_flags |= float_flag_inexact; + aSign = extractFloatx80Sign( a ); + switch (status->float_rounding_mode) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); + } + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + switch (status->float_rounding_mode) { + case float_round_nearest_even: + z.low += lastBitMask>>1; + if ((z.low & roundBitsMask) == 0) { + z.low &= ~lastBitMask; + } + break; + case float_round_ties_away: + z.low += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + case float_round_down: + if (extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + default: + abort(); + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); } - aSig = ( aSig | 0x00800000 )<<8; - z = aSig>>( - shiftCount ); - if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) { + if (z.low != a.low) { status->float_exception_flags |= float_flag_inexact; } - if ( aSign ) z = - z; return z; } -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 16-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int16_t float32_to_int16_round_to_zero(float32 a, float_status *status) -{ +#ifdef SOFTFLOAT_68K // 09-01-2017: Added for Previous +floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status) +{ flag aSign; - int aExp; - int shiftCount; - uint32_t aSig; - int32_t z; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = aExp - 0x8E; - if ( 0 <= shiftCount ) { - if ( float32_val(a) != 0xC7000000 ) { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { - return 0x7FFF; - } + int32_t aExp; + uint64_t lastBitMask, roundBitsMask; + floatx80 z; + + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaNOneArg( a, status ); } - return (int32_t) 0xffff8000; + return a; } - else if ( aExp <= 0x7E ) { - if ( aExp | aSig ) { - status->float_exception_flags |= float_flag_inexact; + if ( aExp < 0x3FFF ) { + if ( ( aExp == 0 ) +#ifdef SOFTFLOAT_68K + && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { +#else + && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { +#endif + return a; } - return 0; - } - shiftCount -= 0x10; - aSig = ( aSig | 0x00800000 )<<8; - z = aSig>>( - shiftCount ); - if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) { status->float_exception_flags |= float_flag_inexact; + aSign = extractFloatx80Sign( a ); + return packFloatx80( aSign, 0, 0 ); } - if ( aSign ) { - z = - z; + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); } + if ( z.low != a.low ) status->float_exception_flags |= float_flag_inexact; return z; - + } +#endif // End of addition for Previous /*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 64-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. +| Returns the result of adding the absolute values of the extended double- +| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is +| negated before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -int64_t float32_to_int64(float32 a, float_status *status) +static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, + float_status *status) { - flag aSign; - int aExp; - int shiftCount; - uint32_t aSig; - uint64_t aSig64, aSigExtra; - a = float32_squash_input_denormal(a, status); + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + int32_t expDiff; - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = 0xBE - aExp; - if ( shiftCount < 0 ) { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); +#ifdef SOFTFLOAT_68K + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } +#endif + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return a; } - return (int64_t) LIT64( 0x8000000000000000 ); +#ifndef SOFTFLOAT_68K + if ( bExp == 0 ) --expDiff; +#endif + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; } - if ( aExp ) aSig |= 0x00800000; - aSig64 = aSig; - aSig64 <<= 40; - shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); - return roundAndPackInt64(aSign, aSig64, aSigExtra, status); - + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } +#ifndef SOFTFLOAT_68K + if ( aExp == 0 ) ++expDiff; +#endif + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN(a, b, status); + } + return a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + #ifndef SOFTFLOAT_68K + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + goto roundAndPack; + } +#endif + zExp = aExp; +#ifdef SOFTFLOAT_68K + if ( aSig == 0 || bSig == 0 ) goto roundAndPack; +#endif + goto shiftRight1; + } + zSig0 = aSig + bSig; + if ( (int64_t) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } /*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 64-bit unsigned integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| unsigned integer is returned. Otherwise, if the conversion overflows, the -| largest unsigned integer is returned. If the 'a' is negative, the result -| is rounded and zero is returned; values that do not round to zero will -| raise the inexact exception flag. +| Returns the result of subtracting the absolute values of the extended +| double-precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -uint64_t float32_to_uint64(float32 a, float_status *status) +static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, + float_status *status) { - flag aSign; - int aExp; - int shiftCount; - uint32_t aSig; - uint64_t aSig64, aSigExtra; - a = float32_squash_input_denormal(a, status); + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + int32_t expDiff; - aSig = extractFloat32Frac(a); - aExp = extractFloat32Exp(a); - aSign = extractFloat32Sign(a); - if ((aSign) && (aExp > 126)) { - float_raise(float_flag_invalid, status); - if (float32_is_any_nan(a)) { - return LIT64(0xFFFFFFFFFFFFFFFF); - } else { - return 0; + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN(a, b, status); } + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); } - shiftCount = 0xBE - aExp; - if (aExp) { - aSig |= 0x00800000; + #ifndef SOFTFLOAT_68K + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } +#endif + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); } - if (shiftCount < 0) { - float_raise(float_flag_invalid, status); - return LIT64(0xFFFFFFFFFFFFFFFF); +#ifndef SOFTFLOAT_68K + if ( aExp == 0 ) ++expDiff; +#endif + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return a; } - - aSig64 = aSig; - aSig64 <<= 40; - shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra); - return roundAndPackUint64(aSign, aSig64, aSigExtra, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 64-bit unsigned integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. If -| `a' is a NaN, the largest unsigned integer is returned. Otherwise, if the -| conversion overflows, the largest unsigned integer is returned. If the -| 'a' is negative, the result is rounded and zero is returned; values that do -| not round to zero will raise the inexact flag. -*----------------------------------------------------------------------------*/ - -uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *status) -{ - signed char current_rounding_mode = status->float_rounding_mode; - set_float_rounding_mode(float_round_to_zero, status); - int64_t v = float32_to_uint64(a, status); - set_float_rounding_mode(current_rounding_mode, status); - return v; +#ifndef SOFTFLOAT_68K + if ( bExp == 0 ) --expDiff; +#endif + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } /*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 64-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. If -| `a' is a NaN, the largest positive integer is returned. Otherwise, if the -| conversion overflows, the largest integer with the same sign as `a' is -| returned. +| Returns the result of adding the extended double-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -int64_t float32_to_int64_round_to_zero(float32 a, float_status *status) +floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) { - flag aSign; - int aExp; - int shiftCount; - uint32_t aSig; - uint64_t aSig64; - int64_t z; - a = float32_squash_input_denormal(a, status); + flag aSign, bSign; - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = aExp - 0xBE; - if ( 0 <= shiftCount ) { - if ( float32_val(a) != 0xDF000000 ) { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (int64_t) LIT64( 0x8000000000000000 ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); } - else if ( aExp <= 0x7E ) { - if (aExp | aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs(a, b, aSign, status); } - aSig64 = aSig | 0x00800000; - aSig64 <<= 40; - z = aSig64>>( - shiftCount ); - if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) { - status->float_exception_flags |= float_flag_inexact; + else { + return subFloatx80Sigs(a, b, aSign, status); } - if ( aSign ) z = - z; - return z; } /*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the double-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| Returns the result of subtracting the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float32_to_float64(float32 a, float_status *status) +floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) { - flag aSign; - int aExp; - uint32_t aSig; - a = float32_squash_input_denormal(a, status); + flag aSign, bSign; - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if (aSig) { - return commonNaNToFloat64(float32ToCommonNaN(a, status), status); - } - return packFloat64( aSign, 0x7FF, 0 ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs(a, b, aSign, status); + } + else { + return addFloatx80Sigs(a, b, aSign, status); } - return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 ); } /*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| Returns the result of multiplying the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 float32_to_floatx80(float32 a, float_status *status) +floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) { - flag aSign; - int aExp; - uint32_t aSig; + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; - a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - aSig |= 0x00800000; - return packFloatx80( aSign, 0x7FFF, ( (uint64_t) aSig )<<40 ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN(a, b, status); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - aSig |= 0x00800000; - return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 ); - -} - -#ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous -floatx80 float32_to_floatx80_allowunnormal(float32 a , float_status *status) -{ - flag aSign; - int16_t aExp; - uint32_t aSig; - - aSig = extractFloat32Frac(a); - aExp = extractFloat32Exp(a); - aSign = extractFloat32Sign(a); - if (aExp == 0xFF) { - if (aSig) return commonNaNToFloatx80(float32ToCommonNaN(a, status), status); - return packFloatx80(aSign, 0x7FFF, LIT64(0x8000000000000000)); + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } - if (aExp == 0) { - if (aSig == 0) return packFloatx80(aSign, 0, 0); - return packFloatx80(aSign, 0x3F81, ((uint64_t) aSig) << 40); + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (int64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; } - aSig |= 0x00800000; - return packFloatx80(aSign, aExp + 0x3F80, ((uint64_t)aSig) << 40); - + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } -#endif // end of addition for Previous - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the double-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ -float128 float32_to_float128(float32 a, float_status *status) +#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous +floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ) { - flag aSign; - int aExp; - uint32_t aSig; - - a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if (aSig) { - return commonNaNToFloat128(float32ToCommonNaN(a, status), status); - } - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 ); - + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + aSig &= LIT64( 0xFFFFFF0000000000 ); + bSig &= LIT64( 0xFFFFFF0000000000 ); + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (int64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return roundAndPackFloatx80Sgl( zSign, zExp, zSig0, zSig1, status ); + } +#endif // End of addition for Previous + /*---------------------------------------------------------------------------- -| Rounds the single-precision floating-point value `a' to an integer, and -| returns the result as a single-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. +| Returns the result of dividing the extended double-precision floating-point +| value `a' by the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float32_round_to_int(float32 a, float_status *status) +floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) { - flag aSign; - int aExp; - uint32_t lastBitMask, roundBitsMask; - uint32_t z; - a = float32_squash_input_denormal(a, status); + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + uint64_t rem0, rem1, rem2, term0, term1, term2; - aExp = extractFloat32Exp( a ); - if ( 0x96 <= aExp ) { - if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { - return propagateFloat32NaN(a, a, status); - } - return a; + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); } - if ( aExp <= 0x7E ) { - if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a; - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloat32Sign( a ); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { - return packFloat32( aSign, 0x7F, 0 ); - } - break; - case float_round_ties_away: - if (aExp == 0x7E) { - return packFloat32(aSign, 0x7F, 0); + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); } - break; - case float_round_down: - return make_float32(aSign ? 0xBF800000 : 0); - case float_round_up: - return make_float32(aSign ? 0x80000000 : 0x3F800000); + goto invalid; } - return packFloat32( aSign, 0, 0 ); + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } - lastBitMask = 1; - lastBitMask <<= 0x96 - aExp; - roundBitsMask = lastBitMask - 1; - z = float32_val(a); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - z += lastBitMask>>1; - if ((z & roundBitsMask) == 0) { - z &= ~lastBitMask; - } - break; - case float_round_ties_away: - z += lastBitMask >> 1; - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloat32Sign(make_float32(z))) { - z += roundBitsMask; + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); } - break; - case float_round_down: - if (extractFloat32Sign(make_float32(z))) { - z += roundBitsMask; + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + float_raise(float_flag_divbyzero, status); + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } - break; - default: - abort(); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } - z &= ~ roundBitsMask; - if (z != float32_val(a)) { - status->float_exception_flags |= float_flag_inexact; + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - return make_float32(z); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the single-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float32 addFloat32Sigs(float32 a, float32 b, flag zSign, - float_status *status) -{ - int aExp, bExp, zExp; - uint32_t aSig, bSig, zSig; - int expDiff; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 6; - bSig <<= 6; - if ( 0 < expDiff ) { - if ( aExp == 0xFF ) { - if (aSig) { - return propagateFloat32NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x20000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0xFF ) { - if (bSig) { - return propagateFloat32NaN(a, b, status); - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x20000000; - } - shift32RightJamming( aSig, - expDiff, &aSig ); - zExp = bExp; - } - else { - if ( aExp == 0xFF ) { - if (aSig | bSig) { - return propagateFloat32NaN(a, b, status); - } - return a; - } - if ( aExp == 0 ) { - if (status->flush_to_zero) { -// if (aSig | bSig) { -// float_raise(float_flag_output_denormal, status); -// } - return packFloat32(zSign, 0, 0); - } - return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); - } - zSig = 0x40000000 + aSig + bSig; - zExp = aExp; - goto roundAndPack; - } - aSig |= 0x20000000; - zSig = ( aSig + bSig )<<1; - --zExp; - if ( (int32_t) zSig < 0 ) { - zSig = aSig + bSig; + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); ++zExp; } - roundAndPack: - return roundAndPackFloat32(zSign, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the single- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float32 subFloat32Sigs(float32 a, float32 b, flag zSign, - float_status *status) -{ - int aExp, bExp, zExp; - uint32_t aSig, bSig, zSig; - int expDiff; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 7; - bSig <<= 7; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0xFF ) { - if (aSig | bSig) { - return propagateFloat32NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloat32(status->float_rounding_mode == float_round_down, 0, 0); - bExpBigger: - if ( bExp == 0xFF ) { - if (bSig) { - return propagateFloat32NaN(a, b, status); - } - return packFloat32( zSign ^ 1, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x40000000; + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); } - shift32RightJamming( aSig, - expDiff, &aSig ); - bSig |= 0x40000000; - bBigger: - zSig = bSig - aSig; - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0xFF ) { - if (aSig) { - return propagateFloat32NaN(a, b, status); + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); } - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x40000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - aSig |= 0x40000000; - aBigger: - zSig = aSig - bSig; - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the single-precision floating-point values `a' -| and `b'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_add(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return addFloat32Sigs(a, b, aSign, status); - } - else { - return subFloat32Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the single-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_sub(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return subFloat32Sigs(a, b, aSign, status); - } - else { - return addFloat32Sigs(a, b, aSign, status); + zSig1 |= ( ( rem1 | rem2 ) != 0 ); } - + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the single-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_mul(float32 a, float32 b, float_status *status) +#ifdef SOFTFLOAT_68K // 21-01-2017: Addition for Previous +floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ) { - flag aSign, bSign, zSign; - int aExp, bExp, zExp; - uint32_t aSig, bSig; - uint64_t zSig64; - uint32_t zSig; - - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN(a, b, status); - } - if ( ( bExp | bSig ) == 0 ) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if (bSig) { - return propagateFloat32NaN(a, b, status); - } - if ( ( aExp | aSig ) == 0 ) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x7F; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 ); - zSig = zSig64; - if ( 0 <= (int32_t) ( zSig<<1 ) ) { - zSig <<= 1; - --zExp; - } - return roundAndPackFloat32(zSign, zExp, zSig, status); + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + uint64_t rem0, rem1, rem2, term0, term1, term2; + floatx80 z; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; + } + float_raise( float_flag_divbyzero, status ); + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return roundAndPackFloatx80Sgl( zSign, zExp, zSig0, zSig1, status ); + } +#endif // End of addition for Previous + /*---------------------------------------------------------------------------- -| Returns the result of dividing the single-precision floating-point value `a' -| by the corresponding value `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_div(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign, zSign; - int aExp, bExp, zExp; - uint32_t aSig, bSig, zSig; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if (aSig) { - return propagateFloat32NaN(a, b, status); - } - if ( bExp == 0xFF ) { - if (bSig) { - return propagateFloat32NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if (bSig) { - return propagateFloat32NaN(a, b, status); - } - return packFloat32( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - float_raise(float_flag_divbyzero, status); - return packFloat32( zSign, 0xFF, 0 ); - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x7D; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - if ( bSig <= ( aSig + aSig ) ) { - aSig >>= 1; - ++zExp; - } - zSig = ( ( (uint64_t) aSig )<<32 ) / bSig; - if ( ( zSig & 0x3F ) == 0 ) { - zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 ); - } - return roundAndPackFloat32(zSign, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the remainder of the single-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_rem(float32 a, float32 b, float_status *status) -{ - flag aSign, zSign; - int aExp, bExp, expDiff; - uint32_t aSig, bSig; - uint32_t q; - uint64_t aSig64, bSig64, q64; - uint32_t alternateASig; - int32_t sigMean; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - if ( bExp == 0xFF ) { - if (bSig) { - return propagateFloat32NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig |= 0x00800000; - bSig |= 0x00800000; - if ( expDiff < 32 ) { - aSig <<= 8; - bSig <<= 8; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - if ( 0 < expDiff ) { - q = ( ( (uint64_t) aSig )<<32 ) / bSig; - q >>= 32 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - } - else { - if ( bSig <= aSig ) aSig -= bSig; - aSig64 = ( (uint64_t) aSig )<<40; - bSig64 = ( (uint64_t) bSig )<<40; - expDiff -= 64; - while ( 0 < expDiff ) { - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - aSig64 = - ( ( bSig * q64 )<<38 ); - expDiff -= 62; - } - expDiff += 64; - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - q = q64>>( 64 - expDiff ); - bSig <<= 6; - aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (int32_t) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (int32_t) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the single-precision floating-point values -| `a' and `b' then adding 'c', with no intermediate rounding step after the -| multiplication. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic 754-2008. -| The flags argument allows the caller to select negation of the -| addend, the intermediate product, or the final result. (The difference -| between this and having the caller do a separate negation is that negating -| externally will flip the sign bit on NaNs.) -*----------------------------------------------------------------------------*/ - -float32 float32_muladd(float32 a, float32 b, float32 c, int flags, - float_status *status) -{ - flag aSign, bSign, cSign, zSign; - int aExp, bExp, cExp, pExp, zExp, expDiff; - uint32_t aSig, bSig, cSig; - flag pInf, pZero, pSign; - uint64_t pSig64, cSig64, zSig64; - uint32_t pSig; - int shiftcount; - flag signflip, infzero; - - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - c = float32_squash_input_denormal(c, status); - aSig = extractFloat32Frac(a); - aExp = extractFloat32Exp(a); - aSign = extractFloat32Sign(a); - bSig = extractFloat32Frac(b); - bExp = extractFloat32Exp(b); - bSign = extractFloat32Sign(b); - cSig = extractFloat32Frac(c); - cExp = extractFloat32Exp(c); - cSign = extractFloat32Sign(c); - - infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) || - (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0)); - - /* It is implementation-defined whether the cases of (0,inf,qnan) - * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN - * they return if they do), so we have to hand this information - * off to the target-specific pick-a-NaN routine. - */ - if (((aExp == 0xff) && aSig) || - ((bExp == 0xff) && bSig) || - ((cExp == 0xff) && cSig)) { - return propagateFloat32MulAddNaN(a, b, c, infzero, status); - } - - if (infzero) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - - if (flags & float_muladd_negate_c) { - cSign ^= 1; - } - - signflip = (flags & float_muladd_negate_result) ? 1 : 0; - - /* Work out the sign and type of the product */ - pSign = aSign ^ bSign; - if (flags & float_muladd_negate_product) { - pSign ^= 1; - } - pInf = (aExp == 0xff) || (bExp == 0xff); - pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0); - - if (cExp == 0xff) { - if (pInf && (pSign ^ cSign)) { - /* addition of opposite-signed infinities => InvalidOperation */ - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - /* Otherwise generate an infinity of the same sign */ - return packFloat32(cSign ^ signflip, 0xff, 0); - } - - if (pInf) { - return packFloat32(pSign ^ signflip, 0xff, 0); - } - - if (pZero) { - if (cExp == 0) { - if (cSig == 0) { - /* Adding two exact zeroes */ - if (pSign == cSign) { - zSign = pSign; - } else if (status->float_rounding_mode == float_round_down) { - zSign = 1; - } else { - zSign = 0; - } - return packFloat32(zSign ^ signflip, 0, 0); - } - /* Exact zero plus a denorm */ - if (status->flush_to_zero) { - //float_raise(float_flag_output_denormal, status); - return packFloat32(cSign ^ signflip, 0, 0); - } - } - /* Zero plus something non-zero : just return the something */ - if (flags & float_muladd_halve_result) { - if (cExp == 0) { - normalizeFloat32Subnormal(cSig, &cExp, &cSig); - } - /* Subtract one to halve, and one again because roundAndPackFloat32 - * wants one less than the true exponent. - */ - cExp -= 2; - cSig = (cSig | 0x00800000) << 7; - return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status); - } - return packFloat32(cSign ^ signflip, cExp, cSig); - } - - if (aExp == 0) { - normalizeFloat32Subnormal(aSig, &aExp, &aSig); - } - if (bExp == 0) { - normalizeFloat32Subnormal(bSig, &bExp, &bSig); - } - - /* Calculate the actual result a * b + c */ - - /* Multiply first; this is easy. */ - /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f - * because we want the true exponent, not the "one-less-than" - * flavour that roundAndPackFloat32() takes. - */ - pExp = aExp + bExp - 0x7e; - aSig = (aSig | 0x00800000) << 7; - bSig = (bSig | 0x00800000) << 8; - pSig64 = (uint64_t)aSig * bSig; - if ((int64_t)(pSig64 << 1) >= 0) { - pSig64 <<= 1; - pExp--; - } - - zSign = pSign ^ signflip; - - /* Now pSig64 is the significand of the multiply, with the explicit bit in - * position 62. - */ - if (cExp == 0) { - if (!cSig) { - /* Throw out the special case of c being an exact zero now */ - shift64RightJamming(pSig64, 32, &pSig64); - pSig = pSig64; - if (flags & float_muladd_halve_result) { - pExp--; - } - return roundAndPackFloat32(zSign, pExp - 1, - pSig, status); - } - normalizeFloat32Subnormal(cSig, &cExp, &cSig); - } - - cSig64 = (uint64_t)cSig << (62 - 23); - cSig64 |= LIT64(0x4000000000000000); - expDiff = pExp - cExp; - - if (pSign == cSign) { - /* Addition */ - if (expDiff > 0) { - /* scale c to match p */ - shift64RightJamming(cSig64, expDiff, &cSig64); - zExp = pExp; - } else if (expDiff < 0) { - /* scale p to match c */ - shift64RightJamming(pSig64, -expDiff, &pSig64); - zExp = cExp; - } else { - /* no scaling needed */ - zExp = cExp; - } - /* Add significands and make sure explicit bit ends up in posn 62 */ - zSig64 = pSig64 + cSig64; - if ((int64_t)zSig64 < 0) { - shift64RightJamming(zSig64, 1, &zSig64); - } else { - zExp--; - } - } else { - /* Subtraction */ - if (expDiff > 0) { - shift64RightJamming(cSig64, expDiff, &cSig64); - zSig64 = pSig64 - cSig64; - zExp = pExp; - } else if (expDiff < 0) { - shift64RightJamming(pSig64, -expDiff, &pSig64); - zSig64 = cSig64 - pSig64; - zExp = cExp; - zSign ^= 1; - } else { - zExp = pExp; - if (cSig64 < pSig64) { - zSig64 = pSig64 - cSig64; - } else if (pSig64 < cSig64) { - zSig64 = cSig64 - pSig64; - zSign ^= 1; - } else { - /* Exact zero */ - zSign = signflip; - if (status->float_rounding_mode == float_round_down) { - zSign ^= 1; - } - return packFloat32(zSign, 0, 0); - } - } - --zExp; - /* Normalize to put the explicit bit back into bit 62. */ - shiftcount = countLeadingZeros64(zSig64) - 1; - zSig64 <<= shiftcount; - zExp -= shiftcount; - } - if (flags & float_muladd_halve_result) { - zExp--; - } - - shift64RightJamming(zSig64, 32, &zSig64); - return roundAndPackFloat32(zSign, zExp, zSig64, status); -} - - -/*---------------------------------------------------------------------------- -| Returns the square root of the single-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_sqrt(float32 a, float_status *status) -{ - flag aSign; - int aExp, zExp; - uint32_t aSig, zSig; - uint64_t rem, term; - a = float32_squash_input_denormal(a, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if (aSig) { - return propagateFloat32NaN(a, float32_zero, status); - } - if ( ! aSign ) return a; - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - if ( aSign ) { - if ( ( aExp | aSig ) == 0 ) return a; - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return float32_zero; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; - aSig = ( aSig | 0x00800000 )<<8; - zSig = estimateSqrt32( aExp, aSig ) + 2; - if ( ( zSig & 0x7F ) <= 5 ) { - if ( zSig < 2 ) { - zSig = 0x7FFFFFFF; - goto roundAndPack; - } - aSig >>= aExp & 1; - term = ( (uint64_t) zSig ) * zSig; - rem = ( ( (uint64_t) aSig )<<32 ) - term; - while ( (int64_t) rem < 0 ) { - --zSig; - rem += ( ( (uint64_t) zSig )<<1 ) | 1; - } - zSig |= ( rem != 0 ); - } - shift32RightJamming( zSig, 1, &zSig ); - roundAndPack: - return roundAndPackFloat32(0, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the binary exponential of the single-precision floating-point value -| `a'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -| -| Uses the following identities: -| -| 1. ------------------------------------------------------------------------- -| x x*ln(2) -| 2 = e -| -| 2. ------------------------------------------------------------------------- -| 2 3 4 5 n -| x x x x x x x -| e = 1 + --- + --- + --- + --- + --- + ... + --- + ... -| 1! 2! 3! 4! 5! n! -*----------------------------------------------------------------------------*/ - -static const float64 float32_exp2_coefficients[15] = -{ - const_float64( 0x3ff0000000000000ll ), /* 1 */ - const_float64( 0x3fe0000000000000ll ), /* 2 */ - const_float64( 0x3fc5555555555555ll ), /* 3 */ - const_float64( 0x3fa5555555555555ll ), /* 4 */ - const_float64( 0x3f81111111111111ll ), /* 5 */ - const_float64( 0x3f56c16c16c16c17ll ), /* 6 */ - const_float64( 0x3f2a01a01a01a01all ), /* 7 */ - const_float64( 0x3efa01a01a01a01all ), /* 8 */ - const_float64( 0x3ec71de3a556c734ll ), /* 9 */ - const_float64( 0x3e927e4fb7789f5cll ), /* 10 */ - const_float64( 0x3e5ae64567f544e4ll ), /* 11 */ - const_float64( 0x3e21eed8eff8d898ll ), /* 12 */ - const_float64( 0x3de6124613a86d09ll ), /* 13 */ - const_float64( 0x3da93974a8c07c9dll ), /* 14 */ - const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ -}; - -float32 float32_exp2(float32 a, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - float64 r, x, xn; - int i; - a = float32_squash_input_denormal(a, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - - if ( aExp == 0xFF) { - if (aSig) { - return propagateFloat32NaN(a, float32_zero, status); - } - return (aSign) ? float32_zero : a; - } - if (aExp == 0) { - if (aSig == 0) return float32_one; - } - - float_raise(float_flag_inexact, status); - - /* ******************************* */ - /* using float64 for approximation */ - /* ******************************* */ - x = float32_to_float64(a, status); - x = float64_mul(x, float64_ln2, status); - - xn = x; - r = float64_one; - for (i = 0 ; i < 15 ; i++) { - float64 f; - - f = float64_mul(xn, float32_exp2_coefficients[i], status); - r = float64_add(r, f, status); - - xn = float64_mul(xn, x, status); - } - - return float64_to_float32(r, status); -} - -/*---------------------------------------------------------------------------- -| Returns the binary log of the single-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -float32 float32_log2(float32 a, float_status *status) -{ - flag aSign, zSign; - int aExp; - uint32_t aSig, zSig, i; - - a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - if ( aSign ) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - if ( aExp == 0xFF ) { - if (aSig) { - return propagateFloat32NaN(a, float32_zero, status); - } - return a; - } - - aExp -= 0x7F; - aSig |= 0x00800000; - zSign = aExp < 0; - zSig = aExp << 23; - - for (i = 1 << 22; i > 0; i >>= 1) { - aSig = ( (uint64_t)aSig * aSig ) >> 23; - if ( aSig & 0x01000000 ) { - aSig >>= 1; - zSig |= i; - } - } - - if ( zSign ) - zSig = -zSig; - - return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_eq(float32 a, float32 b, float_status *status) -{ - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - av = float32_val(a); - bv = float32_val(b); - return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_le(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_lt(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_unordered(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_eq_quiet(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return ( float32_val(a) == float32_val(b) ) || - ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 ); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_le_quiet(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_lt_quiet(float32 a, float32 b, float_status *status) -{ - flag aSign, bSign; - uint32_t av, bv; - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - av = float32_val(a); - bv = float32_val(b); - if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float32_unordered_quiet(float32 a, float32 b, float_status *status) -{ - a = float32_squash_input_denormal(a, status); - b = float32_squash_input_denormal(b, status); - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if (float32_is_signaling_nan(a, status) - || float32_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t float64_to_int32(float64 a, float_status *status) -{ - flag aSign; - int aExp; - int shiftCount; - uint64_t aSig; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; - if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x42C - aExp; - if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32(aSign, aSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int32_t float64_to_int32_round_to_zero(float64 a, float_status *status) -{ - flag aSign; - int aExp; - int shiftCount; - uint64_t aSig, savedASig; - int32_t z; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( 0x41E < aExp ) { - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FF ) { - if (aExp || aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x433 - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 16-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int16_t float64_to_int16_round_to_zero(float64 a, float_status *status) -{ - flag aSign; - int aExp; - int shiftCount; - uint64_t aSig, savedASig; - int32_t z; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( 0x40E < aExp ) { - if ( ( aExp == 0x7FF ) && aSig ) { - aSign = 0; - } - goto invalid; - } - else if ( aExp < 0x3FF ) { - if ( aExp || aSig ) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x433 - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) { - z = - z; - } - if ( ( (int16_t)z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0xffff8000 : 0x7FFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; - } - return z; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 64-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t float64_to_int64(float64 a, float_status *status) -{ - flag aSign; - int aExp; - int shiftCount; - uint64_t aSig, aSigExtra; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x433 - aExp; - if ( shiftCount <= 0 ) { - if ( 0x43E < aExp ) { - float_raise(float_flag_invalid, status); - if ( ! aSign - || ( ( aExp == 0x7FF ) - && ( aSig != LIT64( 0x0010000000000000 ) ) ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (int64_t) LIT64( 0x8000000000000000 ); - } - aSigExtra = 0; - aSig <<= - shiftCount; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64(aSign, aSig, aSigExtra, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 64-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int64_t float64_to_int64_round_to_zero(float64 a, float_status *status) -{ - flag aSign; - int aExp; - int shiftCount; - uint64_t aSig; - int64_t z; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); - shiftCount = aExp - 0x433; - if ( 0 <= shiftCount ) { - if ( 0x43E <= aExp ) { - if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) { - float_raise(float_flag_invalid, status); - if ( ! aSign - || ( ( aExp == 0x7FF ) - && ( aSig != LIT64( 0x0010000000000000 ) ) ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (int64_t) LIT64( 0x8000000000000000 ); - } - z = aSig<float_exception_flags |= float_flag_inexact; - } - return 0; - } - z = aSig>>( - shiftCount ); - if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) { - status->float_exception_flags |= float_flag_inexact; - } - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the single-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float64_to_float32(float64 a, float_status *status) -{ - flag aSign; - int aExp; - uint64_t aSig; - uint32_t zSig; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if (aSig) { - return commonNaNToFloat32(float64ToCommonNaN(a, status), status); - } - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 22, &aSig ); - zSig = aSig; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x381; - } - return roundAndPackFloat32(aSign, aExp, zSig, status); - -} - - -/*---------------------------------------------------------------------------- -| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a -| half-precision floating-point value, returning the result. After being -| shifted into the proper positions, the three fields are simply added -| together to form the result. This means that any integer portion of `zSig' -| will be added into the exponent. Since a properly normalized significand -| will have an integer portion equal to 1, the `zExp' input should be 1 less -| than the desired result exponent whenever `zSig' is a complete, normalized -| significand. -*----------------------------------------------------------------------------*/ -static float16 packFloat16(flag zSign, int zExp, uint16_t zSig) -{ - return make_float16( - (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig); -} - -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand `zSig', and returns the proper half-precision floating- -| point value corresponding to the abstract input. Ordinarily, the abstract -| value is simply rounded and packed into the half-precision format, with -| the inexact exception raised if the abstract input cannot be represented -| exactly. However, if the abstract value is too large, the overflow and -| inexact exceptions are raised and an infinity or maximal finite value is -| returned. If the abstract value is too small, the input value is rounded to -| a subnormal number, and the underflow and inexact exceptions are raised if -| the abstract input cannot be represented exactly as a subnormal half- -| precision floating-point number. -| The `ieee' flag indicates whether to use IEEE standard half precision, or -| ARM-style "alternative representation", which omits the NaN and Inf -| encodings in order to raise the maximum representable exponent by one. -| The input significand `zSig' has its binary point between bits 22 -| and 23, which is 13 bits to the left of the usual location. This shifted -| significand must be normalized or smaller. If `zSig' is not normalized, -| `zExp' must be 0; in that case, the result returned is a subnormal number, -| and it must not require rounding. In the usual case that `zSig' is -| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. -| Note the slightly odd position of the binary point in zSig compared with the -| other roundAndPackFloat functions. This should probably be fixed if we -| need to implement more float16 routines than just conversion. -| The handling of underflow and overflow follows the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float16 roundAndPackFloat16(flag zSign, int zExp, - uint32_t zSig, flag ieee, - float_status *status) -{ - int maxexp = ieee ? 29 : 30; - uint32_t mask; - uint32_t increment; - bool rounding_bumps_exp; - bool is_tiny = false; - - /* Calculate the mask of bits of the mantissa which are not - * representable in half-precision and will be lost. - */ - if (zExp < 1) { - /* Will be denormal in halfprec */ - mask = 0x00ffffff; - if (zExp >= -11) { - mask >>= 11 + zExp; - } - } else { - /* Normal number in halfprec */ - mask = 0x00001fff; - } - - switch (status->float_rounding_mode) { - case float_round_nearest_even: - increment = (mask + 1) >> 1; - if ((zSig & mask) == increment) { - increment = zSig & (increment << 1); - } - break; - case float_round_ties_away: - increment = (mask + 1) >> 1; - break; - case float_round_up: - increment = zSign ? 0 : mask; - break; - case float_round_down: - increment = zSign ? mask : 0; - break; - default: /* round_to_zero */ - increment = 0; - break; - } - - rounding_bumps_exp = (zSig + increment >= 0x01000000); - - if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) { - if (ieee) { - float_raise(float_flag_overflow | float_flag_inexact, status); - return packFloat16(zSign, 0x1f, 0); - } else { - float_raise(float_flag_invalid, status); - return packFloat16(zSign, 0x1f, 0x3ff); - } - } - - if (zExp < 0) { - /* Note that flush-to-zero does not affect half-precision results */ - is_tiny = - (status->float_detect_tininess == float_tininess_before_rounding) - || (zExp < -1) - || (!rounding_bumps_exp); - } - if (zSig & mask) { - float_raise(float_flag_inexact, status); - if (is_tiny) { - float_raise(float_flag_underflow, status); - } - } - - zSig += increment; - if (rounding_bumps_exp) { - zSig >>= 1; - zExp++; - } - - if (zExp < -10) { - return packFloat16(zSign, 0, 0); - } - if (zExp < 0) { - zSig >>= -zExp; - zExp = 0; - } - return packFloat16(zSign, zExp, zSig >> 13); -} - -static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr, - uint32_t *zSigPtr) -{ - int8_t shiftCount = countLeadingZeros32(aSig) - 21; - *zSigPtr = aSig << shiftCount; - *zExpPtr = 1 - shiftCount; -} - -/* Half precision floats come in two formats: standard IEEE and "ARM" format. - The latter gains extra exponent range by omitting the NaN/Inf encodings. */ - -float32 float16_to_float32(float16 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - - aSign = extractFloat16Sign(a); - aExp = extractFloat16Exp(a); - aSig = extractFloat16Frac(a); - - if (aExp == 0x1f && ieee) { - if (aSig) { - return commonNaNToFloat32(float16ToCommonNaN(a, status), status); - } - return packFloat32(aSign, 0xff, 0); - } - if (aExp == 0) { - if (aSig == 0) { - return packFloat32(aSign, 0, 0); - } - - normalizeFloat16Subnormal(aSig, &aExp, &aSig); - aExp--; - } - return packFloat32( aSign, aExp + 0x70, aSig << 13); -} - -float16 float32_to_float16(float32 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - - a = float32_squash_input_denormal(a, status); - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if (aSig) { - /* Input is a NaN */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0, 0); - } - return commonNaNToFloat16( - float32ToCommonNaN(a, status), status); - } - /* Infinity */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0x1f, 0x3ff); - } - return packFloat16(aSign, 0x1f, 0); - } - if (aExp == 0 && aSig == 0) { - return packFloat16(aSign, 0, 0); - } - /* Decimal point between bits 22 and 23. Note that we add the 1 bit - * even if the input is denormal; however this is harmless because - * the largest possible single-precision denormal is still smaller - * than the smallest representable half-precision denormal, and so we - * will end up ignoring aSig and returning via the "always return zero" - * codepath. - */ - aSig |= 0x00800000; - aExp -= 0x71; - - return roundAndPackFloat16(aSign, aExp, aSig, ieee, status); -} - -float64 float16_to_float64(float16 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint32_t aSig; - - aSign = extractFloat16Sign(a); - aExp = extractFloat16Exp(a); - aSig = extractFloat16Frac(a); - - if (aExp == 0x1f && ieee) { - if (aSig) { - return commonNaNToFloat64( - float16ToCommonNaN(a, status), status); - } - return packFloat64(aSign, 0x7ff, 0); - } - if (aExp == 0) { - if (aSig == 0) { - return packFloat64(aSign, 0, 0); - } - - normalizeFloat16Subnormal(aSig, &aExp, &aSig); - aExp--; - } - return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42); -} - -float16 float64_to_float16(float64 a, flag ieee, float_status *status) -{ - flag aSign; - int aExp; - uint64_t aSig; - uint32_t zSig; - - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac(a); - aExp = extractFloat64Exp(a); - aSign = extractFloat64Sign(a); - if (aExp == 0x7FF) { - if (aSig) { - /* Input is a NaN */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0, 0); - } - return commonNaNToFloat16( - float64ToCommonNaN(a, status), status); - } - /* Infinity */ - if (!ieee) { - float_raise(float_flag_invalid, status); - return packFloat16(aSign, 0x1f, 0x3ff); - } - return packFloat16(aSign, 0x1f, 0); - } - shift64RightJamming(aSig, 29, &aSig); - zSig = aSig; - if (aExp == 0 && zSig == 0) { - return packFloat16(aSign, 0, 0); - } - /* Decimal point between bits 22 and 23. Note that we add the 1 bit - * even if the input is denormal; however this is harmless because - * the largest possible single-precision denormal is still smaller - * than the smallest representable half-precision denormal, and so we - * will end up ignoring aSig and returning via the "always return zero" - * codepath. - */ - zSig |= 0x00800000; - aExp -= 0x3F1; - - return roundAndPackFloat16(aSign, aExp, zSig, ieee, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 float64_to_floatx80(float64 a, float_status *status) -{ - flag aSign; - int aExp; - uint64_t aSig; - - a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - return packFloatx80( aSign, 0x7FFF, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - return - packFloatx80( - aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); - -} - -#ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous -floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ) -{ - flag aSign; - int16_t aExp; - uint64_t aSig; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a, status ), status ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - return packFloatx80( aSign, 0x3C01, aSig<<11); - } - return - packFloatx80( - aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); - -} -#endif // end of addition for Previous - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the quadruple-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float64_to_float128(float64 a, float_status *status) -{ - flag aSign; - int aExp; - uint64_t aSig, zSig0, zSig1; - - a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if (aSig) { - return commonNaNToFloat128(float64ToCommonNaN(a, status), status); - } - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Rounds the double-precision floating-point value `a' to an integer, and -| returns the result as a double-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_round_to_int(float64 a, float_status *status) -{ - flag aSign; - int aExp; - uint64_t lastBitMask, roundBitsMask; - uint64_t z; - a = float64_squash_input_denormal(a, status); - - aExp = extractFloat64Exp( a ); - if ( 0x433 <= aExp ) { - if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { - return propagateFloat64NaN(a, a, status); - } - return a; - } - if ( aExp < 0x3FF ) { - if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a; - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloat64Sign( a ); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { - return packFloat64( aSign, 0x3FF, 0 ); - } - break; - case float_round_ties_away: - if (aExp == 0x3FE) { - return packFloat64(aSign, 0x3ff, 0); - } - break; - case float_round_down: - return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0); - case float_round_up: - return make_float64( - aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 )); - } - return packFloat64( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x433 - aExp; - roundBitsMask = lastBitMask - 1; - z = float64_val(a); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - z += lastBitMask >> 1; - if ((z & roundBitsMask) == 0) { - z &= ~lastBitMask; - } - break; - case float_round_ties_away: - z += lastBitMask >> 1; - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloat64Sign(make_float64(z))) { - z += roundBitsMask; - } - break; - case float_round_down: - if (extractFloat64Sign(make_float64(z))) { - z += roundBitsMask; - } - break; - default: - abort(); - } - z &= ~ roundBitsMask; - if (z != float64_val(a)) { - status->float_exception_flags |= float_flag_inexact; - } - return make_float64(z); - -} - -float64 float64_trunc_to_int(float64 a, float_status *status) -{ - int oldmode; - float64 res; - oldmode = status->float_rounding_mode; - status->float_rounding_mode = float_round_to_zero; - res = float64_round_to_int(a, status); - status->float_rounding_mode = oldmode; - return res; -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the double-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float64 addFloat64Sigs(float64 a, float64 b, flag zSign, - float_status *status) -{ - int aExp, bExp, zExp; - uint64_t aSig, bSig, zSig; - int expDiff; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - aSig <<= 9; - bSig <<= 9; - if ( 0 < expDiff ) { - if ( aExp == 0x7FF ) { - if (aSig) { - return propagateFloat64NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= LIT64( 0x2000000000000000 ); - } - shift64RightJamming( bSig, expDiff, &bSig ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FF ) { - if (bSig) { - return propagateFloat64NaN(a, b, status); - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= LIT64( 0x2000000000000000 ); - } - shift64RightJamming( aSig, - expDiff, &aSig ); - zExp = bExp; - } - else { - if ( aExp == 0x7FF ) { - if (aSig | bSig) { - return propagateFloat64NaN(a, b, status); - } - return a; - } - if ( aExp == 0 ) { - if (status->flush_to_zero) { -// if (aSig | bSig) { -// float_raise(float_flag_output_denormal, status); -// } - return packFloat64(zSign, 0, 0); - } - return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); - } - zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; - zExp = aExp; - goto roundAndPack; - } - aSig |= LIT64( 0x2000000000000000 ); - zSig = ( aSig + bSig )<<1; - --zExp; - if ( (int64_t) zSig < 0 ) { - zSig = aSig + bSig; - ++zExp; - } - roundAndPack: - return roundAndPackFloat64(zSign, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the double- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float64 subFloat64Sigs(float64 a, float64 b, flag zSign, - float_status *status) -{ - int aExp, bExp, zExp; - uint64_t aSig, bSig, zSig; - int expDiff; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - aSig <<= 10; - bSig <<= 10; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FF ) { - if (aSig | bSig) { - return propagateFloat64NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloat64(status->float_rounding_mode == float_round_down, 0, 0); - bExpBigger: - if ( bExp == 0x7FF ) { - if (bSig) { - return propagateFloat64NaN(a, b, status); - } - return packFloat64( zSign ^ 1, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= LIT64( 0x4000000000000000 ); - } - shift64RightJamming( aSig, - expDiff, &aSig ); - bSig |= LIT64( 0x4000000000000000 ); - bBigger: - zSig = bSig - aSig; - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FF ) { - if (aSig) { - return propagateFloat64NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= LIT64( 0x4000000000000000 ); - } - shift64RightJamming( bSig, expDiff, &bSig ); - aSig |= LIT64( 0x4000000000000000 ); - aBigger: - zSig = aSig - bSig; - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the double-precision floating-point values `a' -| and `b'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_add(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return addFloat64Sigs(a, b, aSign, status); - } - else { - return subFloat64Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the double-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_sub(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return subFloat64Sigs(a, b, aSign, status); - } - else { - return addFloat64Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the double-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_mul(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign, zSign; - int aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { - return propagateFloat64NaN(a, b, status); - } - if ( ( bExp | bSig ) == 0 ) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( bExp == 0x7FF ) { - if (bSig) { - return propagateFloat64NaN(a, b, status); - } - if ( ( aExp | aSig ) == 0 ) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FF; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - zSig0 |= ( zSig1 != 0 ); - if ( 0 <= (int64_t) ( zSig0<<1 ) ) { - zSig0 <<= 1; - --zExp; - } - return roundAndPackFloat64(zSign, zExp, zSig0, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the double-precision floating-point value `a' -| by the corresponding value `b'. The operation is performed according to -| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_div(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign, zSign; - int aExp, bExp, zExp; - uint64_t aSig, bSig, zSig; - uint64_t rem0, rem1; - uint64_t term0, term1; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if (aSig) { - return propagateFloat64NaN(a, b, status); - } - if ( bExp == 0x7FF ) { - if (bSig) { - return propagateFloat64NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( bExp == 0x7FF ) { - if (bSig) { - return propagateFloat64NaN(a, b, status); - } - return packFloat64( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - float_raise(float_flag_divbyzero, status); - return packFloat64( zSign, 0x7FF, 0 ); - } - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FD; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - if ( bSig <= ( aSig + aSig ) ) { - aSig >>= 1; - ++zExp; - } - zSig = estimateDiv128To64( aSig, 0, bSig ); - if ( ( zSig & 0x1FF ) <= 2 ) { - mul64To128( bSig, zSig, &term0, &term1 ); - sub128( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig |= ( rem1 != 0 ); - } - return roundAndPackFloat64(zSign, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the remainder of the double-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_rem(float64 a, float64 b, float_status *status) -{ - flag aSign, zSign; - int aExp, bExp, expDiff; - uint64_t aSig, bSig; - uint64_t q, alternateASig; - int64_t sigMean; - - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - if ( aExp == 0x7FF ) { - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { - return propagateFloat64NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - if ( bExp == 0x7FF ) { - if (bSig) { - return propagateFloat64NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - aSig = - ( ( bSig>>2 ) * q ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (int64_t) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (int64_t) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the double-precision floating-point values -| `a' and `b' then adding 'c', with no intermediate rounding step after the -| multiplication. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic 754-2008. -| The flags argument allows the caller to select negation of the -| addend, the intermediate product, or the final result. (The difference -| between this and having the caller do a separate negation is that negating -| externally will flip the sign bit on NaNs.) -*----------------------------------------------------------------------------*/ - -float64 float64_muladd(float64 a, float64 b, float64 c, int flags, - float_status *status) -{ - flag aSign, bSign, cSign, zSign; - int aExp, bExp, cExp, pExp, zExp, expDiff; - uint64_t aSig, bSig, cSig; - flag pInf, pZero, pSign; - uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1; - int shiftcount; - flag signflip, infzero; - - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - c = float64_squash_input_denormal(c, status); - aSig = extractFloat64Frac(a); - aExp = extractFloat64Exp(a); - aSign = extractFloat64Sign(a); - bSig = extractFloat64Frac(b); - bExp = extractFloat64Exp(b); - bSign = extractFloat64Sign(b); - cSig = extractFloat64Frac(c); - cExp = extractFloat64Exp(c); - cSign = extractFloat64Sign(c); - - infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) || - (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0)); - - /* It is implementation-defined whether the cases of (0,inf,qnan) - * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN - * they return if they do), so we have to hand this information - * off to the target-specific pick-a-NaN routine. - */ - if (((aExp == 0x7ff) && aSig) || - ((bExp == 0x7ff) && bSig) || - ((cExp == 0x7ff) && cSig)) { - return propagateFloat64MulAddNaN(a, b, c, infzero, status); - } - - if (infzero) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - - if (flags & float_muladd_negate_c) { - cSign ^= 1; - } - - signflip = (flags & float_muladd_negate_result) ? 1 : 0; - - /* Work out the sign and type of the product */ - pSign = aSign ^ bSign; - if (flags & float_muladd_negate_product) { - pSign ^= 1; - } - pInf = (aExp == 0x7ff) || (bExp == 0x7ff); - pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0); - - if (cExp == 0x7ff) { - if (pInf && (pSign ^ cSign)) { - /* addition of opposite-signed infinities => InvalidOperation */ - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - /* Otherwise generate an infinity of the same sign */ - return packFloat64(cSign ^ signflip, 0x7ff, 0); - } - - if (pInf) { - return packFloat64(pSign ^ signflip, 0x7ff, 0); - } - - if (pZero) { - if (cExp == 0) { - if (cSig == 0) { - /* Adding two exact zeroes */ - if (pSign == cSign) { - zSign = pSign; - } else if (status->float_rounding_mode == float_round_down) { - zSign = 1; - } else { - zSign = 0; - } - return packFloat64(zSign ^ signflip, 0, 0); - } - /* Exact zero plus a denorm */ - if (status->flush_to_zero) { -// float_raise(float_flag_output_denormal, status); - return packFloat64(cSign ^ signflip, 0, 0); - } - } - /* Zero plus something non-zero : just return the something */ - if (flags & float_muladd_halve_result) { - if (cExp == 0) { - normalizeFloat64Subnormal(cSig, &cExp, &cSig); - } - /* Subtract one to halve, and one again because roundAndPackFloat64 - * wants one less than the true exponent. - */ - cExp -= 2; - cSig = (cSig | 0x0010000000000000ULL) << 10; - return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status); - } - return packFloat64(cSign ^ signflip, cExp, cSig); - } - - if (aExp == 0) { - normalizeFloat64Subnormal(aSig, &aExp, &aSig); - } - if (bExp == 0) { - normalizeFloat64Subnormal(bSig, &bExp, &bSig); - } - - /* Calculate the actual result a * b + c */ - - /* Multiply first; this is easy. */ - /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff - * because we want the true exponent, not the "one-less-than" - * flavour that roundAndPackFloat64() takes. - */ - pExp = aExp + bExp - 0x3fe; - aSig = (aSig | LIT64(0x0010000000000000))<<10; - bSig = (bSig | LIT64(0x0010000000000000))<<11; - mul64To128(aSig, bSig, &pSig0, &pSig1); - if ((int64_t)(pSig0 << 1) >= 0) { - shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1); - pExp--; - } - - zSign = pSign ^ signflip; - - /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit - * bit in position 126. - */ - if (cExp == 0) { - if (!cSig) { - /* Throw out the special case of c being an exact zero now */ - shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1); - if (flags & float_muladd_halve_result) { - pExp--; - } - return roundAndPackFloat64(zSign, pExp - 1, - pSig1, status); - } - normalizeFloat64Subnormal(cSig, &cExp, &cSig); - } - - /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the - * significand of the addend, with the explicit bit in position 126. - */ - cSig0 = cSig << (126 - 64 - 52); - cSig1 = 0; - cSig0 |= LIT64(0x4000000000000000); - expDiff = pExp - cExp; - - if (pSign == cSign) { - /* Addition */ - if (expDiff > 0) { - /* scale c to match p */ - shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1); - zExp = pExp; - } else if (expDiff < 0) { - /* scale p to match c */ - shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1); - zExp = cExp; - } else { - /* no scaling needed */ - zExp = cExp; - } - /* Add significands and make sure explicit bit ends up in posn 126 */ - add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); - if ((int64_t)zSig0 < 0) { - shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1); - } else { - zExp--; - } - shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1); - if (flags & float_muladd_halve_result) { - zExp--; - } - return roundAndPackFloat64(zSign, zExp, zSig1, status); - } else { - /* Subtraction */ - if (expDiff > 0) { - shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1); - sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); - zExp = pExp; - } else if (expDiff < 0) { - shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1); - sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1); - zExp = cExp; - zSign ^= 1; - } else { - zExp = pExp; - if (lt128(cSig0, cSig1, pSig0, pSig1)) { - sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); - } else if (lt128(pSig0, pSig1, cSig0, cSig1)) { - sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1); - zSign ^= 1; - } else { - /* Exact zero */ - zSign = signflip; - if (status->float_rounding_mode == float_round_down) { - zSign ^= 1; - } - return packFloat64(zSign, 0, 0); - } - } - --zExp; - /* Do the equivalent of normalizeRoundAndPackFloat64() but - * starting with the significand in a pair of uint64_t. - */ - if (zSig0) { - shiftcount = countLeadingZeros64(zSig0) - 1; - shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1); - if (zSig1) { - zSig0 |= 1; - } - zExp -= shiftcount; - } else { - shiftcount = countLeadingZeros64(zSig1); - if (shiftcount == 0) { - zSig0 = (zSig1 >> 1) | (zSig1 & 1); - zExp -= 63; - } else { - shiftcount--; - zSig0 = zSig1 << shiftcount; - zExp -= (shiftcount + 64); - } - } - if (flags & float_muladd_halve_result) { - zExp--; - } - return roundAndPackFloat64(zSign, zExp, zSig0, status); - } -} - -/*---------------------------------------------------------------------------- -| Returns the square root of the double-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_sqrt(float64 a, float_status *status) -{ - flag aSign; - int aExp, zExp; - uint64_t aSig, zSig, doubleZSig; - uint64_t rem0, rem1, term0, term1; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if (aSig) { - return propagateFloat64NaN(a, a, status); - } - if ( ! aSign ) return a; - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - if ( aSign ) { - if ( ( aExp | aSig ) == 0 ) return a; - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return float64_zero; - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; - aSig |= LIT64( 0x0010000000000000 ); - zSig = estimateSqrt32( aExp, aSig>>21 ); - aSig <<= 9 - ( aExp & 1 ); - zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); - if ( ( zSig & 0x1FF ) <= 5 ) { - doubleZSig = zSig<<1; - mul64To128( zSig, zSig, &term0, &term1 ); - sub128( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig; - doubleZSig -= 2; - add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); - } - zSig |= ( ( rem0 | rem1 ) != 0 ); - } - return roundAndPackFloat64(0, zExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the binary log of the double-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -float64 float64_log2(float64 a, float_status *status) -{ - flag aSign, zSign; - int aExp; - uint64_t aSig, aSig0, aSig1, zSig, i; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - if ( aSign ) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - if ( aExp == 0x7FF ) { - if (aSig) { - return propagateFloat64NaN(a, float64_zero, status); - } - return a; - } - - aExp -= 0x3FF; - aSig |= LIT64( 0x0010000000000000 ); - zSign = aExp < 0; - zSig = (uint64_t)aExp << 52; - for (i = 1LL << 51; i > 0; i >>= 1) { - mul64To128( aSig, aSig, &aSig0, &aSig1 ); - aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 ); - if ( aSig & LIT64( 0x0020000000000000 ) ) { - aSig >>= 1; - zSig |= i; - } - } - - if ( zSign ) - zSig = -zSig; - return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status); -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. The invalid exception is raised -| if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_eq(float64 a, float64 b, float_status *status) -{ - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - av = float64_val(a); - bv = float64_val(b); - return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_le(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_lt(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_unordered(float64 a, float64 b, float_status *status) -{ - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception.The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_eq_quiet(float64 a, float64 b, float_status *status) -{ - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - av = float64_val(a); - bv = float64_val(b); - return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_le_quiet(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); - return ( av == bv ) || ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_lt_quiet(float64 a, float64 b, float_status *status) -{ - flag aSign, bSign; - uint64_t av, bv; - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - av = float64_val(a); - bv = float64_val(b); - if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); - return ( av != bv ) && ( aSign ^ ( av < bv ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float64_unordered_quiet(float64 a, float64 b, float_status *status) -{ - a = float64_squash_input_denormal(a, status); - b = float64_squash_input_denormal(b, status); - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if (float64_is_signaling_nan(a, status) - || float64_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, the -| largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t floatx80_to_int32(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); -#ifdef SOFTFLOAT_68K - if ( aExp == 0x7FFF ) { - float_raise( float_flag_invalid, status ); - if ( (uint64_t) ( aSig<<1 ) ) return (int32_t)(aSig>>32); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } -#else - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; -#endif - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32(aSign, aSig, status); - -} - -#ifdef SOFTFLOAT_68K // 30-01-2017: Addition for Previous -int16_t floatx80_to_int16( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - float_raise( float_flag_invalid, status ); - if ( (uint64_t) ( aSig<<1 ) ) return (int16_t)(aSig>>48); - return aSign ? (int16_t) 0x8000 : 0x7FFF; - } - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt16( aSign, aSig, status ); - -} -int8_t floatx80_to_int8( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - float_raise( float_flag_invalid, status ); - if ( (uint64_t) ( aSig<<1 ) ) return (int8_t)(aSig>>56); - return aSign ? (int8_t) 0x80 : 0x7F; - } - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt8( aSign, aSig, status ); - -} -#endif // End of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, savedASig; - int32_t z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1 << 31; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if (aExp || aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - shiftCount = 0x403E - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, -| the largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig, aSigExtra; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1ULL << 63; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = 0x403E - aExp; - if ( shiftCount <= 0 ) { - if ( shiftCount ) { - float_raise(float_flag_invalid, status); - if ( ! aSign - || ( ( aExp == 0x7FFF ) - && ( aSig != LIT64( 0x8000000000000000 ) ) ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (int64_t) LIT64( 0x8000000000000000 ); - } - aSigExtra = 0; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64(aSign, aSig, aSigExtra, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 64-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig; - int64_t z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return 1ULL << 63; - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = aExp - 0x403E; - if ( 0 <= shiftCount ) { - aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); - if ( ( a.high != 0xC03E ) || aSig ) { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (int64_t) LIT64( 0x8000000000000000 ); - } - else if ( aExp < 0x3FFF ) { - if (aExp | aSig) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - z = aSig>>( - shiftCount ); - if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) { - status->float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the single-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 floatx80_to_float32(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return float32_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status); - } - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 33, &aSig ); - if ( aExp || aSig ) aExp -= 0x3F81; - return roundAndPackFloat32(aSign, aExp, aSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the double-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 floatx80_to_float64(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig, zSig; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return float64_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - shift64RightJamming( aSig, 1, &zSig ); - if ( aExp || aSig ) aExp -= 0x3C01; - return roundAndPackFloat64(aSign, aExp, zSig, status); - -} - -#ifdef SOFTFLOAT_68K // 31-01-2017 -/*---------------------------------------------------------------------------- - | Returns the result of converting the extended double-precision floating- - | point value `a' to the extended double-precision floating-point format. - | The conversion is performed according to the IEC/IEEE Standard for Binary - | Floating-Point Arithmetic. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_to_floatx80( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) { - return propagateFloatx80NaN( a, a, status ); - } - if ( aExp == 0 && aSig != 0 ) { - return normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); - } - return a; - -} -#endif - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the quadruple-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 floatx80_to_float128(floatx80 a, float_status *status) -{ - flag aSign; - int aExp; - uint64_t aSig, zSig0, zSig1; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return float128_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) { - return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status); - } -#ifdef SOFTFLOAT_68K - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } -#endif - shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp, zSig0, zSig1 ); - -} - -#ifdef SOFTFLOAT_68K // 30-01-2016: Added for Previous -floatx80 floatx80_round32( floatx80 a, float_status *status ) -{ - flag aSign; - int16_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - return roundAndPackFloatx80(32, aSign, aExp, aSig, 0, status); - -} - -floatx80 floatx80_round64( floatx80 a, float_status *status ) -{ - flag aSign; - int16_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - return roundAndPackFloatx80(64, aSign, aExp, aSig, 0, status); - -} - -floatx80 floatx80_normalize( floatx80 a ) -{ - flag aSign; - int16_t aExp; - uint64_t aSig; - int8_t shiftCount; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF || aExp == 0 ) return a; - if ( aSig == 0 ) return packFloatx80(aSign, 0, 0); - - shiftCount = countLeadingZeros64( aSig ); - - if ( shiftCount > aExp ) shiftCount = aExp; - - aExp -= shiftCount; - aSig <<= shiftCount; - - return packFloatx80( aSign, aExp, aSig ); -} -#endif // end of addition for Previous - -/*---------------------------------------------------------------------------- -| Rounds the extended double-precision floating-point value `a' to an integer, -| and returns the result as an extended quadruple-precision floating-point -| value. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_round_to_int(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t lastBitMask, roundBitsMask; - floatx80 z; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aExp = extractFloatx80Exp( a ); - if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaN(a, a, status); - } - return a; - } - if ( aExp < 0x3FFF ) { - if ( ( aExp == 0 ) - #ifdef SOFTFLOAT_68K - && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { -#else - && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { -#endif - return a; - } - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloatx80Sign( a ); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) - ) { - return - packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - break; - case float_round_ties_away: - if (aExp == 0x3FFE) { - return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); - } - break; - case float_round_down: - return - aSign ? - packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) - : packFloatx80( 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloatx80( 1, 0, 0 ) - : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - return packFloatx80( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x403E - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - switch (status->float_rounding_mode) { - case float_round_nearest_even: - z.low += lastBitMask>>1; - if ((z.low & roundBitsMask) == 0) { - z.low &= ~lastBitMask; - } - break; - case float_round_ties_away: - z.low += lastBitMask >> 1; - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloatx80Sign(z)) { - z.low += roundBitsMask; - } - break; - case float_round_down: - if (extractFloatx80Sign(z)) { - z.low += roundBitsMask; - } - break; - default: - abort(); - } - z.low &= ~ roundBitsMask; - if ( z.low == 0 ) { - ++z.high; - z.low = LIT64( 0x8000000000000000 ); - } - if (z.low != a.low) { - status->float_exception_flags |= float_flag_inexact; - } - return z; - -} - -#ifdef SOFTFLOAT_68K // 09-01-2017: Added for Previous -floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t lastBitMask, roundBitsMask; - floatx80 z; - - aExp = extractFloatx80Exp( a ); - if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaN( a, a, status ); - } - return a; - } - if ( aExp < 0x3FFF ) { - if ( ( aExp == 0 ) -#ifdef SOFTFLOAT_68K - && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { -#else - && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { -#endif - return a; - } - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloatx80Sign( a ); - return packFloatx80( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x403E - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - z.low &= ~ roundBitsMask; - if ( z.low == 0 ) { - ++z.high; - z.low = LIT64( 0x8000000000000000 ); - } - if ( z.low != a.low ) status->float_exception_flags |= float_flag_inexact; - return z; - -} -#endif // End of addition for Previous - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the extended double- -| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is -| negated before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - int32_t expDiff; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); -#ifdef SOFTFLOAT_68K - if ( aExp == 0 ) { - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } -#endif - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } -#ifndef SOFTFLOAT_68K - if ( bExp == 0 ) --expDiff; -#endif - shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } -#ifndef SOFTFLOAT_68K - if ( aExp == 0 ) ++expDiff; -#endif - shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - zSig1 = 0; - zSig0 = aSig + bSig; - #ifndef SOFTFLOAT_68K - if ( aExp == 0 ) { - normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); - goto roundAndPack; - } -#endif - zExp = aExp; -#ifdef SOFTFLOAT_68K - if ( aSig == 0 || bSig == 0 ) goto roundAndPack; -#endif - goto shiftRight1; - } - zSig0 = aSig + bSig; - if ( (int64_t) zSig0 < 0 ) goto roundAndPack; - shiftRight1: - shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= LIT64( 0x8000000000000000 ); - ++zExp; - roundAndPack: - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the extended -| double-precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - int32_t expDiff; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - #ifndef SOFTFLOAT_68K - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } -#endif - zSig1 = 0; - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); - bExpBigger: - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } -#ifndef SOFTFLOAT_68K - if ( aExp == 0 ) ++expDiff; -#endif - shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - bBigger: - sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } -#ifndef SOFTFLOAT_68K - if ( bExp == 0 ) --expDiff; -#endif - shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - aBigger: - sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the extended double-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return addFloatx80Sigs(a, b, aSign, status); - } - else { - return subFloatx80Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return subFloatx80Sigs(a, b, aSign, status); - } - else { - return addFloatx80Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN(a, b, status); - } - if ( ( bExp | bSig ) == 0 ) goto invalid; - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FFE; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous -floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - if ( ( bExp | bSig ) == 0 ) goto invalid; - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - aSig &= LIT64( 0xFFFFFF0000000000 ); - bSig &= LIT64( 0xFFFFFF0000000000 ); - zExp = aExp + bExp - 0x3FFE; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - return roundAndPackFloatx80Sgl( zSign, zExp, zSig0, zSig1, status ); - -} -#endif // End of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the extended double-precision floating-point -| value `a' by the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - uint64_t rem0, rem1, rem2, term0, term1, term2; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return packFloatx80( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - float_raise(float_flag_divbyzero, status); - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FFE; - rem1 = 0; - if ( bSig <= aSig ) { - shift128Right( aSig, 0, 1, &aSig, &rem1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig, rem1, bSig ); - mul64To128( bSig, zSig0, &term0, &term1 ); - sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, bSig ); - if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { - mul64To128( bSig, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); - } - zSig1 |= ( ( rem1 | rem2 ) != 0 ); - } - return roundAndPackFloatx80(status->floatx80_rounding_precision, - zSign, zExp, zSig0, zSig1, status); -} - -#ifdef SOFTFLOAT_68K // 21-01-2017: Addition for Previous -floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig, bSig, zSig0, zSig1; - uint64_t rem0, rem1, rem2, term0, term1, term2; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - goto invalid; - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - return packFloatx80( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - float_raise( float_flag_divbyzero, status ); - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - zExp = aExp - bExp + 0x3FFE; - rem1 = 0; - if ( bSig <= aSig ) { - shift128Right( aSig, 0, 1, &aSig, &rem1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig, rem1, bSig ); - mul64To128( bSig, zSig0, &term0, &term1 ); - sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, bSig ); - if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { - mul64To128( bSig, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); - } - zSig1 |= ( ( rem1 | rem2 ) != 0 ); - } - return roundAndPackFloatx80Sgl( zSign, zExp, zSig0, zSig1, status ); - -} -#endif // End of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the remainder of the extended double-precision floating-point value -| `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -#ifndef SOFTFLOAT_68K -floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t q, term0, term1, alternateASig0, alternateASig1; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN(a, b, status); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ((uint64_t)(bSig << 1)) { - return propagateFloatx80NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - aSig1 = 0; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); - expDiff = 0; - } - q = ( bSig <= aSig0 ); - if ( q ) aSig0 -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - mul64To128( bSig, q, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++q; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - } - else { - term1 = 0; - term0 = bSig; - } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( q & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status); - -} -#else // 09-01-2017: Modified version for Previous -floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t qTemp, term0, term1, alternateASig0, alternateASig1; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - *q = 0; - *s = 0; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { -#ifdef SOFTFLOAT_68K - if ( aSig0 == 0 ) return a; -#else - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; -#endif - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - *s = (aSign != bSign); - aSig1 = 0; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); - expDiff = 0; - } - qTemp = ( bSig <= aSig0 ); - if ( qTemp ) aSig0 -= bSig; - *q = ( expDiff > 63 ) ? 0 : ( qTemp< 63 ) ? 0 : ( qTemp<>= 64 - expDiff; - mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++qTemp; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - *q += qTemp; - } - else { - term1 = 0; - term0 = bSig; - } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( qTemp & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; - ++*q; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status ); - -} -#endif // End of modification - - -#ifdef SOFTFLOAT_68K // 08-01-2017: Added for Previous -/*---------------------------------------------------------------------------- - | Returns the modulo remainder of the extended double-precision floating-point - | value `a' with respect to the corresponding value `b'. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) -{ - flag aSign, bSign, zSign; - int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig; - uint64_t qTemp, term0, term1; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - *q = 0; - *s = 0; - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise( float_flag_invalid, status ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { -#ifdef SOFTFLOAT_68K - if ( aSig0 == 0 ) return a; -#else - if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; -#endif - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - *s = (aSign != bSign); - aSig1 = 0; - if ( expDiff < 0 ) return a; - qTemp = ( bSig <= aSig0 ); - if ( qTemp ) aSig0 -= bSig; - *q = ( expDiff > 63 ) ? 0 : ( qTemp< 63 ) ? 0 : ( qTemp<>= 64 - expDiff; - mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++qTemp; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - *q += qTemp; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1, status ); - -} -#endif // end of addition for Previous - - -/*---------------------------------------------------------------------------- -| Returns the square root of the extended double-precision floating-point -| value `a'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_sqrt(floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp, zExp; - uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ((uint64_t)(aSig0 << 1)) { - return propagateFloatx80NaN(a, a, status); - } - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 ) == 0 ) return a; - invalid: - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); - } - if ( aExp == 0 ) { - if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; - zSig0 = estimateSqrt32( aExp, aSig0>>32 ); - shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= doubleZSig0; - return roundAndPackFloatx80(status->floatx80_rounding_precision, - 0, zExp, zSig0, zSig1, status); -} - - -#ifdef SOFTFLOAT_68K // 07-01-2017: Added for Previous -/*---------------------------------------------------------------------------- - | Returns the mantissa of the extended double-precision floating-point - | value `a'. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_getman( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a, status ); - float_raise( float_flag_invalid, status ); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x3FFF, aSig, 0, status); -} - -/*---------------------------------------------------------------------------- - | Returns the exponent of the extended double-precision floating-point - | value `a' as an extended double-precision value. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_getexp( floatx80 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a, status ); - float_raise( float_flag_invalid, status ); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - return int32_to_floatx80(aExp - 0x3FFF, status); -} - -/*---------------------------------------------------------------------------- - | Scales extended double-precision floating-point value in operand `a' by - | value `b'. The function truncates the value in the second operand 'b' to - | an integral value and adds that value to the exponent of the operand 'a'. - | The operation performed according to the IEC/IEEE Standard for Binary - | Floating-Point Arithmetic. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - int32_t aExp, bExp, shiftCount; - uint64_t aSig, bSig; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - bSign = extractFloatx80Sign(b); - - if ( bExp == 0x7FFF ) { - if ( (uint64_t) ( bSig<<1 ) || - ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b, status ); - } - float_raise( float_flag_invalid, status ); - a.low = floatx80_default_nan_low; - a.high = floatx80_default_nan_high; - return a; - } - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0); - if ( bExp < 0x3FFF ) return a; - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - - if ( bExp < 0x3FFF ) return a; - - if ( 0x400E < bExp ) { - aExp = bSign ? -0x4000 : 0x7FFF; - return roundAndPackFloatx80( - status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); - } - - shiftCount = 0x403E - bExp; - bSig >>= shiftCount; - aExp = bSign ? ( aExp - bSig ) : ( aExp + bSig ); - - return roundAndPackFloatx80( - status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status); - -} - -/*---------------------------------------------------------------------------- - | Returns the result of comparing the extended double-precision floating- - | point values `a' and `b'. The result is abstracted for matching the - | corresponding condition codes. - *----------------------------------------------------------------------------*/ - -floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status ) -{ - flag aSign, bSign; - int32_t aExp, bExp; - uint64_t aSig, bSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - - if ( ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) || - ( bExp == 0x7FFF && (uint64_t) ( bSig<<1 ) ) ) { - if ( floatx80_is_signaling_nan( a, status ) || floatx80_is_signaling_nan( b, status ) ) - float_raise( float_flag_signaling, status ); - return packFloatx80(0, 0x7FFF, floatx80_default_nan_low); } - - if ( bExp < aExp ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - if ( aExp < bExp ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); - - if ( aExp == 0x7FFF ) { - if ( aSign == bSign ) return packFloatx80( aSign, 0, 0 ); - return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - - if ( bSig < aSig ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - if ( aSig < bSig ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); - - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - - if ( aSign == bSign ) return packFloatx80( 0, 0, 0 ); - - return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - -} - -floatx80 floatx80_tst( floatx80 a, float_status *status ) -{ - if ( floatx80_is_signaling_nan( a, status ) ) - float_raise( float_flag_signaling, status ); - return a; -} - -floatx80 floatx80_move( floatx80 a, float_status *status ) -{ - flag aSign; - int32_t aExp; - uint64_t aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - - if ( aExp == 0x7FFF ) { - if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a, status ); - return a; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); - } - return a; -} - -#endif // End of addition for Previous - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is equal -| to the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_eq(floatx80 a, floatx80 b, float_status *status) -{ - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than or equal to the corresponding value `b', and 0 otherwise. The -| invalid exception is raised if either operand is a NaN. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_le(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_lt(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point values `a' and `b' -| cannot be compared, and 0 otherwise. The invalid exception is raised if -| either operand is a NaN. The comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -int floatx80_unordered(floatx80 a, floatx80 b, float_status *status) -{ - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) - || (extractFloatx80Exp(a) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(a) << 1)) - || (extractFloatx80Exp(b) == 0x7FFF - && (uint64_t) (extractFloatx80Frac(b) << 1)) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status) -{ - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs -| do not cause an exception. Otherwise, the comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause -| an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 0; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point values `a' and `b' -| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception. -| The comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status) -{ - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return 1; - } - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if (floatx80_is_signaling_nan(a, status) - || floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - -#ifdef FLOATX128 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 32-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32_t float128_to_int32(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; - if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); - aSig0 |= ( aSig1 != 0 ); - shiftCount = 0x4028 - aExp; - if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); - return roundAndPackInt32(aSign, aSig0, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 32-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. If -| `a' is a NaN, the largest positive integer is returned. Otherwise, if the -| conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int32_t float128_to_int32_round_to_zero(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1, savedASig; - int32_t z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - aSig0 |= ( aSig1 != 0 ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if (aExp || aSig0) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - aSig0 |= LIT64( 0x0001000000000000 ); - shiftCount = 0x402F - aExp; - savedASig = aSig0; - aSig0 >>= shiftCount; - z = aSig0; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise(float_flag_invalid, status); - return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig0<float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 64-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64_t float128_to_int64(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); - shiftCount = 0x402F - aExp; - if ( shiftCount <= 0 ) { - if ( 0x403E < aExp ) { - float_raise(float_flag_invalid, status); - if ( ! aSign - || ( ( aExp == 0x7FFF ) - && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) - ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (int64_t) LIT64( 0x8000000000000000 ); - } - shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); - } - else { - shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); - } - return roundAndPackInt64(aSign, aSig0, aSig1, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 64-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int64_t float128_to_int64_round_to_zero(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp, shiftCount; - uint64_t aSig0, aSig1; - int64_t z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); - shiftCount = aExp - 0x402F; - if ( 0 < shiftCount ) { - if ( 0x403E <= aExp ) { - aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); - if ( ( a.high == LIT64( 0xC03E000000000000 ) ) - && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { - if (aSig1) { - status->float_exception_flags |= float_flag_inexact; - } - } - else { - float_raise(float_flag_invalid, status); - if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (int64_t) LIT64( 0x8000000000000000 ); - } - z = ( aSig0<>( ( - shiftCount ) & 63 ) ); - if ( (uint64_t) ( aSig1<float_exception_flags |= float_flag_inexact; - } - } - else { - if ( aExp < 0x3FFF ) { - if ( aExp | aSig0 | aSig1 ) { - status->float_exception_flags |= float_flag_inexact; - } - return 0; - } - z = aSig0>>( - shiftCount ); - if ( aSig1 - || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) { - status->float_exception_flags |= float_flag_inexact; - } - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the single-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float128_to_float32(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - uint32_t zSig; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat32(float128ToCommonNaN(a, status), status); - } - return packFloat32( aSign, 0xFF, 0 ); - } - aSig0 |= ( aSig1 != 0 ); - shift64RightJamming( aSig0, 18, &aSig0 ); - zSig = aSig0; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x3F81; - } - return roundAndPackFloat32(aSign, aExp, zSig, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float128_to_float64(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat64(float128ToCommonNaN(a, status), status); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - aSig0 |= ( aSig1 != 0 ); - if ( aExp || aSig0 ) { - aSig0 |= LIT64( 0x4000000000000000 ); - aExp -= 0x3C01; - } - return roundAndPackFloat64(aSign, aExp, aSig0, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the extended double-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 float128_to_floatx80(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloatx80(float128ToCommonNaN(a, status), status); - } - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - else { - aSig0 |= LIT64( 0x0001000000000000 ); - } - shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); - return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status); - -} - -/*---------------------------------------------------------------------------- -| Rounds the quadruple-precision floating-point value `a' to an integer, and -| returns the result as a quadruple-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float128_round_to_int(float128 a, float_status *status) -{ - flag aSign; - int32_t aExp; - uint64_t lastBitMask, roundBitsMask; - float128 z; - - aExp = extractFloat128Exp( a ); - if ( 0x402F <= aExp ) { - if ( 0x406F <= aExp ) { - if ( ( aExp == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) - ) { - return propagateFloat128NaN(a, a, status); - } - return a; - } - lastBitMask = 1; - lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; - roundBitsMask = lastBitMask - 1; - z = a; - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( lastBitMask ) { - add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else { - if ( (int64_t) z.low < 0 ) { - ++z.high; - if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1; - } - } - break; - case float_round_ties_away: - if (lastBitMask) { - add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low); - } else { - if ((int64_t) z.low < 0) { - ++z.high; - } - } - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloat128Sign(z)) { - add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); - } - break; - case float_round_down: - if (extractFloat128Sign(z)) { - add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); - } - break; - default: - abort(); - } - z.low &= ~ roundBitsMask; - } - else { - if ( aExp < 0x3FFF ) { - if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a; - status->float_exception_flags |= float_flag_inexact; - aSign = extractFloat128Sign( a ); - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) - && ( extractFloat128Frac0( a ) - | extractFloat128Frac1( a ) ) - ) { - return packFloat128( aSign, 0x3FFF, 0, 0 ); - } - break; - case float_round_ties_away: - if (aExp == 0x3FFE) { - return packFloat128(aSign, 0x3FFF, 0, 0); - } - break; - case float_round_down: - return - aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) - : packFloat128( 0, 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloat128( 1, 0, 0, 0 ) - : packFloat128( 0, 0x3FFF, 0, 0 ); - } - return packFloat128( aSign, 0, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x402F - aExp; - roundBitsMask = lastBitMask - 1; - z.low = 0; - z.high = a.high; - switch (status->float_rounding_mode) { - case float_round_nearest_even: - z.high += lastBitMask>>1; - if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { - z.high &= ~ lastBitMask; - } - break; - case float_round_ties_away: - z.high += lastBitMask>>1; - break; - case float_round_to_zero: - break; - case float_round_up: - if (!extractFloat128Sign(z)) { - z.high |= ( a.low != 0 ); - z.high += roundBitsMask; - } - break; - case float_round_down: - if (extractFloat128Sign(z)) { - z.high |= (a.low != 0); - z.high += roundBitsMask; - } - break; - default: - abort(); - } - z.high &= ~ roundBitsMask; - } - if ( ( z.low != a.low ) || ( z.high != a.high ) ) { - status->float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the quadruple-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float128 addFloat128Sigs(float128 a, float128 b, flag zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - int32_t expDiff; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= LIT64( 0x0001000000000000 ); - } - shift128ExtraRightJamming( - bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= LIT64( 0x0001000000000000 ); - } - shift128ExtraRightJamming( - aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN(a, b, status); - } - return a; - } - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - if ( aExp == 0 ) { - if (status->flush_to_zero) { - if (zSig0 | zSig1) { - float_raise(float_flag_output_denormal, status); - } - return packFloat128(zSign, 0, 0, 0); - } - return packFloat128( zSign, 0, zSig0, zSig1 ); - } - zSig2 = 0; - zSig0 |= LIT64( 0x0002000000000000 ); - zExp = aExp; - goto shiftRight1; - } - aSig0 |= LIT64( 0x0001000000000000 ); - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - --zExp; - if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; - ++zExp; - shiftRight1: - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - roundAndPack: - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the quadruple- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float128 subFloat128Sigs(float128 a, float128 b, flag zSign, - float_status *status) -{ - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; - int32_t expDiff; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN(a, b, status); - } - float_raise(float_flag_invalid, status); - return float128_default_nan(status); - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig0 < aSig0 ) goto aBigger; - if ( aSig0 < bSig0 ) goto bBigger; - if ( bSig1 < aSig1 ) goto aBigger; - if ( aSig1 < bSig1 ) goto bBigger; - return packFloat128(status->float_rounding_mode == float_round_down, - 0, 0, 0); - bExpBigger: - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= LIT64( 0x4000000000000000 ); - } - shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - bSig0 |= LIT64( 0x4000000000000000 ); - bBigger: - sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, b, status); - } - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= LIT64( 0x4000000000000000 ); - } - shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); - aSig0 |= LIT64( 0x4000000000000000 ); - aBigger: - sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1, - status); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the quadruple-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float128_add(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return addFloat128Sigs(a, b, aSign, status); - } - else { - return subFloat128Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the quadruple-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float128_sub(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return subFloat128Sigs(a, b, aSign, status); - } - else { - return addFloat128Sigs(a, b, aSign, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the quadruple-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_mul(float128 a, float128 b, float_status *status) +#ifndef SOFTFLOAT_68K +floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); - zSign = aSign ^ bSign; + flag aSign, zSign; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t q, term0, term1, alternateASig0, alternateASig1; + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN(a, b, status); + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN(a, b, status); } - if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; - return packFloat128( zSign, 0x7FFF, 0, 0 ); + goto invalid; } if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); } - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { invalid: float_raise(float_flag_invalid, status); - return float128_default_nan(status); + return floatx80_default_nan(status); } - return packFloat128( zSign, 0x7FFF, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - zExp = aExp + bExp - 0x4000; - aSig0 |= LIT64( 0x0001000000000000 ); - shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); - mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); - add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zSig2 |= ( zSig3 != 0 ); - if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - ++zExp; + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; } - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1, status); } - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the quadruple-precision floating-point value -| `a' by the corresponding value `b'. The operation is performed according to -| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float128_div(float128 a, float128 b, float_status *status) +#else // 09-01-2017: Modified version for Previous +floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) { flag aSign, bSign, zSign; - int32_t aExp, bExp, zExp; - uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); - zSign = aSign ^ bSign; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t qTemp, term0, term1, alternateASig0, alternateASig1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + *q = 0; + *s = 0; if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, b, status); - } - if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - goto invalid; + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); } - return packFloat128( zSign, 0x7FFF, 0, 0 ); + goto invalid; } if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } - return packFloat128( zSign, 0, 0, 0 ); + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + return a; } if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return float128_default_nan(status); - } - float_raise(float_flag_divbyzero, status); - return packFloat128( zSign, 0x7FFF, 0, 0 ); + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = aExp - bExp + 0x3FFD; - shortShift128Left( - aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); - shortShift128Left( - bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); - if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { - shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); - ++zExp; +#ifdef SOFTFLOAT_68K + if ( aSig0 == 0 ) return a; +#else + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; +#endif + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } - zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); - mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); - sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + *s = (aSign != bSign); + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; } - zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); - if ( ( zSig1 & 0x3FFF ) <= 4 ) { - mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); - sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + qTemp = ( bSig <= aSig0 ); + if ( qTemp ) aSig0 -= bSig; + *q = ( expDiff > 63 ) ? 0 : ( qTemp< 63 ) ? 0 : ( qTemp<>= 64 - expDiff; + mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++qTemp; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + *q += qTemp; } - shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); - + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( qTemp & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + ++*q; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1, status ); + } +#endif // End of modification + +#ifdef SOFTFLOAT_68K // 08-01-2017: Added for Previous /*---------------------------------------------------------------------------- -| Returns the remainder of the quadruple-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ + | Returns the modulo remainder of the extended double-precision floating-point + | value `a' with respect to the corresponding value `b'. + *----------------------------------------------------------------------------*/ -float128 float128_rem(float128 a, float128 b, float_status *status) +floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) { - flag aSign, zSign; + flag aSign, bSign, zSign; int32_t aExp, bExp, expDiff; - uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; - uint64_t allZero, alternateASig0, alternateASig1, sigMean1; - int64_t sigMean0; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); + uint64_t aSig0, aSig1, bSig; + uint64_t qTemp, term0, term1; + floatx80 z; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + *q = 0; + *s = 0; if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN(a, b, status); + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); } goto invalid; } if ( bExp == 0x7FFF ) { - if (bSig0 | bSig1) { - return propagateFloat128NaN(a, b, status); - } + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); return a; } if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - invalid: - float_raise(float_flag_invalid, status); - return float128_default_nan(status); + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + z.low = floatx80_default_nan_low; + z.high = floatx80_default_nan_high; + return z; } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return a; - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); +#ifdef SOFTFLOAT_68K + if ( aSig0 == 0 ) return a; +#else + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; +#endif + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; expDiff = aExp - bExp; - if ( expDiff < -1 ) return a; - shortShift128Left( - aSig0 | LIT64( 0x0001000000000000 ), - aSig1, - 15 - ( expDiff < 0 ), - &aSig0, - &aSig1 - ); - shortShift128Left( - bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); - q = le128( bSig0, bSig1, aSig0, aSig1 ); - if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + *s = (aSign != bSign); + aSig1 = 0; + if ( expDiff < 0 ) return a; + qTemp = ( bSig <= aSig0 ); + if ( qTemp ) aSig0 -= bSig; + *q = ( expDiff > 63 ) ? 0 : ( qTemp<>= - expDiff; - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - expDiff += 52; - if ( expDiff < 0 ) { - shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - } - else { - shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + qTemp = estimateDiv128To64( aSig0, aSig1, bSig ); + qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0; + mul64To128( bSig, qTemp, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + *q = ( expDiff > 63 ) ? 0 : ( qTemp<>= 64 - expDiff; + mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++qTemp; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); } - mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); - sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); - } - else { - shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - } - do { - alternateASig0 = aSig0; - alternateASig1 = aSig1; - ++q; - sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - } while ( 0 <= (int64_t) aSig0 ); - add128( - aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 ); - if ( ( sigMean0 < 0 ) - || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; + *q += qTemp; } - zSign = ( (int64_t) aSig0 < 0 ); - if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); - return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1, - status); + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1, status ); + } +#endif // end of addition for Previous + /*---------------------------------------------------------------------------- -| Returns the square root of the quadruple-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. +| Returns the square root of the extended double-precision floating-point +| value `a'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_sqrt(float128 a, float_status *status) +floatx80 floatx80_sqrt(floatx80 a, float_status *status) { flag aSign; int32_t aExp, zExp; - uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; + uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); if ( aExp == 0x7FFF ) { - if (aSig0 | aSig1) { - return propagateFloat128NaN(a, a, status); + if ((uint64_t)(aSig0 << 1)) { + return propagateFloatx80NaNOneArg(a, status); } if ( ! aSign ) return a; goto invalid; } if ( aSign ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + if ( ( aExp | aSig0 ) == 0 ) return a; invalid: float_raise(float_flag_invalid, status); - return float128_default_nan(status); + return floatx80_default_nan(status); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; - aSig0 |= LIT64( 0x0001000000000000 ); - zSig0 = estimateSqrt32( aExp, aSig0>>17 ); - shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); doubleZSig0 = zSig0<<1; mul64To128( zSig0, zSig0, &term0, &term1 ); @@ -7863,7 +2605,7 @@ float128 float128_sqrt(float128 a, float_status *status) add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); } zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & 0x1FFF ) <= 5 ) { + if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { if ( zSig1 == 0 ) zSig1 = 1; mul64To128( doubleZSig0, zSig1, &term1, &term2 ); sub128( rem1, 0, term1, term2, &rem1, &rem2 ); @@ -7876,910 +2618,272 @@ float128 float128_sqrt(float128 a, float_status *status) term2 |= doubleZSig0; add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_eq(float128 a, float128 b, float_status *status) -{ - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The invalid -| exception is raised if either operand is a NaN. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_le(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. The comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_lt(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. The invalid exception is raised if either -| operand is a NaN. The comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_unordered(float128 a, float128 b, float_status *status) -{ - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise(float_flag_invalid, status); - return 1; - } - return 0; -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. The comparison is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_eq_quiet(float128 a, float128 b, float_status *status) -{ - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_le_quiet(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_lt_quiet(float128 a, float128 b, float_status *status) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot -| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -int float128_unordered_quiet(float128 a, float128 b, float_status *status) -{ - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if (float128_is_signaling_nan(a, status) - || float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return 1; - } - return 0; -} - -/* misc functions */ -float32 uint32_to_float32(uint32_t a, float_status *status) -{ - return int64_to_float32(a, status); -} - -float64 uint32_to_float64(uint32_t a, float_status *status) -{ - return int64_to_float64(a, status); -} - -uint32_t float32_to_uint32(float32 a, float_status *status) -{ - int64_t v; - uint32_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float32_to_int64(a, status); - if (v < 0) { - res = 0; - } else if (v > 0xffffffff) { - res = 0xffffffff; - } else { - return v; - } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *status) -{ - int64_t v; - uint32_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float32_to_int64_round_to_zero(a, status); - if (v < 0) { - res = 0; - } else if (v > 0xffffffff) { - res = 0xffffffff; - } else { - return v; - } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -int16_t float32_to_int16(float32 a, float_status *status) -{ - int32_t v; - int16_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float32_to_int32(a, status); - if (v < -0x8000) { - res = -0x8000; - } else if (v > 0x7fff) { - res = 0x7fff; - } else { - return v; - } - - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -uint16_t float32_to_uint16(float32 a, float_status *status) -{ - int32_t v; - uint16_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float32_to_int32(a, status); - if (v < 0) { - res = 0; - } else if (v > 0xffff) { - res = 0xffff; - } else { - return v; - } - - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *status) -{ - int64_t v; - uint16_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float32_to_int64_round_to_zero(a, status); - if (v < 0) { - res = 0; - } else if (v > 0xffff) { - res = 0xffff; - } else { - return v; - } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -uint32_t float64_to_uint32(float64 a, float_status *status) -{ - uint64_t v; - uint32_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float64_to_uint64(a, status); - if (v > 0xffffffff) { - res = 0xffffffff; - } else { - return v; - } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *status) -{ - uint64_t v; - uint32_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float64_to_uint64_round_to_zero(a, status); - if (v > 0xffffffff) { - res = 0xffffffff; - } else { - return v; - } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -int16_t float64_to_int16(float64 a, float_status *status) -{ - int64_t v; - int16_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float64_to_int32(a, status); - if (v < -0x8000) { - res = -0x8000; - } else if (v > 0x7fff) { - res = 0x7fff; - } else { - return v; - } - - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -uint16_t float64_to_uint16(float64 a, float_status *status) -{ - int64_t v; - uint16_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float64_to_int32(a, status); - if (v < 0) { - res = 0; - } else if (v > 0xffff) { - res = 0xffff; - } else { - return v; - } - - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *status) -{ - int64_t v; - uint16_t res; - int old_exc_flags = get_float_exception_flags(status); - - v = float64_to_int64_round_to_zero(a, status); - if (v < 0) { - res = 0; - } else if (v > 0xffff) { - res = 0xffff; - } else { - return v; - } - set_float_exception_flags(old_exc_flags, status); - float_raise(float_flag_invalid, status); - return res; -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 64-bit unsigned integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. If the conversion overflows, the -| largest unsigned integer is returned. If 'a' is negative, the value is -| rounded and zero is returned; negative values that do not round to zero -| will raise the inexact exception. -*----------------------------------------------------------------------------*/ - -uint64_t float64_to_uint64(float64 a, float_status *status) -{ - flag aSign; - int aExp; - int shiftCount; - uint64_t aSig, aSigExtra; - a = float64_squash_input_denormal(a, status); - - aSig = extractFloat64Frac(a); - aExp = extractFloat64Exp(a); - aSign = extractFloat64Sign(a); - if (aSign && (aExp > 1022)) { - float_raise(float_flag_invalid, status); - if (float64_is_any_nan(a)) { - return LIT64(0xFFFFFFFFFFFFFFFF); - } else { - return 0; - } - } - if (aExp) { - aSig |= LIT64(0x0010000000000000); - } - shiftCount = 0x433 - aExp; - if (shiftCount <= 0) { - if (0x43E < aExp) { - float_raise(float_flag_invalid, status); - return LIT64(0xFFFFFFFFFFFFFFFF); - } - aSigExtra = 0; - aSig <<= -shiftCount; - } else { - shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra); - } - return roundAndPackUint64(aSign, aSig, aSigExtra, status); -} - -uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status) -{ - signed char current_rounding_mode = status->float_rounding_mode; - set_float_rounding_mode(float_round_to_zero, status); - int64_t v = float64_to_uint64(a, status); - set_float_rounding_mode(current_rounding_mode, status); - return v; -} - -#define COMPARE(s, nan_exp) \ -static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\ - int is_quiet, float_status *status) \ -{ \ - flag aSign, bSign; \ - uint ## s ## _t av, bv; \ - a = float ## s ## _squash_input_denormal(a, status); \ - b = float ## s ## _squash_input_denormal(b, status); \ - \ - if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \ - extractFloat ## s ## Frac( a ) ) || \ - ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \ - extractFloat ## s ## Frac( b ) )) { \ - if (!is_quiet || \ - float ## s ## _is_signaling_nan(a, status) || \ - float ## s ## _is_signaling_nan(b, status)) { \ - float_raise(float_flag_invalid, status); \ - } \ - return float_relation_unordered; \ - } \ - aSign = extractFloat ## s ## Sign( a ); \ - bSign = extractFloat ## s ## Sign( b ); \ - av = float ## s ## _val(a); \ - bv = float ## s ## _val(b); \ - if ( aSign != bSign ) { \ - if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \ - /* zero case */ \ - return float_relation_equal; \ - } else { \ - return 1 - (2 * aSign); \ - } \ - } else { \ - if (av == bv) { \ - return float_relation_equal; \ - } else { \ - return 1 - 2 * (aSign ^ ( av < bv )); \ - } \ - } \ -} \ - \ -int float ## s ## _compare(float ## s a, float ## s b, float_status *status) \ -{ \ - return float ## s ## _compare_internal(a, b, 0, status); \ -} \ - \ -int float ## s ## _compare_quiet(float ## s a, float ## s b, \ - float_status *status) \ -{ \ - return float ## s ## _compare_internal(a, b, 1, status); \ -} - -COMPARE(32, 0xff) -COMPARE(64, 0x7ff) - -static inline int floatx80_compare_internal(floatx80 a, floatx80 b, - int is_quiet, float_status *status) -{ - flag aSign, bSign; - - if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { - float_raise(float_flag_invalid, status); - return float_relation_unordered; - } - if (( ( extractFloatx80Exp( a ) == 0x7fff ) && - ( extractFloatx80Frac( a )<<1 ) ) || - ( ( extractFloatx80Exp( b ) == 0x7fff ) && - ( extractFloatx80Frac( b )<<1 ) )) { - if (!is_quiet || - floatx80_is_signaling_nan(a, status) || - floatx80_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return float_relation_unordered; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - - if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) && - ( ( a.low | b.low ) == 0 ) ) { - /* zero case */ - return float_relation_equal; - } else { - return 1 - (2 * aSign); - } - } else { - if (a.low == b.low && a.high == b.high) { - return float_relation_equal; - } else { - return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); - } - } -} - -int floatx80_compare(floatx80 a, floatx80 b, float_status *status) -{ - return floatx80_compare_internal(a, b, 0, status); -} - -int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status) -{ - return floatx80_compare_internal(a, b, 1, status); -} - -static inline int float128_compare_internal(float128 a, float128 b, - int is_quiet, float_status *status) -{ - flag aSign, bSign; - - if (( ( extractFloat128Exp( a ) == 0x7fff ) && - ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) || - ( ( extractFloat128Exp( b ) == 0x7fff ) && - ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) { - if (!is_quiet || - float128_is_signaling_nan(a, status) || - float128_is_signaling_nan(b, status)) { - float_raise(float_flag_invalid, status); - } - return float_relation_unordered; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) { - /* zero case */ - return float_relation_equal; - } else { - return 1 - (2 * aSign); - } - } else { - if (a.low == b.low && a.high == b.high) { - return float_relation_equal; - } else { - return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); - } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); } + shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= doubleZSig0; + return roundAndPackFloatx80(status->floatx80_rounding_precision, + 0, zExp, zSig0, zSig1, status); } -int float128_compare(float128 a, float128 b, float_status *status) -{ - return float128_compare_internal(a, b, 0, status); -} - -int float128_compare_quiet(float128 a, float128 b, float_status *status) -{ - return float128_compare_internal(a, b, 1, status); -} - -/* min() and max() functions. These can't be implemented as - * 'compare and pick one input' because that would mishandle - * NaNs and +0 vs -0. - * - * minnum() and maxnum() functions. These are similar to the min() - * and max() functions but if one of the arguments is a QNaN and - * the other is numerical then the numerical argument is returned. - * minnum() and maxnum correspond to the IEEE 754-2008 minNum() - * and maxNum() operations. min() and max() are the typical min/max - * semantics provided by many CPUs which predate that specification. - * - * minnummag() and maxnummag() functions correspond to minNumMag() - * and minNumMag() from the IEEE-754 2008. - */ -#define MINMAX(s) \ -static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \ - int ismin, int isieee, \ - int ismag, \ - float_status *status) \ -{ \ - flag aSign, bSign; \ - uint ## s ## _t av, bv, aav, abv; \ - a = float ## s ## _squash_input_denormal(a, status); \ - b = float ## s ## _squash_input_denormal(b, status); \ - if (float ## s ## _is_any_nan(a) || \ - float ## s ## _is_any_nan(b)) { \ - if (isieee) { \ - if (float ## s ## _is_quiet_nan(a, status) && \ - !float ## s ##_is_any_nan(b)) { \ - return b; \ - } else if (float ## s ## _is_quiet_nan(b, status) && \ - !float ## s ## _is_any_nan(a)) { \ - return a; \ - } \ - } \ - return propagateFloat ## s ## NaN(a, b, status); \ - } \ - aSign = extractFloat ## s ## Sign(a); \ - bSign = extractFloat ## s ## Sign(b); \ - av = float ## s ## _val(a); \ - bv = float ## s ## _val(b); \ - if (ismag) { \ - aav = float ## s ## _abs(av); \ - abv = float ## s ## _abs(bv); \ - if (aav != abv) { \ - if (ismin) { \ - return (aav < abv) ? a : b; \ - } else { \ - return (aav < abv) ? b : a; \ - } \ - } \ - } \ - if (aSign != bSign) { \ - if (ismin) { \ - return aSign ? a : b; \ - } else { \ - return aSign ? b : a; \ - } \ - } else { \ - if (ismin) { \ - return (aSign ^ (av < bv)) ? a : b; \ - } else { \ - return (aSign ^ (av < bv)) ? b : a; \ - } \ - } \ -} \ - \ -float ## s float ## s ## _min(float ## s a, float ## s b, \ - float_status *status) \ -{ \ - return float ## s ## _minmax(a, b, 1, 0, 0, status); \ -} \ - \ -float ## s float ## s ## _max(float ## s a, float ## s b, \ - float_status *status) \ -{ \ - return float ## s ## _minmax(a, b, 0, 0, 0, status); \ -} \ - \ -float ## s float ## s ## _minnum(float ## s a, float ## s b, \ - float_status *status) \ -{ \ - return float ## s ## _minmax(a, b, 1, 1, 0, status); \ -} \ - \ -float ## s float ## s ## _maxnum(float ## s a, float ## s b, \ - float_status *status) \ -{ \ - return float ## s ## _minmax(a, b, 0, 1, 0, status); \ -} \ - \ -float ## s float ## s ## _minnummag(float ## s a, float ## s b, \ - float_status *status) \ -{ \ - return float ## s ## _minmax(a, b, 1, 1, 1, status); \ -} \ - \ -float ## s float ## s ## _maxnummag(float ## s a, float ## s b, \ - float_status *status) \ -{ \ - return float ## s ## _minmax(a, b, 0, 1, 1, status); \ -} - -MINMAX(32) -MINMAX(64) +#ifdef SOFTFLOAT_68K // 07-01-2017: Added for Previous +/*---------------------------------------------------------------------------- + | Returns the mantissa of the extended double-precision floating-point + | value `a'. + *----------------------------------------------------------------------------*/ -/* Multiply A by 2 raised to the power N. */ -float32 float32_scalbn(float32 a, int n, float_status *status) +floatx80 floatx80_getman( floatx80 a, float_status *status) { flag aSign; - int16_t aExp; - uint32_t aSig; - - a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - - if ( aExp == 0xFF ) { - if ( aSig ) { - return propagateFloat32NaN(a, a, status); - } - return a; - } - if (aExp != 0) { - aSig |= 0x00800000; - } else if (aSig == 0) { + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); + float_raise( float_flag_invalid, status ); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; return a; - } else { - aExp++; } - - if (n > 0x200) { - n = 0x200; - } else if (n < -0x200) { - n = -0x200; + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - - aExp += n - 1; - aSig <<= 7; - return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status); + + return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x3FFF, aSig, 0, status); } -float64 float64_scalbn(float64 a, int n, float_status *status) +/*---------------------------------------------------------------------------- + | Returns the exponent of the extended double-precision floating-point + | value `a' as an extended double-precision value. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_getexp( floatx80 a, float_status *status) { flag aSign; - int16_t aExp; + int32_t aExp; uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); + float_raise( float_flag_invalid, status ); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return int32_to_floatx80(aExp - 0x3FFF, status); +} - a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); +/*---------------------------------------------------------------------------- + | Scales extended double-precision floating-point value in operand `a' by + | value `b'. The function truncates the value in the second operand 'b' to + | an integral value and adds that value to the exponent of the operand 'a'. + | The operation performed according to the IEC/IEEE Standard for Binary + | Floating-Point Arithmetic. + *----------------------------------------------------------------------------*/ - if ( aExp == 0x7FF ) { - if ( aSig ) { - return propagateFloat64NaN(a, a, status); +floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, bSign; + int32_t aExp, bExp, shiftCount; + uint64_t aSig, bSig; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + bSign = extractFloatx80Sign(b); + + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) || + ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); } + float_raise( float_flag_invalid, status ); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; return a; } - if (aExp != 0) { - aSig |= LIT64( 0x0010000000000000 ); - } else if (aSig == 0) { - return a; - } else { - aExp++; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } - - if (n > 0x1000) { - n = 0x1000; - } else if (n < -0x1000) { - n = -0x1000; + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0); + if ( bExp < 0x3FFF ) return a; + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - - aExp += n - 1; - aSig <<= 10; - return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status); + + if ( bExp < 0x3FFF ) return a; + + if ( 0x400E < bExp ) { + aExp = bSign ? -0x4000 : 0x7FFF; + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + } + + shiftCount = 0x403E - bExp; + bSig >>= shiftCount; + aExp = bSign ? ( aExp - bSig ) : ( aExp + bSig ); + + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status); + } + +/*----------------------------------------------------------------------------- + | Calculates the absolute value of the extended double-precision floating-point + | value `a'. The operation is performed according to the IEC/IEEE Standard + | for Binary Floating-Point Arithmetic. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_abs(floatx80 a, float_status *status) +{ + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + + if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) { + return propagateFloatx80NaNOneArg( a, status ); + } + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } -floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) + return roundAndPackFloatx80( + status->floatx80_rounding_precision, 0, aExp, aSig, 0, status ); + +} + +/*----------------------------------------------------------------------------- + | Changes the sign of the extended double-precision floating-point value 'a'. + | The operation is performed according to the IEC/IEEE Standard for Binary + | Floating-Point Arithmetic. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_neg(floatx80 a, float_status *status) { flag aSign; int32_t aExp; uint64_t aSig; - - if (floatx80_invalid_encoding(a)) { - float_raise(float_flag_invalid, status); - return floatx80_default_nan(status); + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) { + return propagateFloatx80NaNOneArg( a, status ); + } + + aSign = !aSign; + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } + + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + +} + +/*---------------------------------------------------------------------------- + | Returns the result of comparing the extended double-precision floating- + | point values `a' and `b'. The result is abstracted for matching the + | corresponding condition codes. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status ) +{ + flag aSign, bSign; + int32_t aExp, bExp; + uint64_t aSig, bSig; + aSig = extractFloatx80Frac( a ); aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); - + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + + if ( ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) || + ( bExp == 0x7FFF && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( packFloatx80( 0, aExp, aSig ), + packFloatx80( 0, bExp, bSig ), status ); + } + + if ( bExp < aExp ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + if ( aExp < bExp ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); + if ( aExp == 0x7FFF ) { - if ( aSig<<1 ) { - return propagateFloatx80NaN(a, a, status); - } - return a; - } - - if (aExp == 0) { - if (aSig == 0) { - return a; - } - aExp++; - } - - if (n > 0x10000) { - n = 0x10000; - } else if (n < -0x10000) { - n = -0x10000; + if ( aSign == bSign ) return packFloatx80( aSign, 0, 0 ); + return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); } + + if ( bSig < aSig ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + if ( aSig < bSig ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); + + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + + if ( aSign == bSign ) return packFloatx80( 0, 0, 0 ); + + return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + +} - aExp += n; - return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, - aSign, aExp, aSig, 0, status); +floatx80 floatx80_tst( floatx80 a, float_status *status ) +{ + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + + if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) + return propagateFloatx80NaNOneArg( a, status ); + return a; } -float128 float128_scalbn(float128 a, int n, float_status *status) +floatx80 floatx80_move( floatx80 a, float_status *status ) { flag aSign; int32_t aExp; - uint64_t aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return propagateFloat128NaN(a, a, status); - } - return a; - } - if (aExp != 0) { - aSig0 |= LIT64( 0x0001000000000000 ); - } else if (aSig0 == 0 && aSig1 == 0) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); return a; - } else { - aExp++; } - - if (n > 0x10000) { - n = 0x10000; - } else if (n < -0x10000) { - n = -0x10000; + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); } - - aExp += n - 1; - return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1 - , status); - + return a; } -#endif +#endif // End of addition for Previous diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index 6dfaf616..3ec8ebc7 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -280,11 +280,34 @@ static inline flag get_default_nan_mode(float_status *status) *----------------------------------------------------------------------------*/ void float_raise(uint8_t flags, float_status *status); + +/*---------------------------------------------------------------------------- + | The pattern for a default generated single-precision NaN. + *----------------------------------------------------------------------------*/ +#define float32_default_nan 0x7FFFFFFF + +/*---------------------------------------------------------------------------- + | The pattern for a default generated double-precision NaN. + *----------------------------------------------------------------------------*/ +#define float64_default_nan LIT64( 0x7FFFFFFFFFFFFFFF ) + +/*---------------------------------------------------------------------------- + | The pattern for a default generated extended double-precision NaN. The + | `high' and `low' values hold the most- and least-significant bits, + | respectively. + *----------------------------------------------------------------------------*/ +#define floatx80_default_nan_high 0x7FFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/*---------------------------------------------------------------------------- + | The pattern for a default generated extended double-precision infinity. + *----------------------------------------------------------------------------*/ +#define floatx80_default_infinity_low LIT64( 0x0000000000000000 ) + /*---------------------------------------------------------------------------- | If `a' is denormal and we are in flush-to-zero mode then set the | input-denormal exception and return zero. Otherwise just return the value. *----------------------------------------------------------------------------*/ -float32 float32_squash_input_denormal(float32 a, float_status *status); float64 float64_squash_input_denormal(float64 a, float_status *status); /*---------------------------------------------------------------------------- @@ -305,294 +328,22 @@ enum { /*---------------------------------------------------------------------------- | Software IEC/IEEE integer-to-floating-point conversion routines. *----------------------------------------------------------------------------*/ -float32 int32_to_float32(int32_t, float_status *status); -float64 int32_to_float64(int32_t, float_status *status); -float32 uint32_to_float32(uint32_t, float_status *status); -float64 uint32_to_float64(uint32_t, float_status *status); -floatx80 int32_to_floatx80(int32_t, float_status *status); -float128 int32_to_float128(int32_t, float_status *status); -float32 int64_to_float32(int64_t, float_status *status); -float64 int64_to_float64(int64_t, float_status *status); -floatx80 int64_to_floatx80(int64_t, float_status *status); -float128 int64_to_float128(int64_t, float_status *status); -float32 uint64_to_float32(uint64_t, float_status *status); -float64 uint64_to_float64(uint64_t, float_status *status); -float128 uint64_to_float128(uint64_t, float_status *status); - -/* We provide the int16 versions for symmetry of API with float-to-int */ -static inline float32 int16_to_float32(int16_t v, float_status *status) -{ - return int32_to_float32(v, status); -} - -static inline float32 uint16_to_float32(uint16_t v, float_status *status) -{ - return uint32_to_float32(v, status); -} - -static inline float64 int16_to_float64(int16_t v, float_status *status) -{ - return int32_to_float64(v, status); -} -static inline float64 uint16_to_float64(uint16_t v, float_status *status) -{ - return uint32_to_float64(v, status); -} - -/*---------------------------------------------------------------------------- -| Software half-precision conversion routines. -*----------------------------------------------------------------------------*/ -float16 float32_to_float16(float32, flag, float_status *status); -float32 float16_to_float32(float16, flag, float_status *status); -float16 float64_to_float16(float64 a, flag ieee, float_status *status); -float64 float16_to_float64(float16 a, flag ieee, float_status *status); - -/*---------------------------------------------------------------------------- -| Software half-precision operations. -*----------------------------------------------------------------------------*/ -int float16_is_quiet_nan(float16, float_status *status); -int float16_is_signaling_nan(float16, float_status *status); -float16 float16_maybe_silence_nan(float16, float_status *status); - -static inline int float16_is_any_nan(float16 a) -{ - return ((float16_val(a) & ~0x8000) > 0x7c00); -} - -/*---------------------------------------------------------------------------- -| The pattern for a default generated half-precision NaN. -*----------------------------------------------------------------------------*/ -float16 float16_default_nan(float_status *status); +floatx80 int32_to_floatx80(int32_t, float_status *status); /*---------------------------------------------------------------------------- | Software IEC/IEEE single-precision conversion routines. *----------------------------------------------------------------------------*/ -int16_t float32_to_int16(float32, float_status *status); -uint16_t float32_to_uint16(float32, float_status *status); -int16_t float32_to_int16_round_to_zero(float32, float_status *status); -uint16_t float32_to_uint16_round_to_zero(float32, float_status *status); -int32_t float32_to_int32(float32, float_status *status); -int32_t float32_to_int32_round_to_zero(float32, float_status *status); -uint32_t float32_to_uint32(float32, float_status *status); -uint32_t float32_to_uint32_round_to_zero(float32, float_status *status); -int64_t float32_to_int64(float32, float_status *status); -uint64_t float32_to_uint64(float32, float_status *status); -uint64_t float32_to_uint64_round_to_zero(float32, float_status *status); -int64_t float32_to_int64_round_to_zero(float32, float_status *status); -float64 float32_to_float64(float32, float_status *status); floatx80 float32_to_floatx80(float32, float_status *status); -float128 float32_to_float128(float32, float_status *status); - floatx80 float32_to_floatx80_allowunnormal(float32, float_status *status); -/*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision operations. -*----------------------------------------------------------------------------*/ -float32 float32_round_to_int(float32, float_status *status); -float32 float32_add(float32, float32, float_status *status); -float32 float32_sub(float32, float32, float_status *status); -float32 float32_mul(float32, float32, float_status *status); -float32 float32_div(float32, float32, float_status *status); -float32 float32_rem(float32, float32, float_status *status); -float32 float32_muladd(float32, float32, float32, int, float_status *status); -float32 float32_sqrt(float32, float_status *status); -float32 float32_exp2(float32, float_status *status); -float32 float32_log2(float32, float_status *status); -int float32_eq(float32, float32, float_status *status); -int float32_le(float32, float32, float_status *status); -int float32_lt(float32, float32, float_status *status); -int float32_unordered(float32, float32, float_status *status); -int float32_eq_quiet(float32, float32, float_status *status); -int float32_le_quiet(float32, float32, float_status *status); -int float32_lt_quiet(float32, float32, float_status *status); -int float32_unordered_quiet(float32, float32, float_status *status); -int float32_compare(float32, float32, float_status *status); -int float32_compare_quiet(float32, float32, float_status *status); -float32 float32_min(float32, float32, float_status *status); -float32 float32_max(float32, float32, float_status *status); -float32 float32_minnum(float32, float32, float_status *status); -float32 float32_maxnum(float32, float32, float_status *status); -float32 float32_minnummag(float32, float32, float_status *status); -float32 float32_maxnummag(float32, float32, float_status *status); -int float32_is_quiet_nan(float32, float_status *status); -int float32_is_signaling_nan(float32, float_status *status); -float32 float32_maybe_silence_nan(float32, float_status *status); -float32 float32_scalbn(float32, int, float_status *status); - -static inline float32 float32_abs(float32 a) -{ - /* Note that abs does *not* handle NaN specially, nor does - * it flush denormal inputs to zero. - */ - return make_float32(float32_val(a) & 0x7fffffff); -} - -static inline float32 float32_chs(float32 a) -{ - /* Note that chs does *not* handle NaN specially, nor does - * it flush denormal inputs to zero. - */ - return make_float32(float32_val(a) ^ 0x80000000); -} - -static inline int float32_is_infinity(float32 a) -{ - return (float32_val(a) & 0x7fffffff) == 0x7f800000; -} - -static inline int float32_is_neg(float32 a) -{ - return float32_val(a) >> 31; -} - -static inline int float32_is_zero(float32 a) -{ - return (float32_val(a) & 0x7fffffff) == 0; -} - -static inline int float32_is_any_nan(float32 a) -{ - return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL); -} - -static inline int float32_is_zero_or_denormal(float32 a) -{ - return (float32_val(a) & 0x7f800000) == 0; -} - -static inline float32 float32_set_sign(float32 a, int sign) -{ - return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31)); -} - -#define float32_zero make_float32(0) -#define float32_one make_float32(0x3f800000) -#define float32_ln2 make_float32(0x3f317218) -#define float32_pi make_float32(0x40490fdb) -#define float32_half make_float32(0x3f000000) -#define float32_infinity make_float32(0x7f800000) - - -/*---------------------------------------------------------------------------- -| The pattern for a default generated single-precision NaN. -*----------------------------------------------------------------------------*/ -float32 float32_default_nan(float_status *status); - /*---------------------------------------------------------------------------- | Software IEC/IEEE double-precision conversion routines. *----------------------------------------------------------------------------*/ -int16_t float64_to_int16(float64, float_status *status); -uint16_t float64_to_uint16(float64, float_status *status); -int16_t float64_to_int16_round_to_zero(float64, float_status *status); -uint16_t float64_to_uint16_round_to_zero(float64, float_status *status); -int32_t float64_to_int32(float64, float_status *status); -int32_t float64_to_int32_round_to_zero(float64, float_status *status); -uint32_t float64_to_uint32(float64, float_status *status); -uint32_t float64_to_uint32_round_to_zero(float64, float_status *status); -int64_t float64_to_int64(float64, float_status *status); -int64_t float64_to_int64_round_to_zero(float64, float_status *status); -uint64_t float64_to_uint64(float64 a, float_status *status); -uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status); -float32 float64_to_float32(float64, float_status *status); floatx80 float64_to_floatx80(float64, float_status *status); -float128 float64_to_float128(float64, float_status *status); floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ); -/*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision operations. -*----------------------------------------------------------------------------*/ -float64 float64_round_to_int(float64, float_status *status); -float64 float64_trunc_to_int(float64, float_status *status); -float64 float64_add(float64, float64, float_status *status); -float64 float64_sub(float64, float64, float_status *status); -float64 float64_mul(float64, float64, float_status *status); -float64 float64_div(float64, float64, float_status *status); -float64 float64_rem(float64, float64, float_status *status); -float64 float64_muladd(float64, float64, float64, int, float_status *status); -float64 float64_sqrt(float64, float_status *status); -float64 float64_log2(float64, float_status *status); -int float64_eq(float64, float64, float_status *status); -int float64_le(float64, float64, float_status *status); -int float64_lt(float64, float64, float_status *status); -int float64_unordered(float64, float64, float_status *status); -int float64_eq_quiet(float64, float64, float_status *status); -int float64_le_quiet(float64, float64, float_status *status); -int float64_lt_quiet(float64, float64, float_status *status); -int float64_unordered_quiet(float64, float64, float_status *status); -int float64_compare(float64, float64, float_status *status); -int float64_compare_quiet(float64, float64, float_status *status); -float64 float64_min(float64, float64, float_status *status); -float64 float64_max(float64, float64, float_status *status); -float64 float64_minnum(float64, float64, float_status *status); -float64 float64_maxnum(float64, float64, float_status *status); -float64 float64_minnummag(float64, float64, float_status *status); -float64 float64_maxnummag(float64, float64, float_status *status); -int float64_is_quiet_nan(float64 a, float_status *status); -int float64_is_signaling_nan(float64, float_status *status); -float64 float64_maybe_silence_nan(float64, float_status *status); -float64 float64_scalbn(float64, int, float_status *status); - -static inline float64 float64_abs(float64 a) -{ - /* Note that abs does *not* handle NaN specially, nor does - * it flush denormal inputs to zero. - */ - return make_float64(float64_val(a) & 0x7fffffffffffffffLL); -} - -static inline float64 float64_chs(float64 a) -{ - /* Note that chs does *not* handle NaN specially, nor does - * it flush denormal inputs to zero. - */ - return make_float64(float64_val(a) ^ 0x8000000000000000LL); -} - -static inline int float64_is_infinity(float64 a) -{ - return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL; -} - -static inline int float64_is_neg(float64 a) -{ - return float64_val(a) >> 63; -} - -static inline int float64_is_zero(float64 a) -{ - return (float64_val(a) & 0x7fffffffffffffffLL) == 0; -} - -static inline int float64_is_any_nan(float64 a) -{ - return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL); -} - -static inline int float64_is_zero_or_denormal(float64 a) -{ - return (float64_val(a) & 0x7ff0000000000000LL) == 0; -} - -static inline float64 float64_set_sign(float64 a, int sign) -{ - return make_float64((float64_val(a) & 0x7fffffffffffffffULL) - | ((int64_t)sign << 63)); -} - -#define float64_zero make_float64(0) -#define float64_one make_float64(0x3ff0000000000000LL) -#define float64_ln2 make_float64(0x3fe62e42fefa39efLL) -#define float64_pi make_float64(0x400921fb54442d18LL) -#define float64_half make_float64(0x3fe0000000000000LL) -#define float64_infinity make_float64(0x7ff0000000000000LL) - -/*---------------------------------------------------------------------------- -| The pattern for a default generated double-precision NaN. -*----------------------------------------------------------------------------*/ -float64 float64_default_nan(float_status *status); - /*---------------------------------------------------------------------------- | Software IEC/IEEE extended double-precision conversion routines. *----------------------------------------------------------------------------*/ @@ -609,33 +360,68 @@ float64 floatx80_to_float64(floatx80, float_status *status); #ifdef SOFTFLOAT_68K floatx80 floatx80_to_floatx80( floatx80, float_status *status); #endif -float128 floatx80_to_float128(floatx80, float_status *status); + +uint64_t extractFloatx80Frac( floatx80 a ); +int32_t extractFloatx80Exp( floatx80 a ); +flag extractFloatx80Sign( floatx80 a ); floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status); +floatx80 floatx80_round_to_float32( floatx80, float_status *status ); +floatx80 floatx80_round_to_float64( floatx80, float_status *status ); floatx80 floatx80_round32( floatx80, float_status *status); floatx80 floatx80_round64( floatx80, float_status *status); -int floatx80_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status *status); -int floatx80_fsin(floatx80 *a, float_status *status); -int floatx80_fcos(floatx80 *a, float_status *status); -int floatx80_ftan(floatx80 *a, float_status *status); - -floatx80 floatx80_flognp1(floatx80 a, float_status *status); -floatx80 floatx80_flogn(floatx80 a, float_status *status);; -floatx80 floatx80_flog2(floatx80 a, float_status *status); -floatx80 floatx80_flog10(floatx80 a, float_status *status); - -floatx80 floatx80_getman( floatx80 a, float_status *status); -floatx80 floatx80_getexp( floatx80 a, float_status *status); +#ifdef SOFTFLOAT_68K +flag floatx80_is_zero( floatx80 ); +flag floatx80_is_infinity( floatx80 ); +flag floatx80_is_negative( floatx80 ); +flag floatx80_is_denormal( floatx80 ); +flag floatx80_is_unnormal( floatx80 ); +flag floatx80_is_normal( floatx80 ); + +// functions are in softfloat.c +floatx80 floatx80_move( floatx80 a, float_status *status ); +floatx80 floatx80_abs( floatx80 a, float_status *status ); +floatx80 floatx80_neg( floatx80 a, float_status *status ); +floatx80 floatx80_getexp( floatx80 a, float_status *status ); +floatx80 floatx80_getman( floatx80 a, float_status *status ); +floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status ); floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); -floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status); -floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status); -floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status); -floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status); +floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ); +floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ); +floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status ); floatx80 floatx80_tst( floatx80 a, float_status *status ); -floatx80 floatx80_move( floatx80 a, float_status *status); +// functions are in softfloat_extension.c +floatx80 floatx80_acos_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_asin_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_atan_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_atanh_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_cos_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_cosh_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_etox_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_etoxm1_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_log10_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_log2_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_logn_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_lognp1_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_sin_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_sinh_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_tan_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_tanh_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_tentox_check(floatx80 a, flag *e, float_status *status); +floatx80 floatx80_twotox_check(floatx80 a, flag *e, float_status *status); +#endif + +// functions originally internal to softfloat.c +void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ); +floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig ); +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); + +// functions are in softfloat-specialize.h +floatx80 propagateFloatx80NaNOneArg( floatx80 a, float_status *status ); +floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b, float_status *status ); /*---------------------------------------------------------------------------- | Software IEC/IEEE extended double-precision operations. *----------------------------------------------------------------------------*/ @@ -644,40 +430,10 @@ floatx80 floatx80_add(floatx80, floatx80, float_status *status); floatx80 floatx80_sub(floatx80, floatx80, float_status *status); floatx80 floatx80_mul(floatx80, floatx80, float_status *status); floatx80 floatx80_div(floatx80, floatx80, float_status *status); -//floatx80 floatx80_rem(floatx80, floatx80, float_status *status); floatx80 floatx80_sqrt(floatx80, float_status *status); -int floatx80_eq(floatx80, floatx80, float_status *status); -int floatx80_le(floatx80, floatx80, float_status *status); -int floatx80_lt(floatx80, floatx80, float_status *status); -int floatx80_unordered(floatx80, floatx80, float_status *status); -int floatx80_eq_quiet(floatx80, floatx80, float_status *status); -int floatx80_le_quiet(floatx80, floatx80, float_status *status); -int floatx80_lt_quiet(floatx80, floatx80, float_status *status); -int floatx80_unordered_quiet(floatx80, floatx80, float_status *status); -int floatx80_compare(floatx80, floatx80, float_status *status); -int floatx80_compare_quiet(floatx80, floatx80, float_status *status); -int floatx80_is_quiet_nan(floatx80, float_status *status); -int floatx80_is_signaling_nan(floatx80, float_status *status); -floatx80 floatx80_maybe_silence_nan(floatx80, float_status *status); -floatx80 floatx80_scalbn(floatx80, int, float_status *status); - -//flag floatx80_is_unnormal( floatx80 a ); -//flag floatx80_is_denormal( floatx80 a ); - +flag floatx80_is_signaling_nan(floatx80); floatx80 floatx80_normalize(floatx80); -static inline floatx80 floatx80_abs(floatx80 a) -{ - a.high &= 0x7fff; - return a; -} - -static inline floatx80 floatx80_chs(floatx80 a) -{ - a.high ^= 0x8000; - return a; -} - static inline int floatx80_is_zero_or_denormal(floatx80 a) { return (a.high & 0x7fff) == 0; @@ -715,85 +471,4 @@ static inline bool floatx80_invalid_encoding(floatx80 a) *----------------------------------------------------------------------------*/ floatx80 floatx80_default_nan(float_status *status); -/*---------------------------------------------------------------------------- -| Software IEC/IEEE quadruple-precision conversion routines. -*----------------------------------------------------------------------------*/ -int32_t float128_to_int32(float128, float_status *status); -int32_t float128_to_int32_round_to_zero(float128, float_status *status); -int64_t float128_to_int64(float128, float_status *status); -int64_t float128_to_int64_round_to_zero(float128, float_status *status); -float32 float128_to_float32(float128, float_status *status); -float64 float128_to_float64(float128, float_status *status); -floatx80 float128_to_floatx80(float128, float_status *status); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE quadruple-precision operations. -*----------------------------------------------------------------------------*/ -float128 float128_round_to_int(float128, float_status *status); -float128 float128_add(float128, float128, float_status *status); -float128 float128_sub(float128, float128, float_status *status); -float128 float128_mul(float128, float128, float_status *status); -float128 float128_div(float128, float128, float_status *status); -float128 float128_rem(float128, float128, float_status *status); -float128 float128_sqrt(float128, float_status *status); -int float128_eq(float128, float128, float_status *status); -int float128_le(float128, float128, float_status *status); -int float128_lt(float128, float128, float_status *status); -int float128_unordered(float128, float128, float_status *status); -int float128_eq_quiet(float128, float128, float_status *status); -int float128_le_quiet(float128, float128, float_status *status); -int float128_lt_quiet(float128, float128, float_status *status); -int float128_unordered_quiet(float128, float128, float_status *status); -int float128_compare(float128, float128, float_status *status); -int float128_compare_quiet(float128, float128, float_status *status); -int float128_is_quiet_nan(float128, float_status *status); -int float128_is_signaling_nan(float128, float_status *status); -float128 float128_maybe_silence_nan(float128, float_status *status); -float128 float128_scalbn(float128, int, float_status *status); - -static inline float128 float128_abs(float128 a) -{ - a.high &= 0x7fffffffffffffffLL; - return a; -} - -static inline float128 float128_chs(float128 a) -{ - a.high ^= 0x8000000000000000LL; - return a; -} - -static inline int float128_is_infinity(float128 a) -{ - return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0; -} - -static inline int float128_is_neg(float128 a) -{ - return a.high >> 63; -} - -static inline int float128_is_zero(float128 a) -{ - return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0; -} - -static inline int float128_is_zero_or_denormal(float128 a) -{ - return (a.high & 0x7fff000000000000LL) == 0; -} - -static inline int float128_is_any_nan(float128 a) -{ - return ((a.high >> 48) & 0x7fff) == 0x7fff && - ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0)); -} - -#define float128_zero make_float128(0, 0) - -/*---------------------------------------------------------------------------- -| The pattern for a default generated quadruple-precision NaN. -*----------------------------------------------------------------------------*/ -float128 float128_default_nan(float_status *status); - #endif /* SOFTFLOAT_H */ diff --git a/softfloat/softfloat_extension.cpp b/softfloat/softfloat_extension.cpp new file mode 100644 index 00000000..432d4e14 --- /dev/null +++ b/softfloat/softfloat_extension.cpp @@ -0,0 +1,658 @@ + + +#define SOFTFLOAT_68K + +#include +#include + +/*============================================================================ + +This C source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +=============================================================================*/ + +#include "softfloat/softfloat.h" +#include "softfloat/softfloat-specialize.h" + +/*---------------------------------------------------------------------------- +| Methods for detecting special conditions for mathematical functions +| supported by MC68881 and MC68862 mathematical coprocessor. +*----------------------------------------------------------------------------*/ + +#define pi_sig0 LIT64(0xc90fdaa22168c234) +#define pi_sig1 LIT64(0xc4c6628b80dc1cd1) + +#define pi_exp 0x4000 +#define piby2_exp 0x3FFF +#define piby4_exp 0x3FFE + +#define one_exp 0x3FFF +#define one_sig LIT64(0x8000000000000000) + +/*---------------------------------------------------------------------------- + | Arc cosine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_acos_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { + return propagateFloatx80NaNOneArg(a, status); + } + + if (aExp > one_exp || (aExp == one_exp && aSig > one_sig)) { + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + if (aExp == 0) { + if (aSig == 0) return roundAndPackFloatx80(status->floatx80_rounding_precision, + 0, piby2_exp, pi_sig0, pi_sig1, status); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Arc sine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_asin_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { + return propagateFloatx80NaNOneArg(a, status); + } + + if (aExp > one_exp || (aExp == one_exp && aSig > one_sig)) { + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Arc tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_atan_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + return roundAndPackFloatx80(status->floatx80_rounding_precision, + aSign, piby2_exp, pi_sig0, pi_sig1, status); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Hyperbolic arc tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_atanh_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { + return propagateFloatx80NaNOneArg(a, status); + } + + if (aExp >= one_exp) { + if (aExp == one_exp && aSig == one_sig) { + float_raise(float_flag_divbyzero, status); + packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); + } + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Cosine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_cos_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(0, one_exp, one_sig); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Hyperbolic cosine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_cosh_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(0, one_exp, one_sig); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | e to x + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_etox_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(0, 0, 0); + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(0, one_exp, one_sig); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | e to x minus 1 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_etoxm1_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(aSign, one_exp, one_sig); + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Log base 10 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_log10_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); + if (aSign == 0) + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) { + float_raise(float_flag_divbyzero, status); + return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); + } + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + if (aSign) { + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Log base 2 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_log2_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); + if (aSign == 0) + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) { + float_raise(float_flag_divbyzero, status); + return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); + } + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + if (aSign) { + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Log base e + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_logn_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); + if (aSign == 0) + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) { + float_raise(float_flag_divbyzero, status); + return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); + } + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + if (aSign) { + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Log base e of x plus 1 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_lognp1_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) propagateFloatx80NaNOneArg(a, status); + if (aSign) { + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + if (aSign && aExp >= one_exp) { + if (aExp == one_exp && aSig == one_sig) { + float_raise(float_flag_divbyzero, status); + packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); /* NaN? */ + } + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Sine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_sin_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Hyperbolic sine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_sinh_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + return packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_tan_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + float_raise(float_flag_invalid, status); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | Hyperbolic tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_tanh_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + return packFloatx80(aSign, one_exp, one_sig); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | 10 to x + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_tentox_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(0, 0, 0); + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(0, one_exp, one_sig); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +} + +/*---------------------------------------------------------------------------- + | 2 to x + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_twotox_check(floatx80 a, flag *e, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + *e = 1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(0, 0, 0); + return packFloatx80(0, 0x7FFF, floatx80_default_infinity_low); + } + + if (aExp == 0) { + if (aSig == 0) return packFloatx80(0, one_exp, one_sig); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + *e = 0; + return a; +}