From ec9dd093d7beece3e2b34507255cc862cc0f26ec Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Sat, 21 Jan 2017 10:37:06 +0200 Subject: [PATCH] Softfloat update, switched back to QEMU softfloat, merged Previous additions, separated native and softfloat implementations. --- fpp.cpp | 2106 ++---- fpp_native.cpp | 884 +++ fpp_softfloat.cpp | 611 ++ include/fpp-ieee-be.h | 72 - include/fpp-unknown.h | 146 - include/fpp.h | 107 + include/newcpu.h | 4 +- softfloat/README.txt | 78 - softfloat/SOFTFLOAT-MACROS.H | 388 +- softfloat/fpu_constant.h | 80 - softfloat/fsincos.cpp | 652 -- softfloat/fyl2x.cpp | 494 -- softfloat/mamesf.h | 72 - softfloat/milieu.h | 42 - softfloat/softfloat-specialize.h | 1342 +++- softfloat/softfloat.cpp | 10334 +++++++++++++++++++---------- softfloat/softfloat.h | 1057 +-- 17 files changed, 10753 insertions(+), 7716 deletions(-) create mode 100644 fpp_native.cpp create mode 100644 fpp_softfloat.cpp delete mode 100644 include/fpp-ieee-be.h delete mode 100644 include/fpp-unknown.h create mode 100644 include/fpp.h delete mode 100644 softfloat/README.txt delete mode 100644 softfloat/fpu_constant.h delete mode 100644 softfloat/fsincos.cpp delete mode 100644 softfloat/fyl2x.cpp delete mode 100644 softfloat/mamesf.h delete mode 100644 softfloat/milieu.h diff --git a/fpp.cpp b/fpp.cpp index c6a873bb..d8d1e8ce 100644 --- a/fpp.cpp +++ b/fpp.cpp @@ -17,12 +17,6 @@ #include "sysconfig.h" #include "sysdeps.h" -#ifdef _MSC_VER -#pragma fenv_access(on) -#endif - -#define USE_HOST_ROUNDING - #include "options.h" #include "memory.h" #include "uae/attributes.h" @@ -30,7 +24,7 @@ #include "custom.h" #include "events.h" #include "newcpu.h" -#include "md-fpp.h" +#include "fpp.h" #include "savestate.h" #include "cpu_prefetch.h" #include "cpummu.h" @@ -39,6 +33,75 @@ #include "softfloat/softfloat.h" +FPP_PRINT fpp_print; + +FPP_IS fpp_is_snan; +FPP_IS fpp_unset_snan; +FPP_IS fpp_is_nan; +FPP_IS fpp_is_infinity; +FPP_IS fpp_is_zero; +FPP_IS fpp_is_neg; +FPP_IS fpp_is_denormal; +FPP_IS fpp_is_unnormal; + +FPP_GET_STATUS fpp_get_status; +FPP_CLEAR_STATUS fpp_clear_status; +FPP_SET_MODE fpp_set_mode; + +FPP_FROM_NATIVE fpp_from_native; +FPP_TO_NATIVE fpp_to_native; + +FPP_TO_INT fpp_to_int; +FPP_FROM_INT fpp_from_int; + +FPP_TO_SINGLE fpp_to_single_xn; +FPP_TO_SINGLE fpp_to_single_x; +FPP_FROM_SINGLE fpp_from_single_x; + +FPP_TO_DOUBLE fpp_to_double_xn; +FPP_TO_DOUBLE fpp_to_double_x; +FPP_FROM_DOUBLE fpp_from_double_x; + +FPP_TO_EXTEN fpp_to_exten_x; +FPP_FROM_EXTEN fpp_from_exten_x; + +FPP_A fpp_roundsgl; +FPP_A fpp_rounddbl; +FPP_A fpp_round32; +FPP_A fpp_round64; +FPP_AB fpp_int; +FPP_AB fpp_sinh; +FPP_AB fpp_intrz; +FPP_AB fpp_sqrt; +FPP_AB fpp_lognp1; +FPP_AB fpp_etoxm1; +FPP_AB fpp_tanh; +FPP_AB fpp_atan; +FPP_AB fpp_atanh; +FPP_AB fpp_sin; +FPP_AB fpp_asin; +FPP_AB fpp_tan; +FPP_AB fpp_etox; +FPP_AB fpp_twotox; +FPP_AB fpp_tentox; +FPP_AB fpp_logn; +FPP_AB fpp_log10; +FPP_AB fpp_log2; +FPP_AB fpp_abs; +FPP_AB fpp_cosh; +FPP_AB fpp_neg; +FPP_AB fpp_acos; +FPP_AB fpp_cos; +FPP_AB fpp_getexp; +FPP_AB fpp_getman; +FPP_AB fpp_div; +FPP_ABQS fpp_mod; +FPP_AB fpp_add; +FPP_AB fpp_mul; +FPP_ABQS fpp_rem; +FPP_AB fpp_scale; +FPP_AB fpp_sub; + #define DEBUG_FPP 0 #define EXCEPTION_FPP 1 @@ -47,755 +110,112 @@ STATIC_INLINE int isinrom (void) return (munge24 (m68k_getpc ()) & 0xFFF80000) == 0xF80000 && !currprefs.mmu_model; } -static uae_u32 xhex_pi[] ={0x2168c235, 0xc90fdaa2, 0x4000}; -uae_u32 xhex_exp_1[] ={0xa2bb4a9a, 0xadf85458, 0x4000}; -static uae_u32 xhex_l2_e[] ={0x5c17f0bc, 0xb8aa3b29, 0x3fff}; -static uae_u32 xhex_ln_2[] ={0xd1cf79ac, 0xb17217f7, 0x3ffe}; -uae_u32 xhex_ln_10[] ={0xaaa8ac17, 0x935d8ddd, 0x4000}; -uae_u32 xhex_l10_2[] ={0xfbcff798, 0x9a209a84, 0x3ffd}; -uae_u32 xhex_l10_e[] ={0x37287195, 0xde5bd8a9, 0x3ffd}; -uae_u32 xhex_1e16[] ={0x04000000, 0x8e1bc9bf, 0x4034}; -uae_u32 xhex_1e32[] ={0x2b70b59e, 0x9dc5ada8, 0x4069}; -uae_u32 xhex_1e64[] ={0xffcfa6d5, 0xc2781f49, 0x40d3}; -uae_u32 xhex_1e128[] ={0x80e98ce0, 0x93ba47c9, 0x41a8}; -uae_u32 xhex_1e256[] ={0x9df9de8e, 0xaa7eebfb, 0x4351}; -uae_u32 xhex_1e512[] ={0xa60e91c7, 0xe319a0ae, 0x46a3}; -uae_u32 xhex_1e1024[]={0x81750c17, 0xc9767586, 0x4d48}; -uae_u32 xhex_1e2048[]={0xc53d5de5, 0x9e8b3b5d, 0x5a92}; -uae_u32 xhex_1e4096[]={0x8a20979b, 0xc4605202, 0x7525}; -static uae_u32 xhex_inf[] ={0x00000000, 0x00000000, 0x7fff}; -static uae_u32 xhex_nan[] ={0xffffffff, 0xffffffff, 0x7fff}; -static uae_u32 xhex_snan[] ={0xffffffff, 0xbfffffff, 0x7fff}; -#if USE_LONG_DOUBLE -static long double *fp_pi = (long double *)xhex_pi; -static long double *fp_exp_1 = (long double *)xhex_exp_1; -static long double *fp_l2_e = (long double *)xhex_l2_e; -static long double *fp_ln_2 = (long double *)xhex_ln_2; -static long double *fp_ln_10 = (long double *)xhex_ln_10; -static long double *fp_l10_2 = (long double *)xhex_l10_2; -static long double *fp_l10_e = (long double *)xhex_l10_e; -static long double *fp_1e16 = (long double *)xhex_1e16; -static long double *fp_1e32 = (long double *)xhex_1e32; -static long double *fp_1e64 = (long double *)xhex_1e64; -static long double *fp_1e128 = (long double *)xhex_1e128; -static long double *fp_1e256 = (long double *)xhex_1e256; -static long double *fp_1e512 = (long double *)xhex_1e512; -static long double *fp_1e1024 = (long double *)xhex_1e1024; -static long double *fp_1e2048 = (long double *)xhex_1e2048; -static long double *fp_1e4096 = (long double *)xhex_1e4096; -static long double *fp_inf = (long double *)xhex_inf; -static long double *fp_nan = (long double *)xhex_nan; -#else -static uae_u32 dhex_pi[] ={0x54442D18, 0x400921FB}; -static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A}; -static uae_u32 dhex_l2_e[] ={0x652B82FE, 0x3FF71547}; -static uae_u32 dhex_ln_2[] ={0xFEFA39EF, 0x3FE62E42}; -static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1}; -static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413}; -static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B}; -static uae_u32 dhex_1e16[] ={0x37E08000, 0x4341C379}; -static uae_u32 dhex_1e32[] ={0xB5056E17, 0x4693B8B5}; -static uae_u32 dhex_1e64[] ={0xE93FF9F5, 0x4D384F03}; -static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748}; -static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD}; -static uae_u32 dhex_inf[] ={0x00000000, 0x7ff00000}; -static uae_u32 dhex_nan[] ={0xffffffff, 0x7fffffff}; -static double *fp_pi = (double *)dhex_pi; -static double *fp_exp_1 = (double *)dhex_exp_1; -static double *fp_l2_e = (double *)dhex_l2_e; -static double *fp_ln_2 = (double *)dhex_ln_2; -static double *fp_ln_10 = (double *)dhex_ln_10; -static double *fp_l10_2 = (double *)dhex_l10_2; -static double *fp_l10_e = (double *)dhex_l10_e; -static double *fp_1e16 = (double *)dhex_1e16; -static double *fp_1e32 = (double *)dhex_1e32; -static double *fp_1e64 = (double *)dhex_1e64; -static double *fp_1e128 = (double *)dhex_1e128; -static double *fp_1e256 = (double *)dhex_1e256; -static double *fp_1e512 = (double *)dhex_inf; -static double *fp_1e1024 = (double *)dhex_inf; -static double *fp_1e2048 = (double *)dhex_inf; -static double *fp_1e4096 = (double *)dhex_inf; -static double *fp_inf = (double *)dhex_inf; -static double *fp_nan = (double *)dhex_nan; -#endif -static const double twoto32 = 4294967296.0; -double fp_1e8 = 1.0e8; -float fp_1e0 = 1, fp_1e1 = 10, fp_1e2 = 100, fp_1e4 = 10000; -static bool fpu_mmu_fixup; - -static floatx80 fxsizes[6]; -static floatx80 fxzero; -static floatx80 fx_1e0, fx_1e1, fx_1e2, fx_1e4, fx_1e8; -static const fptype fsizes[] = { -128.0, 127.0, -32768.0, 32767.0, -2147483648.0, 2147483647.0 }; - -#define FPCR_ROUNDING_MODE 0x00000030 -#define FPCR_ROUND_NEAR 0x00000000 -#define FPCR_ROUND_ZERO 0x00000010 -#define FPCR_ROUND_MINF 0x00000020 -#define FPCR_ROUND_PINF 0x00000030 - -#define FPCR_ROUNDING_PRECISION 0x000000c0 -#define FPCR_PRECISION_SINGLE 0x00000040 -#define FPCR_PRECISION_DOUBLE 0x00000080 -#define FPCR_PRECISION_EXTENDED 0x00000000 - - -#if defined(CPU_i386) || defined(CPU_x86_64) - -/* The main motivation for dynamically creating an x86(-64) function in - * memory is because MSVC (x64) does not allow you to use inline assembly, - * and the x86-64 versions of _control87/_controlfp functions only modifies - * SSE2 registers. */ - -static uae_u16 x87_cw = 0; -static uae_u8 *x87_fldcw_code = NULL; -typedef void (uae_cdecl *x87_fldcw_function)(void); - -static void init_fpucw_x87(void) -{ - if (x87_fldcw_code) { - return; - } - x87_fldcw_code = (uae_u8 *) uae_vm_alloc( - uae_vm_page_size(), UAE_VM_32BIT, UAE_VM_READ_WRITE_EXECUTE); - uae_u8 *c = x87_fldcw_code; - /* mov eax,0x0 */ - *(c++) = 0xb8; - *(c++) = 0x00; - *(c++) = 0x00; - *(c++) = 0x00; - *(c++) = 0x00; -#ifdef CPU_x86_64 - /* Address override prefix */ - *(c++) = 0x67; -#endif - /* fldcw WORD PTR [eax+addr] */ - *(c++) = 0xd9; - *(c++) = 0xa8; - *(c++) = (((uintptr_t) &x87_cw) ) & 0xff; - *(c++) = (((uintptr_t) &x87_cw) >> 8) & 0xff; - *(c++) = (((uintptr_t) &x87_cw) >> 16) & 0xff; - *(c++) = (((uintptr_t) &x87_cw) >> 24) & 0xff; - /* ret */ - *(c++) = 0xc3; - /* Write-protect the function */ - uae_vm_protect(x87_fldcw_code, uae_vm_page_size(), UAE_VM_READ_EXECUTE); -} - -static inline void set_fpucw_x87(uae_u32 m68k_cw) -{ -#ifdef _MSC_VER - static int ex = 0; - // RN, RZ, RM, RP - static const unsigned int fp87_round[4] = { _RC_NEAR, _RC_CHOP, _RC_DOWN, _RC_UP }; - // Extend X, Single S, Double D, Undefined - static const unsigned int fp87_prec[4] = { _PC_64, _PC_24, _PC_53, 0 }; - int round = (m68k_cw >> 4) & 3; -#ifdef WIN64 - // x64 only sets SSE2, must also call x87_fldcw_code() to set FPU rounding mode. - _controlfp(ex | fp87_round[round], _MCW_RC); -#else - int prec = (m68k_cw >> 6) & 3; - // x86 sets both FPU and SSE2 rounding mode, don't need x87_fldcw_code() - _control87(ex | fp87_round[round] | fp87_prec[prec], _MCW_RC | _MCW_PC); - return; -#endif -#endif - static const uae_u16 x87_cw_tab[] = { - 0x137f, 0x1f7f, 0x177f, 0x1b7f, /* Extended */ - 0x107f, 0x1c7f, 0x147f, 0x187f, /* Single */ - 0x127f, 0x1e7f, 0x167f, 0x1a7f, /* Double */ - 0x137f, 0x1f7f, 0x177f, 0x1b7f /* undefined */ - }; - x87_cw = x87_cw_tab[(m68k_cw >> 4) & 0xf]; -#if defined(X86_MSVC_ASSEMBLY) && 0 - __asm { fldcw word ptr x87_cw } -#elif defined(__GNUC__) && 0 - __asm__("fldcw %0" : : "m" (*&x87_cw)); -#else - ((x87_fldcw_function) x87_fldcw_code)(); -#endif -} - -#endif /* defined(CPU_i386) || defined(CPU_x86_64) */ - -static void native_set_fpucw(uae_u32 m68k_cw) -{ -#if defined(CPU_i386) || defined(CPU_x86_64) - set_fpucw_x87(m68k_cw); -#endif -} - -/* Functions for setting host/library modes and getting status */ -static void set_fp_mode(uae_u32 mode_control) -{ - floatx80_rounding_precision = 80; -#if 0 - switch(mode_control & FPCR_ROUNDING_PRECISION) { - case FPCR_PRECISION_SINGLE: // S - //floatx80_rounding_precision = 32; - break; - case FPCR_PRECISION_DOUBLE: // D - //floatx80_rounding_precision = 64; - break; - case FPCR_PRECISION_EXTENDED: // X - default: // undefined - //floatx80_rounding_precision = 80; - break; - } -#endif -#ifdef USE_HOST_ROUNDING - switch(mode_control & FPCR_ROUNDING_MODE) { - case FPCR_ROUND_NEAR: // to neareset - fesetround(FE_TONEAREST); - break; - case FPCR_ROUND_ZERO: // to zero - fesetround(FE_TOWARDZERO); - break; - case FPCR_ROUND_MINF: // to minus - fesetround(FE_DOWNWARD); - break; - case FPCR_ROUND_PINF: // to plus - fesetround(FE_UPWARD); - break; - } - native_set_fpucw(mode_control); - return; -#endif -} - -static void get_fp_status(uae_u32 *status) -{ - int exp_flags = fetestexcept(FE_ALL_EXCEPT); - if (exp_flags) { - if (exp_flags & FE_INEXACT) - *status |= 0x0200; - if (exp_flags & FE_DIVBYZERO) - *status |= 0x0400; - if (exp_flags & FE_UNDERFLOW) - *status |= 0x0800; - if (exp_flags & FE_OVERFLOW) - *status |= 0x1000; - if (exp_flags & FE_INVALID) - *status |= 0x2000; - } -} - -static void clear_fp_status(void) -{ - feclearexcept (FE_ALL_EXCEPT); -} - -static void fp_roundsgl(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - fpd->fpx = floatx80_round32(fpd->fpx); - } else { - int expon; - float mant; -#ifdef USE_LONG_DOUBLE - mant = (float)(frexpl(fpd->fp, &expon) * 2.0); - fpd->fp = ldexpl((fptype)mant, expon - 1); -#else - mant = (float)(frexp(fpd->fp, &expon) * 2.0); - fpd->fp = ldexp((fptype)mant, expon - 1); -#endif - } -} -static void fp_round32(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - float32 f = floatx80_to_float32(fpd->fpx); - fpd->fpx = float32_to_floatx80(f); - } else { - fpd->fp = (float)fpd->fp; - } -} -static void fp_round64(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - float64 f = floatx80_to_float64(fpd->fpx); - fpd->fpx = float64_to_floatx80(f); - } else { -#ifdef USE_LONG_DOUBLE - fpd->fp = (double)fpd->fp; -#else - ; -#endif - } -} - -static fptype fp_mod(fptype a, fptype b, uae_u64 *q, uae_s8 *s) -{ - fptype quot; -#ifdef USE_HOST_ROUNDING - quot = trunc(a / b); -#else - quot = fp_round_to_zero(a / b); -#endif - if (quot < 0.0) { - *s = 1; - quot = -quot; - } else { - *s = 0; - } - *q = (uae_u64)quot; - return fmodl(a, b); -} - -static fptype fp_rem(fptype a, fptype b, uae_u64 *q, uae_s8 *s) -{ - fptype quot; -#ifdef USE_HOST_ROUNDING - quot = round(a / b); -#else - quot = fp_round_to_nearest(a / b); -#endif - if (quot < 0.0) { - *s = 1; - quot = -quot; - } else { - *s = 0; - } - *q = (uae_u64)quot; - return remainderl(a, b); -} - -#ifdef USE_LONG_DOUBLE - -static fptype fp_int(fptype a) -{ -#ifdef USE_HOST_ROUNDING - return rintl(a); -#else - switch (regs.fpcr & FPCR_ROUNDING_MODE) - { - case FPCR_ROUND_NEAR: - return fp_round_to_nearest(a); - case FPCR_ROUND_ZERO: - return fp_round_to_zero(a); - case FPCR_ROUND_MINF: - return fp_round_to_minus_infinity(a); - case FPCR_ROUND_PINF: - return fp_round_to_plus_infinity(a); - default: /* never reached */ - return a; - } -#endif -} - -static fptype fp_intrz(fptype a) -{ -#ifdef USE_HOST_ROUNDING - return truncl(a); -#else - return fp_round_to_zero (a); -#endif -} - -#else // if !USE_LONG_DOUBLE - -static fptype fp_int(fptype a) -{ -#ifdef USE_HOST_ROUNDING - return rint(a); -#else - switch (regs.fpcr & FPCR_ROUNDING_MODE) - { - case FPCR_ROUND_NEAR: - return fp_round_to_nearest(a); - case FPCR_ROUND_ZERO: - return fp_round_to_zero(a); - case FPCR_ROUND_MINF: - return fp_round_to_minus_infinity(a); - case FPCR_ROUND_PINF: - return fp_round_to_plus_infinity(a); - default: /* never reached */ - return a; - } -#endif -} -static fptype fp_intrz(fptype a) -{ -#ifdef USE_HOST_ROUNDING - return trunc(a); -#else - return fp_round_to_zero (a); -#endif -} - -#endif - -static void to_native(fptype *fp, fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - int expon; - fptype frac; - floatx80 *fpx = &fpd->fpx; - - expon = fpx->high & 0x7fff; - - if (floatx80_is_zero(*fpx)) { - *fp = floatx80_is_negative(*fpx) ? -0.0 : +0.0; - return; - } - if (floatx80_is_nan(*fpx)) { - *fp = sqrtl(-1); - return; - } - if (floatx80_is_infinity(*fpx)) { - *fp = floatx80_is_negative(*fpx) ? logl(0.0) : (1.0/0.0); - return; - } - - frac = (long double)fpx->low / (long double)(twoto32 * 2147483648.0); - if (floatx80_is_negative(*fpx)) - frac = -frac; - *fp = ldexpl (frac, expon - 16383); - } else { - *fp = fpd->fp; - } -} - -static void from_native(fptype fp, fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - int expon; - fptype frac; - floatx80 *fpx = &fpd->fpx; - - if (signbit(fp)) - fpx->high = 0x8000; - else - fpx->high = 0x0000; - - if (isnan(fp)) { - fpx->high |= 0x7fff; - fpx->low = LIT64(0xffffffffffffffff); - return; - } - if (isinf(fp)) { - fpx->high |= 0x7fff; - fpx->low = LIT64(0x0000000000000000); - return; - } - if (fp == 0.0) { - fpx->low = LIT64(0x0000000000000000); - return; - } - if (fp < 0.0) - fp = -fp; - - frac = frexpl (fp, &expon); - frac += 0.5 / (twoto32 * twoto32); - if (frac >= 1.0) { - frac /= 2.0; - expon++; - } - fpx->high |= (expon + 16383 - 1) & 0x7fff; - fpx->low = (bits64)(frac * (long double)(twoto32 * twoto32)); - - while (!(fpx->low & LIT64( 0x8000000000000000))) { - if (fpx->high == 0) { - float_raise( float_flag_denormal ); - break; - } - fpx->low <<= 1; - fpx->high--; - } - } else { - fpd->fp = fp; - } -} - -static void softfloat_set(floatx80 *fx, uae_u32 *f) -{ - fx->high = (uae_u16)f[2]; - fx->low = ((uae_u64)f[1] << 32) | f[0]; -} -static void softfloat_get(floatx80 *fx, uae_u32 *f) -{ - f[2] = fx->high; - f[1] = fx->low >> 32; - f[0] = (uae_u32)fx->low; -} - -static void to_single_xn(fpdata *fpd, uae_u32 wrd1) -{ - if (currprefs.fpu_softfloat) { - float32 f = wrd1; - fpd->fpx = float32_to_floatx80(f); // automatically fix denormals - } else { - union { - float f; - uae_u32 u; - } val; - val.u = wrd1; - fpd->fp = (fptype) val.f; - } -} -static void to_single_x(fpdata *fpd, uae_u32 wrd1) -{ - if (currprefs.fpu_softfloat) { - float32 f = wrd1; - fpd->fpx = float32_to_floatx80_allowunnormal(f); - } else { - union { - float f; - uae_u32 u; - } val; - val.u = wrd1; - fpd->fp = (fptype) val.f; - } -} -static uae_u32 from_single_x(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - float32 f = floatx80_to_float32(fpd->fpx); - return f; - } else { - union { - float f; - uae_u32 u; - } val; - val.f = (float)fpd->fp; - return val.u; - } -} +struct fpp_cr_entry { + uae_u32 val[3]; + uae_u8 inexact; + uae_s8 rndoff[4]; +}; + +static struct fpp_cr_entry fpp_cr[22] = { + { {0x40000000, 0xc90fdaa2, 0x2168c235}, 1, {0,-1,-1, 0} }, // 0 = pi + { {0x3ffd0000, 0x9a209a84, 0xfbcff798}, 1, {0, 0, 0, 1} }, // 1 = log10(2) + { {0x40000000, 0xadf85458, 0xa2bb4a9a}, 1, {0, 0, 0, 1} }, // 2 = e + { {0x3fff0000, 0xb8aa3b29, 0x5c17f0bc}, 1, {0,-1,-1, 0} }, // 3 = log2(e) + { {0x3ffd0000, 0xde5bd8a9, 0x37287195}, 0, {0, 0, 0, 0} }, // 4 = log10(e) + { {0x00000000, 0x00000000, 0x00000000}, 0, {0, 0, 0, 0} }, // 5 = 0.0 + { {0x3ffe0000, 0xb17217f7, 0xd1cf79ac}, 1, {0,-1,-1, 0} }, // 6 = ln(2) + { {0x40000000, 0x935d8ddd, 0xaaa8ac17}, 1, {0,-1,-1, 0} }, // 7 = ln(10) + { {0x3fff0000, 0x80000000, 0x00000000}, 0, {0, 0, 0, 0} }, // 8 = 1e0 + { {0x40020000, 0xa0000000, 0x00000000}, 0, {0, 0, 0, 0} }, // 9 = 1e1 + { {0x40050000, 0xc8000000, 0x00000000}, 0, {0, 0, 0, 0} }, // 10 = 1e2 + { {0x400c0000, 0x9c400000, 0x00000000}, 0, {0, 0, 0, 0} }, // 11 = 1e4 + { {0x40190000, 0xbebc2000, 0x00000000}, 0, {0, 0, 0, 0} }, // 12 = 1e8 + { {0x40340000, 0x8e1bc9bf, 0x04000000}, 0, {0, 0, 0, 0} }, // 13 = 1e16 + { {0x40690000, 0x9dc5ada8, 0x2b70b59e}, 1, {0,-1,-1, 0} }, // 14 = 1e32 + { {0x40d30000, 0xc2781f49, 0xffcfa6d5}, 1, {0, 0, 0, 1} }, // 15 = 1e64 + { {0x41a80000, 0x93ba47c9, 0x80e98ce0}, 1, {0,-1,-1, 0} }, // 16 = 1e128 + { {0x43510000, 0xaa7eebfb, 0x9df9de8e}, 1, {0,-1,-1, 0} }, // 17 = 1e256 + { {0x46a30000, 0xe319a0ae, 0xa60e91c7}, 1, {0,-1,-1, 0} }, // 18 = 1e512 + { {0x4d480000, 0xc9767586, 0x81750c17}, 1, {0, 0, 0, 1} }, // 19 = 1e1024 + { {0x5a920000, 0x9e8b3b5d, 0xc53d5de5}, 1, {0, 0, 0, 1} }, // 20 = 1e2048 + { {0x75250000, 0xc4605202, 0x8a20979b}, 1, {0,-1,-1, 0} } // 21 = 1e4094 +}; + +#define FPP_CR_PI 0 +#define FPP_CR_LOG10_2 1 +#define FPP_CR_E 2 +#define FPP_CR_LOG2_E 3 +#define FPP_CR_LOG10_E 4 +#define FPP_CR_ZERO 5 +#define FPP_CR_LN_2 6 +#define FPP_CR_LN_10 7 +#define FPP_CR_1E0 8 +#define FPP_CR_1E1 9 +#define FPP_CR_1E2 10 +#define FPP_CR_1E4 11 +#define FPP_CR_1E8 12 +#define FPP_CR_1E16 13 +#define FPP_CR_1E32 14 +#define FPP_CR_1E64 15 +#define FPP_CR_1E128 16 +#define FPP_CR_1E256 17 +#define FPP_CR_1E512 18 +#define FPP_CR_1E1024 19 +#define FPP_CR_1E2048 20 +#define FPP_CR_1E4096 21 + +uae_u32 xhex_nan[] ={0x7fff0000, 0xffffffff, 0xffffffff}; -static void to_double_xn(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) -{ - if (currprefs.fpu_softfloat) { - float64 f = ((float64)wrd1 << 32) | wrd2; - fpd->fpx = float64_to_floatx80(f); // automatically fix denormals - } else { - union { - double d; - uae_u32 u[2]; - } val; -#ifdef WORDS_BIGENDIAN - val.u[0] = wrd1; - val.u[1] = wrd2; -#else - val.u[1] = wrd1; - val.u[0] = wrd2; -#endif - fpd->fp = (fptype) val.d; - } -} -static void to_double_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) -{ - if (currprefs.fpu_softfloat) { - float64 f = ((float64)wrd1 << 32) | wrd2; - fpd->fpx = float64_to_floatx80_allowunnormal(f); - } else { - union { - double d; - uae_u32 u[2]; - } val; -#ifdef WORDS_BIGENDIAN - val.u[0] = wrd1; - val.u[1] = wrd2; -#else - val.u[1] = wrd1; - val.u[0] = wrd2; -#endif - fpd->fp = (fptype) val.d; - } -} -static void from_double_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) -{ - if (currprefs.fpu_softfloat) { - float64 f = floatx80_to_float64(fpd->fpx); - *wrd1 = f >> 32; - *wrd2 = (uae_u32)f; - } else { - union { - double d; - uae_u32 u[2]; - } val; - val.d = (double) fpd->fp; -#ifdef WORDS_BIGENDIAN - *wrd1 = val.u[0]; - *wrd2 = val.u[1]; -#else - *wrd1 = val.u[1]; - *wrd2 = val.u[0]; -#endif - } -} -#ifdef USE_LONG_DOUBLE -static void to_exten_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) -{ - if (currprefs.fpu_softfloat) { - uae_u32 wrd[3] = { wrd1, wrd2, wrd3 }; - softfloat_set(&fpd->fpx, wrd); - } else { - union { - fptype ld; - uae_u32 u[3]; - } val; -#if WORDS_BIGENDIAN - val.u[0] = (wrd1 & 0xffff0000) | ((wrd2 & 0xffff0000) >> 16); - val.u[1] = (wrd2 & 0x0000ffff) | ((wrd3 & 0xffff0000) >> 16); - val.u[2] = (wrd3 & 0x0000ffff) << 16; -#else - val.u[0] = wrd3; - val.u[1] = wrd2; - val.u[2] = wrd1 >> 16; -#endif - fpd->fp = val.ld; - } -} -static void from_exten_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) -{ - if (currprefs.fpu_softfloat) { - uae_u32 wrd[3]; - softfloat_get(&fpd->fpx, wrd); - *wrd1 = wrd[0]; - *wrd2 = wrd[1]; - *wrd3 = wrd[2]; - } else { - union { - fptype ld; - uae_u32 u[3]; - } val; - val.ld = *fp; -#if WORDS_BIGENDIAN - *wrd1 = val.u[0] & 0xffff0000; - *wrd2 = ((val.u[0] & 0x0000ffff) << 16) | ((val.u[1] & 0xffff0000) >> 16); - *wrd3 = ((val.u[1] & 0x0000ffff) << 16) | ((val.u[2] & 0xffff0000) >> 16); -#else - *wrd3 = val.u[0]; - *wrd2 = val.u[1]; - *wrd1 = val.u[2] << 16; -#endif - } -} -#else // if !USE_LONG_DOUBLE -static void to_exten_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) -{ - if (currprefs.fpu_softfloat) { - uae_u32 wrd[3] = { wrd1, wrd2, wrd3 }; - softfloat_set(&fpd->fpx, wrd); - } else { - fptype frac; - if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { - fpd->fp = (wrd1 & 0x80000000) ? -0.0 : +0.0; - return; - } - frac = ((fptype)wrd2 + ((fptype)wrd3 / twoto32)) / 2147483648.0; - if (wrd1 & 0x80000000) - frac = -frac; - fpd->fp = ldexp (frac, ((wrd1 >> 16) & 0x7fff) - 16383); - } -} -static void from_exten_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) -{ - if (currprefs.fpu_softfloat) { - uae_u32 wrd[3]; - softfloat_get(&fpd->fpx, wrd); - *wrd1 = wrd[0]; - *wrd2 = wrd[1]; - *wrd3 = wrd[2]; - } else { - int expon; - fptype frac; - fptype v; - - v = fpd->fp; - if (v == 0.0) { - *wrd1 = signbit(v) ? 0x80000000 : 0; - *wrd2 = 0; - *wrd3 = 0; - return; - } - if (v < 0) { - *wrd1 = 0x80000000; - v = -v; - } else { - *wrd1 = 0; - } - frac = frexp (v, &expon); - frac += 0.5 / (twoto32 * twoto32); - if (frac >= 1.0) { - frac /= 2.0; - expon++; - } - *wrd1 |= (((expon + 16383 - 1) & 0x7fff) << 16); - *wrd2 = (uae_u32) (frac * twoto32); - *wrd3 = (uae_u32) ((frac * twoto32 - *wrd2) * twoto32); - } -} -#endif // !USE_LONG_DOUBLE +static bool fpu_mmu_fixup; static void normalize_exten(uae_u32 *pwrd1, uae_u32 *pwrd2, uae_u32 *pwrd3) { - uae_u32 wrd1 = *pwrd1; - uae_u32 wrd2 = *pwrd2; - uae_u32 wrd3 = *pwrd3; - uae_u16 exp = (wrd1 >> 16) & 0x7fff; - // Normalize if unnormal. - if (exp != 0 && exp != 0x7fff && !(wrd2 & 0x80000000)) { - while (!(wrd2 & 0x80000000) && (wrd2 || wrd3)) { - if (exp == 0) - break; // Result is denormal - wrd2 <<= 1; - if (wrd3 & 0x80000000) - wrd2 |= 1; - wrd3 <<= 1; - exp--; - } - if (!wrd2 && !wrd3) - exp = 0; - *pwrd1 = (wrd1 & 0x80000000) | (exp << 16); - *pwrd2 = wrd2; - *pwrd3 = wrd3; - } -} - -static void from_int(fpdata *fpd, uae_s32 src) -{ - if (currprefs.fpu_softfloat) { - fpd->fpx = int32_to_floatx80(src); - } else { - fpd->fp = (fptype)src; - } -} - -static void fpclear (fpdata *fpd) -{ - from_int(fpd, 0); -} -static void fpset (fpdata *fpd, uae_s32 val) -{ - from_int(fpd, val); + uae_u32 wrd1 = *pwrd1; + uae_u32 wrd2 = *pwrd2; + uae_u32 wrd3 = *pwrd3; + uae_u16 exp = (wrd1 >> 16) & 0x7fff; + // Normalize if unnormal. + if (exp != 0 && exp != 0x7fff && !(wrd2 & 0x80000000)) { + while (!(wrd2 & 0x80000000) && (wrd2 || wrd3)) { + if (exp == 0) + break; // Result is denormal + wrd2 <<= 1; + if (wrd3 & 0x80000000) + wrd2 |= 1; + wrd3 <<= 1; + exp--; + } + if (!wrd2 && !wrd3) + exp = 0; + *pwrd1 = (wrd1 & 0x80000000) | (exp << 16); + *pwrd2 = wrd2; + *pwrd3 = wrd3; + } } void to_single(fpdata *fpd, uae_u32 wrd1) { // automatically fix denormals if 6888x if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) - to_single_xn(fpd, wrd1); + fpp_to_single_xn(fpd, wrd1); else - to_single_x(fpd, wrd1); + fpp_to_single_x(fpd, wrd1); } static uae_u32 from_single(fpdata *fpd) { - return from_single_x(fpd); + return fpp_from_single_x(fpd); } void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) { // automatically fix denormals if 6888x if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) - to_double_xn(fpd, wrd1, wrd2); + fpp_to_double_xn(fpd, wrd1, wrd2); else - to_double_x(fpd, wrd1, wrd2); + fpp_to_double_x(fpd, wrd1, wrd2); } static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) { - from_double_x(fpd, wrd1, wrd2); + fpp_from_double_x(fpd, wrd1, wrd2); } void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) @@ -804,15 +224,15 @@ void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) { normalize_exten(&wrd1, &wrd2, &wrd3); } - to_exten_x(fpd, wrd1, wrd2, wrd3); + fpp_to_exten_x(fpd, wrd1, wrd2, wrd3); } static void to_exten_fmovem(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) { - to_exten_x(fpd, wrd1, wrd2, wrd3); + fpp_to_exten_x(fpd, wrd1, wrd2, wrd3); } static void from_exten(fpdata *fpd, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) { - from_exten_x(fpd, wrd1, wrd2, wrd3); + fpp_from_exten_x(fpd, wrd1, wrd2, wrd3); } /* Floating Point Control Register (FPCR) @@ -893,230 +313,231 @@ static void from_exten(fpdata *fpd, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wr #define FPSR_AE_DZ 0x00000010 #define FPSR_AE_INEX 0x00000008 -static void fpnan (fpdata *fpd) + +static void fpsr_set_exception(uae_u32 exception) { - to_exten(fpd, xhex_nan[0], xhex_nan[1], xhex_nan[2]); + regs.fpsr |= exception; +} +static void fpsr_check_exception(void) +{ + // Any exception status bit and matching exception enable bits set? + uae_u32 exception = (regs.fpsr >> 8) & (regs.fpcr >> 8); + + if (exception) { + int vector = 0; + int vtable[8] = { 49, 49, 50, 51, 53, 52, 54, 48 }; + int i; + for (i = 7; i >= 0; i--) { + if (exception & (1 << i)) { + vector = vtable[i]; + break; + } + } + // logging only so far + write_log (_T("FPU exception: FPSR: %08x, FPCR: %04x (vector: %d)!\n"), regs.fpsr, regs.fpcr, vector); + } +} +static void fpsr_set_result(fpdata *result) +{ + regs.fp_result = *result; + + // condition code byte + regs.fpsr &= 0x00fffff8; // clear cc + if (fpp_is_nan (®s.fp_result)) { + regs.fpsr |= FPSR_CC_NAN; + } else { + if (fpp_is_zero(®s.fp_result)) + regs.fpsr |= FPSR_CC_Z; + if (fpp_is_infinity (®s.fp_result)) + regs.fpsr |= FPSR_CC_I; + } + if (fpp_is_neg(®s.fp_result)) + regs.fpsr |= FPSR_CC_N; + + // check if result is signaling nan + if (fpp_is_snan(®s.fp_result)) + regs.fpsr |= FPSR_SNAN; +} +static void fpsr_clear_status(void) +{ + // clear exception status byte only + regs.fpsr &= 0x0fff00f8; + + // clear external status + fpp_clear_status(); } -const TCHAR *fp_print(fpdata *fpd) +static void fpsr_make_status(void) { - static TCHAR fs[32]; - if (currprefs.fpu_softfloat) { - bool n, u, d; - fptype result = 0.0; - int i; - floatx80 *fx = &fpd->fpx; - - n = floatx80_is_negative(*fx); - u = floatx80_is_unnormal(*fx); - d = floatx80_is_denormal(*fx); + // get external status + fpp_get_status(®s.fpsr); - if (floatx80_is_zero(*fx)) { -#if USE_LONG_DOUBLE - _stprintf(fs, _T("%c%#.17Le%s%s"), n?'-':'+', (fptype) 0.0, u ? _T("U") : _T(""), d ? _T("D") : _T("")); -#else - _stprintf(fs, _T("%c%#.17e%s%s"), n?'-':'+', (fptype) 0.0, u ? _T("U") : _T(""), d ? _T("D") : _T("")); -#endif - } else if (floatx80_is_infinity(*fx)) { - _stprintf(fs, _T("%c%s"), n?'-':'+', _T("inf")); - } else if (floatx80_is_signaling_nan(*fx)) { - _stprintf(fs, _T("%c%s"), n?'-':'+', _T("snan")); - } else if (floatx80_is_nan(*fx)) { - _stprintf(fs, _T("%c%s"), n?'-':'+', _T("nan")); - } else { - for (i = 63; i >= 0; i--) { - if (fx->low & (((uae_u64)1)<high&0x7FFF) - 0x3FFF); -#if USE_LONG_DOUBLE - _stprintf(fs, _T("%c%#.17Le%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T("")); -#else - _stprintf(fs, _T("%c%#.17e%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T("")); -#endif - } - } else { -#if USE_LONG_DOUBLE - _stprintf(fs, _T("#%Le"), fpd->fp); -#else - _stprintf(fs, _T("#%e"), fpd->fp); -#endif - } - return fs; + // update accrued exception byte + if (regs.fpsr & (FPSR_BSUN | FPSR_SNAN | FPSR_OPERR)) + regs.fpsr |= FPSR_AE_IOP; // IOP = BSUN || SNAN || OPERR + if (regs.fpsr & FPSR_OVFL) + regs.fpsr |= FPSR_AE_OVFL; // OVFL = OVFL + if ((regs.fpsr & FPSR_UNFL) && (regs.fpsr & FPSR_INEX2)) + regs.fpsr |= FPSR_AE_UNFL; // UNFL = UNFL && INEX2 + if (regs.fpsr & FPSR_DZ) + regs.fpsr |= FPSR_AE_DZ; // DZ = DZ + if (regs.fpsr & (FPSR_OVFL | FPSR_INEX2 | FPSR_INEX1)) + regs.fpsr |= FPSR_AE_INEX; // INEX = INEX1 || INEX2 || OVFL + + fpsr_check_exception(); } -static bool fpu_get_constant_fp(fpdata *fp, int cr) +static int fpsr_set_bsun(void) { - fptype f; - switch (cr & 0x7f) - { - case 0x00: - f = *fp_pi; - break; - case 0x0b: - f = *fp_l10_2; - break; - case 0x0c: - f = *fp_exp_1; - break; - case 0x0d: - f = *fp_l2_e; - break; - case 0x0e: - f = *fp_l10_e; - break; - case 0x0f: - f = 0.0; - break; - case 0x30: - f = *fp_ln_2; - break; - case 0x31: - f = *fp_ln_10; - break; - case 0x32: - f = (fptype)fp_1e0; - break; - case 0x33: - f = (fptype)fp_1e1; - break; - case 0x34: - f = (fptype)fp_1e2; - break; - case 0x35: - f = (fptype)fp_1e4; - break; - case 0x36: - f = (fptype)fp_1e8; - break; - case 0x37: - f = *fp_1e16; - break; - case 0x38: - f = *fp_1e32; - break; - case 0x39: - f = *fp_1e64; - break; - case 0x3a: - f = *fp_1e128; - break; - case 0x3b: - f = *fp_1e256; - break; - case 0x3c: - f = *fp_1e512; - break; - case 0x3d: - f = *fp_1e1024; - break; - case 0x3e: - f = *fp_1e2048; - break; - case 0x3f: - f = *fp_1e4096; - break; - default: - return false; - } - fp->fp = f; - return true; + regs.fpsr |= FPSR_BSUN; + regs.fpsr |= FPSR_AE_IOP; + + if (regs.fpcr & FPSR_BSUN) { + // logging only so far + write_log (_T("FPU exception: BSUN! (FPSR: %08x, FPCR: %04x)\n"), regs.fpsr, regs.fpcr); + return 0; // return 1, once BSUN exception works + } + return 0; } -static bool fpu_get_constant_softfloat(fpdata *fp, int cr) +void fpsr_set_quotient(uae_u64 quot, uae_s8 sign) { - uae_u32 *f = NULL; - floatx80 fx; + regs.fpsr &= 0x0f00fff8; + regs.fpsr |= (quot << 16) & FPSR_QUOT_LSB; + regs.fpsr |= sign ? FPSR_QUOT_SIGN : 0; +} - switch (cr & 0x7f) - { - case 0x00: - f = xhex_pi; - break; - case 0x0b: - f = xhex_l10_2; - break; - case 0x0c: - f = xhex_exp_1; - break; - case 0x0d: - f = xhex_l2_e; - break; - case 0x0e: - f = xhex_l10_e; - break; - case 0x0f: - fx = fxzero; - break; - case 0x30: - f = xhex_ln_2; - break; - case 0x31: - f = xhex_ln_10; - break; - case 0x32: - fx = fx_1e0; - break; - case 0x33: - fx = fx_1e1; - break; - case 0x34: - fx = fx_1e2; - break; - case 0x35: - fx = fx_1e4; - break; - case 0x36: - fx = fx_1e8; - break; - case 0x37: - f = xhex_1e16; - break; - case 0x38: - f = xhex_1e32; - break; - case 0x39: - f = xhex_1e64; - break; - case 0x3a: - f = xhex_1e128; - break; - case 0x3b: - f = xhex_1e256; - break; - case 0x3c: - f = xhex_1e512; - break; - case 0x3d: - f = xhex_1e1024; - break; - case 0x3e: - f = xhex_1e2048; - break; - case 0x3f: - f = xhex_1e4096; - break; - default: - return false; - } - if (f) - softfloat_set(&fp->fpx, f); - else - fp->fpx = fx; - return true; +uae_u32 fpp_get_fpsr (void) +{ + return regs.fpsr; +} + +static void fpp_set_fpsr (uae_u32 val) +{ + regs.fpsr = val; +} + +static void fpp_set_fpcr (uae_u32 val) +{ + fpp_set_mode(val); + regs.fpcr = val & 0xffff; +} + +static void fpnan (fpdata *fpd) +{ + to_exten(fpd, xhex_nan[0], xhex_nan[1], xhex_nan[2]); +} + +static void fpclear (fpdata *fpd) +{ + fpp_from_int(fpd, 0); } - -bool fpu_get_constant(fpdata *fp, int cr) +static void fpset (fpdata *fpd, uae_s32 val) { - if (currprefs.fpu_softfloat) { - return fpu_get_constant_softfloat(fp, cr); - } else { - return fpu_get_constant_fp(fp, cr); - } + fpp_from_int(fpd, val); } -typedef uae_s64 tointtype; +bool fpu_get_constant(fpdata *fpd, int cr) +{ + uae_u32 *f = NULL; + uae_u32 entry = 0; + bool valid = true; + + switch (cr & 0x7f) + { + case 0x00: // pi + entry = FPP_CR_PI; + break; + case 0x0b: // log10(2) + entry = FPP_CR_LOG10_2; + break; + case 0x0c: // e + entry = FPP_CR_E; + break; + case 0x0d: // log2(e) + entry = FPP_CR_LOG2_E; + break; + case 0x0e: // log10(e) + entry = FPP_CR_LOG10_E; + break; + case 0x0f: // 0.0 + entry = FPP_CR_ZERO; + break; + case 0x30: // ln(2) + entry = FPP_CR_LN_2; + break; + case 0x31: // ln(10) + entry = FPP_CR_LN_10; + break; + case 0x32: // 1e0 + entry = FPP_CR_1E0; + break; + case 0x33: // 1e1 + entry = FPP_CR_1E1; + break; + case 0x34: // 1e2 + entry = FPP_CR_1E2; + break; + case 0x35: // 1e4 + entry = FPP_CR_1E4; + break; + case 0x36: // 1e8 + entry = FPP_CR_1E8; + break; + case 0x37: // 1e16 + entry = FPP_CR_1E16; + break; + case 0x38: // 1e32 + entry = FPP_CR_1E32; + break; + case 0x39: // 1e64 + entry = FPP_CR_1E64; + break; + case 0x3a: // 1e128 + entry = FPP_CR_1E128; + break; + case 0x3b: // 1e256 + entry = FPP_CR_1E256; + break; + case 0x3c: // 1e512 + entry = FPP_CR_1E512; + break; + case 0x3d: // 1e1024 + entry = FPP_CR_1E1024; + break; + case 0x3e: // 1e2048 + entry = FPP_CR_1E2048; + break; + case 0x3f: // 1e4096 + entry = FPP_CR_1E4096; + break; + default: // undefined, return 0.0 + write_log (_T("Undocumented FPU constant access (index %02x\n"), entry); + valid = false; + entry = FPP_CR_ZERO; + break; + } + + f = fpp_cr[entry].val; + + // if constant is inexact, set inexact bit and round + // note: with valid constants, LSB never wraps + if (fpp_cr[entry].inexact) { + fpsr_set_exception(FPSR_INEX2); + f[2] += fpp_cr[entry].rndoff[(regs.fpcr >> 4) & 3]; + } + + to_exten_fmovem(fpd, f[0], f[1], f[2]); + + if (((regs.fpcr >> 6) & 3) == 1) + fpp_roundsgl(fpd); + if (((regs.fpcr >> 6) & 3) >= 2) + fpp_rounddbl(fpd); + + fpsr_set_result(fpd); + + return valid; +} static void fpu_format_error (void) { @@ -1368,359 +789,108 @@ static bool fault_if_unimplemented_6888x (uae_u16 opcode, uae_u16 extra, uaecptr { if ((currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) && currprefs.fpu_no_unimplemented) { uae_u16 v = extra & 0x7f; - /* 68040/68060 only variants. 6888x = F-line exception. */ - switch (v) - { - case 0x62: /* FSADD */ - case 0x66: /* FDADD */ - case 0x68: /* FSSUB */ - case 0x6c: /* FDSUB */ - case 0x5a: /* FSNEG */ - case 0x5e: /* FDNEG */ - case 0x58: /* FSABS */ - case 0x5c: /* FDABS */ - case 0x63: /* FSMUL */ - case 0x67: /* FDMUL */ - case 0x41: /* FSSQRT */ - case 0x45: /* FDSQRT */ - fpu_noinst (opcode, oldpc); - return true; - } - } - return false; -} - -static bool fault_if_60 (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, int type) -{ - if (currprefs.cpu_model == 68060 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { - fpu_op_unimp (opcode, extra, ea, oldpc, type, NULL, -1, -1); - return true; - } - return false; -} - -static bool fault_if_4060 (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, int type, fpdata *src, uae_u32 *pack) -{ - if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { - if (pack) { - regs.exp_pack[0] = pack[0]; - regs.exp_pack[1] = pack[1]; - regs.exp_pack[2] = pack[2]; - } - fpu_op_unimp (opcode, extra, ea, oldpc, type, src, -1, -1); - return true; - } - return false; -} - -static bool fault_if_no_fpu_u (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc) -{ - if (fault_if_no_fpu (opcode, extra, ea, oldpc)) - return true; - if (currprefs.cpu_model == 68060 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { - // 68060 FTRAP, FDBcc or FScc are not implemented. - fpu_op_unimp (opcode, extra, ea, oldpc, FPU_EXP_UNIMP_INS, NULL, -1, -1); - return true; - } - return false; -} - -static bool fault_if_no_6888x (uae_u16 opcode, uae_u16 extra, uaecptr oldpc) -{ - if (currprefs.cpu_model < 68040 && currprefs.fpu_model <= 0) { -#if EXCEPTION_FPP - write_log (_T("6888x no FPU: %04X-%04X PC=%08X\n"), opcode, extra, oldpc); -#endif - m68k_setpc (oldpc); - regs.fp_exception = true; - op_illg (opcode); - return true; - } - return false; -} - -static int get_fpu_version (void) -{ - int v = 0; - - switch (currprefs.fpu_model) - { - case 68881: - case 68882: - v = 0x1f; - break; - case 68040: - if (currprefs.fpu_revision == 0x40) - v = 0x40; - else - v = 0x41; - break; - } - return v; -} - -static void fpu_null (void) -{ - regs.fpu_state = 0; - regs.fpu_exp_state = 0; - regs.fpcr = 0; - regs.fpsr = 0; - regs.fpiar = 0; - fpset(®s.fp_result, 1); - fpclear (®s.fp_result); - for (int i = 0; i < 8; i++) - fpnan (®s.fp[i]); -} - -#ifndef USE_HOST_ROUNDING -#ifdef USE_LONG_DOUBLE -#define fp_round_to_minus_infinity(x) floorl(x) -#define fp_round_to_plus_infinity(x) ceill(x) -#define fp_round_to_zero(x) ((x) >= 0.0 ? floorl(x) : ceill(x)) -#define fp_round_to_nearest(x) roundl(x) -#else // if !USE_LONG_DOUBLE -#define fp_round_to_minus_infinity(x) floor(x) -#define fp_round_to_plus_infinity(x) ceil(x) -#define fp_round_to_zero(x) ((x) >= 0.0 ? floor(x) : ceil(x)) -#define fp_round_to_nearest(x) round(x) -#endif // !USE_LONG_DOUBLE -#endif // USE_HOST_ROUNDING - -static tointtype to_int(fpdata *src, int size) -{ - if (currprefs.fpu_softfloat) { - if (floatx80_lt(src->fpx, fxsizes[size * 2 + 0])) - return floatx80_to_int32(fxsizes[size * 2 + 0]); - if (floatx80_le(fxsizes[size * 2 + 1], src->fpx)) - return floatx80_to_int32(fxsizes[size * 2 + 1]); - return floatx80_to_int32(src->fpx); - } else { - fptype fp = src->fp; - if (fp < fsizes[size * 2 + 0]) - fp = fsizes[size * 2 + 0]; - if (fp > fsizes[size * 2 + 1]) - fp = fsizes[size * 2 + 1]; -#ifdef USE_HOST_ROUNDING -#ifdef USE_LONG_DOUBLE - return lrintl(fp); -#else - return lrint(fp); -#endif -#else -#if defined(X86_MSVC_ASSEMBLY_FPU) - { - fptype tmp_fp; - __asm { - fld LDPTR fp - frndint - fstp LDPTR tmp_fp - } - return (tointtype)tmp_fp; - } -#else /* no X86_MSVC */ - { - tointtype result = (int)fp; - switch (regs.fpcr & 0x30) - { - case FPCR_ROUND_ZERO: - result = (int)fp_round_to_zero (fp); - break; - case FPCR_ROUND_MINF: - result = (int)fp_round_to_minus_infinity (fp); - break; - case FPCR_ROUND_NEAR: - result = fp_round_to_nearest (fp); - break; - case FPCR_ROUND_PINF: - result = (int)fp_round_to_plus_infinity (fp); - break; - } - return result; - } -#endif -#endif - } -} - -static bool fp_is_snan(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - return floatx80_is_signaling_nan(fpd->fpx) != 0; - } else { - return false; - } -} -static void fp_unset_snan(fpdata *fpd) -{ - fpd->fpx.low |= LIT64(0x4000000000000000); -} -static bool fp_is_nan (fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - return floatx80_is_nan(fpd->fpx) != 0; - } else { -#ifdef HAVE_ISNAN - return isnan(fpd->fp) != 0; -#else - return false; -#endif - } -} -static bool fp_is_infinity (fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - return floatx80_is_infinity(fpd->fpx) != 0; - } else { -#ifdef _MSC_VER - return !_finite (fpd->fp); -#elif defined(HAVE_ISINF) - return isinf(fpd->fp); -#else - return false; -#endif - } -} -static bool fp_is_zero(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - return floatx80_is_zero(fpd->fpx) != 0; - } else { - return fpd->fp == 0.0; - } -} -static bool fp_is_neg(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - return floatx80_is_negative(fpd->fpx) != 0; - } else { - return fpd->fp < 0.0; - } -} - -static bool fp_is_denormal(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - return floatx80_is_denormal(fpd->fpx) != 0; - } else { - return false; - } -} -static bool fp_is_unnormal(fpdata *fpd) -{ - if (currprefs.fpu_softfloat) { - return floatx80_is_unnormal(fpd->fpx) != 0; - } else { - return false; - } -} - -static void fpsr_set_exception(uae_u32 exception) -{ - regs.fpsr |= exception; -} -static void fpsr_check_exception(void) -{ - // Any exception status bit and matching exception enable bits set? - uae_u32 exception = (regs.fpsr >> 8) & (regs.fpcr >> 8); - - if (exception) { - int vector = 0; - int vtable[8] = { 49, 49, 50, 51, 53, 52, 54, 48 }; - int i; - for (i = 7; i >= 0; i--) { - if (exception & (1 << i)) { - vector = vtable[i]; - break; - } - } - // logging only so far - write_log (_T("FPU exception: FPSR: %08x, FPCR: %04x (vector: %d)!\n"), regs.fpsr, regs.fpcr, vector); - } -} -static void fpsr_set_result(fpdata *result) -{ - regs.fp_result = *result; - - // condition code byte - regs.fpsr &= 0x00fffff8; // clear cc - if (fp_is_nan (®s.fp_result)) { - regs.fpsr |= FPSR_CC_NAN; - } else { - if (fp_is_zero(®s.fp_result)) - regs.fpsr |= FPSR_CC_Z; - if (fp_is_infinity (®s.fp_result)) - regs.fpsr |= FPSR_CC_I; - } - if (fp_is_neg(®s.fp_result)) - regs.fpsr |= FPSR_CC_N; - - // check if result is signaling nan - if (fp_is_snan(®s.fp_result)) - regs.fpsr |= FPSR_SNAN; -} -static void fpsr_clear_status(void) -{ - // clear exception status byte only - regs.fpsr &= 0x0fff00f8; - - // clear external status - clear_fp_status(); + /* 68040/68060 only variants. 6888x = F-line exception. */ + switch (v) + { + case 0x62: /* FSADD */ + case 0x66: /* FDADD */ + case 0x68: /* FSSUB */ + case 0x6c: /* FDSUB */ + case 0x5a: /* FSNEG */ + case 0x5e: /* FDNEG */ + case 0x58: /* FSABS */ + case 0x5c: /* FDABS */ + case 0x63: /* FSMUL */ + case 0x67: /* FDMUL */ + case 0x41: /* FSSQRT */ + case 0x45: /* FDSQRT */ + fpu_noinst (opcode, oldpc); + return true; + } + } + return false; } -static void fpsr_make_status(void) +static bool fault_if_60 (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, int type) { - // get external status - get_fp_status(®s.fpsr); - - // update accrued exception byte - if (regs.fpsr & (FPSR_BSUN | FPSR_SNAN | FPSR_OPERR)) - regs.fpsr |= FPSR_AE_IOP; // IOP = BSUN || SNAN || OPERR - if (regs.fpsr & FPSR_OVFL) - regs.fpsr |= FPSR_AE_OVFL; // OVFL = OVFL - if ((regs.fpsr & FPSR_UNFL) && (regs.fpsr & FPSR_INEX2)) - regs.fpsr |= FPSR_AE_UNFL; // UNFL = UNFL && INEX2 - if (regs.fpsr & FPSR_DZ) - regs.fpsr |= FPSR_AE_DZ; // DZ = DZ - if (regs.fpsr & (FPSR_OVFL | FPSR_INEX2 | FPSR_INEX1)) - regs.fpsr |= FPSR_AE_INEX; // INEX = INEX1 || INEX2 || OVFL - - fpsr_check_exception(); + if (currprefs.cpu_model == 68060 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { + fpu_op_unimp (opcode, extra, ea, oldpc, type, NULL, -1, -1); + return true; + } + return false; } -static int fpsr_set_bsun(void) +static bool fault_if_4060 (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, int type, fpdata *src, uae_u32 *pack) { - regs.fpsr |= FPSR_BSUN; - regs.fpsr |= FPSR_AE_IOP; - - if (regs.fpcr & FPSR_BSUN) { - // logging only so far - write_log (_T("FPU exception: BSUN! (FPSR: %08x, FPCR: %04x)\n"), regs.fpsr, regs.fpcr); - return 0; // return 1, once BSUN exception works - } - return 0; + if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { + if (pack) { + regs.exp_pack[0] = pack[0]; + regs.exp_pack[1] = pack[1]; + regs.exp_pack[2] = pack[2]; + } + fpu_op_unimp (opcode, extra, ea, oldpc, type, src, -1, -1); + return true; + } + return false; } -void fpsr_set_quotient(uae_u64 quot, uae_s8 sign) +static bool fault_if_no_fpu_u (uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc) { - regs.fpsr &= 0x0f00fff8; - regs.fpsr |= (quot << 16) & FPSR_QUOT_LSB; - regs.fpsr |= sign ? FPSR_QUOT_SIGN : 0; + if (fault_if_no_fpu (opcode, extra, ea, oldpc)) + return true; + if (currprefs.cpu_model == 68060 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { + // 68060 FTRAP, FDBcc or FScc are not implemented. + fpu_op_unimp (opcode, extra, ea, oldpc, FPU_EXP_UNIMP_INS, NULL, -1, -1); + return true; + } + return false; } -uae_u32 fpp_get_fpsr (void) +static bool fault_if_no_6888x (uae_u16 opcode, uae_u16 extra, uaecptr oldpc) { - return regs.fpsr; + if (currprefs.cpu_model < 68040 && currprefs.fpu_model <= 0) { +#if EXCEPTION_FPP + write_log (_T("6888x no FPU: %04X-%04X PC=%08X\n"), opcode, extra, oldpc); +#endif + m68k_setpc (oldpc); + regs.fp_exception = true; + op_illg (opcode); + return true; + } + return false; } -static void fpp_set_fpsr (uae_u32 val) +static int get_fpu_version (void) { - regs.fpsr = val; + int v = 0; + + switch (currprefs.fpu_model) + { + case 68881: + case 68882: + v = 0x1f; + break; + case 68040: + if (currprefs.fpu_revision == 0x40) + v = 0x40; + else + v = 0x41; + break; + } + return v; } -static void fpp_set_fpcr (uae_u32 val) +static void fpu_null (void) { - set_fp_mode(val); - regs.fpcr = val & 0xffff; + regs.fpu_state = 0; + regs.fpu_exp_state = 0; + regs.fpcr = 0; + regs.fpsr = 0; + regs.fpiar = 0; + fpset(®s.fp_result, 1); + fpclear (®s.fp_result); + for (int i = 0; i < 8; i++) + fpnan (®s.fp[i]); } static uae_u32 get_ftag (uae_u32 w1, uae_u32 w2, uae_u32 w3, int size) @@ -1806,7 +976,7 @@ static void to_pack (fpdata *fpd, uae_u32 *wrd) #else sscanf (str, "%le", &d); #endif - from_native(d, fpd); + fpp_from_native(d, fpd); } static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) @@ -1818,12 +988,12 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) char str[100]; fptype fp; - if (fp_is_nan (src)) { + if (fpp_is_nan (src)) { // copied bit by bit, no conversion from_exten(src, &wrd[0], &wrd[1], &wrd[2]); return; } - if (fp_is_infinity (src)) { + if (fpp_is_infinity (src)) { // extended exponent and all 0 packed fraction from_exten(src, &wrd[0], &wrd[1], &wrd[2]); wrd[1] = wrd[2] = 0; @@ -1832,7 +1002,7 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) wrd[0] = wrd[1] = wrd[2] = 0; - to_native(&fp, src); + fpp_to_native(&fp, src); #if USE_LONG_DOUBLE sprintf (str, "%#.17Le", fp); @@ -1959,10 +1129,8 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) static bool fault_if_no_denormal_support_pre(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) { if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented && currprefs.fpu_softfloat) { - bits64 Sig = extractFloatx80Frac(fpd->fpx); - bits32 Exp = extractFloatx80Exp(fpd->fpx); - if (Exp == 0 && Sig != 0) { - fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_PRE, fpd, -1, size); + if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { + fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_PRE, fpd, -1, size); return true; } } @@ -1971,9 +1139,7 @@ static bool fault_if_no_denormal_support_pre(uae_u16 opcode, uae_u16 extra, uaec static bool fault_if_no_denormal_support_post(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size) { if (currprefs.fpu_softfloat && currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) { - bits64 Sig = extractFloatx80Frac(fpd->fpx); - bits32 Exp = extractFloatx80Exp(fpd->fpx); - if (Exp == 0 && Sig != 0) { + if (fpp_is_unnormal(fpd) || fpp_is_denormal(fpd)) { fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_POST, fpd, -1, size); return true; } @@ -2177,8 +1343,8 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o { int size, mode, reg; uae_u32 ad = 0; - static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; - static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + static const int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static const int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; #if DEBUG_FPP if (!isinrom ()) @@ -2200,15 +1366,15 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o switch (size) { case 6: - m68k_dreg (regs, reg) = (uae_u32)(((to_int (value, 0) & 0xff) + m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 0) & 0xff) | (m68k_dreg (regs, reg) & ~0xff))); break; case 4: - m68k_dreg (regs, reg) = (uae_u32)(((to_int (value, 1) & 0xffff) + m68k_dreg (regs, reg) = (uae_u32)(((fpp_to_int (value, 1) & 0xffff) | (m68k_dreg (regs, reg) & ~0xffff))); break; case 0: - m68k_dreg (regs, reg) = (uae_u32)to_int (value, 2); + m68k_dreg (regs, reg) = (uae_u32)fpp_to_int (value, 2); break; case 1: m68k_dreg (regs, reg) = from_single (value); @@ -2275,7 +1441,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o case 0: if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) return 1; - x_cp_put_long(ad, (uae_u32)to_int(value, 2)); + x_cp_put_long(ad, (uae_u32)fpp_to_int(value, 2)); break; case 1: if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) @@ -2317,7 +1483,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o case 4: if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) return 1; - x_cp_put_word(ad, (uae_s16)to_int(value, 1)); + x_cp_put_word(ad, (uae_s16)fpp_to_int(value, 1)); break; case 5: { @@ -2333,7 +1499,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o case 6: if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2)) return 1; - x_cp_put_byte(ad, (uae_s8)to_int(value, 0)); + x_cp_put_byte(ad, (uae_s8)fpp_to_int(value, 0)); break; default: return 0; @@ -2395,9 +1561,9 @@ int fpp_cond (int condition) { int NotANumber, Z, N; - NotANumber = fp_is_nan(®s.fp_result); - N = fp_is_neg(®s.fp_result); - Z = fp_is_zero(®s.fp_result); + NotANumber = fpp_is_nan(®s.fp_result); + N = fpp_is_neg(®s.fp_result); + Z = fpp_is_zero(®s.fp_result); if ((condition & 0x10) && NotANumber) { if (fpsr_set_bsun()) @@ -3050,153 +2216,144 @@ static uaecptr fmovem2fpp (uaecptr ad, uae_u32 list, int incr, int regdir) return ad; } -static bool arithmetic_fp(fptype src, int reg, int extra) +static bool arithmetic(fpdata *src, int reg, int extra) { bool sgl = false; uae_u64 q = 0; - uae_s8 s = 0; + uae_u8 s = 0; + fpdata *dst = ®s.fp[reg]; + + // SNAN -> QNAN if SNAN interrupt is not enabled + if (fpp_is_snan(src) && !(regs.fpcr & 0x4000)) { + fpp_unset_snan(src); + } switch (extra & 0x7f) { case 0x00: /* FMOVE */ case 0x40: case 0x44: - regs.fp[reg].fp = src; + *dst = *src; break; case 0x01: /* FINT */ - regs.fp[reg].fp = fp_int(src); + fpp_int(src, dst); break; case 0x02: /* FSINH */ - regs.fp[reg].fp = sinh (src); + fpp_sinh(src, dst); break; case 0x03: /* FINTRZ */ - regs.fp[reg].fp = fp_intrz (src); + fpp_intrz(src, dst); break; case 0x04: /* FSQRT */ case 0x41: /* FSSQRT */ case 0x45: /* FDSQRT */ - regs.fp[reg].fp = sqrt (src); + fpp_sqrt(src, dst); break; case 0x06: /* FLOGNP1 */ - regs.fp[reg].fp = log (src + 1.0); + fpp_lognp1(src, dst); break; case 0x08: /* FETOXM1 */ - regs.fp[reg].fp = exp (src) - 1.0; + fpp_etoxm1(src, dst); break; case 0x09: /* FTANH */ - regs.fp[reg].fp = tanh (src); + fpp_tanh(src, dst); break; case 0x0a: /* FATAN */ - regs.fp[reg].fp = atan (src); + fpp_atan(src, dst); break; case 0x0c: /* FASIN */ - regs.fp[reg].fp = asin (src); + fpp_asin(src, dst); break; case 0x0d: /* FATANH */ - regs.fp[reg].fp = atanh (src); + fpp_atanh(src, dst); break; case 0x0e: /* FSIN */ - regs.fp[reg].fp = sin (src); + fpp_sin(src, dst); break; case 0x0f: /* FTAN */ - regs.fp[reg].fp = tan (src); + fpp_tan(src, dst); break; case 0x10: /* FETOX */ - regs.fp[reg].fp = exp (src); + fpp_etox(src, dst); break; case 0x11: /* FTWOTOX */ - regs.fp[reg].fp = pow (2.0, src); + fpp_twotox(src, dst); break; case 0x12: /* FTENTOX */ - regs.fp[reg].fp = pow (10.0, src); + fpp_tentox(src, dst); break; case 0x14: /* FLOGN */ - regs.fp[reg].fp = log (src); + fpp_logn(src, dst); break; case 0x15: /* FLOG10 */ - regs.fp[reg].fp = log10 (src); + fpp_log10(src, dst); break; case 0x16: /* FLOG2 */ - regs.fp[reg].fp = *fp_l2_e * log (src); + fpp_log2(src, dst); break; case 0x18: /* FABS */ case 0x58: /* FSABS */ case 0x5c: /* FDABS */ - regs.fp[reg].fp = src < 0 ? -src : src; + fpp_abs(src, dst); break; case 0x19: /* FCOSH */ - regs.fp[reg].fp = cosh (src); + fpp_cosh(src, dst); break; case 0x1a: /* FNEG */ case 0x5a: /* FSNEG */ case 0x5e: /* FDNEG */ - regs.fp[reg].fp = -src; + fpp_neg(src, dst); break; case 0x1c: /* FACOS */ - regs.fp[reg].fp = acos (src); + fpp_acos(src, dst); break; case 0x1d: /* FCOS */ - regs.fp[reg].fp = cos (src); + fpp_cos(src, dst); break; case 0x1e: /* FGETEXP */ - { - if (src == 0) { - regs.fp[reg].fp = 0; - } else { - int expon; - frexp (src, &expon); - regs.fp[reg].fp = (double) (expon - 1); - } - } + fpp_getexp(src, dst); break; case 0x1f: /* FGETMAN */ - { - if (src == 0) { - regs.fp[reg].fp = 0; - } else { - int expon; - regs.fp[reg].fp = frexp (src, &expon) * 2.0; - } - } + fpp_getman(src, dst); break; case 0x20: /* FDIV */ case 0x60: /* FSDIV */ case 0x64: /* FDDIV */ - regs.fp[reg].fp /= src; + fpp_div(dst, src); break; case 0x21: /* FMOD */ - regs.fp[reg].fp = fp_mod(regs.fp[reg].fp, src, &q, &s); + fpp_mod(dst, src, &q, &s); fpsr_set_quotient(q, s); break; case 0x22: /* FADD */ case 0x62: /* FSADD */ case 0x66: /* FDADD */ - regs.fp[reg].fp += src; + fpp_add(dst, src); break; case 0x23: /* FMUL */ case 0x63: /* FSMUL */ case 0x67: /* FDMUL */ - regs.fp[reg].fp *= src; + fpp_mul(dst, src); break; case 0x24: /* FSGLDIV */ - regs.fp[reg].fp /= src; + fpp_div(dst, src); sgl = true; break; case 0x25: /* FREM */ - regs.fp[reg].fp = fp_rem(regs.fp[reg].fp, src, &q, &s); + fpp_rem(dst, src, &q, &s); fpsr_set_quotient(q, s); break; case 0x26: /* FSCALE */ - regs.fp[reg].fp = ldexp(regs.fp[reg].fp, (int)src); + fpp_scale(dst, src); break; case 0x27: /* FSGLMUL */ - regs.fp[reg].fp *= src; + fpp_mul(dst, src); sgl = true; break; case 0x28: /* FSUB */ case 0x68: /* FSSUB */ case 0x6c: /* FDSUB */ - regs.fp[reg].fp -= src; + fpp_sub(dst, src); break; case 0x30: /* FSINCOS */ case 0x31: @@ -3206,25 +2363,23 @@ static bool arithmetic_fp(fptype src, int reg, int extra) case 0x35: case 0x36: case 0x37: - regs.fp[extra & 7].fp = cos (src); - regs.fp[reg].fp = sin (src); + fpp_cos(src, ®s.fp[extra & 7]); + fpp_sin(src, dst); if (((regs.fpcr >> 6) & 3) == 1) - fp_round32(®s.fp[extra & 7]); + fpp_round32(®s.fp[extra & 7]); else if (((regs.fpcr >> 6) & 3) == 2) - fp_round64(®s.fp[extra & 7]); + fpp_round64(®s.fp[extra & 7]); break; case 0x38: /* FCMP */ { - fpdata fpd = { 0 }; - fpd.fp = regs.fp[reg].fp - src; - fpsr_set_result(&fpd); + fpdata t = *dst; + fpp_sub(&t, src); + fpsr_set_result(&t); return true; } case 0x3a: /* FTST */ { - fpdata fpd = { 0 }; - fpd.fp = src; - fpsr_set_result(&fpd); + fpsr_set_result(src); return true; } default: @@ -3233,199 +2388,21 @@ static bool arithmetic_fp(fptype src, int reg, int extra) // must check instruction rounding overrides first if (sgl) { - fp_roundsgl(®s.fp[reg]); + fpp_roundsgl(®s.fp[reg]); } else if ((extra & 0x44) == 0x40) { - fp_round32(®s.fp[reg]); + fpp_round32(®s.fp[reg]); } else if ((extra & 0x44) == 0x44) { - fp_round64(®s.fp[reg]); + fpp_round64(®s.fp[reg]); } else if (((regs.fpcr >> 6) & 3) == 1) { - fp_round32(®s.fp[reg]); + fpp_round32(®s.fp[reg]); } else if (((regs.fpcr >> 6) & 3) == 2) { - fp_round64(®s.fp[reg]); + fpp_round64(®s.fp[reg]); } fpsr_set_result(®s.fp[reg]); return true; } -static bool arithmetic_softfloat(fpdata *srcd, int reg, int extra) -{ - floatx80 fx = srcd->fpx; - floatx80 f = regs.fp[reg].fpx; - bool sgl = false; - uae_u64 q = 0; - uae_s8 s = 0; - - // SNAN -> QNAN if SNAN interrupt is not enabled - if (floatx80_is_signaling_nan(fx) && !(regs.fpcr & 0x4000)) { - fp_unset_snan(srcd); - } - - switch (extra & 0x7f) - { - case 0x00: /* FMOVE */ - case 0x40: - case 0x44: - regs.fp[reg].fpx = fx; - break; - case 0x01: /* FINT */ - regs.fp[reg].fpx = floatx80_round_to_int(fx); - break; - case 0x03: /* FINTRZ */ - regs.fp[reg].fpx = floatx80_round_to_int_toward_zero(fx); - break; - case 0x04: /* FSQRT */ - case 0x41: /* FSSQRT */ - case 0x45: /* FDSQRT */ - regs.fp[reg].fpx = floatx80_sqrt(fx); - break; - case 0x18: /* FABS */ - case 0x58: /* FSABS */ - case 0x5c: /* FDABS */ - regs.fp[reg].fpx = floatx80_abs(fx); - break; - case 0x1a: /* FNEG */ - case 0x5a: /* FSNEG */ - case 0x5e: /* FDNEG */ - // same here.. - regs.fp[reg].fpx = floatx80_chs(fx); - break; - case 0x20: /* FDIV */ - case 0x60: /* FSDIV */ - case 0x64: /* FDDIV */ - regs.fp[reg].fpx = floatx80_div(f, fx); - break; - case 0x21: /* FMOD */ - regs.fp[reg].fpx = floatx80_mod(f, fx, &q, &s); - fpsr_set_quotient(q, s); - break; - case 0x22: /* FADD */ - case 0x62: /* FSADD */ - case 0x66: /* FDADD */ - regs.fp[reg].fpx = floatx80_add(f, fx); - break; - case 0x23: /* FMUL */ - case 0x63: /* FSMUL */ - case 0x67: /* FDMUL */ - regs.fp[reg].fpx = floatx80_mul(f, fx); - break; - case 0x24: /* FSGLDIV */ - regs.fp[reg].fpx = floatx80_div(f, fx); - sgl = true; - break; - case 0x25: /* FREM */ - regs.fp[reg].fpx = floatx80_rem(f, fx, &q, &s); - fpsr_set_quotient(q, s); - break; - case 0x27: /* FSGLMUL */ - regs.fp[reg].fpx = floatx80_mul(f, fx); - sgl = true; - break; - case 0x28: /* FSUB */ - case 0x68: /* FSSUB */ - case 0x6c: /* FDSUB */ - regs.fp[reg].fpx = floatx80_sub(f, fx); - break; - case 0x38: /* FCMP */ - { - fpdata fpd = { 0 }; - fpd.fpx = floatx80_sub(f, fx); - fpsr_set_result(&fpd); - return true; - } - case 0x3a: /* FTST */ - { - fpdata fpd = { 0 }; - fpd.fpx = f; - fpsr_set_result(&fpd); - return true; - } - case 0x1d: /* FCOS */ - floatx80_fcos(&f); - regs.fp[reg].fpx = f; - break; - case 0x0e: /* FSIN */ - floatx80_fsin(&f); - regs.fp[reg].fpx = f; - break; - case 0x0f: /* FTAN */ - floatx80_ftan(&f); - regs.fp[reg].fpx = f; - break; - case 0x30: /* FSINCOS */ - case 0x31: /* FSINCOS */ - case 0x32: /* FSINCOS */ - case 0x33: /* FSINCOS */ - case 0x34: /* FSINCOS */ - case 0x35: /* FSINCOS */ - case 0x36: /* FSINCOS */ - case 0x37: /* FSINCOS */ - floatx80_fsincos(f, ®s.fp[extra & 7].fpx, ®s.fp[reg].fpx); - if (((regs.fpcr >> 6) & 3) == 1) - fp_round32(®s.fp[extra & 7]); - else if (((regs.fpcr >> 6) & 3) == 2) - fp_round64(®s.fp[extra & 7]); - break; - case 0x14: /* FLOGN */ - regs.fp[reg].fpx = floatx80_flogn(f); - break; - case 0x15: /* FLOG10 */ - regs.fp[reg].fpx = floatx80_flog10(f); - break; - case 0x16: /* FLOG2 */ - regs.fp[reg].fpx = floatx80_flog2(f); - break; - case 0x06: /* FLOGNP1 */ - regs.fp[reg].fpx = floatx80_flognp1(f); - break; - case 0x1e: /* FGETEXP */ - regs.fp[reg].fpx = floatx80_getexp(f); - break; - case 0x1f: /* FGETMAN */ - regs.fp[reg].fpx = floatx80_getman(f); - break; - - case 0x08: /* FETOXM1 */ - case 0x09: /* FTANH */ - case 0x0a: /* FATAN */ - case 0x0c: /* FASIN */ - case 0x0d: /* FATANH */ - case 0x10: /* FETOX */ - case 0x11: /* FTWOTOX */ - case 0x12: /* FTENTOX */ - case 0x19: /* FCOSH */ - case 0x1c: /* FACOS */ - { - // This is horribly ineffective.. - fptype fpa; - fpdata fpdx = { 0 }; - fpdx.fpx = fx; - to_native(&fpa, &fpdx); - // emulate instruction using normal fpu code - if (!arithmetic_fp(fpa, reg, extra)) - return false; - from_native(fpa, ®s.fp[reg]); - } - break; - } - - // must check instruction rounding overrides first - if (sgl) { - fp_roundsgl(®s.fp[reg]); - } else if ((extra & 0x44) == 0x40) { - fp_round32(®s.fp[reg]); - } else if ((extra & 0x44) == 0x44) { - fp_round64(®s.fp[reg]); - } else if (((regs.fpcr >> 6) & 3) == 1) { - fp_round32(®s.fp[reg]); - } else if (((regs.fpcr >> 6) & 3) == 2) { - fp_round64(®s.fp[reg]); - } - - fpsr_set_result(®s.fp[reg]); - return true; -} - static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) { int reg = -1; @@ -3479,10 +2456,8 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) if (extra & 0x0400) m68k_areg (regs, opcode & 7) = regs.fpiar; } else { - if (extra & 0x1000) { + if (extra & 0x1000) fpp_set_fpcr(m68k_areg (regs, opcode & 7)); - native_set_fpucw (regs.fpcr); - } if (extra & 0x0800) fpp_set_fpsr(m68k_areg (regs, opcode & 7)); if (extra & 0x0400) @@ -3507,10 +2482,8 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) ext[1] = x_cp_next_ilong (); if (extra & 0x0400) ext[2] = x_cp_next_ilong (); - if (extra & 0x1000) { + if (extra & 0x1000) fpp_set_fpcr(ext[0]); - native_set_fpucw (regs.fpcr); - } if (extra & 0x0800) fpp_set_fpsr(ext[1]); if (extra & 0x0400) @@ -3577,7 +2550,6 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) } if (extra & 0x1000) { fpp_set_fpcr(x_cp_get_long (ad)); - native_set_fpucw (regs.fpcr); ad += 4; } if (extra & 0x0800) { @@ -3679,10 +2651,7 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra) regs.fpiar = pc; fpsr_clear_status(); - if (currprefs.fpu_softfloat) - v = arithmetic_softfloat(&srcd, reg, extra); - else - v = arithmetic_fp(srcd.fp, reg, extra); + v = arithmetic(&srcd, reg, extra); if (!v) fpu_noinst (opcode, pc); return; @@ -3722,6 +2691,12 @@ void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra) void fpu_reset (void) { + if (currprefs.fpu_softfloat) { + fp_init_softfloat(); + } else { + fp_init_native(); + } + #if defined(CPU_i386) || defined(CPU_x86_64) init_fpucw_x87(); #endif @@ -3729,21 +2704,8 @@ void fpu_reset (void) regs.fpcr = regs.fpsr = regs.fpiar = 0; regs.fpu_exp_state = 0; fpset (®s.fp_result, 1); - native_set_fpucw (regs.fpcr); + fpsr_make_status(); fpux_restore (NULL); - - fxsizes[0] = int32_to_floatx80(-128); - fxsizes[1] = int32_to_floatx80(127); - fxsizes[2] = int32_to_floatx80(-32768); - fxsizes[3] = int32_to_floatx80(32767); - fxsizes[4] = int32_to_floatx80(-2147483648); - fxsizes[5] = int32_to_floatx80(2147483647); - fxzero = int32_to_floatx80(0); - fx_1e0 = int32_to_floatx80(1); - fx_1e1 = int32_to_floatx80(10); - fx_1e2 = int32_to_floatx80(100); - fx_1e4 = int32_to_floatx80(10000); - fx_1e8 = int32_to_floatx80(100000000); } uae_u8 *restore_fpu (uae_u8 *src) @@ -3762,9 +2724,9 @@ uae_u8 *restore_fpu (uae_u8 *src) to_exten (®s.fp[i], w1, w2, w3); } regs.fpcr = restore_u32 (); - native_set_fpucw (regs.fpcr); regs.fpsr = restore_u32 (); regs.fpiar = restore_u32 (); + fpsr_make_status(); if (flags & 0x80000000) { restore_u32 (); restore_u32 (); diff --git a/fpp_native.cpp b/fpp_native.cpp new file mode 100644 index 00000000..456bcbf0 --- /dev/null +++ b/fpp_native.cpp @@ -0,0 +1,884 @@ +/* +* UAE - The Un*x Amiga Emulator +* +* MC68881/68882/68040/68060 FPU emulation +* +* Copyright 1996 Herman ten Brugge +* Modified 2005 Peter Keunecke +* 68040+ exceptions and more by Toni Wilen +*/ + +#define __USE_ISOC9X /* We might be able to pick up a NaN */ + +#include +#include +#include + +#include "sysconfig.h" +#include "sysdeps.h" + +#ifdef _MSC_VER +#pragma fenv_access(on) +#endif + +#define USE_HOST_ROUNDING + +#include "options.h" +#include "memory.h" +#include "newcpu.h" +#include "fpp.h" +#include "uae/attributes.h" +#include "uae/vm.h" +#include "newcpu.h" + +static uae_u32 xhex_pi[] ={0x2168c235, 0xc90fdaa2, 0x4000}; +uae_u32 xhex_exp_1[] ={0xa2bb4a9a, 0xadf85458, 0x4000}; +static uae_u32 xhex_l2_e[] ={0x5c17f0bc, 0xb8aa3b29, 0x3fff}; +static uae_u32 xhex_ln_2[] ={0xd1cf79ac, 0xb17217f7, 0x3ffe}; +uae_u32 xhex_ln_10[] ={0xaaa8ac17, 0x935d8ddd, 0x4000}; +uae_u32 xhex_l10_2[] ={0xfbcff798, 0x9a209a84, 0x3ffd}; +uae_u32 xhex_l10_e[] ={0x37287195, 0xde5bd8a9, 0x3ffd}; +uae_u32 xhex_1e16[] ={0x04000000, 0x8e1bc9bf, 0x4034}; +uae_u32 xhex_1e32[] ={0x2b70b59e, 0x9dc5ada8, 0x4069}; +uae_u32 xhex_1e64[] ={0xffcfa6d5, 0xc2781f49, 0x40d3}; +uae_u32 xhex_1e128[] ={0x80e98ce0, 0x93ba47c9, 0x41a8}; +uae_u32 xhex_1e256[] ={0x9df9de8e, 0xaa7eebfb, 0x4351}; +uae_u32 xhex_1e512[] ={0xa60e91c7, 0xe319a0ae, 0x46a3}; +uae_u32 xhex_1e1024[]={0x81750c17, 0xc9767586, 0x4d48}; +uae_u32 xhex_1e2048[]={0xc53d5de5, 0x9e8b3b5d, 0x5a92}; +uae_u32 xhex_1e4096[]={0x8a20979b, 0xc4605202, 0x7525}; +static uae_u32 xhex_inf[] ={0x00000000, 0x00000000, 0x7fff}; +static uae_u32 xhex_nan[] ={0xffffffff, 0xffffffff, 0x7fff}; +static uae_u32 xhex_snan[] ={0xffffffff, 0xbfffffff, 0x7fff}; +#if USE_LONG_DOUBLE +static long double *fp_pi = (long double *)xhex_pi; +static long double *fp_exp_1 = (long double *)xhex_exp_1; +static long double *fp_l2_e = (long double *)xhex_l2_e; +static long double *fp_ln_2 = (long double *)xhex_ln_2; +static long double *fp_ln_10 = (long double *)xhex_ln_10; +static long double *fp_l10_2 = (long double *)xhex_l10_2; +static long double *fp_l10_e = (long double *)xhex_l10_e; +static long double *fp_1e16 = (long double *)xhex_1e16; +static long double *fp_1e32 = (long double *)xhex_1e32; +static long double *fp_1e64 = (long double *)xhex_1e64; +static long double *fp_1e128 = (long double *)xhex_1e128; +static long double *fp_1e256 = (long double *)xhex_1e256; +static long double *fp_1e512 = (long double *)xhex_1e512; +static long double *fp_1e1024 = (long double *)xhex_1e1024; +static long double *fp_1e2048 = (long double *)xhex_1e2048; +static long double *fp_1e4096 = (long double *)xhex_1e4096; +static long double *fp_inf = (long double *)xhex_inf; +static long double *fp_nan = (long double *)xhex_nan; +#else +static uae_u32 dhex_pi[] ={0x54442D18, 0x400921FB}; +static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A}; +static uae_u32 dhex_l2_e[] ={0x652B82FE, 0x3FF71547}; +static uae_u32 dhex_ln_2[] ={0xFEFA39EF, 0x3FE62E42}; +static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1}; +static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413}; +static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B}; +static uae_u32 dhex_1e16[] ={0x37E08000, 0x4341C379}; +static uae_u32 dhex_1e32[] ={0xB5056E17, 0x4693B8B5}; +static uae_u32 dhex_1e64[] ={0xE93FF9F5, 0x4D384F03}; +static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748}; +static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD}; +static uae_u32 dhex_inf[] ={0x00000000, 0x7ff00000}; +static uae_u32 dhex_nan[] ={0xffffffff, 0x7fffffff}; +static double *fp_pi = (double *)dhex_pi; +static double *fp_exp_1 = (double *)dhex_exp_1; +static double *fp_l2_e = (double *)dhex_l2_e; +static double *fp_ln_2 = (double *)dhex_ln_2; +static double *fp_ln_10 = (double *)dhex_ln_10; +static double *fp_l10_2 = (double *)dhex_l10_2; +static double *fp_l10_e = (double *)dhex_l10_e; +static double *fp_1e16 = (double *)dhex_1e16; +static double *fp_1e32 = (double *)dhex_1e32; +static double *fp_1e64 = (double *)dhex_1e64; +static double *fp_1e128 = (double *)dhex_1e128; +static double *fp_1e256 = (double *)dhex_1e256; +static double *fp_1e512 = (double *)dhex_inf; +static double *fp_1e1024 = (double *)dhex_inf; +static double *fp_1e2048 = (double *)dhex_inf; +static double *fp_1e4096 = (double *)dhex_inf; +static double *fp_inf = (double *)dhex_inf; +static double *fp_nan = (double *)dhex_nan; +#endif +static const double twoto32 = 4294967296.0; +double fp_1e8 = 1.0e8; +float fp_1e0 = 1, fp_1e1 = 10, fp_1e2 = 100, fp_1e4 = 10000; + +#define FPCR_ROUNDING_MODE 0x00000030 +#define FPCR_ROUND_NEAR 0x00000000 +#define FPCR_ROUND_ZERO 0x00000010 +#define FPCR_ROUND_MINF 0x00000020 +#define FPCR_ROUND_PINF 0x00000030 + +#define FPCR_ROUNDING_PRECISION 0x000000c0 +#define FPCR_PRECISION_SINGLE 0x00000040 +#define FPCR_PRECISION_DOUBLE 0x00000080 +#define FPCR_PRECISION_EXTENDED 0x00000000 + +#if defined(CPU_i386) || defined(CPU_x86_64) + +/* The main motivation for dynamically creating an x86(-64) function in + * memory is because MSVC (x64) does not allow you to use inline assembly, + * and the x86-64 versions of _control87/_controlfp functions only modifies + * SSE2 registers. */ + +static uae_u16 x87_cw = 0; +static uae_u8 *x87_fldcw_code = NULL; +typedef void (uae_cdecl *x87_fldcw_function)(void); + +void init_fpucw_x87(void) +{ + if (x87_fldcw_code) { + return; + } + x87_fldcw_code = (uae_u8 *) uae_vm_alloc( + uae_vm_page_size(), UAE_VM_32BIT, UAE_VM_READ_WRITE_EXECUTE); + uae_u8 *c = x87_fldcw_code; + /* mov eax,0x0 */ + *(c++) = 0xb8; + *(c++) = 0x00; + *(c++) = 0x00; + *(c++) = 0x00; + *(c++) = 0x00; +#ifdef CPU_x86_64 + /* Address override prefix */ + *(c++) = 0x67; +#endif + /* fldcw WORD PTR [eax+addr] */ + *(c++) = 0xd9; + *(c++) = 0xa8; + *(c++) = (((uintptr_t) &x87_cw) ) & 0xff; + *(c++) = (((uintptr_t) &x87_cw) >> 8) & 0xff; + *(c++) = (((uintptr_t) &x87_cw) >> 16) & 0xff; + *(c++) = (((uintptr_t) &x87_cw) >> 24) & 0xff; + /* ret */ + *(c++) = 0xc3; + /* Write-protect the function */ + uae_vm_protect(x87_fldcw_code, uae_vm_page_size(), UAE_VM_READ_EXECUTE); +} + +static void set_fpucw_x87(uae_u32 m68k_cw) +{ +#ifdef _MSC_VER + static int ex = 0; + // RN, RZ, RM, RP + static const unsigned int fp87_round[4] = { _RC_NEAR, _RC_CHOP, _RC_DOWN, _RC_UP }; + // Extend X, Single S, Double D, Undefined + static const unsigned int fp87_prec[4] = { _PC_64, _PC_24, _PC_53, 0 }; + int round = (m68k_cw >> 4) & 3; +#ifdef WIN64 + // x64 only sets SSE2, must also call x87_fldcw_code() to set FPU rounding mode. + _controlfp(ex | fp87_round[round], _MCW_RC); +#else + int prec = (m68k_cw >> 6) & 3; + // x86 sets both FPU and SSE2 rounding mode, don't need x87_fldcw_code() + _control87(ex | fp87_round[round] | fp87_prec[prec], _MCW_RC | _MCW_PC); + return; +#endif +#endif + static const uae_u16 x87_cw_tab[] = { + 0x137f, 0x1f7f, 0x177f, 0x1b7f, /* Extended */ + 0x107f, 0x1c7f, 0x147f, 0x187f, /* Single */ + 0x127f, 0x1e7f, 0x167f, 0x1a7f, /* Double */ + 0x137f, 0x1f7f, 0x177f, 0x1b7f /* undefined */ + }; + x87_cw = x87_cw_tab[(m68k_cw >> 4) & 0xf]; +#if defined(X86_MSVC_ASSEMBLY) && 0 + __asm { fldcw word ptr x87_cw } +#elif defined(__GNUC__) && 0 + __asm__("fldcw %0" : : "m" (*&x87_cw)); +#else + ((x87_fldcw_function) x87_fldcw_code)(); +#endif +} + +#endif /* defined(CPU_i386) || defined(CPU_x86_64) */ + +static void native_set_fpucw(uae_u32 m68k_cw) +{ +#if defined(CPU_i386) || defined(CPU_x86_64) + set_fpucw_x87(m68k_cw); +#endif +} + +/* Functions for setting host/library modes and getting status */ +static void fp_set_mode(uae_u32 mode_control) +{ + switch(mode_control & FPCR_ROUNDING_PRECISION) { + case FPCR_PRECISION_EXTENDED: // X + break; + case FPCR_PRECISION_SINGLE: // S + break; + case FPCR_PRECISION_DOUBLE: // D + default: // undefined + break; + } +#ifdef USE_HOST_ROUNDING + switch(mode_control & FPCR_ROUNDING_MODE) { + case FPCR_ROUND_NEAR: // to neareset + fesetround(FE_TONEAREST); + break; + case FPCR_ROUND_ZERO: // to zero + fesetround(FE_TOWARDZERO); + break; + case FPCR_ROUND_MINF: // to minus + fesetround(FE_DOWNWARD); + break; + case FPCR_ROUND_PINF: // to plus + fesetround(FE_UPWARD); + break; + } + native_set_fpucw(mode_control); +#endif +} + + +static void fp_get_status(uae_u32 *status) +{ + int exp_flags = fetestexcept(FE_ALL_EXCEPT); + if (exp_flags) { + if (exp_flags & FE_INEXACT) + *status |= 0x0200; + if (exp_flags & FE_DIVBYZERO) + *status |= 0x0400; + if (exp_flags & FE_UNDERFLOW) + *status |= 0x0800; + if (exp_flags & FE_OVERFLOW) + *status |= 0x1000; + if (exp_flags & FE_INVALID) + *status |= 0x2000; + } +} + +static void fp_clear_status(void) +{ + feclearexcept (FE_ALL_EXCEPT); +} + +static const TCHAR *fp_print(fpdata *fpd) +{ + static TCHAR fs[32]; +#if USE_LONG_DOUBLE + _stprintf(fs, _T("#%Le"), fpd->fp); +#else + _stprintf(fs, _T("#%e"), fpd->fp); +#endif + return fs; +} + +/* Functions for detecting float type */ +static bool fp_is_snan(fpdata *fpd) +{ + return 0; /* FIXME: how to detect SNAN */ +} +static bool fp_unset_snan(fpdata *fpd) +{ + /* FIXME: how to unset SNAN */ + return 0; +} +static bool fp_is_nan (fpdata *fpd) +{ + return isnan(fpd->fp) != 0; +} +static bool fp_is_infinity (fpdata *fpd) +{ + return isinf(fpd->fp) != 0; +} +static bool fp_is_zero(fpdata *fpd) +{ + return (fpd->fp == 0.0); +} +static bool fp_is_neg(fpdata *fpd) +{ + return signbit(fpd->fp) != 0; +} +static bool fp_is_denormal(fpdata *fpd) +{ + return (isnormal(fpd->fp) == 0); /* FIXME: how to differ denormal/unnormal? */ +} +static bool fp_is_unnormal(fpdata *fpd) +{ + return (isnormal(fpd->fp) == 0); /* FIXME: how to differ denormal/unnormal? */ +} + +/* Functions for converting between float formats */ +/* FIXME: how to preserve/fix denormals and unnormals? */ + +static void fp_to_native(fptype *fp, fpdata *fpd) +{ + *fp = fpd->fp; +} +static void fp_from_native(fptype fp, fpdata *fpd) +{ + fpd->fp = fp; +} + +static void fp_to_single_xn(fpdata *fpd, uae_u32 wrd1) +{ + union { + float f; + uae_u32 u; + } val; + + val.u = wrd1; + fpd->fp = (fptype) val.f; +} +static void fp_to_single_x(fpdata *fpd, uae_u32 wrd1) +{ + union { + float f; + uae_u32 u; + } val; + + val.u = wrd1; + fpd->fp = (fptype) val.f; +} +static uae_u32 fp_from_single_x(fpdata *fpd) +{ + union { + float f; + uae_u32 u; + } val; + + val.f = (float) fpd->fp; + return val.u; +} + +static void fp_to_double_xn(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) +{ + union { + double d; + uae_u32 u[2]; + } val; + +#ifdef WORDS_BIGENDIAN + val.u[0] = wrd1; + val.u[1] = wrd2; +#else + val.u[1] = wrd1; + val.u[0] = wrd2; +#endif + fpd->fp = (fptype) val.d; +} +static void fp_to_double_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) +{ + union { + double d; + uae_u32 u[2]; + } val; + +#ifdef WORDS_BIGENDIAN + val.u[0] = wrd1; + val.u[1] = wrd2; +#else + val.u[1] = wrd1; + val.u[0] = wrd2; +#endif + fpd->fp = (fptype) val.d; +} +static void fp_from_double_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) +{ + union { + double d; + uae_u32 u[2]; + } val; + + val.d = (double) fpd->fp; +#ifdef WORDS_BIGENDIAN + *wrd1 = val.u[0]; + *wrd2 = val.u[1]; +#else + *wrd1 = val.u[1]; + *wrd2 = val.u[0]; +#endif +} +#ifdef USE_LONG_DOUBLE +static void fp_to_exten_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + union { + long double ld; + uae_u32 u[3]; + } val; + +#if WORDS_BIGENDIAN + val.u[0] = (wrd1 & 0xffff0000) | ((wrd2 & 0xffff0000) >> 16); + val.u[1] = (wrd2 & 0x0000ffff) | ((wrd3 & 0xffff0000) >> 16); + val.u[2] = (wrd3 & 0x0000ffff) << 16; +#else + val.u[0] = wrd3; + val.u[1] = wrd2; + val.u[2] = wrd1 >> 16; +#endif + fpd->fp = val.ld; +} +static void fp_from_exten_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) +{ + union { + long double ld; + uae_u32 u[3]; + } val; + + val.ld = fpd->fp; +#if WORDS_BIGENDIAN + *wrd1 = val.u[0] & 0xffff0000; + *wrd2 = ((val.u[0] & 0x0000ffff) << 16) | ((val.u[1] & 0xffff0000) >> 16); + *wrd3 = ((val.u[1] & 0x0000ffff) << 16) | ((val.u[2] & 0xffff0000) >> 16); +#else + *wrd3 = val.u[0]; + *wrd2 = val.u[1]; + *wrd1 = val.u[2] << 16; +#endif +} +#else // if !USE_LONG_DOUBLE +static void fp_to_exten_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + double frac; + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { + fpd->fp = (wrd1 & 0x80000000) ? -0.0 : +0.0; + return; + } + frac = ((double)wrd2 + ((double)wrd3 / twoto32)) / 2147483648.0; + if (wrd1 & 0x80000000) + frac = -frac; + fpd->fp = ldexp (frac, ((wrd1 >> 16) & 0x7fff) - 16383); +} +static void fp_from_exten_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) +{ + int expon; + double frac; + fptype v; + + v = fpd->fp; + if (v == 0.0) { + *wrd1 = signbit(v) ? 0x80000000 : 0; + *wrd2 = 0; + *wrd3 = 0; + return; + } + if (v < 0) { + *wrd1 = 0x80000000; + v = -v; + } else { + *wrd1 = 0; + } + frac = frexp (v, &expon); + frac += 0.5 / (twoto32 * twoto32); + if (frac >= 1.0) { + frac /= 2.0; + expon++; + } + *wrd1 |= (((expon + 16383 - 1) & 0x7fff) << 16); + *wrd2 = (uae_u32) (frac * twoto32); + *wrd3 = (uae_u32) ((frac * twoto32 - *wrd2) * twoto32); +} +#endif // !USE_LONG_DOUBLE + +static uae_s64 fp_to_int(fpdata *src, int size) +{ + static const fptype fxsizes[6] = + { + -128.0, 127.0, + -32768.0, 32767.0, + -2147483648.0, 2147483647.0 + }; + + fptype fp = src->fp; + if (fp < fxsizes[size * 2 + 0]) + fp = fxsizes[size * 2 + 0]; + if (fp > fxsizes[size * 2 + 1]) + fp = fxsizes[size * 2 + 1]; +#ifdef USE_HOST_ROUNDING +#ifdef USE_LONG_DOUBLE + return lrintl(fp); +#else + return lrint(fp); +#endif +#else + tointtype result = (int)fp; + switch (regs.fpcr & 0x30) + { + case FPCR_ROUND_ZERO: + result = (int)fp_round_to_zero (fp); + break; + case FPCR_ROUND_MINF: + result = (int)fp_round_to_minus_infinity (fp); + break; + case FPCR_ROUND_NEAR: + result = fp_round_to_nearest (fp); + break; + case FPCR_ROUND_PINF: + result = (int)fp_round_to_plus_infinity (fp); + break; + } + return result; +#endif +} +static void fp_from_int(fpdata *fpd, uae_s32 src) +{ + fpd->fp = (fptype) src; +} + + +/* Functions for rounding */ + +// round to float with extended precision exponent +static void fp_roundsgl(fpdata *fpd) +{ + int expon; + float mant; +#ifdef USE_LONG_DOUBLE + mant = (float)(frexpl(fpd->fp, &expon) * 2.0); + fpd->fp = ldexpl((fptype)mant, expon - 1); +#else + mant = (float)(frexp(fpd->fp, &expon) * 2.0); + fpd->fp = ldexp((fptype)mant, expon - 1); +#endif +} + +// round to double with extended precision exponent +static void fp_rounddbl(fpdata *fpd) +{ + int expon; + double mant; +#ifdef USE_LONG_DOUBLE + mant = (double)(frexpl(fpd->fp, &expon) * 2.0); + fpd->fp = ldexpl((fptype)mant, expon - 1); +#else + mant = (double)(frexp(fpd->fp, &expon) * 2.0); + fpd->fp = ldexp((fptype)mant, expon - 1); +#endif +} + +// round to float +static void fp_round32(fpdata *fpd) +{ + fpd->fp = (float) fpd->fp; +} + +// round to double +static void fp_round64(fpdata *fpd) +{ +#ifdef USE_LONG_DOUBLE + fpd->fp = (double) fpd->fp; +#endif +} + +/* Arithmetic functions */ + +#ifdef USE_LONG_DOUBLE + +STATIC_INLINE fptype fp_int(fpdata *a, fpdata *dst) +{ +#ifdef USE_HOST_ROUNDING + dst->fp = rintl(a->dst); +#else + switch (regs.fpcr & FPCR_ROUNDING_MODE) + { + case FPCR_ROUND_NEAR: + return fp_round_to_nearest(a); + case FPCR_ROUND_ZERO: + return fp_round_to_zero(a); + case FPCR_ROUND_MINF: + return fp_round_to_minus_infinity(a); + case FPCR_ROUND_PINF: + return fp_round_to_plus_infinity(a); + default: /* never reached */ + return a; + } +#endif +} +STATIC_INLINE fptype fp_mod(fptype a, fptype b, uae_u64 *q, uae_s8 *s) +{ + fptype quot; +#ifdef USE_HOST_ROUNDING + quot = truncl(a / b); +#else + quot = fp_round_to_zero(a / b); +#endif + if (quot < 0.0) { + *s = 1; + quot = -quot; + } else { + *s = 0; + } + *q = (uae_u64)quot; + return fmodl(a, b); +} +STATIC_INLINE fptype fp_rem(fptype a, fptype b, uae_u64 *q, uae_s8 *s) +{ + fptype quot; +#ifdef USE_HOST_ROUNDING + quot = roundl(a / b); +#else + quot = fp_round_to_nearest(a / b); +#endif + if (quot < 0.0) { + *s = 1; + quot = -quot; + } else { + *s = 0; + } + *q = (uae_u64)quot; + return remainderl(a, b); +} + +#else // if !USE_LONG_DOUBLE + +static void fp_int(fpdata *fpd, fpdata *dst) +{ + fptype a = fpd->fp; +#ifdef USE_HOST_ROUNDING + dst->fp = rint(a); +#else + switch (regs.fpcr & FPCR_ROUNDING_MODE) + { + case FPCR_ROUND_NEAR: + dst->fp = fp_round_to_nearest(a); + case FPCR_ROUND_ZERO: + dst->fp = fp_round_to_zero(a); + case FPCR_ROUND_MINF: + dst->fp = fp_round_to_minus_infinity(a); + case FPCR_ROUND_PINF: + dst->fp = fp_round_to_plus_infinity(a); + default: /* never reached */ + break; + } +#endif +} + +static void fp_getexp(fpdata *a, fpdata *dst) +{ + int expon; + frexp(a->fp, &expon); + dst->fp = (double) (expon - 1); +} +static void fp_getman(fpdata *a, fpdata *dst) +{ + int expon; + dst->fp = frexp(a->fp, &expon) * 2.0; +} +static void fp_div(fpdata *a, fpdata *b) +{ + a->fp = a->fp / b->fp; +} +static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) +{ + fptype quot; +#ifdef USE_HOST_ROUNDING + quot = trunc(a->fp / b->fp); +#else + quot = fp_round_to_zero(a->fp / b->fp); +#endif + if (quot < 0.0) { + *s = 1; + quot = -quot; + } else { + *s = 0; + } + *q = (uae_u64)quot; + a->fp = fmod(a->fp, b->fp); +} +static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) +{ + fptype quot; +#ifdef USE_HOST_ROUNDING + quot = round(a->fp / b->fp); +#else + quot = fp_round_to_nearest(a->fp / b->fp); +#endif + if (quot < 0.0) { + *s = 1; + quot = -quot; + } else { + *s = 0; + } + *q = (uae_u64)quot; + a->fp = remainder(a->fp, b->fp); +} + +static void fp_scale(fpdata *a, fpdata *b) +{ + a->fp = ldexp(a->fp, (int)b->fp); +} + +#endif // !USE_LONG_DOUBLE + +static void fp_sinh(fpdata *a, fpdata *dst) +{ + dst->fp = sinh(a->fp); +} +static void fp_intrz(fpdata *fpd, fpdata *dst) +{ +#ifdef USE_HOST_ROUNDING + dst->fp = trunc(fpd->fp); +#else + dst->fp = fp_round_to_zero (fpd->fp); +#endif +} +static void fp_sqrt(fpdata *a, fpdata *dst) +{ + dst->fp = sqrt(a->fp); +} +static void fp_lognp1(fpdata *a, fpdata *dst) +{ + dst->fp = log(a->fp + 1.0); +} +static void fp_etoxm1(fpdata *a, fpdata *dst) +{ + dst->fp = exp(a->fp) - 1.0; +} +static void fp_tanh(fpdata *a, fpdata *dst) +{ + dst->fp = tanh(a->fp); +} +static void fp_atan(fpdata *a, fpdata *dst) +{ + dst->fp = atan(a->fp); +} +static void fp_atanh(fpdata *a, fpdata *dst) +{ + dst->fp = atanh(a->fp); +} +static void fp_sin(fpdata *a, fpdata *dst) +{ + dst->fp = sin(a->fp); +} +static void fp_asin(fpdata *a, fpdata *dst) +{ + dst->fp = asin(a->fp); +} +static void fp_tan(fpdata *a, fpdata *dst) +{ + dst->fp = tan(a->fp); +} +static void fp_etox(fpdata *a, fpdata *dst) +{ + dst->fp = exp(a->fp); +} +static void fp_twotox(fpdata *a, fpdata *dst) +{ + dst->fp = pow(2.0, a->fp); +} +static void fp_tentox(fpdata *a, fpdata *dst) +{ + dst->fp = pow(10.0, a->fp); +} +static void fp_logn(fpdata *a, fpdata *dst) +{ + dst->fp = log(a->fp); +} +static void fp_log10(fpdata *a, fpdata *dst) +{ + dst->fp = log10(a->fp); +} +static void fp_log2(fpdata *a, fpdata *dst) +{ + dst->fp = log2(a->fp); +} +static void fp_abs(fpdata *a, fpdata *dst) +{ + dst->fp = a->fp < 0.0 ? -a->fp : a->fp; +} +static void fp_cosh(fpdata *a, fpdata *dst) +{ + dst->fp = cosh(a->fp); +} +static void fp_neg(fpdata *a, fpdata *dst) +{ + dst->fp = -a->fp; +} +static void fp_acos(fpdata *a, fpdata *dst) +{ + dst->fp = acos(a->fp); +} +static void fp_cos(fpdata *a, fpdata *dst) +{ + dst->fp = cos(a->fp); +} +static void fp_sub(fpdata *a, fpdata *b) +{ + a->fp = a->fp - b->fp; +} +static void fp_add(fpdata *a, fpdata *b) +{ + a->fp = a->fp + b->fp; +} +static void fp_mul(fpdata *a, fpdata *b) +{ + a->fp = a->fp * b->fp; +} + +void fp_init_native(void) +{ + fpp_print = fp_print; + fpp_is_snan = fp_is_snan; + fpp_unset_snan = fp_unset_snan; + fpp_is_nan = fp_is_nan; + fpp_is_infinity = fp_is_infinity; + fpp_is_zero = fp_is_zero; + fpp_is_neg = fp_is_neg; + fpp_is_denormal = fp_is_denormal; + fpp_is_unnormal = fp_is_unnormal; + + fpp_get_status = fp_get_status; + fpp_clear_status = fp_clear_status; + fpp_set_mode = fp_set_mode; + + fpp_from_native = fp_from_native; + fpp_to_native = fp_to_native; + + fpp_to_int = fp_to_int; + fpp_from_int = fp_from_int; + + fpp_to_single_xn = fp_to_single_xn; + fpp_to_single_x = fp_to_single_x; + fpp_from_single_x = fp_from_single_x; + + fpp_to_double_xn = fp_to_double_xn; + fpp_to_double_x = fp_to_double_x; + fpp_from_double_x = fp_from_double_x; + + fpp_to_exten_x = fp_to_exten_x; + fpp_from_exten_x = fp_from_exten_x; + + fpp_roundsgl = fp_roundsgl; + fpp_rounddbl = fp_rounddbl; + fpp_round32 = fp_round32; + fpp_round64 = fp_round64; + + fpp_int = fp_int; + fpp_sinh = fp_sinh; + fpp_intrz = fp_intrz; + fpp_sqrt = fp_sqrt; + fpp_lognp1 = fp_lognp1; + fpp_etoxm1 = fp_etoxm1; + fpp_tanh = fp_tanh; + fpp_atan = fp_atan; + fpp_atanh = fp_atanh; + fpp_sin = fp_sin; + fpp_asin = fp_asin; + fpp_tan = fp_tan; + fpp_etox = fp_etox; + fpp_twotox = fp_twotox; + fpp_tentox = fp_tentox; + fpp_logn = fp_logn; + fpp_log10 = fp_log10; + fpp_log2 = fp_log2; + fpp_abs = fp_abs; + fpp_cosh = fp_cosh; + fpp_neg = fp_neg; + fpp_acos = fp_acos; + fpp_cos = fp_cos; + fpp_getexp = fp_getexp; + fpp_getman = fp_getman; + fpp_div = fp_div; + fpp_mod = fp_mod; + fpp_add = fp_add; + fpp_mul = fp_mul; + fpp_rem = fp_rem; + fpp_scale = fp_scale; + fpp_sub = fp_sub; +} diff --git a/fpp_softfloat.cpp b/fpp_softfloat.cpp new file mode 100644 index 00000000..62be8aaa --- /dev/null +++ b/fpp_softfloat.cpp @@ -0,0 +1,611 @@ +/* +* UAE - The Un*x Amiga Emulator +* +* MC68881/68882/68040/68060 FPU emulation +* Softfloat version +* +* Andreas Grabher and Toni Wilen +* +*/ + +#define __USE_ISOC9X /* We might be able to pick up a NaN */ + +#define SOFTFLOAT_FAST_INT64 + +#include +#include +#include + +#include "sysconfig.h" +#include "sysdeps.h" + +#include "options.h" +#include "memory.h" +#include "newcpu.h" +#include "fpp.h" +#include "newcpu.h" + +#include "softfloat/softfloat-macros.h" +#include "softfloat/softfloat-specialize.h" + +#define FPCR_ROUNDING_MODE 0x00000030 +#define FPCR_ROUND_NEAR 0x00000000 +#define FPCR_ROUND_ZERO 0x00000010 +#define FPCR_ROUND_MINF 0x00000020 +#define FPCR_ROUND_PINF 0x00000030 + +#define FPCR_ROUNDING_PRECISION 0x000000c0 +#define FPCR_PRECISION_SINGLE 0x00000040 +#define FPCR_PRECISION_DOUBLE 0x00000080 +#define FPCR_PRECISION_EXTENDED 0x00000000 + +static floatx80 fxsizes[6]; +static floatx80 fxzero; +static floatx80 fx_1e0, fx_1e1, fx_1e2, fx_1e4, fx_1e8; +static struct float_status fs; + +/* Functions for setting host/library modes and getting status */ +static void fp_set_mode(uae_u32 mode_control) +{ + set_floatx80_rounding_precision(80, &fs); + switch(mode_control & FPCR_ROUNDING_MODE) { + case FPCR_ROUND_NEAR: // to neareset + set_float_rounding_mode(float_round_nearest_even, &fs); + break; + case FPCR_ROUND_ZERO: // to zero + set_float_rounding_mode(float_round_to_zero, &fs); + break; + case FPCR_ROUND_MINF: // to minus + set_float_rounding_mode(float_round_down, &fs); + break; + case FPCR_ROUND_PINF: // to plus + set_float_rounding_mode(float_round_up, &fs); + break; + } + return; +} + +static void fp_get_status(uae_u32 *status) +{ + if (fs.float_exception_flags & float_flag_invalid) + *status |= 0x2000; + if (fs.float_exception_flags & float_flag_divbyzero) + *status |= 0x0400; + if (fs.float_exception_flags & float_flag_overflow) + *status |= 0x1000; + if (fs.float_exception_flags & float_flag_underflow) + *status |= 0x0800; + if (fs.float_exception_flags & float_flag_inexact) + *status |= 0x0200; +} +STATIC_INLINE void fp_clear_status(void) +{ + fs.float_exception_flags = 0; +} + + +static const TCHAR *fp_print(fpdata *fpd) +{ + static TCHAR fsout[32]; + flag n, u, d; + fptype result = 0.0; + int i; + floatx80 *fx = &fpd->fpx; + + n = floatx80_is_negative(*fx); + u = floatx80_is_unnormal(*fx); + d = floatx80_is_denormal(*fx); + + if (floatx80_is_zero(*fx)) { +#if USE_LONG_DOUBLE + _stprintf(fsout, _T("%c%#.17Le%s%s"), n?'-':'+', (fptype) 0.0, u ? _T("U") : _T(""), d ? _T("D") : _T("")); +#else + _stprintf(fsout, _T("%c%#.17e%s%s"), n?'-':'+', (fptype) 0.0, u ? _T("U") : _T(""), d ? _T("D") : _T("")); +#endif + } else if (floatx80_is_infinity(*fx)) { + _stprintf(fsout, _T("%c%s"), n?'-':'+', _T("inf")); + } else if (floatx80_is_signaling_nan(*fx, &fs)) { + _stprintf(fsout, _T("%c%s"), n?'-':'+', _T("snan")); + } else if (floatx80_is_any_nan(*fx)) { + _stprintf(fsout, _T("%c%s"), n?'-':'+', _T("nan")); + } else { + for (i = 63; i >= 0; i--) { + if (fx->low & (((uae_u64)1)<high&0x7FFF) - 0x3FFF); +#if USE_LONG_DOUBLE + _stprintf(fsout, _T("%c%#.17Le%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T("")); +#else + _stprintf(fsout, _T("%c%#.17e%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T("")); +#endif + } + return fsout; +} + +static void softfloat_set(fpdata *fpd, uae_u32 *f) +{ + fpd->fpx.high = (uae_u16)(f[0] >> 16); + fpd->fpx.low = ((uae_u64)f[1] << 32) | f[2]; +} + +static void softfloat_get(fpdata *fpd, uae_u32 *f) +{ + f[0] = (uae_u32)(fpd->fpx.high << 16); + f[1] = fpd->fpx.low >> 32; + f[2] = (uae_u32)fpd->fpx.low; +} + +/* Functions for detecting float type */ +static bool fp_is_snan(fpdata *fpd) +{ + return floatx80_is_signaling_nan(fpd->fpx, &fs) != 0; +} +static bool fp_unset_snan(fpdata *fpd) +{ + fpd->fpx.low |= LIT64(0x4000000000000000); + return 0; +} +static bool fp_is_nan (fpdata *fpd) +{ + return floatx80_is_any_nan(fpd->fpx) != 0; +} +static bool fp_is_infinity (fpdata *fpd) +{ + return floatx80_is_infinity(fpd->fpx) != 0; +} +static bool fp_is_zero(fpdata *fpd) +{ + return floatx80_is_zero(fpd->fpx) != 0; +} +static bool fp_is_neg(fpdata *fpd) +{ + return floatx80_is_negative(fpd->fpx) != 0; +} +static bool fp_is_denormal(fpdata *fpd) +{ + return floatx80_is_denormal(fpd->fpx) != 0; +} +static bool fp_is_unnormal(fpdata *fpd) +{ + return floatx80_is_unnormal(fpd->fpx) != 0; +} + +/* Functions for converting between float formats */ +static const long double twoto32 = 4294967296.0; + +static void to_native(fptype *fp, fpdata *fpd) +{ + int expon; + fptype frac; + + expon = fpd->fpx.high & 0x7fff; + + if (fp_is_zero(fpd)) { + *fp = fp_is_neg(fpd) ? -0.0 : +0.0; + return; + } + if (fp_is_nan(fpd)) { + *fp = sqrtl(-1); + return; + } + if (fp_is_infinity(fpd)) { + //*fp = fp_is_neg(fpd) ? logl(0.0) : (1.0/0.0); + return; + } + + frac = (fptype)fpd->fpx.low / (fptype)(twoto32 * 2147483648.0); + if (fp_is_neg(fpd)) + frac = -frac; + *fp = ldexpl (frac, expon - 16383); +} + +static void from_native(fptype fp, fpdata *fpd) +{ + int expon; + fptype frac; + + if (signbit(fp)) + fpd->fpx.high = 0x8000; + else + fpd->fpx.high = 0x0000; + + if (isnan(fp)) { + fpd->fpx.high |= 0x7fff; + fpd->fpx.low = LIT64(0xffffffffffffffff); + return; + } + if (isinf(fp)) { + fpd->fpx.high |= 0x7fff; + fpd->fpx.low = LIT64(0x0000000000000000); + return; + } + if (fp == 0.0) { + fpd->fpx.low = LIT64(0x0000000000000000); + return; + } + if (fp < 0.0) + fp = -fp; + + frac = frexpl (fp, &expon); + frac += 0.5 / (twoto32 * twoto32); + if (frac >= 1.0) { + frac /= 2.0; + expon++; + } + fpd->fpx.high |= (expon + 16383 - 1) & 0x7fff; + fpd->fpx.low = (uint64_t)(frac * (fptype)(twoto32 * twoto32)); + + while (!(fpd->fpx.low & LIT64( 0x8000000000000000))) { + if (fpd->fpx.high == 0) { + float_raise(float_flag_denormal, &fs); + break; + } + fpd->fpx.low <<= 1; + fpd->fpx.high--; + } +} + +static void to_single_xn(fpdata *fpd, uae_u32 wrd1) +{ + float32 f = wrd1; + fpd->fpx = float32_to_floatx80(f, &fs); // automatically fix denormals +} +static void to_single_x(fpdata *fpd, uae_u32 wrd1) +{ + float32 f = wrd1; + fpd->fpx = float32_to_floatx80_allowunnormal(f, &fs); +} +static uae_u32 from_single_x(fpdata *fpd) +{ + float32 f = floatx80_to_float32(fpd->fpx, &fs); + return f; +} + +static void to_double_xn(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) +{ + float64 f = ((float64)wrd1 << 32) | wrd2; + fpd->fpx = float64_to_floatx80(f, &fs); // automatically fix denormals +} +static void to_double_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2) +{ + float64 f = ((float64)wrd1 << 32) | wrd2; + fpd->fpx = float64_to_floatx80_allowunnormal(f, &fs); +} +static void from_double_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2) +{ + float64 f = floatx80_to_float64(fpd->fpx, &fs); + *wrd1 = f >> 32; + *wrd2 = (uae_u32)f; +} + +static void to_exten_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + uae_u32 wrd[3] = { wrd1, wrd2, wrd3 }; + softfloat_set(fpd, wrd); +} +static void from_exten_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3) +{ + uae_u32 wrd[3]; + softfloat_get(fpd, wrd); + *wrd1 = wrd[0]; + *wrd2 = wrd[1]; + *wrd3 = wrd[2]; +} + +static uae_s64 to_int(fpdata *src, int size) +{ + if (floatx80_lt(src->fpx, fxsizes[size * 2 + 0], &fs)) { + return floatx80_to_int32(fxsizes[size * 2 + 0], &fs); + } + if (floatx80_le(fxsizes[size * 2 + 1], src->fpx, &fs)) { + return floatx80_to_int32(fxsizes[size * 2 + 1], &fs); + } + return floatx80_to_int32(src->fpx, &fs); +} +static void from_int(fpdata *fpd, uae_s32 src) +{ + fpd->fpx = int32_to_floatx80(src, &fs); +} + +/* Functions for rounding */ + +// round to float with extended precision exponent +static void fp_roundsgl(fpdata *fpd) +{ + fpd->fpx = floatx80_round32(fpd->fpx, &fs); +} + +// round to double with extended precision exponent +static void fp_rounddbl(fpdata *fpd) +{ + fpd->fpx = floatx80_round64(fpd->fpx, &fs); +} + +// round to float +static void fp_round32(fpdata *fpd) +{ + float32 f = floatx80_to_float32(fpd->fpx, &fs); + fpd->fpx = float32_to_floatx80(f, &fs); +} + +// round to double +static void fp_round64(fpdata *fpd) +{ + float64 f = floatx80_to_float64(fpd->fpx, &fs); + fpd->fpx = float64_to_floatx80(f, &fs); +} + +/* Arithmetic functions */ + +static void fp_int(fpdata *a, fpdata *dst) +{ + dst->fpx = floatx80_round_to_int(a->fpx, &fs); +} + +static void fp_intrz(fpdata *a, fpdata *dst) +{ + dst->fpx = floatx80_round_to_int_toward_zero(a->fpx, &fs); +} +static void fp_sqrt(fpdata *a, fpdata *dst) +{ + dst->fpx = floatx80_sqrt(a->fpx, &fs); +} +static void fp_lognp1(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = log(a->fp + 1.0); + from_native(fpa, dst); +} +static void fp_sin(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = sin(fpa); + from_native(fpa, dst); +} +static void fp_tan(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = tan(fpa); + from_native(fpa, dst); +} +static void fp_logn(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = log(fpa); + from_native(fpa, dst); +} +static void fp_log10(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = log10(fpa); + from_native(fpa, dst); +} +static void fp_log2(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = log2(fpa); + from_native(fpa, dst); +} +static void fp_abs(fpdata *a, fpdata *dst) +{ + dst->fpx = floatx80_abs(a->fpx); +} +static void fp_neg(fpdata *a, fpdata *dst) +{ + dst->fpx = floatx80_chs(a->fpx); +} +static void fp_cos(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = cos(fpa); + from_native(fpa, dst); +} +static void fp_getexp(fpdata *a, fpdata *dst) +{ + dst->fpx = floatx80_getexp(a->fpx, &fs); +} +static void fp_getman(fpdata *a, fpdata *dst) +{ + dst->fpx = floatx80_getman(a->fpx, &fs); +} +static void fp_div(fpdata *a, fpdata *b) +{ + a->fpx = floatx80_div(a->fpx, b->fpx, &fs); +} +static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) +{ + a->fpx = floatx80_mod(a->fpx, b->fpx, q, s, &fs); +} +static void fp_add(fpdata *a, fpdata *b) +{ + a->fpx = floatx80_add(a->fpx, b->fpx, &fs); +} +static void fp_mul(fpdata *a, fpdata *b) +{ + a->fpx = floatx80_mul(a->fpx, b->fpx, &fs); +} +static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) +{ + a->fpx = floatx80_rem(a->fpx, b->fpx, q, s, &fs); +} +static void fp_scale(fpdata *a, fpdata *b) +{ + a->fpx = floatx80_scale(a->fpx, b->fpx, &fs); +} +static void fp_sub(fpdata *a, fpdata *b) +{ + a->fpx = floatx80_sub(a->fpx, b->fpx, &fs); +} + + +/* FIXME: create softfloat functions for following arithmetics */ + +static void fp_sinh(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = sinhl(fpa); + from_native(fpa, dst); +} +static void fp_etoxm1(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = expl(fpa) - 1.0; + from_native(fpa, dst); +} +static void fp_tanh(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = tanhl(fpa); + from_native(fpa, dst); +} +static void fp_atan(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = atanl(fpa); + from_native(fpa, dst); +} +static void fp_asin(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = asinl(fpa); + from_native(fpa, dst); +} +static void fp_atanh(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = atanhl(fpa); + from_native(fpa, dst); +} +static void fp_etox(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = expl(fpa); + from_native(fpa, dst); +} +static void fp_twotox(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = powl(2.0, fpa); + from_native(fpa, dst); +} +static void fp_tentox(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = powl(10.0, fpa); + from_native(fpa, dst); +} +static void fp_cosh(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = coshl(fpa); + from_native(fpa, dst); +} +static void fp_acos(fpdata *a, fpdata *dst) +{ + fptype fpa; + to_native(&fpa, a); + fpa = acosl(fpa); + from_native(fpa, dst); +} + +void fp_init_softfloat(void) +{ + float_status fsx = { 0 }; + set_floatx80_rounding_precision(80, &fsx); + set_float_rounding_mode(float_round_to_zero, &fsx); + fxsizes[0] = int32_to_floatx80(-128, &fsx); + fxsizes[1] = int32_to_floatx80(127, &fsx); + fxsizes[2] = int32_to_floatx80(-32768, &fsx); + fxsizes[3] = int32_to_floatx80(32767, &fsx); + fxsizes[4] = int32_to_floatx80(-2147483648, &fsx); + fxsizes[5] = int32_to_floatx80(2147483647, &fsx); + + fpp_print = fp_print; + fpp_is_snan = fp_is_snan; + fpp_unset_snan = fp_unset_snan; + fpp_is_nan = fp_is_nan; + fpp_is_infinity = fp_is_infinity; + fpp_is_zero = fp_is_zero; + fpp_is_neg = fp_is_neg; + fpp_is_denormal = fp_is_denormal; + fpp_is_unnormal = fp_is_unnormal; + + fpp_get_status = fp_get_status; + fpp_clear_status = fp_clear_status; + fpp_set_mode = fp_set_mode; + + fpp_from_native = from_native; + fpp_to_native = to_native; + + fpp_to_int = to_int; + fpp_from_int = from_int; + + fpp_to_single_xn = to_single_xn; + fpp_to_single_x = to_single_x; + fpp_from_single_x = from_single_x; + + fpp_to_double_xn = to_double_xn; + fpp_to_double_x = to_double_x; + fpp_from_double_x = from_double_x; + + fpp_to_exten_x = to_exten_x; + fpp_from_exten_x = from_exten_x; + + fpp_roundsgl = fp_roundsgl; + fpp_rounddbl = fp_rounddbl; + fpp_round32 = fp_round32; + fpp_round64 = fp_round64; + + fpp_int = fp_int; + fpp_sinh = fp_sinh; + fpp_intrz = fp_intrz; + fpp_sqrt = fp_sqrt; + fpp_lognp1 = fp_lognp1; + fpp_etoxm1 = fp_etoxm1; + fpp_tanh = fp_tanh; + fpp_atan = fp_atan; + fpp_atanh = fp_atanh; + fpp_sin = fp_sin; + fpp_asin = fp_asin; + fpp_tan = fp_tan; + fpp_etox = fp_etox; + fpp_twotox = fp_twotox; + fpp_tentox = fp_tentox; + fpp_logn = fp_logn; + fpp_log10 = fp_log10; + fpp_log2 = fp_log2; + fpp_abs = fp_abs; + fpp_cosh = fp_cosh; + fpp_neg = fp_neg; + fpp_acos = fp_acos; + fpp_cos = fp_cos; + fpp_getexp = fp_getexp; + fpp_getman = fp_getman; + fpp_div = fp_div; + fpp_mod = fp_mod; + fpp_add = fp_add; + fpp_mul = fp_mul; + fpp_rem = fp_rem; + fpp_scale = fp_scale; + fpp_sub = fp_sub; +} + diff --git a/include/fpp-ieee-be.h b/include/fpp-ieee-be.h deleted file mode 100644 index 29f52808..00000000 --- a/include/fpp-ieee-be.h +++ /dev/null @@ -1,72 +0,0 @@ - /* - * UAE - The Un*x Amiga Emulator - * - * MC68881 emulation - * Support functions for IEEE compatible host CPUs. - * These functions use a GCC extension (type punning through unions) and - * should only be compiled with compilers that support this. - * - * Copyright 1999 Sam Jordan - */ - -#ifndef UAE_FPP_IEEE_BE_H -#define UAE_FPP_IEEE_BE_H - -#include "uae/types.h" - - -STATIC_INLINE double to_single (uae_u32 value) -{ - union { - float f; - uae_u32 u; - } val; - - val.u = value; - return val.f; -} - -STATIC_INLINE uae_u32 from_single (double src) -{ - union { - float f; - uae_u32 u; - } val; - - val.f = src; - return val.u; -} - -STATIC_INLINE double to_double(uae_u32 wrd1, uae_u32 wrd2) -{ - union { - double d; - uae_u32 u[2]; - } val; - - val.u[0] = wrd1; - val.u[1] = wrd2; - return val.d; -} - -STATIC_INLINE void from_double(double src, uae_u32 * wrd1, uae_u32 * wrd2) -{ - union { - double d; - uae_u32 u[2]; - } val; - - val.d = src; - *wrd1 = val.u[0]; - *wrd2 = val.u[1]; -} - -#define HAVE_from_double -#define HAVE_to_double -#define HAVE_from_single -#define HAVE_to_single - -/* Get the rest of the conversion functions defined. */ -#include "fpp-unknown.h" - -#endif /* UAE_FPP_IEEE_BE_H */ diff --git a/include/fpp-unknown.h b/include/fpp-unknown.h deleted file mode 100644 index 05707a99..00000000 --- a/include/fpp-unknown.h +++ /dev/null @@ -1,146 +0,0 @@ - /* - * UAE - The Un*x Amiga Emulator - * - * MC68881 emulation - * - * Conversion routines for hosts with unknown floating point format. - * - * Copyright 1996 Herman ten Brugge - */ - -#ifndef UAE_FPP_UNKNOWN_H -#define UAE_FPP_UNKNOWN_H - -#include "uae/types.h" - -#ifndef HAVE_to_single -STATIC_INLINE double to_single (uae_u32 value) -{ - double frac; - - if ((value & 0x7fffffff) == 0) - return (0.0); - frac = (double) ((value & 0x7fffff) | 0x800000) / 8388608.0; - if (value & 0x80000000) - frac = -frac; - return (ldexp (frac, ((value >> 23) & 0xff) - 127)); -} -#endif - -#ifndef HAVE_from_single -STATIC_INLINE uae_u32 from_single (double src) -{ - int expon; - uae_u32 tmp; - double frac; - - if (src == 0.0) - return 0; - if (src < 0) { - tmp = 0x80000000; - src = -src; - } else { - tmp = 0; - } - frac = frexp (src, &expon); - frac += 0.5 / 16777216.0; - if (frac >= 1.0) { - frac /= 2.0; - expon++; - } - return (tmp | (((expon + 127 - 1) & 0xff) << 23) | - (((int) (frac * 16777216.0)) & 0x7fffff)); -} -#endif - -#ifndef HAVE_to_exten -STATIC_INLINE double to_exten(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) -{ - double frac; - - if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) - return 0.0; - frac = (double) wrd2 / 2147483648.0 + - (double) wrd3 / 9223372036854775808.0; - if (wrd1 & 0x80000000) - frac = -frac; - return ldexp (frac, ((wrd1 >> 16) & 0x7fff) - 16383); -} -#endif - -#ifndef HAVE_from_exten -STATIC_INLINE void from_exten(double src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) -{ - int expon; - double frac; - - if (src == 0.0) { - *wrd1 = 0; - *wrd2 = 0; - *wrd3 = 0; - return; - } - if (src < 0) { - *wrd1 = 0x80000000; - src = -src; - } else { - *wrd1 = 0; - } - frac = frexp (src, &expon); - frac += 0.5 / 18446744073709551616.0; - if (frac >= 1.0) { - frac /= 2.0; - expon++; - } - *wrd1 |= (((expon + 16383 - 1) & 0x7fff) << 16); - *wrd2 = (uae_u32) (frac * 4294967296.0); - *wrd3 = (uae_u32) (frac * 18446744073709551616.0 - *wrd2 * 4294967296.0); -} -#endif - -#ifndef HAVE_to_double -STATIC_INLINE double to_double(uae_u32 wrd1, uae_u32 wrd2) -{ - double frac; - - if ((wrd1 & 0x7fffffff) == 0 && wrd2 == 0) - return 0.0; - frac = (double) ((wrd1 & 0xfffff) | 0x100000) / 1048576.0 + - (double) wrd2 / 4503599627370496.0; - if (wrd1 & 0x80000000) - frac = -frac; - return ldexp (frac, ((wrd1 >> 20) & 0x7ff) - 1023); -} -#endif - -#ifndef HAVE_from_double -STATIC_INLINE void from_double(double src, uae_u32 * wrd1, uae_u32 * wrd2) -{ - int expon; - int tmp; - double frac; - - if (src == 0.0) { - *wrd1 = 0; - *wrd2 = 0; - return; - } - if (src < 0) { - *wrd1 = 0x80000000; - src = -src; - } else { - *wrd1 = 0; - } - frac = frexp (src, &expon); - frac += 0.5 / 9007199254740992.0; - if (frac >= 1.0) { - frac /= 2.0; - expon++; - } - tmp = (uae_u32) (frac * 2097152.0); - *wrd1 |= (((expon + 1023 - 1) & 0x7ff) << 20) | (tmp & 0xfffff); - *wrd2 = (uae_u32) (frac * 9007199254740992.0 - tmp * 4294967296.0); -} -#endif - -#endif /* UAE_FPP_UNKNOWN_H */ diff --git a/include/fpp.h b/include/fpp.h new file mode 100644 index 00000000..bdc5bf26 --- /dev/null +++ b/include/fpp.h @@ -0,0 +1,107 @@ + +extern void fp_init_native(void); +extern void fp_init_softfloat(void); + +#if defined(CPU_i386) || defined(CPU_x86_64) +extern void init_fpucw_x87(void); +#endif + +void to_single(fpdata *fpd, uae_u32 wrd1); +void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2); +void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3); + +typedef void (*FPP_ABQS)(fpdata*, fpdata*, uae_u64*, uae_u8*); +typedef void (*FPP_AB)(fpdata*, fpdata*); +typedef void (*FPP_A)(fpdata*); + +typedef bool (*FPP_IS)(fpdata*); +typedef void (*FPP_SET_MODE)(uae_u32); +typedef void (*FPP_GET_STATUS)(uae_u32*); +typedef void (*FPP_CLEAR_STATUS)(void); + +typedef void (*FPP_FROM_NATIVE)(fptype, fpdata*); +typedef void (*FPP_TO_NATIVE)(fptype*, fpdata*); + +typedef void (*FPP_FROM_INT)(fpdata*,uae_s32); +typedef uae_s64 (*FPP_TO_INT)(fpdata*, int); + +typedef void (*FPP_TO_SINGLE)(fpdata*, uae_u32); +typedef uae_u32 (*FPP_FROM_SINGLE)(fpdata*); + +typedef void (*FPP_TO_DOUBLE)(fpdata*, uae_u32, uae_u32); +typedef void (*FPP_FROM_DOUBLE)(fpdata*, uae_u32*, uae_u32*); + +typedef void (*FPP_TO_EXTEN)(fpdata*, uae_u32, uae_u32, uae_u32); +typedef void (*FPP_FROM_EXTEN)(fpdata*, uae_u32*, uae_u32*, uae_u32*); + +typedef const TCHAR* (*FPP_PRINT)(fpdata*); + +extern FPP_PRINT fpp_print; + +extern FPP_IS fpp_is_snan; +extern FPP_IS fpp_unset_snan; +extern FPP_IS fpp_is_nan; +extern FPP_IS fpp_is_infinity; +extern FPP_IS fpp_is_zero; +extern FPP_IS fpp_is_neg; +extern FPP_IS fpp_is_denormal; +extern FPP_IS fpp_is_unnormal; + +extern FPP_GET_STATUS fpp_get_status; +extern FPP_CLEAR_STATUS fpp_clear_status; +extern FPP_SET_MODE fpp_set_mode; + +extern FPP_FROM_NATIVE fpp_from_native; +extern FPP_TO_NATIVE fpp_to_native; + +extern FPP_TO_INT fpp_to_int; +extern FPP_FROM_INT fpp_from_int; + +extern FPP_TO_SINGLE fpp_to_single_xn; +extern FPP_TO_SINGLE fpp_to_single_x; +extern FPP_FROM_SINGLE fpp_from_single_x; + +extern FPP_TO_DOUBLE fpp_to_double_xn; +extern FPP_TO_DOUBLE fpp_to_double_x; +extern FPP_FROM_DOUBLE fpp_from_double_x; + +extern FPP_TO_EXTEN fpp_to_exten_x; +extern FPP_FROM_EXTEN fpp_from_exten_x; + +extern FPP_A fpp_roundsgl; +extern FPP_A fpp_rounddbl; +extern FPP_A fpp_round32; +extern FPP_A fpp_round64; + +extern FPP_AB fpp_int; +extern FPP_AB fpp_sinh; +extern FPP_AB fpp_intrz; +extern FPP_AB fpp_sqrt; +extern FPP_AB fpp_lognp1; +extern FPP_AB fpp_etoxm1; +extern FPP_AB fpp_tanh; +extern FPP_AB fpp_atan; +extern FPP_AB fpp_atanh; +extern FPP_AB fpp_sin; +extern FPP_AB fpp_asin; +extern FPP_AB fpp_tan; +extern FPP_AB fpp_etox; +extern FPP_AB fpp_twotox; +extern FPP_AB fpp_tentox; +extern FPP_AB fpp_logn; +extern FPP_AB fpp_log10; +extern FPP_AB fpp_log2; +extern FPP_AB fpp_abs; +extern FPP_AB fpp_cosh; +extern FPP_AB fpp_neg; +extern FPP_AB fpp_acos; +extern FPP_AB fpp_cos; +extern FPP_AB fpp_getexp; +extern FPP_AB fpp_getman; +extern FPP_AB fpp_div; +extern FPP_ABQS fpp_mod; +extern FPP_AB fpp_add; +extern FPP_AB fpp_mul; +extern FPP_ABQS fpp_rem; +extern FPP_AB fpp_scale; +extern FPP_AB fpp_sub; diff --git a/include/newcpu.h b/include/newcpu.h index 30ebb0dd..28985839 100644 --- a/include/newcpu.h +++ b/include/newcpu.h @@ -142,8 +142,8 @@ extern struct mmufixup mmufixup[2]; typedef struct { - fptype fp; floatx80 fpx; + fptype fp; } fpdata; struct regstruct @@ -562,7 +562,6 @@ extern int m68k_movec2 (int, uae_u32 *); extern bool m68k_divl (uae_u32, uae_u32, uae_u16); extern bool m68k_mull (uae_u32, uae_u32, uae_u16); extern void init_m68k (void); -extern void init_m68k_full (void); extern void m68k_go (int); extern void m68k_dumpstate (uaecptr *); extern void m68k_dumpstate (uaecptr, uaecptr *); @@ -570,7 +569,6 @@ extern void m68k_dumpcache (void); extern int getDivu68kCycles (uae_u32 dividend, uae_u16 divisor); extern int getDivs68kCycles (uae_s32 dividend, uae_s16 divisor); extern void divbyzero_special (bool issigned, uae_s32 dst); -extern void m68k_do_rte (void); extern void protect_roms (bool); extern void unprotect_maprom (void); extern bool is_hardreset(void); diff --git a/softfloat/README.txt b/softfloat/README.txt deleted file mode 100644 index 9500d25e..00000000 --- a/softfloat/README.txt +++ /dev/null @@ -1,78 +0,0 @@ -MAME note: this package is derived from the following original SoftFloat -package and has been "re-packaged" to work with MAME's conventions and -build system. The source files come from bits64/ and bits64/templates -in the original distribution as MAME requires a compiler with a 64-bit -integer type. - - -Package Overview for SoftFloat Release 2b - -John R. Hauser -2002 May 27 - - ----------------------------------------------------------------------------- -Overview - -SoftFloat is a software implementation of floating-point that conforms to -the IEC/IEEE Standard for Binary Floating-Point Arithmetic. SoftFloat is -distributed in the form of C source code. Compiling the SoftFloat sources -generates two things: - --- A SoftFloat object file (typically `softfloat.o') containing the complete - set of IEC/IEEE floating-point routines. - --- A `timesoftfloat' program for evaluating the speed of the SoftFloat - routines. (The SoftFloat module is linked into this program.) - -The SoftFloat package is documented in four text files: - - SoftFloat.txt Documentation for using the SoftFloat functions. - SoftFloat-source.txt Documentation for compiling SoftFloat. - SoftFloat-history.txt History of major changes to SoftFloat. - timesoftfloat.txt Documentation for using `timesoftfloat'. - -Other files in the package comprise the source code for SoftFloat. - -Please be aware that some work is involved in porting this software to other -targets. It is not just a matter of getting `make' to complete without -error messages. I would have written the code that way if I could, but -there are fundamental differences between systems that can't be hidden. -You should not attempt to compile SoftFloat without first reading both -`SoftFloat.txt' and `SoftFloat-source.txt'. - - ----------------------------------------------------------------------------- -Legal Notice - -SoftFloat was written by me, John R. Hauser. This work was made possible in -part by the International Computer Science Institute, located at Suite 600, -1947 Center Street, Berkeley, California 94704. Funding was partially -provided by the National Science Foundation under grant MIP-9311980. The -original version of this code was written as part of a project to build -a fixed-point vector processor in collaboration with the University of -California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL -LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO -FURTHERMORE EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER -SCIENCE INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, -COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE -SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, provided -that the minimal documentation requirements stated in the source code are -satisfied. - - ----------------------------------------------------------------------------- -Contact Information - -At the time of this writing, the most up-to-date information about -SoftFloat and the latest release can be found at the Web page `http:// -www.cs.berkeley.edu/~jhauser/arithmetic/SoftFloat.html'. - - diff --git a/softfloat/SOFTFLOAT-MACROS.H b/softfloat/SOFTFLOAT-MACROS.H index 4607d346..9cc6158c 100644 --- a/softfloat/SOFTFLOAT-MACROS.H +++ b/softfloat/SOFTFLOAT-MACROS.H @@ -1,8 +1,24 @@ - -/*============================================================================ - +/* + * QEMU float support macros + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -11,24 +27,68 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +/*---------------------------------------------------------------------------- +| This macro tests for minimum version of the GNU C compiler. +*----------------------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define SOFTFLOAT_GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0 +#endif -=============================================================================*/ /*---------------------------------------------------------------------------- | Shifts `a' right by the number of bits given in `count'. If any nonzero @@ -39,9 +99,9 @@ these four paragraphs for those parts of this code that are retained. | The result is stored in the location pointed to by `zPtr'. *----------------------------------------------------------------------------*/ -INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) { - bits32 z; + uint32_t z; if ( count == 0 ) { z = a; @@ -65,9 +125,9 @@ INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) | The result is stored in the location pointed to by `zPtr'. *----------------------------------------------------------------------------*/ -INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) +static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) { - bits64 z; + uint64_t z; if ( count == 0 ) { z = a; @@ -91,20 +151,20 @@ INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) | 63 bits of the extra result are all zero if and only if _all_but_the_last_ | bits shifted off were all zero. This extra result is stored in the location | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. -| (This routine makes more sense if `a0' and `a1' are considered to form -| a fixed-point value with binary point between `a0' and `a1'. This fixed- -| point value is shifted right by the number of bits given in `count', and -| the integer part of the result is returned at the location pointed to by +| (This routine makes more sense if `a0' and `a1' are considered to form a +| fixed-point value with binary point between `a0' and `a1'. This fixed-point +| value is shifted right by the number of bits given in `count', and the +| integer part of the result is returned at the location pointed to by | `z0Ptr'. The fractional part of the result may be slightly corrupted as | described above, and is returned at the location pointed to by `z1Ptr'.) *----------------------------------------------------------------------------*/ -INLINE void +static inline void shift64ExtraRightJamming( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { - bits64 z0, z1; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z1 = a1; @@ -136,12 +196,12 @@ INLINE void | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -INLINE void +static inline void shift128Right( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { - bits64 z0, z1; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z1 = a1; @@ -152,7 +212,7 @@ INLINE void z0 = a0>>count; } else { - z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0; + z1 = (count < 128) ? (a0 >> (count & 63)) : 0; z0 = 0; } *z1Ptr = z1; @@ -171,12 +231,12 @@ INLINE void | the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -INLINE void +static inline void shift128RightJamming( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { - bits64 z0, z1; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z1 = a1; @@ -222,19 +282,19 @@ INLINE void | `z2Ptr'.) *----------------------------------------------------------------------------*/ -INLINE void +static inline void shift128ExtraRightJamming( - bits64 a0, - bits64 a1, - bits64 a2, - int16 count, - bits64 *z0Ptr, - bits64 *z1Ptr, - bits64 *z2Ptr + uint64_t a0, + uint64_t a1, + uint64_t a2, + int count, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr ) { - bits64 z0, z1, z2; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1, z2; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z2 = a2; @@ -280,9 +340,9 @@ INLINE void | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. *----------------------------------------------------------------------------*/ -INLINE void +static inline void shortShift128Left( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { *z1Ptr = a1<>32; - bLow = (bits32)b; + bLow = b; bHigh = b>>32; - z1 = ( (bits64) aLow ) * bLow; - zMiddleA = ( (bits64) aLow ) * bHigh; - zMiddleB = ( (bits64) aHigh ) * bLow; - z0 = ( (bits64) aHigh ) * bHigh; + z1 = ( (uint64_t) aLow ) * bLow; + zMiddleA = ( (uint64_t) aLow ) * bHigh; + zMiddleB = ( (uint64_t) aHigh ) * bLow; + z0 = ( (uint64_t) aHigh ) * bHigh; zMiddleA += zMiddleB; - z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); zMiddleA <<= 32; z1 += zMiddleA; z0 += ( z1 < zMiddleA ); @@ -476,17 +536,17 @@ INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) | `z2Ptr'. *----------------------------------------------------------------------------*/ -INLINE void +static inline void mul128By64To192( - bits64 a0, - bits64 a1, - bits64 b, - bits64 *z0Ptr, - bits64 *z1Ptr, - bits64 *z2Ptr + uint64_t a0, + uint64_t a1, + uint64_t b, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr ) { - bits64 z0, z1, z2, more1; + uint64_t z0, z1, z2, more1; mul64To128( a1, b, &z1, &z2 ); mul64To128( a0, b, &z0, &more1 ); @@ -504,20 +564,20 @@ INLINE void | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. *----------------------------------------------------------------------------*/ -INLINE void +static inline void mul128To256( - bits64 a0, - bits64 a1, - bits64 b0, - bits64 b1, - bits64 *z0Ptr, - bits64 *z1Ptr, - bits64 *z2Ptr, - bits64 *z3Ptr + uint64_t a0, + uint64_t a1, + uint64_t b0, + uint64_t b1, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr, + uint64_t *z3Ptr ) { - bits64 z0, z1, z2, z3; - bits64 more1, more2; + uint64_t z0, z1, z2, z3; + uint64_t more1, more2; mul64To128( a1, b1, &z2, &z3 ); mul64To128( a1, b0, &z1, &more2 ); @@ -543,18 +603,18 @@ INLINE void | unsigned integer is returned. *----------------------------------------------------------------------------*/ -INLINE bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) +static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b ) { - bits64 b0, b1; - bits64 rem0, rem1, term0, term1; - bits64 z; + uint64_t b0, b1; + uint64_t rem0, rem1, term0, term1; + uint64_t z; if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); b0 = b>>32; z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; mul64To128( b, z, &term0, &term1 ); sub128( a0, a1, term0, term1, &rem0, &rem1 ); - while ( ( (sbits64) rem0 ) < 0 ) { + while ( ( (int64_t) rem0 ) < 0 ) { z -= LIT64( 0x100000000 ); b1 = b<<32; add128( rem0, rem1, b0, b1, &rem0, &rem1 ); @@ -575,32 +635,32 @@ INLINE bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) | value. *----------------------------------------------------------------------------*/ -INLINE bits32 estimateSqrt32( int16 aExp, bits32 a ) +static uint32_t estimateSqrt32(int aExp, uint32_t a) { - static const bits16 sqrtOddAdjustments[] = { + static const uint16_t sqrtOddAdjustments[] = { 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 }; - static const bits16 sqrtEvenAdjustments[] = { + static const uint16_t sqrtEvenAdjustments[] = { 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 }; - int8 index; - bits32 z; + int8_t index; + uint32_t z; index = ( a>>27 ) & 15; if ( aExp & 1 ) { - z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; z = ( ( a / z )<<14 ) + ( z<<15 ); a >>= 1; } else { - z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; z = a / z + z; z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); - if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); } - return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); + return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); } @@ -609,9 +669,16 @@ INLINE bits32 estimateSqrt32( int16 aExp, bits32 a ) | `a'. If `a' is zero, 32 is returned. *----------------------------------------------------------------------------*/ -static int8 countLeadingZeros32( bits32 a ) +static int8_t countLeadingZeros32( uint32_t a ) { - static const int8 countLeadingZerosHigh[] = { +#if SOFTFLOAT_GNUC_PREREQ(3, 4) + if (a) { + return __builtin_clz(a); + } else { + return 32; + } +#else + static const int8_t countLeadingZerosHigh[] = { 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -629,7 +696,7 @@ static int8 countLeadingZeros32( bits32 a ) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - int8 shiftCount; + int8_t shiftCount; shiftCount = 0; if ( a < 0x10000 ) { @@ -642,7 +709,7 @@ static int8 countLeadingZeros32( bits32 a ) } shiftCount += countLeadingZerosHigh[ a>>24 ]; return shiftCount; - +#endif } /*---------------------------------------------------------------------------- @@ -650,20 +717,27 @@ static int8 countLeadingZeros32( bits32 a ) | `a'. If `a' is zero, 64 is returned. *----------------------------------------------------------------------------*/ -static int8 countLeadingZeros64( bits64 a ) +static int8_t countLeadingZeros64( uint64_t a ) { - int8 shiftCount; +#if SOFTFLOAT_GNUC_PREREQ(3, 4) + if (a) { + return __builtin_clzll(a); + } else { + return 64; + } +#else + int8_t shiftCount; shiftCount = 0; - if ( a < ( (bits64) 1 )<<32 ) { + if ( a < ( (uint64_t) 1 )<<32 ) { shiftCount += 32; } else { a >>= 32; } - shiftCount += countLeadingZeros32((bits32)a ); + shiftCount += countLeadingZeros32( a ); return shiftCount; - +#endif } /*---------------------------------------------------------------------------- @@ -672,7 +746,7 @@ static int8 countLeadingZeros64( bits64 a ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 == b0 ) && ( a1 == b1 ); @@ -685,7 +759,7 @@ INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); @@ -698,7 +772,7 @@ INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | returns 0. *----------------------------------------------------------------------------*/ -INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); @@ -711,33 +785,9 @@ INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 != b0 ) || ( a1 != b1 ); } - -/*----------------------------------------------------------------------------- -| Changes the sign of the extended double-precision floating-point value 'a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -INLINE floatx80 floatx80_chs(floatx80 reg) -{ - reg.high ^= 0x8000; - return reg; -} - -/*----------------------------------------------------------------------------- -| Calculates the absolute value of the extended double-precision floating-point -| value `a'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -INLINE floatx80 floatx80_abs(floatx80 reg) -{ - reg.high &= 0x7FFF; - return reg; -} diff --git a/softfloat/fpu_constant.h b/softfloat/fpu_constant.h deleted file mode 100644 index fdd9719e..00000000 --- a/softfloat/fpu_constant.h +++ /dev/null @@ -1,80 +0,0 @@ -/*============================================================================ -This source file is an extension to the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) -floating point emulation. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. -=============================================================================*/ - -#ifndef _FPU_CONSTANTS_H_ -#define _FPU_CONSTANTS_H_ - -// Pentium CPU uses only 68-bit precision M_PI approximation -#define BETTER_THAN_PENTIUM - -/*============================================================================ - * Written for Bochs (x86 achitecture simulator) by - * Stanislav Shwartsman [sshwarts at sourceforge net] - * ==========================================================================*/ - -////////////////////////////// -// PI, PI/2, PI/4 constants -////////////////////////////// - -#define FLOATX80_PI_EXP (0x4000) - -// 128-bit PI fraction -#ifdef BETTER_THAN_PENTIUM -#define FLOAT_PI_HI (0xc90fdaa22168c234U) -#define FLOAT_PI_LO (0xc4c6628b80dc1cd1U) -#else -#define FLOAT_PI_HI (0xc90fdaa22168c234U) -#define FLOAT_PI_LO (0xC000000000000000U) -#endif - -#define FLOATX80_PI2_EXP (0x3FFF) -#define FLOATX80_PI4_EXP (0x3FFE) - -////////////////////////////// -// 3PI/4 constant -////////////////////////////// - -#define FLOATX80_3PI4_EXP (0x4000) - -// 128-bit 3PI/4 fraction -#ifdef BETTER_THAN_PENTIUM -#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U) -#define FLOAT_3PI4_LO (0x9394c9e8a0a5159cU) -#else -#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U) -#define FLOAT_3PI4_LO (0x9000000000000000U) -#endif - -////////////////////////////// -// 1/LN2 constant -////////////////////////////// - -#define FLOAT_LN2INV_EXP (0x3FFF) - -// 128-bit 1/LN2 fraction -#ifdef BETTER_THAN_PENTIUM -#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU) -#define FLOAT_LN2INV_LO (0xbe87fed0691d3e89U) -#else -#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU) -#define FLOAT_LN2INV_LO (0xC000000000000000U) -#endif - -#endif diff --git a/softfloat/fsincos.cpp b/softfloat/fsincos.cpp deleted file mode 100644 index 695936ed..00000000 --- a/softfloat/fsincos.cpp +++ /dev/null @@ -1,652 +0,0 @@ -/*============================================================================ -This source file is an extension to the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) -floating point emulation. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. -=============================================================================*/ - -/*============================================================================ - * Written for Bochs (x86 achitecture simulator) by - * Stanislav Shwartsman [sshwarts at sourceforge net] - * ==========================================================================*/ - -#define FLOATX128 - -#define USE_estimateDiv128To64 -#include "mamesf.h" -#include "softfloat.h" -//#include "softfloat-specialize" -#include "fpu_constant.h" - -static const floatx80 floatx80_one = { 0x3fff, 0x8000000000000000U }; -static const floatx80 floatx80_default_nan = { 0xffff, 0xffffffffffffffffU }; - -#define packFloat2x128m(zHi, zLo) {(zHi), (zLo)} -#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo)) - -#define EXP_BIAS 0x3FFF - -#if 0 /* Included in softfloat-specialize" */ -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -INLINE bits64 extractFloatx80Frac( floatx80 a ) -{ - return a.low; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -INLINE int32 extractFloatx80Exp( floatx80 a ) -{ - return a.high & 0x7FFF; - -} -#endif - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the extended double-precision floating-point value -| `a'. -*----------------------------------------------------------------------------*/ - -INLINE flag extractFloatx80Sign( floatx80 a ) -{ - return a.high>>15; - -} - -/*---------------------------------------------------------------------------- -| Takes extended double-precision floating-point NaN `a' and returns the -| appropriate NaN result. If `a' is a signaling NaN, the invalid exception -| is raised. -*----------------------------------------------------------------------------*/ - -INLINE floatx80 propagateFloatx80NaNOneArg(floatx80 a) -{ - if (floatx80_is_signaling_nan(a)) - float_raise(float_flag_invalid); - - a.low |= 0xC000000000000000U; - - return a; -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal extended double-precision floating-point value -| represented by the denormalized significand `aSig'. The normalized exponent -| and significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr) -{ - int shiftCount = countLeadingZeros64(aSig); - *zSigPtr = aSig< 0) { - q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1); - } - else { - if (FLOAT_PI_HI <= *aSig0) { - *aSig0 -= FLOAT_PI_HI; - q = 1; - } - } - - shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1); - if (! lt128(*aSig0, *aSig1, term0, term1)) - { - int lt = lt128(term0, term1, *aSig0, *aSig1); - int eq = eq128(*aSig0, *aSig1, term0, term1); - - if ((eq && (q & 1)) || lt) { - *zSign = !(*zSign); - ++q; - } - if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1); - } - - return (int)(q & 3); -} - -#define SIN_ARR_SIZE 11 -#define COS_ARR_SIZE 11 - -static float128 sin_arr[SIN_ARR_SIZE] = -{ - PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ - PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */ - PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ - PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ - PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ - PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ - PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ - PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */ - PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */ - PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */ - PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */ -}; - -static float128 cos_arr[COS_ARR_SIZE] = -{ - PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */ - PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */ - PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ - PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ - PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ - PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ - PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ - PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ - PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */ - PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */ - PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */ -}; - -extern float128 OddPoly (float128 x, float128 *arr, unsigned n); - -/* 0 <= x <= pi/4 */ -INLINE float128 poly_sin(float128 x) -{ - // 3 5 7 9 11 13 15 - // x x x x x x x - // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- = - // 3! 5! 7! 9! 11! 13! 15! - // - // 2 4 6 8 10 12 14 - // x x x x x x x - // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] = - // 3! 5! 7! 9! 11! 13! 15! - // - // 3 3 - // -- 4k -- 4k+2 - // p(x) = > C * x > 0 q(x) = > C * x < 0 - // -- 2k -- 2k+1 - // k=0 k=0 - // - // 2 - // sin(x) ~ x * [ p(x) + x * q(x) ] - // - - return OddPoly(x, sin_arr, SIN_ARR_SIZE); -} - -extern float128 EvenPoly(float128 x, float128 *arr, unsigned n); - -/* 0 <= x <= pi/4 */ -INLINE float128 poly_cos(float128 x) -{ - // 2 4 6 8 10 12 14 - // x x x x x x x - // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ---- - // 2! 4! 6! 8! 10! 12! 14! - // - // 3 3 - // -- 4k -- 4k+2 - // p(x) = > C * x > 0 q(x) = > C * x < 0 - // -- 2k -- 2k+1 - // k=0 k=0 - // - // 2 - // cos(x) ~ [ p(x) + x * q(x) ] - // - - return EvenPoly(x, cos_arr, COS_ARR_SIZE); -} - -INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) -{ - if (sin_a) *sin_a = a; - if (cos_a) *cos_a = a; -} - -INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) -{ - if (sin_a) *sin_a = a; - if (cos_a) *cos_a = floatx80_one; -} - -static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient) -{ - if (quotient & 0x1) { - r = poly_cos(r); - neg = 0; - } else { - r = poly_sin(r); - } - - floatx80 result = float128_to_floatx80(r); - if (quotient & 0x2) - neg = ! neg; - - if (neg) - result = floatx80_chs(result); - - return result; -} - -// ================================================= -// SFFSINCOS Compute sin(x) and cos(x) -// ================================================= - -// -// Uses the following identities: -// ---------------------------------------------------------- -// -// sin(-x) = -sin(x) -// cos(-x) = cos(x) -// -// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) -// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) -// -// sin(x+ pi/2) = cos(x) -// sin(x+ pi) = -sin(x) -// sin(x+3pi/2) = -cos(x) -// sin(x+2pi) = sin(x) -// - -int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a) -{ - uint64_t aSig0, aSig1 = 0; - int32_t aExp, zExp, expDiff; - int aSign, zSign; - int q = 0; - - aSig0 = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - /* invalid argument */ - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig0<<1)) { - sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a)); - return 0; - } - - float_raise(float_flag_invalid); - sincos_invalid(sin_a, cos_a, floatx80_default_nan); - return 0; - } - - if (aExp == 0) { - if (aSig0 == 0) { - sincos_tiny_argument(sin_a, cos_a, a); - return 0; - } - -// float_raise(float_flag_denormal); - - /* handle pseudo denormals */ - if (! (aSig0 & 0x8000000000000000U)) - { - float_raise(float_flag_inexact); - if (sin_a) - float_raise(float_flag_underflow); - sincos_tiny_argument(sin_a, cos_a, a); - return 0; - } - - normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); - } - - zSign = aSign; - zExp = EXP_BIAS; - expDiff = aExp - zExp; - - /* argument is out-of-range */ - if (expDiff >= 63) - return -1; - - float_raise(float_flag_inexact); - - if (expDiff < -1) { // doesn't require reduction - if (expDiff <= -68) { - a = packFloatx80(aSign, aExp, aSig0); - sincos_tiny_argument(sin_a, cos_a, a); - return 0; - } - zExp = aExp; - } - else { - q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1); - } - - /* **************************** */ - /* argument reduction completed */ - /* **************************** */ - - /* using float128 for approximation */ - float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); - - if (aSign) q = -q; - if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1); - if (sin_a) *sin_a = sincos_approximation(zSign, r, q); - - return 0; -} - -int floatx80_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a) -{ - return sf_fsincos(a, sin_a, cos_a); -} - -int floatx80_fsin(floatx80 *a) -{ - return sf_fsincos(*a, a, 0); -} - -int floatx80_fcos(floatx80 *a) -{ - return sf_fsincos(*a, 0, a); -} - -// ================================================= -// FPTAN Compute tan(x) -// ================================================= - -// -// Uses the following identities: -// -// 1. ---------------------------------------------------------- -// -// sin(-x) = -sin(x) -// cos(-x) = cos(x) -// -// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) -// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) -// -// sin(x+ pi/2) = cos(x) -// sin(x+ pi) = -sin(x) -// sin(x+3pi/2) = -cos(x) -// sin(x+2pi) = sin(x) -// -// 2. ---------------------------------------------------------- -// -// sin(x) -// tan(x) = ------ -// cos(x) -// - -int floatx80_ftan(floatx80 *a) -{ - uint64_t aSig0, aSig1 = 0; - int32_t aExp, zExp, expDiff; - int aSign, zSign; - int q = 0; - - aSig0 = extractFloatx80Frac(*a); - aExp = extractFloatx80Exp(*a); - aSign = extractFloatx80Sign(*a); - - /* invalid argument */ - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig0<<1)) - { - *a = propagateFloatx80NaNOneArg(*a); - return 0; - } - - float_raise(float_flag_invalid); - *a = floatx80_default_nan; - return 0; - } - - if (aExp == 0) { - if (aSig0 == 0) return 0; -// float_raise(float_flag_denormal); - /* handle pseudo denormals */ - if (! (aSig0 & 0x8000000000000000U)) - { - float_raise(float_flag_inexact | float_flag_underflow); - return 0; - } - normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); - } - - zSign = aSign; - zExp = EXP_BIAS; - expDiff = aExp - zExp; - - /* argument is out-of-range */ - if (expDiff >= 63) - return -1; - - float_raise(float_flag_inexact); - - if (expDiff < -1) { // doesn't require reduction - if (expDiff <= -68) { - *a = packFloatx80(aSign, aExp, aSig0); - return 0; - } - zExp = aExp; - } - else { - q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1); - } - - /* **************************** */ - /* argument reduction completed */ - /* **************************** */ - - /* using float128 for approximation */ - float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); - - float128 sin_r = poly_sin(r); - float128 cos_r = poly_cos(r); - - if (q & 0x1) { - r = float128_div(cos_r, sin_r); - zSign = ! zSign; - } else { - r = float128_div(sin_r, cos_r); - } - - *a = float128_to_floatx80(r); - if (zSign) - *a = floatx80_chs(*a); - - return 0; -} - -// 2 3 4 n -// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// -// -- 2k -- 2k+1 -// p(x) = > C * x q(x) = > C * x -// -- 2k -- 2k+1 -// -// f(x) ~ [ p(x) + x * q(x) ] -// - -float128 EvalPoly(float128 x, float128 *arr, unsigned n) -{ - float128 x2 = float128_mul(x, x); - unsigned i; - - assert(n > 1); - - float128 r1 = arr[--n]; - i = n; - while(i >= 2) { - r1 = float128_mul(r1, x2); - i -= 2; - r1 = float128_add(r1, arr[i]); - } - if (i) r1 = float128_mul(r1, x); - - float128 r2 = arr[--n]; - i = n; - while(i >= 2) { - r2 = float128_mul(r2, x2); - i -= 2; - r2 = float128_add(r2, arr[i]); - } - if (i) r2 = float128_mul(r2, x); - - return float128_add(r1, r2); -} - -// 2 4 6 8 2n -// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// -// -- 4k -- 4k+2 -// p(x) = > C * x q(x) = > C * x -// -- 2k -- 2k+1 -// -// 2 -// f(x) ~ [ p(x) + x * q(x) ] -// - -float128 EvenPoly(float128 x, float128 *arr, unsigned n) -{ - return EvalPoly(float128_mul(x, x), arr, n); -} - -// 3 5 7 9 2n+1 -// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// 2 4 6 8 2n -// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// -// -- 4k -- 4k+2 -// p(x) = > C * x q(x) = > C * x -// -- 2k -- 2k+1 -// -// 2 -// f(x) ~ x * [ p(x) + x * q(x) ] -// - -float128 OddPoly(float128 x, float128 *arr, unsigned n) -{ - return float128_mul(x, EvenPoly(x, arr, n)); -} - -/*---------------------------------------------------------------------------- -| Scales extended double-precision floating-point value in operand `a' by -| value `b'. The function truncates the value in the second operand 'b' to -| an integral value and adds that value to the exponent of the operand 'a'. -| The operation performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ); - -floatx80 floatx80_scale(floatx80 a, floatx80 b) -{ - sbits32 aExp, bExp; - bits64 aSig, bSig; - - // handle unsupported extended double-precision floating encodings -/* if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) - { - float_raise(float_flag_invalid); - return floatx80_default_nan; - }*/ - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - int aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - int bSign = extractFloatx80Sign(b); - - if (aExp == 0x7FFF) { - if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1))) - { - return propagateFloatx80NaN(a, b); - } - if ((bExp == 0x7FFF) && bSign) { - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - if (bSig && (bExp == 0)) float_raise(float_flag_denormal); - return a; - } - if (bExp == 0x7FFF) { - if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b); - if ((aExp | aSig) == 0) { - if (! bSign) { - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - return a; - } - if (aSig && (aExp == 0)) float_raise(float_flag_denormal); - if (bSign) return packFloatx80(aSign, 0, 0); - return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0) { - if (aSig == 0) return a; - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - if (bExp == 0) { - if (bSig == 0) return a; - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - - if (bExp > 0x400E) { - /* generate appropriate overflow/underflow */ - return roundAndPackFloatx80(80, aSign, - bSign ? -0x3FFF : 0x7FFF, aSig, 0); - } - if (bExp < 0x3FFF) return a; - - int shiftCount = 0x403E - bExp; - bSig >>= shiftCount; - sbits32 scale = bSig; - if (bSign) scale = -scale; /* -32768..32767 */ - return - roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0); -} diff --git a/softfloat/fyl2x.cpp b/softfloat/fyl2x.cpp deleted file mode 100644 index 28dddb17..00000000 --- a/softfloat/fyl2x.cpp +++ /dev/null @@ -1,494 +0,0 @@ -/*============================================================================ -This source file is an extension to the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) -floating point emulation. -float_raise(float_flag_invalid) -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. -=============================================================================*/ - -/*============================================================================ - * Written for Bochs (x86 achitecture simulator) by - * Stanislav Shwartsman [sshwarts at sourceforge net] - * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012) - * ==========================================================================*/ - -#define FLOATX128 - -#define USE_estimateDiv128To64 -#include "mamesf.h" -#include "softfloat.h" -//#include "softfloat-specialize" -#include "fpu_constant.h" -/* -static const floatx80 floatx80_log10_2 = packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U); -static const floatx80 floatx80_ln_2 = packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU); -static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, 0x8000000000000000U); -static const floatx80 floatx80_default_nan = packFloatx80(0, 0xffff, 0xffffffffffffffffU); -*/ -static const floatx80 floatx80_log10_2 = { 0x3ffd, 0x9a209a84fbcff798U }; -static const floatx80 floatx80_ln_2 = { 0x3ffe, 0xb17217f7d1cf79acU }; -static const floatx80 floatx80_one = { 0x3fff, 0x8000000000000000U }; -static const floatx80 floatx80_default_nan = { 0xffff, 0xffffffffffffffffU }; - -#define packFloat_128(zHi, zLo) {(zHi), (zLo)} -#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo)) - -#define EXP_BIAS 0x3FFF - -#if 0 /* Included in softfloat-specialize */ -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -INLINE bits64 extractFloatx80Frac( floatx80 a ) -{ - return a.low; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -INLINE int32 extractFloatx80Exp( floatx80 a ) -{ - return a.high & 0x7FFF; - -} -#endif - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the extended double-precision floating-point value -| `a'. -*----------------------------------------------------------------------------*/ - -INLINE flag extractFloatx80Sign( floatx80 a ) -{ - return a.high>>15; - -} - -#if 0 -/*---------------------------------------------------------------------------- -| Takes extended double-precision floating-point NaN `a' and returns the -| appropriate NaN result. If `a' is a signaling NaN, the invalid exception -| is raised. -*----------------------------------------------------------------------------*/ - -INLINE floatx80 propagateFloatx80NaNOneArg(floatx80 a) -{ - if (floatx80_is_signaling_nan(a)) - float_raise(float_flag_invalid); - - a.low |= 0xC000000000000000U; - - return a; -} -#endif - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal extended double-precision floating-point value -| represented by the denormalized significand `aSig'. The normalized exponent -| and significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -INLINE void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr) -{ - int shiftCount = countLeadingZeros64(aSig); - *zSigPtr = aSig< C * u q(u) = > C * u - // -- 2k -- 2k+1 - // k=0 k=0 - // - // 1+u 2 - // 1/2 ln --- ~ u * [ p(u) + u * q(u) ] - // 1-u - // -*/ - return OddPoly(x1, ln_arr, L2_ARR_SIZE); -} - -/* required sqrt(2)/2 < x < sqrt(2) */ -static float128 poly_l2(float128 x) -{ - /* using float128 for approximation */ - float128 x_p1 = float128_add(x, float128_one); - float128 x_m1 = float128_sub(x, float128_one); - x = float128_div(x_m1, x_p1); - x = poly_ln(x); - x = float128_mul(x, float128_ln2inv2); - return x; -} - -static float128 poly_l2p1(float128 x) -{ - /* using float128 for approximation */ - float128 x_p2 = float128_add(x, float128_two); - x = float128_div(x, x_p2); - x = poly_ln(x); - x = float128_mul(x, float128_ln2inv2); - return x; -} - -// ================================================= -// FYL2X Compute y * log (x) -// 2 -// ================================================= - -// -// Uses the following identities: -// -// 1. ---------------------------------------------------------- -// ln(x) -// log (x) = -------, ln (x*y) = ln(x) + ln(y) -// 2 ln(2) -// -// 2. ---------------------------------------------------------- -// 1+u x-1 -// ln (x) = ln -----, when u = ----- -// 1-u x+1 -// -// 3. ---------------------------------------------------------- -// 3 5 7 2n+1 -// 1+u u u u u -// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] -// 1-u 3 5 7 2n+1 -// - -static floatx80 fyl2x(floatx80 a, floatx80 b) -{ - uint64_t aSig = extractFloatx80Frac(a); - int32_t aExp = extractFloatx80Exp(a); - int aSign = extractFloatx80Sign(a); - uint64_t bSig = extractFloatx80Frac(b); - int32_t bExp = extractFloatx80Exp(b); - int bSign = extractFloatx80Sign(b); - - int zSign = bSign ^ 1; - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1) - || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1))) - { - return propagateFloatx80NaN(a, b); - } - if (aSign) - { -invalid: - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - else { - if (bExp == 0) { - if (bSig == 0) goto invalid; - float_raise(float_flag_denormal); - } - return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); - } - } - if (bExp == 0x7FFF) - { - if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b); - if (aSign && (uint64_t)(aExp | aSig)) goto invalid; - if (aSig && (aExp == 0)) - float_raise(float_flag_denormal); - if (aExp < 0x3FFF) { - return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid; - return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0) { - if (aSig == 0) { - if ((bExp | bSig) == 0) goto invalid; - float_raise(float_flag_divbyzero); - return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); - } - if (aSign) goto invalid; - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - if (aSign) goto invalid; - if (bExp == 0) { - if (bSig == 0) { - if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0); - return packFloatx80(bSign, 0, 0); - } - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) - return packFloatx80(bSign, 0, 0); - - float_raise(float_flag_inexact); - - int ExpDiff = aExp - 0x3FFF; - aExp = 0; - if (aSig >= SQRT2_HALF_SIG) { - ExpDiff++; - aExp--; - } - - /* ******************************** */ - /* using float128 for approximation */ - /* ******************************** */ - - uint64_t zSig0, zSig1; - shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); - float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1); - x = poly_l2(x); - x = float128_add(x, int64_to_float128((int64_t) ExpDiff)); - return floatx80_mul(b, float128_to_floatx80(x)); -} - -// ================================================= -// FYL2XP1 Compute y * log (x + 1) -// 2 -// ================================================= - -// -// Uses the following identities: -// -// 1. ---------------------------------------------------------- -// ln(x) -// log (x) = ------- -// 2 ln(2) -// -// 2. ---------------------------------------------------------- -// 1+u x -// ln (x+1) = ln -----, when u = ----- -// 1-u x+2 -// -// 3. ---------------------------------------------------------- -// 3 5 7 2n+1 -// 1+u u u u u -// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] -// 1-u 3 5 7 2n+1 -// - -floatx80 fyl2xp1(floatx80 a, floatx80 b) -{ - int32_t aExp, bExp; - uint64_t aSig, bSig, zSig0, zSig1, zSig2; - int aSign, bSign; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - bSign = extractFloatx80Sign(b); - int zSign = aSign ^ bSign; - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1) - || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1))) - { - return propagateFloatx80NaN(a, b); - } - if (aSign) - { -invalid: - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - else { - if (bExp == 0) { - if (bSig == 0) goto invalid; - float_raise(float_flag_denormal); - } - return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); - } - } - if (bExp == 0x7FFF) - { - if ((uint64_t) (bSig<<1)) - return propagateFloatx80NaN(a, b); - - if (aExp == 0) { - if (aSig == 0) goto invalid; - float_raise(float_flag_denormal); - } - - return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0) { - if (aSig == 0) { - if (bSig && (bExp == 0)) float_raise(float_flag_denormal); - return packFloatx80(zSign, 0, 0); - } - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - if (bExp == 0) { - if (bSig == 0) return packFloatx80(zSign, 0, 0); - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - - float_raise(float_flag_inexact); - - if (aSign && aExp >= 0x3FFF) - return a; - - if (aExp >= 0x3FFC) // big argument - { - return fyl2x(floatx80_add(a, floatx80_one), b); - } - - // handle tiny argument - if (aExp < EXP_BIAS-70) - { - // first order approximation, return (a*b)/ln(2) - int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE; - - mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2); - if (0 < (int64_t) zSig0) { - shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); - --zExp; - } - - zExp = zExp + bExp - 0x3FFE; - mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2); - if (0 < (int64_t) zSig0) { - shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); - --zExp; - } - - return - roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1); - } - - /* ******************************** */ - /* using float128 for approximation */ - /* ******************************** */ - - shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); - float128 x = packFloat128(aSign, aExp, zSig0, zSig1); - x = poly_l2p1(x); - return floatx80_mul(b, float128_to_floatx80(x)); -} - -floatx80 floatx80_flognp1(floatx80 a) -{ - return fyl2xp1(a, floatx80_ln_2); -} - -floatx80 floatx80_flogn(floatx80 a) -{ - return fyl2x(a, floatx80_ln_2); -} - -floatx80 floatx80_flog2(floatx80 a) -{ - return fyl2x(a, floatx80_one); -} - -floatx80 floatx80_flog10(floatx80 a) -{ - return fyl2x(a, floatx80_log10_2); -} diff --git a/softfloat/mamesf.h b/softfloat/mamesf.h deleted file mode 100644 index 9e55a228..00000000 --- a/softfloat/mamesf.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef MAMESF_H -#define MAMESF_H - -/*---------------------------------------------------------------------------- -| One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. -*----------------------------------------------------------------------------*/ -#ifdef LSB_FIRST -#define LITTLEENDIAN -#else -#define BIGENDIAN -#endif - -/*---------------------------------------------------------------------------- -| The macro `BITS64' can be defined to indicate that 64-bit integer types are -| supported by the compiler. -*----------------------------------------------------------------------------*/ -#define BITS64 - -/*---------------------------------------------------------------------------- -| Each of the following `typedef's defines the most convenient type that holds -| integers of at least as many bits as specified. For example, `uint8' should -| be the most convenient type that can hold unsigned integers of as many as -| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most -| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed -| to the same as `int'. -*----------------------------------------------------------------------------*/ -//#include "assert.h" -#include "sysdeps.h" - -typedef int8_t flag; -typedef uint8_t uint8; -typedef int8_t int8; -typedef uint16_t uint16; -typedef int16_t int16; -typedef uint32_t uint32; -typedef int32_t int32; -typedef uint64_t uint64; -typedef int64_t int64; - -/*---------------------------------------------------------------------------- -| Each of the following `typedef's defines a type that holds integers -| of _exactly_ the number of bits specified. For instance, for most -| implementation of C, `bits16' and `sbits16' should be `typedef'ed to -| `unsigned short int' and `signed short int' (or `short int'), respectively. -*----------------------------------------------------------------------------*/ -typedef uint8_t bits8; -typedef int8_t sbits8; -typedef uint16_t bits16; -typedef int16_t sbits16; -typedef uint32_t bits32; -typedef int32_t sbits32; -typedef uint64_t bits64; -typedef int64_t sbits64; - -/*---------------------------------------------------------------------------- -| The `LIT64' macro takes as its argument a textual integer literal and -| if necessary ``marks'' the literal as having a 64-bit integer type. -| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be -| appended with the letters `LL' standing for `long long', which is `gcc's -| name for the 64-bit integer type. Some compilers may allow `LIT64' to be -| defined as the identity macro: `#define LIT64( a ) a'. -*----------------------------------------------------------------------------*/ -#define LIT64( a ) a##ULL - -/*---------------------------------------------------------------------------- -| The macro `INLINE' can be used before functions that should be inlined. If -| a compiler does not support explicit inlining, this macro should be defined -| to be `static'. -*----------------------------------------------------------------------------*/ -#define INLINE static inline - -#endif //MAMESF_H \ No newline at end of file diff --git a/softfloat/milieu.h b/softfloat/milieu.h deleted file mode 100644 index 10687b75..00000000 --- a/softfloat/milieu.h +++ /dev/null @@ -1,42 +0,0 @@ - -/*============================================================================ - -This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -/*---------------------------------------------------------------------------- -| Include common integer types and flags. -*----------------------------------------------------------------------------*/ -#include "mamesf.h" - -/*---------------------------------------------------------------------------- -| Symbolic Boolean literals. -*----------------------------------------------------------------------------*/ -#define FALSE 0 -#define TRUE 1 diff --git a/softfloat/softfloat-specialize.h b/softfloat/softfloat-specialize.h index f0ec9ef2..962dff2c 100644 --- a/softfloat/softfloat-specialize.h +++ b/softfloat/softfloat-specialize.h @@ -1,8 +1,24 @@ - -/*============================================================================ - +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -11,43 +27,187 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +#if defined(TARGET_XTENSA) +/* Define for architectures which deviate from IEEE in not supporting + * signaling NaNs (so all NaNs are treated as quiet). + */ +#define NO_SIGNALING_NANS 1 +#endif -=============================================================================*/ +/*---------------------------------------------------------------------------- +| The pattern for a default generated half-precision NaN. +*----------------------------------------------------------------------------*/ +static float16 float16_default_nan(float_status *status) +{ +#if defined(TARGET_ARM) + return const_float16(0x7E00); +#else + if (status->snan_bit_is_one) { + return const_float16(0x7DFF); + } else { +#if defined(TARGET_MIPS) + return const_float16(0x7E00); +#else + return const_float16(0xFE00); +#endif + } +#endif +} /*---------------------------------------------------------------------------- -| Underflow tininess-detection mode, statically initialized to default value. -| (The declaration in `softfloat.h' must match the `int8' type here.) +| The pattern for a default generated single-precision NaN. *----------------------------------------------------------------------------*/ -int8 float_detect_tininess = float_tininess_after_rounding; +static float32 float32_default_nan(float_status *status) +{ +#if defined(TARGET_SPARC) + return const_float32(0x7FFFFFFF); +#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \ + defined(TARGET_XTENSA) || defined(TARGET_S390X) || defined(TARGET_TRICORE) + return const_float32(0x7FC00000); +#else + if (status->snan_bit_is_one) { + return const_float32(0x7FBFFFFF); + } else { +#if defined(TARGET_MIPS) + return const_float32(0x7FC00000); +#else + return const_float32(0xFFC00000); +#endif + } +#endif +} /*---------------------------------------------------------------------------- -| Raises the exceptions specified by `flags'. Floating-point traps can be -| defined here if desired. It is currently not possible for such a trap to -| substitute a result value. If traps are not implemented, this routine -| should be simply `float_exception_flags |= flags;'. +| The pattern for a default generated double-precision NaN. +*----------------------------------------------------------------------------*/ +static float64 float64_default_nan(float_status *status) +{ +#if defined(TARGET_SPARC) + return const_float64(LIT64(0x7FFFFFFFFFFFFFFF)); +#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \ + defined(TARGET_S390X) + return const_float64(LIT64(0x7FF8000000000000)); +#else + if (status->snan_bit_is_one) { + return const_float64(LIT64(0x7FF7FFFFFFFFFFFF)); + } else { +#if defined(TARGET_MIPS) + return const_float64(LIT64(0x7FF8000000000000)); +#else + return const_float64(LIT64(0xFFF8000000000000)); +#endif + } +#endif +} + +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. The +| `high' and `low' values hold the most- and least-significant bits, +| respectively. *----------------------------------------------------------------------------*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ +static floatx80 floatx80_default_nan(float_status *status) +{ + floatx80 r; + + if (status->snan_bit_is_one) { + r.low = LIT64(0xBFFFFFFFFFFFFFFF); + r.high = 0x7FFF; + } else { + r.low = LIT64(0xC000000000000000); + r.high = 0xFFFF; + } + return r; +} -void float_raise( int8 flags ) +/*---------------------------------------------------------------------------- +| The pattern for a default generated quadruple-precision NaN. +*----------------------------------------------------------------------------*/ +static float128 float128_default_nan(float_status *status) { + float128 r; + + if (status->snan_bit_is_one) { + r.low = LIT64(0xFFFFFFFFFFFFFFFF); + r.high = LIT64(0x7FFF7FFFFFFFFFFF); + } else { + r.low = LIT64(0x0000000000000000); +#if defined(TARGET_S390X) + r.high = LIT64(0x7FFF800000000000); +#else + r.high = LIT64(0xFFFF800000000000); +#endif + } + return r; +} - float_exception_flags |= flags; +/*---------------------------------------------------------------------------- +| Raises the exceptions specified by `flags'. Floating-point traps can be +| defined here if desired. It is currently not possible for such a trap +| to substitute a result value. If traps are not implemented, this routine +| should be simply `float_exception_flags |= flags;'. +*----------------------------------------------------------------------------*/ +static inline void float_raise(uint8_t flags, float_status *status) +{ + status->float_exception_flags |= flags; } /*---------------------------------------------------------------------------- @@ -55,24 +215,133 @@ void float_raise( int8 flags ) *----------------------------------------------------------------------------*/ typedef struct { flag sign; - bits64 high, low; + uint64_t high, low; } commonNaNT; +#ifdef NO_SIGNALING_NANS +static int float16_is_quiet_nan(float16 a_, float_status *status) +{ + return float16_is_any_nan(a_); +} + +static int float16_is_signaling_nan(float16 a_, float_status *status) +{ + return 0; +} +#else /*---------------------------------------------------------------------------- -| The pattern for a default generated single-precision NaN. +| Returns 1 if the half-precision floating-point value `a' is a quiet +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static int float16_is_quiet_nan(float16 a_, float_status *status) +{ + uint16_t a = float16_val(a_); + if (status->snan_bit_is_one) { + return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); + } else { + return ((a & ~0x8000) >= 0x7C80); + } +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the half-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static int float16_is_signaling_nan(float16 a_, float_status *status) +{ + uint16_t a = float16_val(a_); + if (status->snan_bit_is_one) { + return ((a & ~0x8000) >= 0x7C80); + } else { + return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF); + } +} +#endif + +/*---------------------------------------------------------------------------- +| Returns a quiet NaN if the half-precision floating point value `a' is a +| signaling NaN; otherwise returns `a'. +*----------------------------------------------------------------------------*/ +static float16 float16_maybe_silence_nan(float16 a_, float_status *status) +{ + if (float16_is_signaling_nan(a_, status)) { + if (status->snan_bit_is_one) { + return float16_default_nan(status); + } else { + uint16_t a = float16_val(a_); + a |= (1 << 9); + return make_float16(a); + } + } + return a_; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the half-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. *----------------------------------------------------------------------------*/ -#define float32_default_nan 0xFFFFFFFF + +static commonNaNT float16ToCommonNaN(float16 a, float_status *status) +{ + commonNaNT z; + + if (float16_is_signaling_nan(a, status)) { + float_raise(float_flag_invalid, status); + } + z.sign = float16_val(a) >> 15; + z.low = 0; + z.high = ((uint64_t) float16_val(a)) << 54; + return z; +} /*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a NaN; -| otherwise returns 0. +| Returns the result of converting the canonical NaN `a' to the half- +| precision floating-point format. *----------------------------------------------------------------------------*/ -flag float32_is_nan( float32 a ) +static float16 commonNaNToFloat16(commonNaNT a, float_status *status) { + uint16_t mantissa = a.high >> 54; + + if (status->default_nan_mode) { + return float16_default_nan(status); + } - return ( 0xFF000000 < (bits32) ( a<<1 ) ); + if (mantissa) { + return make_float16(((((uint16_t) a.sign) << 15) + | (0x1F << 10) | mantissa)); + } else { + return float16_default_nan(status); + } +} +#ifdef NO_SIGNALING_NANS +static int float32_is_quiet_nan(float32 a_, float_status *status) +{ + return float32_is_any_nan(a_); +} + +static int float32_is_signaling_nan(float32 a_, float_status *status) +{ + return 0; +} +#else +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is a quiet +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static int float32_is_quiet_nan(float32 a_, float_status *status) +{ + uint32_t a = float32_val(a_); + if (status->snan_bit_is_one) { + return (((a >> 22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF); + } else { + return ((uint32_t)(a << 1) >= 0xFF800000); + } } /*---------------------------------------------------------------------------- @@ -80,11 +349,34 @@ flag float32_is_nan( float32 a ) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -flag float32_is_signaling_nan( float32 a ) +static int float32_is_signaling_nan(float32 a_, float_status *status) { + uint32_t a = float32_val(a_); + if (status->snan_bit_is_one) { + return ((uint32_t)(a << 1) >= 0xFF800000); + } else { + return (((a >> 22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF); + } +} +#endif - return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); +/*---------------------------------------------------------------------------- +| Returns a quiet NaN if the single-precision floating point value `a' is a +| signaling NaN; otherwise returns `a'. +*----------------------------------------------------------------------------*/ +static float32 float32_maybe_silence_nan(float32 a_, float_status *status) +{ + if (float32_is_signaling_nan(a_, status)) { + if (status->snan_bit_is_one) { + return float32_default_nan(status); + } else { + uint32_t a = float32_val(a_); + a |= (1 << 22); + return make_float32(a); + } + } + return a_; } /*---------------------------------------------------------------------------- @@ -93,16 +385,17 @@ flag float32_is_signaling_nan( float32 a ) | exception is raised. *----------------------------------------------------------------------------*/ -static commonNaNT float32ToCommonNaN( float32 a ) +static commonNaNT float32ToCommonNaN(float32 a, float_status *status) { commonNaNT z; - if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a>>31; + if (float32_is_signaling_nan(a, status)) { + float_raise(float_flag_invalid, status); + } + z.sign = float32_val(a) >> 31; z.low = 0; - z.high = ( (bits64) a )<<41; + z.high = ((uint64_t)float32_val(a)) << 41; return z; - } /*---------------------------------------------------------------------------- @@ -110,12 +403,256 @@ static commonNaNT float32ToCommonNaN( float32 a ) | precision floating-point format. *----------------------------------------------------------------------------*/ -static float32 commonNaNToFloat32( commonNaNT a ) +static float32 commonNaNToFloat32(commonNaNT a, float_status *status) { + uint32_t mantissa = a.high >> 41; + + if (status->default_nan_mode) { + return float32_default_nan(status); + } - return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); + if (mantissa) { + return make_float32( + (((uint32_t)a.sign) << 31) | 0x7F800000 | (a.high >> 41)); + } else { + return float32_default_nan(status); + } +} + +/*---------------------------------------------------------------------------- +| Select which NaN to propagate for a two-input operation. +| IEEE754 doesn't specify all the details of this, so the +| algorithm is target-specific. +| The routine is passed various bits of information about the +| two NaNs and should return 0 to select NaN a and 1 for NaN b. +| Note that signalling NaNs are always squashed to quiet NaNs +| by the caller, by calling floatXX_maybe_silence_nan() before +| returning them. +| +| aIsLargerSignificand is only valid if both a and b are NaNs +| of some kind, and is true if a has the larger significand, +| or if both a and b have the same significand but a is +| positive but b is negative. It is only needed for the x87 +| tie-break rule. +*----------------------------------------------------------------------------*/ +#if defined(TARGET_ARM) +static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag aIsLargerSignificand) +{ + /* ARM mandated NaN propagation rules: take the first of: + * 1. A if it is signaling + * 2. B if it is signaling + * 3. A (quiet) + * 4. B (quiet) + * A signaling NaN is always quietened before returning it. + */ + if (aIsSNaN) { + return 0; + } else if (bIsSNaN) { + return 1; + } else if (aIsQNaN) { + return 0; + } else { + return 1; + } +} +#elif defined(TARGET_MIPS) +static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag aIsLargerSignificand) +{ + /* According to MIPS specifications, if one of the two operands is + * a sNaN, a new qNaN has to be generated. This is done in + * floatXX_maybe_silence_nan(). For qNaN inputs the specifications + * says: "When possible, this QNaN result is one of the operand QNaN + * values." In practice it seems that most implementations choose + * the first operand if both operands are qNaN. In short this gives + * the following rules: + * 1. A if it is signaling + * 2. B if it is signaling + * 3. A (quiet) + * 4. B (quiet) + * A signaling NaN is always silenced before returning it. + */ + if (aIsSNaN) { + return 0; + } else if (bIsSNaN) { + return 1; + } else if (aIsQNaN) { + return 0; + } else { + return 1; + } +} +#elif defined(TARGET_PPC) || defined(TARGET_XTENSA) +static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag aIsLargerSignificand) +{ + /* PowerPC propagation rules: + * 1. A if it sNaN or qNaN + * 2. B if it sNaN or qNaN + * A signaling NaN is always silenced before returning it. + */ + if (aIsSNaN || aIsQNaN) { + return 0; + } else { + return 1; + } +} +#else +static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag aIsLargerSignificand) +{ + /* This implements x87 NaN propagation rules: + * SNaN + QNaN => return the QNaN + * two SNaNs => return the one with the larger significand, silenced + * two QNaNs => return the one with the larger significand + * SNaN and a non-NaN => return the SNaN, silenced + * QNaN and a non-NaN => return the QNaN + * + * If we get down to comparing significands and they are the same, + * return the NaN with the positive sign bit (if any). + */ + if (aIsSNaN) { + if (bIsSNaN) { + return aIsLargerSignificand ? 0 : 1; + } + return bIsQNaN ? 1 : 0; + } else if (aIsQNaN) { + if (bIsSNaN || !bIsQNaN) { + return 0; + } else { + return aIsLargerSignificand ? 0 : 1; + } + } else { + return 1; + } } +#endif + +/*---------------------------------------------------------------------------- +| Select which NaN to propagate for a three-input operation. +| For the moment we assume that no CPU needs the 'larger significand' +| information. +| Return values : 0 : a; 1 : b; 2 : c; 3 : default-NaN +*----------------------------------------------------------------------------*/ +#if defined(TARGET_ARM) +static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag cIsQNaN, flag cIsSNaN, flag infzero, + float_status *status) +{ + /* For ARM, the (inf,zero,qnan) case sets InvalidOp and returns + * the default NaN + */ + if (infzero && cIsQNaN) { + float_raise(float_flag_invalid, status); + return 3; + } + + /* This looks different from the ARM ARM pseudocode, because the ARM ARM + * puts the operands to a fused mac operation (a*b)+c in the order c,a,b. + */ + if (cIsSNaN) { + return 2; + } else if (aIsSNaN) { + return 0; + } else if (bIsSNaN) { + return 1; + } else if (cIsQNaN) { + return 2; + } else if (aIsQNaN) { + return 0; + } else { + return 1; + } +} +#elif defined(TARGET_MIPS) +static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag cIsQNaN, flag cIsSNaN, flag infzero, + float_status *status) +{ + /* For MIPS, the (inf,zero,qnan) case sets InvalidOp and returns + * the default NaN + */ + if (infzero) { + float_raise(float_flag_invalid, status); + return 3; + } + + if (status->snan_bit_is_one) { + /* Prefer sNaN over qNaN, in the a, b, c order. */ + if (aIsSNaN) { + return 0; + } else if (bIsSNaN) { + return 1; + } else if (cIsSNaN) { + return 2; + } else if (aIsQNaN) { + return 0; + } else if (bIsQNaN) { + return 1; + } else { + return 2; + } + } else { + /* Prefer sNaN over qNaN, in the c, a, b order. */ + if (cIsSNaN) { + return 2; + } else if (aIsSNaN) { + return 0; + } else if (bIsSNaN) { + return 1; + } else if (cIsQNaN) { + return 2; + } else if (aIsQNaN) { + return 0; + } else { + return 1; + } + } +} +#elif defined(TARGET_PPC) +static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag cIsQNaN, flag cIsSNaN, flag infzero, + float_status *status) +{ + /* For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer + * to return an input NaN if we have one (ie c) rather than generating + * a default NaN + */ + if (infzero) { + float_raise(float_flag_invalid, status); + return 2; + } + + /* If fRA is a NaN return it; otherwise if fRB is a NaN return it; + * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB + */ + if (aIsSNaN || aIsQNaN) { + return 0; + } else if (cIsSNaN || cIsQNaN) { + return 2; + } else { + return 1; + } +} +#else +/* A default implementation: prefer a to b to c. + * This is unlikely to actually match any real implementation. + */ +static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN, + flag cIsQNaN, flag cIsSNaN, flag infzero, + float_status *status) +{ + if (aIsSNaN || aIsQNaN) { + return 0; + } else if (bIsSNaN || bIsQNaN) { + return 1; + } else { + return 2; + } +} +#endif /*---------------------------------------------------------------------------- | Takes two single-precision floating-point values `a' and `b', one of which @@ -123,41 +660,120 @@ static float32 commonNaNToFloat32( commonNaNT a ) | signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -static float32 propagateFloat32NaN( float32 a, float32 b ) +static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status) { - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; + flag aIsLargerSignificand; + uint32_t av, bv; + + aIsQuietNaN = float32_is_quiet_nan(a, status); + aIsSignalingNaN = float32_is_signaling_nan(a, status); + bIsQuietNaN = float32_is_quiet_nan(b, status); + bIsSignalingNaN = float32_is_signaling_nan(b, status); + av = float32_val(a); + bv = float32_val(b); + + if (aIsSignalingNaN | bIsSignalingNaN) { + float_raise(float_flag_invalid, status); + } - aIsNaN = float32_is_nan( a ); - aIsSignalingNaN = float32_is_signaling_nan( a ); - bIsNaN = float32_is_nan( b ); - bIsSignalingNaN = float32_is_signaling_nan( b ); - a |= 0x00400000; - b |= 0x00400000; - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; + if (status->default_nan_mode) { + return float32_default_nan(status); } - else { - return b; + + if ((uint32_t)(av << 1) < (uint32_t)(bv << 1)) { + aIsLargerSignificand = 0; + } else if ((uint32_t)(bv << 1) < (uint32_t)(av << 1)) { + aIsLargerSignificand = 1; + } else { + aIsLargerSignificand = (av < bv) ? 1 : 0; } + if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, + aIsLargerSignificand)) { + return float32_maybe_silence_nan(b, status); + } else { + return float32_maybe_silence_nan(a, status); + } } /*---------------------------------------------------------------------------- -| The pattern for a default generated double-precision NaN. +| Takes three single-precision floating-point values `a', `b' and `c', one of +| which is a NaN, and returns the appropriate NaN result. If any of `a', +| `b' or `c' is a signaling NaN, the invalid exception is raised. +| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case +| obviously c is a NaN, and whether to propagate c or some other NaN is +| implementation defined). *----------------------------------------------------------------------------*/ -#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ +static float32 propagateFloat32MulAddNaN(float32 a, float32 b, + float32 c, flag infzero, + float_status *status) +{ + flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, + cIsQuietNaN, cIsSignalingNaN; + int which; + + aIsQuietNaN = float32_is_quiet_nan(a, status); + aIsSignalingNaN = float32_is_signaling_nan(a, status); + bIsQuietNaN = float32_is_quiet_nan(b, status); + bIsSignalingNaN = float32_is_signaling_nan(b, status); + cIsQuietNaN = float32_is_quiet_nan(c, status); + cIsSignalingNaN = float32_is_signaling_nan(c, status); + + if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) { + float_raise(float_flag_invalid, status); + } + + which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN, + bIsQuietNaN, bIsSignalingNaN, + cIsQuietNaN, cIsSignalingNaN, infzero, status); -flag float64_is_nan( float64 a ) + if (status->default_nan_mode) { + /* Note that this check is after pickNaNMulAdd so that function + * has an opportunity to set the Invalid flag. + */ + return float32_default_nan(status); + } + + switch (which) { + case 0: + return float32_maybe_silence_nan(a, status); + case 1: + return float32_maybe_silence_nan(b, status); + case 2: + return float32_maybe_silence_nan(c, status); + case 3: + default: + return float32_default_nan(status); + } +} + +#ifdef NO_SIGNALING_NANS +static int float64_is_quiet_nan(float64 a_, float_status *status) { + return float64_is_any_nan(a_); +} - return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) ); +int float64_is_signaling_nan(float64 a_, float_status *status) +{ + return 0; +} +#else +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is a quiet +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ +static int float64_is_quiet_nan(float64 a_, float_status *status) +{ + uint64_t a = float64_val(a_); + if (status->snan_bit_is_one) { + return (((a >> 51) & 0xFFF) == 0xFFE) + && (a & 0x0007FFFFFFFFFFFFULL); + } else { + return ((a << 1) >= 0xFFF0000000000000ULL); + } } /*---------------------------------------------------------------------------- @@ -165,13 +781,35 @@ flag float64_is_nan( float64 a ) | NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -flag float64_is_signaling_nan( float64 a ) +static int float64_is_signaling_nan(float64 a_, float_status *status) { + uint64_t a = float64_val(a_); + if (status->snan_bit_is_one) { + return ((a << 1) >= 0xFFF0000000000000ULL); + } else { + return (((a >> 51) & 0xFFF) == 0xFFE) + && (a & LIT64(0x0007FFFFFFFFFFFF)); + } +} +#endif - return - ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) - && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); +/*---------------------------------------------------------------------------- +| Returns a quiet NaN if the double-precision floating point value `a' is a +| signaling NaN; otherwise returns `a'. +*----------------------------------------------------------------------------*/ +static float64 float64_maybe_silence_nan(float64 a_, float_status *status) +{ + if (float64_is_signaling_nan(a_, status)) { + if (status->snan_bit_is_one) { + return float64_default_nan(status); + } else { + uint64_t a = float64_val(a_); + a |= LIT64(0x0008000000000000); + return make_float64(a); + } + } + return a_; } /*---------------------------------------------------------------------------- @@ -180,16 +818,17 @@ flag float64_is_signaling_nan( float64 a ) | exception is raised. *----------------------------------------------------------------------------*/ -static commonNaNT float64ToCommonNaN( float64 a ) +static commonNaNT float64ToCommonNaN(float64 a, float_status *status) { commonNaNT z; - if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a>>63; + if (float64_is_signaling_nan(a, status)) { + float_raise(float_flag_invalid, status); + } + z.sign = float64_val(a) >> 63; z.low = 0; - z.high = a<<12; + z.high = float64_val(a) << 12; return z; - } /*---------------------------------------------------------------------------- @@ -197,14 +836,22 @@ static commonNaNT float64ToCommonNaN( float64 a ) | precision floating-point format. *----------------------------------------------------------------------------*/ -static float64 commonNaNToFloat64( commonNaNT a ) +static float64 commonNaNToFloat64(commonNaNT a, float_status *status) { + uint64_t mantissa = a.high >> 12; - return - ( ( (bits64) a.sign )<<63 ) - | LIT64( 0x7FF8000000000000 ) - | ( a.high>>12 ); + if (status->default_nan_mode) { + return float64_default_nan(status); + } + if (mantissa) { + return make_float64( + (((uint64_t) a.sign) << 63) + | LIT64(0x7FF0000000000000) + | (a.high >> 12)); + } else { + return float64_default_nan(status); + } } /*---------------------------------------------------------------------------- @@ -213,165 +860,192 @@ static float64 commonNaNToFloat64( commonNaNT a ) | signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -static float64 propagateFloat64NaN( float64 a, float64 b ) +static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status) { - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float64_is_nan( a ); - aIsSignalingNaN = float64_is_signaling_nan( a ); - bIsNaN = float64_is_nan( b ); - bIsSignalingNaN = float64_is_signaling_nan( b ); - a |= LIT64( 0x0008000000000000 ); - b |= LIT64( 0x0008000000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; + flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; + flag aIsLargerSignificand; + uint64_t av, bv; + + aIsQuietNaN = float64_is_quiet_nan(a, status); + aIsSignalingNaN = float64_is_signaling_nan(a, status); + bIsQuietNaN = float64_is_quiet_nan(b, status); + bIsSignalingNaN = float64_is_signaling_nan(b, status); + av = float64_val(a); + bv = float64_val(b); + + if (aIsSignalingNaN | bIsSignalingNaN) { + float_raise(float_flag_invalid, status); } -} - -#ifdef FLOATX80 - -/*---------------------------------------------------------------------------- -| The pattern for a default generated extended double-precision NaN. The -| `high' and `low' values hold the most- and least-significant bits, -| respectively. -*----------------------------------------------------------------------------*/ -#define floatx80_default_nan_high 0xFFFF -#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag floatx80_is_nan( floatx80 a ) -{ + if (status->default_nan_mode) { + return float64_default_nan(status); + } - return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + if ((uint64_t)(av << 1) < (uint64_t)(bv << 1)) { + aIsLargerSignificand = 0; + } else if ((uint64_t)(bv << 1) < (uint64_t)(av << 1)) { + aIsLargerSignificand = 1; + } else { + aIsLargerSignificand = (av < bv) ? 1 : 0; + } + if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, + aIsLargerSignificand)) { + return float64_maybe_silence_nan(b, status); + } else { + return float64_maybe_silence_nan(a, status); + } } /*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| signaling NaN; otherwise returns 0. +| Takes three double-precision floating-point values `a', `b' and `c', one of +| which is a NaN, and returns the appropriate NaN result. If any of `a', +| `b' or `c' is a signaling NaN, the invalid exception is raised. +| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case +| obviously c is a NaN, and whether to propagate c or some other NaN is +| implementation defined). *----------------------------------------------------------------------------*/ -flag floatx80_is_signaling_nan( floatx80 a ) +static float64 propagateFloat64MulAddNaN(float64 a, float64 b, + float64 c, flag infzero, + float_status *status) { - bits64 aLow; - - aLow = a.low & ~ LIT64( 0x4000000000000000 ); - return - ( ( a.high & 0x7FFF ) == 0x7FFF ) - && (bits64) ( aLow<<1 ) - && ( a.low == aLow ); - -} + flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, + cIsQuietNaN, cIsSignalingNaN; + int which; + + aIsQuietNaN = float64_is_quiet_nan(a, status); + aIsSignalingNaN = float64_is_signaling_nan(a, status); + bIsQuietNaN = float64_is_quiet_nan(b, status); + bIsSignalingNaN = float64_is_signaling_nan(b, status); + cIsQuietNaN = float64_is_quiet_nan(c, status); + cIsSignalingNaN = float64_is_signaling_nan(c, status); + + if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) { + float_raise(float_flag_invalid, status); + } -// 28-12-2016: Added for Previous: + which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN, + bIsQuietNaN, bIsSignalingNaN, + cIsQuietNaN, cIsSignalingNaN, infzero, status); -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | zero; otherwise returns 0. - *----------------------------------------------------------------------------*/ + if (status->default_nan_mode) { + /* Note that this check is after pickNaNMulAdd so that function + * has an opportunity to set the Invalid flag. + */ + return float64_default_nan(status); + } -flag floatx80_is_zero( floatx80 a ) -{ - - return ( ( a.high & 0x7FFF ) == 0 ) && ( a.low == 0 ); - + switch (which) { + case 0: + return float64_maybe_silence_nan(a, status); + case 1: + return float64_maybe_silence_nan(b, status); + case 2: + return float64_maybe_silence_nan(c, status); + case 3: + default: + return float64_default_nan(status); + } } -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | infinity; otherwise returns 0. - *----------------------------------------------------------------------------*/ - -flag floatx80_is_infinity( floatx80 a ) +#ifdef NO_SIGNALING_NANS +static int floatx80_is_quiet_nan(floatx80 a_, float_status *status) { - - return ( ( a.high & 0x7FFF ) == 0x7FFF ) && ( (bits64) ( a.low<<1 ) == 0 ); - + return floatx80_is_any_nan(a_); } -/*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | negative; otherwise returns 0. - *----------------------------------------------------------------------------*/ - -flag floatx80_is_negative( floatx80 a ) +static int floatx80_is_signaling_nan(floatx80 a_, float_status *status) { - - return ( ( a.high & 0x8000 ) == 0x8000 ); - + return 0; } - +#else /*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | denormal; otherwise returns 0. - *----------------------------------------------------------------------------*/ +| Returns 1 if the extended double-precision floating-point value `a' is a +| quiet NaN; otherwise returns 0. This slightly differs from the same +| function for other types as floatx80 has an explicit bit. +*----------------------------------------------------------------------------*/ -flag floatx80_is_denormal( floatx80 a ) +static int floatx80_is_quiet_nan(floatx80 a, float_status *status) { - - return - ( ( a.high & 0x7FFF ) == 0 ) - && ( (bits64) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ) - && (bits64) ( a.low<<1 ); - + if (status->snan_bit_is_one) { + uint64_t aLow; + + aLow = a.low & ~0x4000000000000000ULL; + return ((a.high & 0x7FFF) == 0x7FFF) + && (aLow << 1) + && (a.low == aLow); + } else { + return ((a.high & 0x7FFF) == 0x7FFF) + && (LIT64(0x8000000000000000) <= ((uint64_t)(a.low << 1))); + } } /*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | unnormal; otherwise returns 0. - *----------------------------------------------------------------------------*/ +| Returns 1 if the extended double-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. This slightly differs from the same +| function for other types as floatx80 has an explicit bit. +*----------------------------------------------------------------------------*/ -flag floatx80_is_unnormal( floatx80 a ) +static int floatx80_is_signaling_nan(floatx80 a, float_status *status) { - - return - ( ( a.high & 0x7FFF ) > 0 ) - && ( ( a.high & 0x7FFF ) < 0x7FFF) - && ( (bits64) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ); - + if (status->snan_bit_is_one) { + return ((a.high & 0x7FFF) == 0x7FFF) + && ((a.low << 1) >= 0x8000000000000000ULL); + } else { + uint64_t aLow; + + aLow = a.low & ~LIT64(0x4000000000000000); + return ((a.high & 0x7FFF) == 0x7FFF) + && (uint64_t)(aLow << 1) + && (a.low == aLow); + } } +#endif /*---------------------------------------------------------------------------- - | Returns 1 if the extended double-precision floating-point value `a' is - | normal; otherwise returns 0. - *----------------------------------------------------------------------------*/ +| Returns a quiet NaN if the extended double-precision floating point value +| `a' is a signaling NaN; otherwise returns `a'. +*----------------------------------------------------------------------------*/ -flag floatx80_is_normal( floatx80 a ) +static floatx80 floatx80_maybe_silence_nan(floatx80 a, float_status *status) { - - return - ( ( a.high & 0x7FFF ) < 0x7FFF ) - && ( (bits64) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x8000000000000000 ) ); - + if (floatx80_is_signaling_nan(a, status)) { + if (status->snan_bit_is_one) { + a = floatx80_default_nan(status); + } else { + a.low |= LIT64(0xC000000000000000); + return a; + } + } + return a; } -// End of addition for Previous - /*---------------------------------------------------------------------------- | Returns the result of converting the extended double-precision floating- | point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the | invalid exception is raised. *----------------------------------------------------------------------------*/ -static commonNaNT floatx80ToCommonNaN( floatx80 a ) +static commonNaNT floatx80ToCommonNaN(floatx80 a, float_status *status) { + floatx80 dflt; commonNaNT z; - if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a.high>>15; - z.low = 0; - z.high = a.low<<1; + if (floatx80_is_signaling_nan(a, status)) { + float_raise(float_flag_invalid, status); + } + if (a.low >> 63) { + z.sign = a.high >> 15; + z.low = 0; + z.high = a.low << 1; + } else { + dflt = floatx80_default_nan(status); + z.sign = dflt.high >> 15; + z.low = 0; + z.high = dflt.low << 1; + } return z; - } /*---------------------------------------------------------------------------- @@ -379,14 +1053,21 @@ static commonNaNT floatx80ToCommonNaN( floatx80 a ) | double-precision floating-point format. *----------------------------------------------------------------------------*/ -static floatx80 commonNaNToFloatx80( commonNaNT a ) +static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) { floatx80 z; - z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); - z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; - return z; + if (status->default_nan_mode) { + return floatx80_default_nan(status); + } + if (a.high >> 1) { + z.low = LIT64(0x8000000000000000) | a.high >> 1; + z.high = (((uint16_t)a.sign) << 15) | 0x7FFF; + } else { + z = floatx80_default_nan(status); + } + return z; } /*---------------------------------------------------------------------------- @@ -395,101 +1076,101 @@ static floatx80 commonNaNToFloatx80( commonNaNT a ) | `b' is a signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, + float_status *status) { - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; + flag aIsLargerSignificand; - aIsNaN = floatx80_is_nan( a ); - aIsSignalingNaN = floatx80_is_signaling_nan( a ); - bIsNaN = floatx80_is_nan( b ); - bIsSignalingNaN = floatx80_is_signaling_nan( b ); - a.low |= LIT64( 0xC000000000000000 ); - b.low |= LIT64( 0xC000000000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; + aIsQuietNaN = floatx80_is_quiet_nan(a, status); + aIsSignalingNaN = floatx80_is_signaling_nan(a, status); + bIsQuietNaN = floatx80_is_quiet_nan(b, status); + bIsSignalingNaN = floatx80_is_signaling_nan(b, status); + + if (aIsSignalingNaN | bIsSignalingNaN) { + float_raise(float_flag_invalid, status); } -} + if (status->default_nan_mode) { + return floatx80_default_nan(status); + } -#define EXP_BIAS 0x3FFF + if (a.low < b.low) { + aIsLargerSignificand = 0; + } else if (b.low < a.low) { + aIsLargerSignificand = 1; + } else { + aIsLargerSignificand = (a.high < b.high) ? 1 : 0; + } -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ + if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, + aIsLargerSignificand)) { + return floatx80_maybe_silence_nan(b, status); + } else { + return floatx80_maybe_silence_nan(a, status); + } +} -bits64 extractFloatx80Frac( floatx80 a ) +#ifdef NO_SIGNALING_NANS +int float128_is_quiet_nan(float128 a_, float_status *status) { - - return a.low; - + return float128_is_any_nan(a_); } -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -int32 extractFloatx80Exp( floatx80 a ) +int float128_is_signaling_nan(float128 a_, float_status *status) { - - return a.high & 0x7FFF; - + return 0; } - +#else /*---------------------------------------------------------------------------- -| Returns the sign bit of the extended double-precision floating-point value -| `a'. +| Returns 1 if the quadruple-precision floating-point value `a' is a quiet +| NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -INLINE flag extractFloatx80Sign( floatx80 a ) +static int float128_is_quiet_nan(float128 a, float_status *status) { - - return a.high>>15; - + if (status->snan_bit_is_one) { + return (((a.high >> 47) & 0xFFFF) == 0xFFFE) + && (a.low || (a.high & 0x00007FFFFFFFFFFFULL)); + } else { + return ((a.high << 1) >= 0xFFFF000000000000ULL) + && (a.low || (a.high & 0x0000FFFFFFFFFFFFULL)); + } } -#endif - -#ifdef FLOATX128 - -/*---------------------------------------------------------------------------- -| The pattern for a default generated quadruple-precision NaN. The `high' and -| `low' values hold the most- and least-significant bits, respectively. -*----------------------------------------------------------------------------*/ -#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) -#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) - /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is a NaN; -| otherwise returns 0. +| Returns 1 if the quadruple-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. *----------------------------------------------------------------------------*/ -flag float128_is_nan( float128 a ) +static int float128_is_signaling_nan(float128 a, float_status *status) { - - return - ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) - && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); - + if (status->snan_bit_is_one) { + return ((a.high << 1) >= 0xFFFF000000000000ULL) + && (a.low || (a.high & 0x0000FFFFFFFFFFFFULL)); + } else { + return (((a.high >> 47) & 0xFFFF) == 0xFFFE) + && (a.low || (a.high & LIT64(0x00007FFFFFFFFFFF))); + } } +#endif /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is a -| signaling NaN; otherwise returns 0. +| Returns a quiet NaN if the quadruple-precision floating point value `a' is +| a signaling NaN; otherwise returns `a'. *----------------------------------------------------------------------------*/ -flag float128_is_signaling_nan( float128 a ) +static float128 float128_maybe_silence_nan(float128 a, float_status *status) { - - return - ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) - && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); - + if (float128_is_signaling_nan(a, status)) { + if (status->snan_bit_is_one) { + a = float128_default_nan(status); + } else { + a.high |= LIT64(0x0000800000000000); + return a; + } + } + return a; } /*---------------------------------------------------------------------------- @@ -498,15 +1179,16 @@ flag float128_is_signaling_nan( float128 a ) | exception is raised. *----------------------------------------------------------------------------*/ -static commonNaNT float128ToCommonNaN( float128 a ) +static commonNaNT float128ToCommonNaN(float128 a, float_status *status) { commonNaNT z; - if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a.high>>63; - shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + if (float128_is_signaling_nan(a, status)) { + float_raise(float_flag_invalid, status); + } + z.sign = a.high >> 63; + shortShift128Left(a.high, a.low, 16, &z.high, &z.low); return z; - } /*---------------------------------------------------------------------------- @@ -514,14 +1196,17 @@ static commonNaNT float128ToCommonNaN( float128 a ) | precision floating-point format. *----------------------------------------------------------------------------*/ -static float128 commonNaNToFloat128( commonNaNT a ) +static float128 commonNaNToFloat128(commonNaNT a, float_status *status) { float128 z; - shift128Right( a.high, a.low, 16, &z.high, &z.low ); - z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); - return z; + if (status->default_nan_mode) { + return float128_default_nan(status); + } + shift128Right(a.high, a.low, 16, &z.high, &z.low); + z.high |= (((uint64_t)a.sign) << 63) | LIT64(0x7FFF000000000000); + return z; } /*---------------------------------------------------------------------------- @@ -530,25 +1215,114 @@ static float128 commonNaNToFloat128( commonNaNT a ) | `b' is a signaling NaN, the invalid exception is raised. *----------------------------------------------------------------------------*/ -static float128 propagateFloat128NaN( float128 a, float128 b ) +static float128 propagateFloat128NaN(float128 a, float128 b, + float_status *status) { - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN; + flag aIsLargerSignificand; + + aIsQuietNaN = float128_is_quiet_nan(a, status); + aIsSignalingNaN = float128_is_signaling_nan(a, status); + bIsQuietNaN = float128_is_quiet_nan(b, status); + bIsSignalingNaN = float128_is_signaling_nan(b, status); - aIsNaN = float128_is_nan( a ); - aIsSignalingNaN = float128_is_signaling_nan( a ); - bIsNaN = float128_is_nan( b ); - bIsSignalingNaN = float128_is_signaling_nan( b ); - a.high |= LIT64( 0x0000800000000000 ); - b.high |= LIT64( 0x0000800000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; + if (aIsSignalingNaN | bIsSignalingNaN) { + float_raise(float_flag_invalid, status); } - else { - return b; + + if (status->default_nan_mode) { + return float128_default_nan(status); + } + + if (lt128(a.high << 1, a.low, b.high << 1, b.low)) { + aIsLargerSignificand = 0; + } else if (lt128(b.high << 1, b.low, a.high << 1, a.low)) { + aIsLargerSignificand = 1; + } else { + aIsLargerSignificand = (a.high < b.high) ? 1 : 0; } + if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN, + aIsLargerSignificand)) { + return float128_maybe_silence_nan(b, status); + } else { + return float128_maybe_silence_nan(a, status); + } } -#endif +// 28-12-2016: Added for Previous: + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | zero; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_zero( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0 ) && ( a.low == 0 ); + +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | infinity; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_infinity( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && ( (uint64_t) ( a.low<<1 ) == 0 ); + +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | negative; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_negative( floatx80 a ) +{ + + return ( ( a.high & 0x8000 ) == 0x8000 ); + +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | unnormal; otherwise returns 0. + *----------------------------------------------------------------------------*/ +static inline flag floatx80_is_unnormal( floatx80 a ) +{ + return + ( ( a.high & 0x7FFF ) > 0 ) + && ( ( a.high & 0x7FFF ) < 0x7FFF) + && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ); +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | denormal; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_denormal( floatx80 a ) +{ + return + ( ( a.high & 0x7FFF ) == 0 ) + && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ) + && (uint64_t) ( a.low<<1 ); +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | normal; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_normal( floatx80 a ) +{ + return + ( ( a.high & 0x7FFF ) < 0x7FFF ) + && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x8000000000000000 ) ); +} +// End of addition for Previous diff --git a/softfloat/softfloat.cpp b/softfloat/softfloat.cpp index 9a26fa16..64c435d3 100644 --- a/softfloat/softfloat.cpp +++ b/softfloat/softfloat.cpp @@ -1,8 +1,32 @@ -/*============================================================================ - -This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. +#define SOFTFLOAT_68K + +#include +#include +#include "softfloat/softfloat.h" + + +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -11,38 +35,66 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -#include "milieu.h" -#include "softfloat.h" +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +/* We only need stdlib for abort() */ /*---------------------------------------------------------------------------- -| Floating-point rounding mode, extended double-precision rounding precision, -| and exception flags. +| Primitive arithmetic functions, including multi-word arithmetic, and +| division and square root approximations. (Can be specialized to target if +| desired.) *----------------------------------------------------------------------------*/ -int8 float_exception_flags = 0; -#ifdef FLOATX80 -int8 floatx80_rounding_precision = 80; -#endif - -int8 float_rounding_mode = float_round_nearest_even; +#include "softfloat-macros.h" /*---------------------------------------------------------------------------- | Functions and definitions to determine: (1) whether tininess for underflow @@ -54,6 +106,33 @@ int8 float_rounding_mode = float_round_nearest_even; *----------------------------------------------------------------------------*/ #include "softfloat-specialize.h" +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the half-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +static inline uint32_t extractFloat16Frac(float16 a) +{ + return float16_val(a) & 0x3ff; +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the half-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +static inline int extractFloat16Exp(float16 a) +{ + return (float16_val(a) >> 10) & 0x1f; +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the single-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +static inline flag extractFloat16Sign(float16 a) +{ + return float16_val(a)>>15; +} + /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 | and 7, and returns the properly rounded 32-bit integer corresponding to the @@ -65,42 +144,45 @@ int8 float_rounding_mode = float_round_nearest_even; | positive or negative integer is returned. *----------------------------------------------------------------------------*/ -static int32 roundAndPackInt32( flag zSign, bits64 absZ ) -{ - int8 roundingMode; - flag roundNearestEven; - int8 roundIncrement, roundBits; - int32 z; - - roundingMode = float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } - } - roundBits = absZ & 0x7F; - absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - z = absZ; - if ( zSign ) z = - z; - z = (sbits32) z; - if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { - float_raise( float_flag_invalid ); - return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( roundBits ) float_exception_flags |= float_flag_inexact; - return z; +static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven; + int8_t roundIncrement, roundBits; + int32_t z; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_raise(float_flag_invalid, status); + return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + return z; } @@ -116,55 +198,116 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ ) | returned. *----------------------------------------------------------------------------*/ -static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) -{ - int8 roundingMode; - flag roundNearestEven, increment; - int64 z; +static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, + float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven, increment; + int64_t z; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t) absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); + } + if ( increment ) { + ++absZ0; + if ( absZ0 == 0 ) goto overflow; + absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven ); + } + z = absZ0; + if ( zSign ) z = - z; + if ( z && ( ( z < 0 ) ^ zSign ) ) { + overflow: + float_raise(float_flag_invalid, status); + return + zSign ? (int64_t) LIT64( 0x8000000000000000 ) + : LIT64( 0x7FFFFFFFFFFFFFFF ); + } + if (absZ1) { + status->float_exception_flags |= float_flag_inexact; + } + return z; - roundingMode = float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (sbits64) absZ1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && absZ1; - } - else { - increment = ( roundingMode == float_round_up ) && absZ1; - } - } - } - if ( increment ) { - ++absZ0; - if ( absZ0 == 0 ) goto overflow; - absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); - } - z = absZ0; - if ( zSign ) z = - z; - z = (sbits64) z; - if ( z && ( ( z < 0 ) ^ zSign ) ) { - overflow: - float_raise( float_flag_invalid ); - return - zSign ? (sbits64) LIT64( 0x8000000000000000 ) - : LIT64( 0x7FFFFFFFFFFFFFFF ); - } - if ( absZ1 ) float_exception_flags |= float_flag_inexact; - return z; +} + +/*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit unsigned integer corresponding to the +| input. Ordinarily, the fixed-point input is simply rounded to an integer, +| with the inexact exception raised if the input cannot be represented exactly +| as an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0, + uint64_t absZ1, float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven, increment; + + roundingMode = status->float_rounding_mode; + roundNearestEven = (roundingMode == float_round_nearest_even); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); + } + if (increment) { + ++absZ0; + if (absZ0 == 0) { + float_raise(float_flag_invalid, status); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); + } + + if (zSign && absZ0) { + float_raise(float_flag_invalid, status); + return 0; + } + if (absZ1) { + status->float_exception_flags |= float_flag_inexact; + } + return absZ0; } /*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE bits32 extractFloat32Frac( float32 a ) +static inline uint32_t extractFloat32Frac( float32 a ) { - return a & 0x007FFFFF; + + return float32_val(a) & 0x007FFFFF; } @@ -172,9 +315,10 @@ INLINE bits32 extractFloat32Frac( float32 a ) | Returns the exponent bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE int16 extractFloat32Exp( float32 a ) +static inline int extractFloat32Exp(float32 a) { - return ( a>>23 ) & 0xFF; + + return ( float32_val(a)>>23 ) & 0xFF; } @@ -182,10 +326,26 @@ INLINE int16 extractFloat32Exp( float32 a ) | Returns the sign bit of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE flag extractFloat32Sign( float32 a ) +static inline flag extractFloat32Sign( float32 a ) { - return a>>31; + return float32_val(a)>>31; + +} + +/*---------------------------------------------------------------------------- +| If `a' is denormal and we are in flush-to-zero mode then set the +| input-denormal exception and return zero. Otherwise just return the value. +*----------------------------------------------------------------------------*/ +float32 float32_squash_input_denormal(float32 a, float_status *status) +{ + if (status->flush_inputs_to_zero) { + if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) { + float_raise(float_flag_input_denormal, status); + return make_float32(float32_val(a) & 0x80000000); + } + } + return a; } /*---------------------------------------------------------------------------- @@ -196,13 +356,13 @@ INLINE flag extractFloat32Sign( float32 a ) *----------------------------------------------------------------------------*/ static void - normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) + normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr) { - int8 shiftCount; + int8_t shiftCount; - shiftCount = countLeadingZeros32( aSig ) - 8; - *zSigPtr = aSig<>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - if ( zSig == 0 ) zExp = 0; - return packFloat32( zSign, zExp, zSig ); +static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, + float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven; + int8_t roundIncrement, roundBits; + flag isTiny; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); + break; + } + roundBits = zSig & 0x7F; + if ( 0xFD <= (uint16_t) zExp ) { + if ( ( 0xFD < zExp ) + || ( ( zExp == 0xFD ) + && ( (int32_t) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise(float_flag_overflow | float_flag_inexact, status); + return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 )); + } + if ( zExp < 0 ) { + if (status->flush_to_zero) { + float_raise(float_flag_output_denormal, status); + return packFloat32(zSign, 0, 0); + } + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); + shift32RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x7F; + if (isTiny && roundBits) { + float_raise(float_flag_underflow, status); + } + } + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig = ( zSig + roundIncrement )>>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); } @@ -307,12 +482,14 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) *----------------------------------------------------------------------------*/ static float32 - normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) + normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, + float_status *status) { - int8 shiftCount; + int8_t shiftCount; - shiftCount = countLeadingZeros32( zSig ) - 1; - return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<>52 ) & 0x7FF; + + return ( float64_val(a)>>52 ) & 0x7FF; } @@ -340,12 +519,28 @@ INLINE int16 extractFloat64Exp( float64 a ) | Returns the sign bit of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE flag extractFloat64Sign( float64 a ) +static inline flag extractFloat64Sign( float64 a ) { - return a>>63; + + return float64_val(a)>>63; } +/*---------------------------------------------------------------------------- +| If `a' is denormal and we are in flush-to-zero mode then set the +| input-denormal exception and return zero. Otherwise just return the value. +*----------------------------------------------------------------------------*/ +float64 float64_squash_input_denormal(float64 a, float_status *status) +{ + if (status->flush_inputs_to_zero) { + if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) { + float_raise(float_flag_input_denormal, status); + return make_float64(float64_val(a) & (1ULL << 63)); + } + } + return a; +} + /*---------------------------------------------------------------------------- | Normalizes the subnormal double-precision floating-point value represented | by the denormalized significand `aSig'. The normalized exponent and @@ -354,13 +549,13 @@ INLINE flag extractFloat64Sign( float64 a ) *----------------------------------------------------------------------------*/ static void - normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) + normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr) { - int8 shiftCount; + int8_t shiftCount; - shiftCount = countLeadingZeros64( aSig ) - 11; - *zSigPtr = aSig<>10; - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); - if ( zSig == 0 ) zExp = 0; - return packFloat64( zSign, zExp, zSig ); +static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, + float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven; + int roundIncrement, roundBits; + flag isTiny; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x200; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x3ff; + break; + case float_round_down: + roundIncrement = zSign ? 0x3ff : 0; + break; + default: + abort(); + } + roundBits = zSig & 0x3FF; + if ( 0x7FD <= (uint16_t) zExp ) { + if ( ( 0x7FD < zExp ) + || ( ( zExp == 0x7FD ) + && ( (int64_t) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise(float_flag_overflow | float_flag_inexact, status); + return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 )); + } + if ( zExp < 0 ) { + if (status->flush_to_zero) { + float_raise(float_flag_output_denormal, status); + return packFloat64(zSign, 0, 0); + } + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) + || ( zExp < -1 ) + || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); + shift64RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x3FF; + if (isTiny && roundBits) { + float_raise(float_flag_underflow, status); + } + } + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig = ( zSig + roundIncrement )>>10; + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat64( zSign, zExp, zSig ); } @@ -465,16 +674,52 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) *----------------------------------------------------------------------------*/ static float64 - normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) + normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig, + float_status *status) +{ + int8_t shiftCount; + + shiftCount = countLeadingZeros64( zSig ) - 1; + return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<>15; + +} /*---------------------------------------------------------------------------- | Normalizes the subnormal extended double-precision floating-point value @@ -484,13 +729,32 @@ static float64 *----------------------------------------------------------------------------*/ static void - normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) + normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ) { - int8 shiftCount; + int8_t shiftCount; - shiftCount = countLeadingZeros64( aSig ); - *zSigPtr = aSig<float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } + else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } + else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : roundMask; + break; + case float_round_down: + roundIncrement = zSign ? roundMask : 0; + break; + default: + abort(); + } + roundBits = zSig0 & roundMask; +#ifdef SOFTFLOAT_68K + if ( 0x7FFE <= (uint32_t) zExp ) { +#else + if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { +#endif + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + goto overflow; + } +#ifdef SOFTFLOAT_68K + if ( zExp < 0 ) { +#else if ( zExp <= 0 ) { - isTiny = - ( float_detect_tininess == float_tininess_before_rounding ) +#endif + if (status->flush_to_zero) { + float_raise(float_flag_output_denormal, status); + return packFloatx80(zSign, 0, 0); + } + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) +#ifdef SOFTFLOAT_68K + || ( zExp < -1 ) +#else || ( zExp < 0 ) +#endif || ( zSig0 <= zSig0 + roundIncrement ); +#ifdef SOFTFLOAT_68K + shift64RightJamming( zSig0, -zExp, &zSig0 ); +#else shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); - zExp = 0; - roundBits = zSig0 & roundMask; - if ( isTiny && roundBits ) float_raise( float_flag_underflow ); - if ( roundBits ) float_exception_flags |= float_flag_inexact; - zSig0 += roundIncrement; - if ( (sbits64) zSig0 < 0 ) zExp = 1; - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if ( roundBits ) float_exception_flags |= float_flag_inexact; - zSig0 += roundIncrement; - if ( zSig0 < roundIncrement ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - return packFloatx80( zSign, zExp, zSig0 ); - precision80: - increment = ( (sbits64) zSig1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } - } - } - if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { - if ( ( 0x7FFE < zExp ) - || ( ( zExp == 0x7FFE ) - && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) - && increment - ) - ) { - roundMask = 0; - overflow: - float_raise( float_flag_overflow | float_flag_inexact ); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return packFloatx80( zSign, 0x7FFE, ~ roundMask ); - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } +#endif + zExp = 0; + roundBits = zSig0 & roundMask; + if (isTiny && roundBits) { + float_raise(float_flag_underflow, status); + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig0 += roundIncrement; +#ifndef SOFTFLOAT_68K + if ( (int64_t) zSig0 < 0 ) zExp = 1; +#endif + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + precision80: + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); + } +#ifdef SOFTFLOAT_68K + if ( 0x7FFE <= (uint32_t) zExp ) { +#else + if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { +#endif + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) + && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) + && increment + ) + ) { + roundMask = 0; + overflow: + float_raise(float_flag_overflow | float_flag_inexact, status); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } +#ifdef SOFTFLOAT_68K + if ( zExp < 0 ) { +#else if ( zExp <= 0 ) { - isTiny = - ( float_detect_tininess == float_tininess_before_rounding ) +#endif + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) +#ifdef SOFTFLOAT_68K + || ( zExp < -1 ) +#else || ( zExp < 0 ) - || ! increment - || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); +#endif + || ! increment + || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); +#ifdef SOFTFLOAT_68K + shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 ); +#else shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); - zExp = 0; - if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); - if ( zSig1 ) float_exception_flags |= float_flag_inexact; - if ( roundNearestEven ) { - increment = ( (sbits64) zSig1 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } - } - if ( increment ) { - ++zSig0; - zSig0 &= - ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); - if ( (sbits64) zSig0 < 0 ) zExp = 1; - } - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if ( zSig1 ) float_exception_flags |= float_flag_inexact; - if ( increment ) { - ++zSig0; - if ( zSig0 == 0 ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - else { - zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); - } - } - else { - if ( zSig0 == 0 ) zExp = 0; - } - return packFloatx80( zSign, zExp, zSig0 ); +#endif + zExp = 0; + if (isTiny && zSig1) { + float_raise(float_flag_underflow, status); + } + if (zSig1) { + status->float_exception_flags |= float_flag_inexact; + } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); + } + if ( increment ) { + ++zSig0; + zSig0 &= + ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); +#ifndef SOFTFLOAT_68K + if ( (int64_t) zSig0 < 0 ) zExp = 1; +#endif + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if (zSig1) { + status->float_exception_flags |= float_flag_inexact; + } + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + else { + zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + } + } + else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); } @@ -688,38 +1014,35 @@ static void | normalized. *----------------------------------------------------------------------------*/ -static floatx80 - normalizeRoundAndPackFloatx80( - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 - ) +static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, + flag zSign, int32_t zExp, + uint64_t zSig0, uint64_t zSig1, + float_status *status) { - int8 shiftCount; + int8_t shiftCount; - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = countLeadingZeros64( zSig0 ); - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - zExp -= shiftCount; - return - roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ); + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + zExp -= shiftCount; + return roundAndPackFloatx80(roundingPrecision, zSign, zExp, + zSig0, zSig1, status); } -#endif - -#ifdef FLOATX128 - /*---------------------------------------------------------------------------- | Returns the least-significant 64 fraction bits of the quadruple-precision | floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE bits64 extractFloat128Frac1( float128 a ) +static inline uint64_t extractFloat128Frac1( float128 a ) { - return a.low; + + return a.low; } @@ -728,9 +1051,10 @@ INLINE bits64 extractFloat128Frac1( float128 a ) | floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE bits64 extractFloat128Frac0( float128 a ) +static inline uint64_t extractFloat128Frac0( float128 a ) { - return a.high & LIT64( 0x0000FFFFFFFFFFFF ); + + return a.high & LIT64( 0x0000FFFFFFFFFFFF ); } @@ -739,9 +1063,10 @@ INLINE bits64 extractFloat128Frac0( float128 a ) | `a'. *----------------------------------------------------------------------------*/ -INLINE int32 extractFloat128Exp( float128 a ) +static inline int32_t extractFloat128Exp( float128 a ) { - return ( a.high>>48 ) & 0x7FFF; + + return ( a.high>>48 ) & 0x7FFF; } @@ -749,9 +1074,10 @@ INLINE int32 extractFloat128Exp( float128 a ) | Returns the sign bit of the quadruple-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -INLINE flag extractFloat128Sign( float128 a ) +static inline flag extractFloat128Sign( float128 a ) { - return a.high>>63; + + return a.high>>63; } @@ -766,160 +1092,337 @@ INLINE flag extractFloat128Sign( float128 a ) *----------------------------------------------------------------------------*/ static void - normalizeFloat128Subnormal( - bits64 aSig0, - bits64 aSig1, - int32 *zExpPtr, - bits64 *zSig0Ptr, - bits64 *zSig1Ptr - ) -{ - int8 shiftCount; - - if ( aSig0 == 0 ) { - shiftCount = countLeadingZeros64( aSig1 ) - 15; - if ( shiftCount < 0 ) { - *zSig0Ptr = aSig1>>( - shiftCount ); - *zSig1Ptr = aSig1<<( shiftCount & 63 ); - } - else { - *zSig0Ptr = aSig1<>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 63 ); + } + else { + *zSig0Ptr = aSig1<float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); + } + if ( 0x7FFD <= (uint32_t) zExp ) { + if ( ( 0x7FFD < zExp ) + || ( ( zExp == 0x7FFD ) + && eq128( + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ), + zSig0, + zSig1 + ) + && increment + ) + ) { + float_raise(float_flag_overflow | float_flag_inexact, status); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return + packFloat128( + zSign, + 0x7FFE, + LIT64( 0x0000FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( zExp < 0 ) { + if (status->flush_to_zero) { + float_raise(float_flag_output_denormal, status); + return packFloat128(zSign, 0, 0, 0); + } + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) + || ( zExp < -1 ) + || ! increment + || lt128( + zSig0, + zSig1, + LIT64( 0x0001FFFFFFFFFFFF ), + LIT64( 0xFFFFFFFFFFFFFFFF ) + ); + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); + zExp = 0; + if (isTiny && zSig2) { + float_raise(float_flag_underflow, status); + } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); + } + } + } + if (zSig2) { + status->float_exception_flags |= float_flag_inexact; + } + if ( increment ) { + add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); + zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + } + else { + if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; + } + return packFloat128( zSign, zExp, zSig0, zSig1 ); } -#ifdef FLOATX80 - /*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand formed by the concatenation of `zSig0' and `zSig1', and +| returns the proper quadruple-precision floating-point value corresponding +| to the abstract input. This routine is just like `roundAndPackFloat128' +| except that the input significand has fewer bits and does not have to be +| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- +| point exponent. +*----------------------------------------------------------------------------*/ + +static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, + uint64_t zSig0, uint64_t zSig1, + float_status *status) +{ + int8_t shiftCount; + uint64_t zSig2; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ) - 15; + if ( 0 <= shiftCount ) { + zSig2 = 0; + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + } + else { + shift128ExtraRightJamming( + zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); + } + zExp -= shiftCount; + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the single-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 int32_to_float32(int32_t a, float_status *status) +{ + flag zSign; + + if ( a == 0 ) return float32_zero; + if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); + zSign = ( a < 0 ); + return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the double-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 int32_to_float64(int32_t a, float_status *status) +{ + flag zSign; + uint32_t absA; + int8_t shiftCount; + uint64_t zSig; + + if ( a == 0 ) return float64_zero; + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 21; + zSig = absA; + return packFloat64( zSign, 0x432 - shiftCount, zSig<= 0) { + return packFloat32(0, 0x95 - shiftcount, a << shiftcount); + } + /* Otherwise we need to do a round-and-pack. roundAndPackFloat32() + * expects the binary point between bits 30 and 29, hence the + 7. + */ + shiftcount += 7; + if (shiftcount < 0) { + shift64RightJamming(a, -shiftcount, &a); + } else { + a <<= shiftcount; + } + + return roundAndPackFloat32(0, 0x9c - shiftcount, a, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit unsigned integer `a' +| to the double-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 uint64_to_float64(uint64_t a, float_status *status) +{ + int exp = 0x43C; + int shiftcount; + if (a == 0) { + return float64_zero; + } + + shiftcount = countLeadingZeros64(a) - 1; + if (shiftcount < 0) { + shift64RightJamming(a, -shiftcount, &a); + } else { + a <<= shiftcount; + } + return roundAndPackFloat64(0, exp - shiftcount, a, status); } +#if 0 +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit unsigned integer `a' +| to the quadruple-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 uint64_to_float128(uint64_t a, float_status *status) +{ + if (a == 0) { + return float128_zero; + } + return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status); +} #endif /*---------------------------------------------------------------------------- @@ -1014,23 +1583,25 @@ float128 int64_to_float128( int64 a ) | largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int32 float32_to_int32( float32 a ) +int32_t float32_to_int32(float32 a, float_status *status) { - flag aSign; - int16 aExp, shiftCount; - bits32 aSig; - bits64 aSig64; + flag aSign; + int aExp; + int shiftCount; + uint32_t aSig; + uint64_t aSig64; - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( ( aExp == 0xFF ) && aSig ) aSign = 0; - if ( aExp ) aSig |= 0x00800000; - shiftCount = 0xAF - aExp; - aSig64 = aSig; - aSig64 <<= 32; - if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); - return roundAndPackInt32( aSign, aSig64 ); + a = float32_squash_input_denormal(a, status); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( ( aExp == 0xFF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= 0x00800000; + shiftCount = 0xAF - aExp; + aSig64 = aSig; + aSig64 <<= 32; + if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); + return roundAndPackInt32(aSign, aSig64, status); } @@ -1044,35 +1615,89 @@ int32 float32_to_int32( float32 a ) | returned. *----------------------------------------------------------------------------*/ -int32 float32_to_int32_round_to_zero( float32 a ) +int32_t float32_to_int32_round_to_zero(float32 a, float_status *status) { - flag aSign; - int16 aExp, shiftCount; - bits32 aSig; - int32 z; + flag aSign; + int aExp; + int shiftCount; + uint32_t aSig; + int32_t z; + a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = aExp - 0x9E; - if ( 0 <= shiftCount ) { - if ( a != 0xCF000000 ) { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; - } - return (sbits32) 0x80000000; - } - else if ( aExp <= 0x7E ) { - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig = ( aSig | 0x00800000 )<<8; - z = aSig>>( - shiftCount ); - if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( float32_val(a) != 0xCF000000 ) { + float_raise(float_flag_invalid, status); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + } + return (int32_t) 0x80000000; + } + else if ( aExp <= 0x7E ) { + if (aExp | aSig) { + status->float_exception_flags |= float_flag_inexact; + } + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) { + status->float_exception_flags |= float_flag_inexact; + } + if ( aSign ) z = - z; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 16-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN, the largest positive integer is returned. Otherwise, if +| the conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int16_t float32_to_int16_round_to_zero(float32 a, float_status *status) +{ + flag aSign; + int aExp; + int shiftCount; + uint32_t aSig; + int32_t z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x8E; + if ( 0 <= shiftCount ) { + if ( float32_val(a) != 0xC7000000 ) { + float_raise(float_flag_invalid, status); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return 0x7FFF; + } + } + return (int32_t) 0xffff8000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) { + status->float_exception_flags |= float_flag_inexact; + } + return 0; + } + shiftCount -= 0x10; + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) { + status->float_exception_flags |= float_flag_inexact; + } + if ( aSign ) { + z = - z; + } + return z; } @@ -1086,30 +1711,99 @@ int32 float32_to_int32_round_to_zero( float32 a ) | largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int64 float32_to_int64( float32 a ) +int64_t float32_to_int64(float32 a, float_status *status) { - flag aSign; - int16 aExp, shiftCount; - bits32 aSig; - bits64 aSig64, aSigExtra; + flag aSign; + int aExp; + int shiftCount; + uint32_t aSig; + uint64_t aSig64, aSigExtra; + a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = 0xBE - aExp; - if ( shiftCount < 0 ) { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - if ( aExp ) aSig |= 0x00800000; - aSig64 = aSig; - aSig64 <<= 40; - shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); - return roundAndPackInt64( aSign, aSig64, aSigExtra ); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = 0xBE - aExp; + if ( shiftCount < 0 ) { + float_raise(float_flag_invalid, status); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + if ( aExp ) aSig |= 0x00800000; + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); + return roundAndPackInt64(aSign, aSig64, aSigExtra, status); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| unsigned integer is returned. Otherwise, if the conversion overflows, the +| largest unsigned integer is returned. If the 'a' is negative, the result +| is rounded and zero is returned; values that do not round to zero will +| raise the inexact exception flag. +*----------------------------------------------------------------------------*/ + +uint64_t float32_to_uint64(float32 a, float_status *status) +{ + flag aSign; + int aExp; + int shiftCount; + uint32_t aSig; + uint64_t aSig64, aSigExtra; + a = float32_squash_input_denormal(a, status); + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + if ((aSign) && (aExp > 126)) { + float_raise(float_flag_invalid, status); + if (float32_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + shiftCount = 0xBE - aExp; + if (aExp) { + aSig |= 0x00800000; + } + if (shiftCount < 0) { + float_raise(float_flag_invalid, status); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra); + return roundAndPackUint64(aSign, aSig64, aSigExtra, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. If +| `a' is a NaN, the largest unsigned integer is returned. Otherwise, if the +| conversion overflows, the largest unsigned integer is returned. If the +| 'a' is negative, the result is rounded and zero is returned; values that do +| not round to zero will raise the inexact flag. +*----------------------------------------------------------------------------*/ +uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *status) +{ + signed char current_rounding_mode = status->float_rounding_mode; + set_float_rounding_mode(float_round_to_zero, status); + int64_t v = float32_to_uint64(a, status); + set_float_rounding_mode(current_rounding_mode, status); + return v; } /*---------------------------------------------------------------------------- @@ -1122,39 +1816,43 @@ int64 float32_to_int64( float32 a ) | returned. *----------------------------------------------------------------------------*/ -int64 float32_to_int64_round_to_zero( float32 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits32 aSig; - bits64 aSig64; - int64 z; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = aExp - 0xBE; - if ( 0 <= shiftCount ) { - if ( a != 0xDF000000 ) { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - else if ( aExp <= 0x7E ) { - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig64 = aSig | 0x00800000; - aSig64 <<= 40; - z = aSig64>>( - shiftCount ); - if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; +int64_t float32_to_int64_round_to_zero(float32 a, float_status *status) +{ + flag aSign; + int aExp; + int shiftCount; + uint32_t aSig; + uint64_t aSig64; + int64_t z; + a = float32_squash_input_denormal(a, status); + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0xBE; + if ( 0 <= shiftCount ) { + if ( float32_val(a) != 0xDF000000 ) { + float_raise(float_flag_invalid, status); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + else if ( aExp <= 0x7E ) { + if (aExp | aSig) { + status->float_exception_flags |= float_flag_inexact; + } + return 0; + } + aSig64 = aSig | 0x00800000; + aSig64 <<= 40; + z = aSig64>>( - shiftCount ); + if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) { + status->float_exception_flags |= float_flag_inexact; + } + if ( aSign ) z = - z; + return z; } @@ -1165,30 +1863,31 @@ int64 float32_to_int64_round_to_zero( float32 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -float64 float32_to_float64( float32 a ) +float64 float32_to_float64(float32 a, float_status *status) { - flag aSign; - int16 aExp; - bits32 aSig; + flag aSign; + int aExp; + uint32_t aSig; + a = float32_squash_input_denormal(a, status); - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); - return packFloat64( aSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if (aSig) { + return commonNaNToFloat64(float32ToCommonNaN(a, status), status); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 ); } -#ifdef FLOATX80 - /*---------------------------------------------------------------------------- | Returns the result of converting the single-precision floating-point value | `a' to the extended double-precision floating-point format. The conversion @@ -1196,55 +1895,54 @@ float64 float32_to_float64( float32 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 float32_to_floatx80( float32 a ) -{ - flag aSign; - int16 aExp; - bits32 aSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - aSig |= 0x00800000; - return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); - -} - -// 31-12-2016: Added for Previous -floatx80 float32_to_floatx80_allowunnormal( float32 a ) +floatx80 float32_to_floatx80(float32 a, float_status *status) { flag aSign; - int16 aExp; - bits32 aSig; - + int aExp; + uint32_t aSig; + + a = float32_squash_input_denormal(a, status); aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); + if (aSig) { + return commonNaNToFloatx80(float32ToCommonNaN(a, status), status); + } return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - return packFloatx80( aSign, 0x3F81, ( (bits64) aSig )<<40 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); } aSig |= 0x00800000; - return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); - -} -// end of addition for Previous + return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 ); -#endif +} -#ifdef FLOATX128 +#ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous +floatx80 float32_to_floatx80_allowunnormal(float32 a , float_status *status) +{ + flag aSign; + int16_t aExp; + uint32_t aSig; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + if (aExp == 0xFF) { + if (aSig) return commonNaNToFloatx80(float32ToCommonNaN(a, status), status); + return packFloatx80(aSign, 0x7FFF, LIT64(0x8000000000000000)); + } + if (aExp == 0) { + if (aSig == 0) return packFloatx80(aSign, 0, 0); + return packFloatx80(aSign, 0x3F81, ((uint64_t) aSig) << 40); + } + aSig |= 0x00800000; + return packFloatx80(aSign, aExp + 0x3F80, ((uint64_t)aSig) << 40); + +} +#endif // end of addition for Previous /*---------------------------------------------------------------------------- | Returns the result of converting the single-precision floating-point value @@ -1253,30 +1951,31 @@ floatx80 float32_to_floatx80_allowunnormal( float32 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -float128 float32_to_float128( float32 a ) +float128 float32_to_float128(float32 a, float_status *status) { - flag aSign; - int16 aExp; - bits32 aSig; + flag aSign; + int aExp; + uint32_t aSig; - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); + a = float32_squash_input_denormal(a, status); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if (aSig) { + return commonNaNToFloat128(float32ToCommonNaN(a, status), status); + } + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 ); } -#endif - /*---------------------------------------------------------------------------- | Rounds the single-precision floating-point value `a' to an integer, and | returns the result as a single-precision floating-point value. The @@ -1284,126 +1983,163 @@ float128 float32_to_float128( float32 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float32_round_to_int( float32 a ) +float32 float32_round_to_int(float32 a, float_status *status) { - flag aSign; - int16 aExp; - bits32 lastBitMask, roundBitsMask; - int8 roundingMode; - float32 z; - - aExp = extractFloat32Exp( a ); - if ( 0x96 <= aExp ) { - if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { - return propagateFloat32NaN( a, a ); - } - return a; - } - if ( aExp <= 0x7E ) { - if ( (bits32) ( a<<1 ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat32Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { - return packFloat32( aSign, 0x7F, 0 ); - } - break; - case float_round_down: - return aSign ? 0xBF800000 : 0; - case float_round_up: - return aSign ? 0x80000000 : 0x3F800000; - } - return packFloat32( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x96 - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { - z += roundBitsMask; - } - } - z &= ~ roundBitsMask; - if ( z != a ) float_exception_flags |= float_flag_inexact; - return z; - -} + flag aSign; + int aExp; + uint32_t lastBitMask, roundBitsMask; + uint32_t z; + a = float32_squash_input_denormal(a, status); -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the single-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN(a, a, status); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a; + status->float_exception_flags |= float_flag_inexact; + aSign = extractFloat32Sign( a ); + switch (status->float_rounding_mode) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_ties_away: + if (aExp == 0x7E) { + return packFloat32(aSign, 0x7F, 0); + } + break; + case float_round_down: + return make_float32(aSign ? 0xBF800000 : 0); + case float_round_up: + return make_float32(aSign ? 0x80000000 : 0x3F800000); + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = float32_val(a); + switch (status->float_rounding_mode) { + case float_round_nearest_even: + z += lastBitMask>>1; + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat32Sign(make_float32(z))) { + z += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat32Sign(make_float32(z))) { + z += roundBitsMask; + } + break; + default: + abort(); + } + z &= ~ roundBitsMask; + if (z != float32_val(a)) { + status->float_exception_flags |= float_flag_inexact; + } + return make_float32(z); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the single-precision +| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +| before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ -static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +static float32 addFloat32Sigs(float32 a, float32 b, flag zSign, + float_status *status) { - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - int16 expDiff; + int aExp, bExp, zExp; + uint32_t aSig, bSig, zSig; + int expDiff; - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 6; - bSig <<= 6; - if ( 0 < expDiff ) { - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x20000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x20000000; - } - shift32RightJamming( aSig, - expDiff, &aSig ); - zExp = bExp; - } - else { - if ( aExp == 0xFF ) { - if ( aSig | bSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); - zSig = 0x40000000 + aSig + bSig; - zExp = aExp; - goto roundAndPack; - } - aSig |= 0x20000000; - zSig = ( aSig + bSig )<<1; - --zExp; - if ( (sbits32) zSig < 0 ) { - zSig = aSig + bSig; - ++zExp; - } - roundAndPack: - return roundAndPackFloat32( zSign, zExp, zSig ); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if (aSig) { + return propagateFloat32NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if (bSig) { + return propagateFloat32NaN(a, b, status); + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if (aSig | bSig) { + return propagateFloat32NaN(a, b, status); + } + return a; + } + if ( aExp == 0 ) { + if (status->flush_to_zero) { + if (aSig | bSig) { + float_raise(float_flag_output_denormal, status); + } + return packFloat32(zSign, 0, 0); + } + return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + } + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (int32_t) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32(zSign, zExp, zSig, status); } @@ -1415,70 +2151,77 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 7; - bSig <<= 7; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0xFF ) { - if ( aSig | bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); - bExpBigger: - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign ^ 1, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x40000000; - } - shift32RightJamming( aSig, - expDiff, &aSig ); - bSig |= 0x40000000; - bBigger: - zSig = bSig - aSig; - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x40000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - aSig |= 0x40000000; - aBigger: - zSig = aSig - bSig; - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); +static float32 subFloat32Sigs(float32 a, float32 b, flag zSign, + float_status *status) +{ + int aExp, bExp, zExp; + uint32_t aSig, bSig, zSig; + int expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if (aSig | bSig) { + return propagateFloat32NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32(status->float_rounding_mode == float_round_down, 0, 0); + bExpBigger: + if ( bExp == 0xFF ) { + if (bSig) { + return propagateFloat32NaN(a, b, status); + } + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if (aSig) { + return propagateFloat32NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status); } @@ -1488,18 +2231,20 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float32_add( float32 a, float32 b ) +float32 float32_add(float32 a, float32 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return addFloat32Sigs( a, b, aSign ); - } - else { - return subFloat32Sigs( a, b, aSign ); - } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs(a, b, aSign, status); + } + else { + return subFloat32Sigs(a, b, aSign, status); + } } @@ -1509,18 +2254,20 @@ float32 float32_add( float32 a, float32 b ) | for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float32_sub( float32 a, float32 b ) +float32 float32_sub(float32 a, float32 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return subFloat32Sigs( a, b, aSign ); - } - else { - return addFloat32Sigs( a, b, aSign ); - } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs(a, b, aSign, status); + } + else { + return addFloat32Sigs(a, b, aSign, status); + } } @@ -1530,57 +2277,62 @@ float32 float32_sub( float32 a, float32 b ) | for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float32_mul( float32 a, float32 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig, bSig; - bits64 zSig64; - bits32 zSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN( a, b ); - } - if ( ( bExp | bSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x7F; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); - zSig = zSig64; - if ( 0 <= (sbits32) ( zSig<<1 ) ) { - zSig <<= 1; - --zExp; - } - return roundAndPackFloat32( zSign, zExp, zSig ); +float32 float32_mul(float32 a, float32 b, float_status *status) +{ + flag aSign, bSign, zSign; + int aExp, bExp, zExp; + uint32_t aSig, bSig; + uint64_t zSig64; + uint32_t zSig; + + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN(a, b, status); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if (bSig) { + return propagateFloat32NaN(a, b, status); + } + if ( ( aExp | aSig ) == 0 ) { + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 ); + zSig = zSig64; + if ( 0 <= (int32_t) ( zSig<<1 ) ) { + zSig <<= 1; + --zExp; + } + return roundAndPackFloat32(zSign, zExp, zSig, status); } @@ -1590,59 +2342,67 @@ float32 float32_mul( float32 a, float32 b ) | IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float32_div( float32 a, float32 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - float_raise( float_flag_divbyzero ); - return packFloat32( zSign, 0xFF, 0 ); - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x7D; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - if ( bSig <= ( aSig + aSig ) ) { - aSig >>= 1; - ++zExp; - } - zSig = ( ( (bits64) aSig )<<32 ) / bSig; - if ( ( zSig & 0x3F ) == 0 ) { - zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 ); - } - return roundAndPackFloat32( zSign, zExp, zSig ); +float32 float32_div(float32 a, float32 b, float_status *status) +{ + flag aSign, bSign, zSign; + int aExp, bExp, zExp; + uint32_t aSig, bSig, zSig; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if (aSig) { + return propagateFloat32NaN(a, b, status); + } + if ( bExp == 0xFF ) { + if (bSig) { + return propagateFloat32NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if (bSig) { + return propagateFloat32NaN(a, b, status); + } + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + float_raise(float_flag_divbyzero, status); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = ( ( (uint64_t) aSig )<<32 ) / bSig; + if ( ( zSig & 0x3F ) == 0 ) { + zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 ); + } + return roundAndPackFloat32(zSign, zExp, zSig, status); } @@ -1652,219 +2412,515 @@ float32 float32_div( float32 a, float32 b ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 float32_rem( float32 a, float32 b ) -{ - flag aSign, zSign; - int16 aExp, bExp, expDiff; - bits32 aSig, bSig; - bits32 q; - bits64 aSig64, bSig64, q64; - bits32 alternateASig; - sbits32 sigMean; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); -// bSign = extractFloat32Sign( b ); - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN( a, b ); - } - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig |= 0x00800000; - bSig |= 0x00800000; - if ( expDiff < 32 ) { - aSig <<= 8; - bSig <<= 8; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - if ( 0 < expDiff ) { - q = ( ( (bits64) aSig )<<32 ) / bSig; - q >>= 32 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - } - else { - if ( bSig <= aSig ) aSig -= bSig; - aSig64 = ( (bits64) aSig )<<40; - bSig64 = ( (bits64) bSig )<<40; - expDiff -= 64; - while ( 0 < expDiff ) { - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - aSig64 = - ( ( bSig * q64 )<<38 ); - expDiff -= 62; - } - expDiff += 64; - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - q = q64>>( 64 - expDiff ); - bSig <<= 6; - aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (sbits32) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (sbits32) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); +float32 float32_rem(float32 a, float32 b, float_status *status) +{ + flag aSign, zSign; + int aExp, bExp, expDiff; + uint32_t aSig, bSig; + uint32_t q; + uint64_t aSig64, bSig64, q64; + uint32_t alternateASig; + int32_t sigMean; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + if ( bExp == 0xFF ) { + if (bSig) { + return propagateFloat32NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig |= 0x00800000; + bSig |= 0x00800000; + if ( expDiff < 32 ) { + aSig <<= 8; + bSig <<= 8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + if ( 0 < expDiff ) { + q = ( ( (uint64_t) aSig )<<32 ) / bSig; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + } + else { + if ( bSig <= aSig ) aSig -= bSig; + aSig64 = ( (uint64_t) aSig )<<40; + bSig64 = ( (uint64_t) bSig )<<40; + expDiff -= 64; + while ( 0 < expDiff ) { + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + aSig64 = - ( ( bSig * q64 )<<38 ); + expDiff -= 62; + } + expDiff += 64; + q64 = estimateDiv128To64( aSig64, 0, bSig64 ); + q64 = ( 2 < q64 ) ? q64 - 2 : 0; + q = q64>>( 64 - expDiff ); + bSig <<= 6; + aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (int32_t) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (int32_t) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status); } /*---------------------------------------------------------------------------- -| Returns the square root of the single-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. +| Returns the result of multiplying the single-precision floating-point values +| `a' and `b' then adding 'c', with no intermediate rounding step after the +| multiplication. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic 754-2008. +| The flags argument allows the caller to select negation of the +| addend, the intermediate product, or the final result. (The difference +| between this and having the caller do a separate negation is that negating +| externally will flip the sign bit on NaNs.) *----------------------------------------------------------------------------*/ -float32 float32_sqrt( float32 a ) -{ - flag aSign; - int16 aExp, zExp; - bits32 aSig, zSig; - bits64 rem, term; +float32 float32_muladd(float32 a, float32 b, float32 c, int flags, + float_status *status) +{ + flag aSign, bSign, cSign, zSign; + int aExp, bExp, cExp, pExp, zExp, expDiff; + uint32_t aSig, bSig, cSig; + flag pInf, pZero, pSign; + uint64_t pSig64, cSig64, zSig64; + uint32_t pSig; + int shiftcount; + flag signflip, infzero; + + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + c = float32_squash_input_denormal(c, status); + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + bSig = extractFloat32Frac(b); + bExp = extractFloat32Exp(b); + bSign = extractFloat32Sign(b); + cSig = extractFloat32Frac(c); + cExp = extractFloat32Exp(c); + cSign = extractFloat32Sign(c); + + infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) || + (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0)); + + /* It is implementation-defined whether the cases of (0,inf,qnan) + * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN + * they return if they do), so we have to hand this information + * off to the target-specific pick-a-NaN routine. + */ + if (((aExp == 0xff) && aSig) || + ((bExp == 0xff) && bSig) || + ((cExp == 0xff) && cSig)) { + return propagateFloat32MulAddNaN(a, b, c, infzero, status); + } - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, 0 ); - if ( ! aSign ) return a; - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aSign ) { - if ( ( aExp | aSig ) == 0 ) return a; - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return 0; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; - aSig = ( aSig | 0x00800000 )<<8; - zSig = estimateSqrt32( aExp, aSig ) + 2; - if ( ( zSig & 0x7F ) <= 5 ) { - if ( zSig < 2 ) { - zSig = 0x7FFFFFFF; - goto roundAndPack; - } - aSig >>= aExp & 1; - term = ( (bits64) zSig ) * zSig; - rem = ( ( (bits64) aSig )<<32 ) - term; - while ( (sbits64) rem < 0 ) { - --zSig; - rem += ( ( (bits64) zSig )<<1 ) | 1; - } - zSig |= ( rem != 0 ); - } - shift32RightJamming( zSig, 1, &zSig ); - roundAndPack: - return roundAndPackFloat32( 0, zExp, zSig ); + if (infzero) { + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } -} + if (flags & float_muladd_negate_c) { + cSign ^= 1; + } -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ + signflip = (flags & float_muladd_negate_result) ? 1 : 0; -flag float32_eq( float32 a, float32 b ) -{ - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + /* Work out the sign and type of the product */ + pSign = aSign ^ bSign; + if (flags & float_muladd_negate_product) { + pSign ^= 1; + } + pInf = (aExp == 0xff) || (bExp == 0xff); + pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0); + + if (cExp == 0xff) { + if (pInf && (pSign ^ cSign)) { + /* addition of opposite-signed infinities => InvalidOperation */ + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + /* Otherwise generate an infinity of the same sign */ + return packFloat32(cSign ^ signflip, 0xff, 0); + } -} + if (pInf) { + return packFloat32(pSign ^ signflip, 0xff, 0); + } -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ + if (pZero) { + if (cExp == 0) { + if (cSig == 0) { + /* Adding two exact zeroes */ + if (pSign == cSign) { + zSign = pSign; + } else if (status->float_rounding_mode == float_round_down) { + zSign = 1; + } else { + zSign = 0; + } + return packFloat32(zSign ^ signflip, 0, 0); + } + /* Exact zero plus a denorm */ + if (status->flush_to_zero) { + float_raise(float_flag_output_denormal, status); + return packFloat32(cSign ^ signflip, 0, 0); + } + } + /* Zero plus something non-zero : just return the something */ + if (flags & float_muladd_halve_result) { + if (cExp == 0) { + normalizeFloat32Subnormal(cSig, &cExp, &cSig); + } + /* Subtract one to halve, and one again because roundAndPackFloat32 + * wants one less than the true exponent. + */ + cExp -= 2; + cSig = (cSig | 0x00800000) << 7; + return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status); + } + return packFloat32(cSign ^ signflip, cExp, cSig); + } -flag float32_le( float32 a, float32 b ) -{ - flag aSign, bSign; + if (aExp == 0) { + normalizeFloat32Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + normalizeFloat32Subnormal(bSig, &bExp, &bSig); + } - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); + /* Calculate the actual result a * b + c */ + + /* Multiply first; this is easy. */ + /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f + * because we want the true exponent, not the "one-less-than" + * flavour that roundAndPackFloat32() takes. + */ + pExp = aExp + bExp - 0x7e; + aSig = (aSig | 0x00800000) << 7; + bSig = (bSig | 0x00800000) << 8; + pSig64 = (uint64_t)aSig * bSig; + if ((int64_t)(pSig64 << 1) >= 0) { + pSig64 <<= 1; + pExp--; + } -} + zSign = pSign ^ signflip; + + /* Now pSig64 is the significand of the multiply, with the explicit bit in + * position 62. + */ + if (cExp == 0) { + if (!cSig) { + /* Throw out the special case of c being an exact zero now */ + shift64RightJamming(pSig64, 32, &pSig64); + pSig = pSig64; + if (flags & float_muladd_halve_result) { + pExp--; + } + return roundAndPackFloat32(zSign, pExp - 1, + pSig, status); + } + normalizeFloat32Subnormal(cSig, &cExp, &cSig); + } + + cSig64 = (uint64_t)cSig << (62 - 23); + cSig64 |= LIT64(0x4000000000000000); + expDiff = pExp - cExp; + + if (pSign == cSign) { + /* Addition */ + if (expDiff > 0) { + /* scale c to match p */ + shift64RightJamming(cSig64, expDiff, &cSig64); + zExp = pExp; + } else if (expDiff < 0) { + /* scale p to match c */ + shift64RightJamming(pSig64, -expDiff, &pSig64); + zExp = cExp; + } else { + /* no scaling needed */ + zExp = cExp; + } + /* Add significands and make sure explicit bit ends up in posn 62 */ + zSig64 = pSig64 + cSig64; + if ((int64_t)zSig64 < 0) { + shift64RightJamming(zSig64, 1, &zSig64); + } else { + zExp--; + } + } else { + /* Subtraction */ + if (expDiff > 0) { + shift64RightJamming(cSig64, expDiff, &cSig64); + zSig64 = pSig64 - cSig64; + zExp = pExp; + } else if (expDiff < 0) { + shift64RightJamming(pSig64, -expDiff, &pSig64); + zSig64 = cSig64 - pSig64; + zExp = cExp; + zSign ^= 1; + } else { + zExp = pExp; + if (cSig64 < pSig64) { + zSig64 = pSig64 - cSig64; + } else if (pSig64 < cSig64) { + zSig64 = cSig64 - pSig64; + zSign ^= 1; + } else { + /* Exact zero */ + zSign = signflip; + if (status->float_rounding_mode == float_round_down) { + zSign ^= 1; + } + return packFloat32(zSign, 0, 0); + } + } + --zExp; + /* Normalize to put the explicit bit back into bit 62. */ + shiftcount = countLeadingZeros64(zSig64) - 1; + zSig64 <<= shiftcount; + zExp -= shiftcount; + } + if (flags & float_muladd_halve_result) { + zExp--; + } + + shift64RightJamming(zSig64, 32, &zSig64); + return roundAndPackFloat32(zSign, zExp, zSig64, status); +} -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -flag float32_lt( float32 a, float32 b ) +/*---------------------------------------------------------------------------- +| Returns the square root of the single-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_sqrt(float32 a, float_status *status) { - flag aSign, bSign; + flag aSign; + int aExp, zExp; + uint32_t aSig, zSig; + uint64_t rem, term; + a = float32_squash_input_denormal(a, status); - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if (aSig) { + return propagateFloat32NaN(a, float32_zero, status); + } + if ( ! aSign ) return a; + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return float32_zero; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0x7FFFFFFF; + goto roundAndPack; + } + aSig >>= aExp & 1; + term = ( (uint64_t) zSig ) * zSig; + rem = ( ( (uint64_t) aSig )<<32 ) - term; + while ( (int64_t) rem < 0 ) { + --zSig; + rem += ( ( (uint64_t) zSig )<<1 ) | 1; + } + zSig |= ( rem != 0 ); + } + shift32RightJamming( zSig, 1, &zSig ); + roundAndPack: + return roundAndPackFloat32(0, zExp, zSig, status); + +} + +/*---------------------------------------------------------------------------- +| Returns the binary exponential of the single-precision floating-point value +| `a'. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +| +| Uses the following identities: +| +| 1. ------------------------------------------------------------------------- +| x x*ln(2) +| 2 = e +| +| 2. ------------------------------------------------------------------------- +| 2 3 4 5 n +| x x x x x x x +| e = 1 + --- + --- + --- + --- + --- + ... + --- + ... +| 1! 2! 3! 4! 5! n! +*----------------------------------------------------------------------------*/ + +static const float64 float32_exp2_coefficients[15] = +{ + const_float64( 0x3ff0000000000000ll ), /* 1 */ + const_float64( 0x3fe0000000000000ll ), /* 2 */ + const_float64( 0x3fc5555555555555ll ), /* 3 */ + const_float64( 0x3fa5555555555555ll ), /* 4 */ + const_float64( 0x3f81111111111111ll ), /* 5 */ + const_float64( 0x3f56c16c16c16c17ll ), /* 6 */ + const_float64( 0x3f2a01a01a01a01all ), /* 7 */ + const_float64( 0x3efa01a01a01a01all ), /* 8 */ + const_float64( 0x3ec71de3a556c734ll ), /* 9 */ + const_float64( 0x3e927e4fb7789f5cll ), /* 10 */ + const_float64( 0x3e5ae64567f544e4ll ), /* 11 */ + const_float64( 0x3e21eed8eff8d898ll ), /* 12 */ + const_float64( 0x3de6124613a86d09ll ), /* 13 */ + const_float64( 0x3da93974a8c07c9dll ), /* 14 */ + const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */ +}; + +float32 float32_exp2(float32 a, float_status *status) +{ + flag aSign; + int aExp; + uint32_t aSig; + float64 r, x, xn; + int i; + a = float32_squash_input_denormal(a, status); + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + + if ( aExp == 0xFF) { + if (aSig) { + return propagateFloat32NaN(a, float32_zero, status); + } + return (aSign) ? float32_zero : a; + } + if (aExp == 0) { + if (aSig == 0) return float32_one; + } + + float_raise(float_flag_inexact, status); + + /* ******************************* */ + /* using float64 for approximation */ + /* ******************************* */ + x = float32_to_float64(a, status); + x = float64_mul(x, float64_ln2, status); + + xn = x; + r = float64_one; + for (i = 0 ; i < 15 ; i++) { + float64 f; + + f = float64_mul(xn, float32_exp2_coefficients[i], status); + r = float64_add(r, f, status); + + xn = float64_mul(xn, x, status); + } + + return float64_to_float32(r, status); +} + +/*---------------------------------------------------------------------------- +| Returns the binary log of the single-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ +float32 float32_log2(float32 a, float_status *status) +{ + flag aSign, zSign; + int aExp; + uint32_t aSig, zSig, i; + + a = float32_squash_input_denormal(a, status); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( aSign ) { + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + if ( aExp == 0xFF ) { + if (aSig) { + return propagateFloat32NaN(a, float32_zero, status); + } + return a; + } + + aExp -= 0x7F; + aSig |= 0x00800000; + zSign = aExp < 0; + zSig = aExp << 23; + + for (i = 1 << 22; i > 0; i >>= 1) { + aSig = ( (uint64_t)aSig * aSig ) >> 23; + if ( aSig & 0x01000000 ) { + aSig >>= 1; + zSig |= i; + } + } + + if ( zSign ) + zSig = -zSig; + return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status); } /*---------------------------------------------------------------------------- @@ -1874,16 +2930,125 @@ flag float32_lt( float32 a, float32 b ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float32_eq_signaling( float32 a, float32 b ) +int float32_eq(float32 a, float32 b, float_status *status) { - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + uint32_t av, bv; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + av = float32_val(a); + bv = float32_val(b); + return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is less than +| or equal to the corresponding value `b', and 0 otherwise. The invalid +| exception is raised if either operand is a NaN. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_le(float32 a, float32 b, float_status *status) +{ + flag aSign, bSign; + uint32_t av, bv; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is less than +| the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. The comparison is performed according +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_lt(float32 a, float32 b, float_status *status) +{ + flag aSign, bSign; + uint32_t av, bv; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point values `a' and `b' cannot +| be compared, and 0 otherwise. The invalid exception is raised if either +| operand is a NaN. The comparison is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_unordered(float32 a, float32 b, float_status *status) +{ + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 1; + } + return 0; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +| exception. The comparison is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ +int float32_eq_quiet(float32 a, float32 b, float_status *status) +{ + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if (float32_is_signaling_nan(a, status) + || float32_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + return ( float32_val(a) == float32_val(b) ) || + ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 ); } /*---------------------------------------------------------------------------- @@ -1893,23 +3058,28 @@ flag float32_eq_signaling( float32 a, float32 b ) | IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float32_le_quiet( float32 a, float32 b ) +int float32_le_quiet(float32 a, float32 b, float_status *status) { - flag aSign, bSign; -// int16 aExp, bExp; + flag aSign, bSign; + uint32_t av, bv; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if (float32_is_signaling_nan(a, status) + || float32_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); } @@ -1920,23 +3090,53 @@ flag float32_le_quiet( float32 a, float32 b ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float32_lt_quiet( float32 a, float32 b ) +int float32_lt_quiet(float32 a, float32 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + uint32_t av, bv; + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if (float32_is_signaling_nan(a, status) + || float32_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + av = float32_val(a); + bv = float32_val(b); + if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point values `a' and `b' cannot +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The +| comparison is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float32_unordered_quiet(float32 a, float32 b, float_status *status) +{ + a = float32_squash_input_denormal(a, status); + b = float32_squash_input_denormal(b, status); + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if (float32_is_signaling_nan(a, status) + || float32_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 1; + } + return 0; } /*---------------------------------------------------------------------------- @@ -1949,20 +3149,22 @@ flag float32_lt_quiet( float32 a, float32 b ) | largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int32 float64_to_int32( float64 a ) +int32_t float64_to_int32(float64 a, float_status *status) { - flag aSign; - int16 aExp, shiftCount; - bits64 aSig; + flag aSign; + int aExp; + int shiftCount; + uint64_t aSig; + a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; - if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x42C - aExp; - if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32( aSign, aSig ); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x42C - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32(aSign, aSig, status); } @@ -1976,41 +3178,96 @@ int32 float64_to_int32( float64 a ) | returned. *----------------------------------------------------------------------------*/ -int32 float64_to_int32_round_to_zero( float64 a ) +int32_t float64_to_int32_round_to_zero(float64 a, float_status *status) { - flag aSign; - int16 aExp, shiftCount; - bits64 aSig, savedASig; - int32 z; + flag aSign; + int aExp; + int shiftCount; + uint64_t aSig, savedASig; + int32_t z; + a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( 0x41E < aExp ) { - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FF ) { - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x433 - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - z = (sbits32) z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x433 - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 16-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN, the largest positive integer is returned. Otherwise, if +| the conversion overflows, the largest integer with the same sign as `a' is +| returned. +*----------------------------------------------------------------------------*/ + +int16_t float64_to_int16_round_to_zero(float64 a, float_status *status) +{ + flag aSign; + int aExp; + int shiftCount; + uint64_t aSig, savedASig; + int32_t z; + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( 0x40E < aExp ) { + if ( ( aExp == 0x7FF ) && aSig ) { + aSign = 0; + } + goto invalid; + } + else if ( aExp < 0x3FF ) { + if ( aExp || aSig ) { + status->float_exception_flags |= float_flag_inexact; + } + return 0; + } + aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x433 - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) { + z = - z; + } + if ( ( (int16_t)z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0xffff8000 : 0x7FFF; + } + if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return z; } /*---------------------------------------------------------------------------- @@ -2023,35 +3280,37 @@ int32 float64_to_int32_round_to_zero( float64 a ) | largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int64 float64_to_int64( float64 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits64 aSig, aSigExtra; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x433 - aExp; - if ( shiftCount <= 0 ) { - if ( 0x43E < aExp ) { - float_raise( float_flag_invalid ); - if ( ! aSign - || ( ( aExp == 0x7FF ) - && ( aSig != LIT64( 0x0010000000000000 ) ) ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - aSigExtra = 0; - aSig <<= - shiftCount; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64( aSign, aSig, aSigExtra ); +int64_t float64_to_int64(float64 a, float_status *status) +{ + flag aSign; + int aExp; + int shiftCount; + uint64_t aSig, aSigExtra; + a = float64_squash_input_denormal(a, status); + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = 0x433 - aExp; + if ( shiftCount <= 0 ) { + if ( 0x43E < aExp ) { + float_raise(float_flag_invalid, status); + if ( ! aSign + || ( ( aExp == 0x7FF ) + && ( aSig != LIT64( 0x0010000000000000 ) ) ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + aSigExtra = 0; + aSig <<= - shiftCount; + } + else { + shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); + } + return roundAndPackInt64(aSign, aSig, aSigExtra, status); } @@ -2065,45 +3324,49 @@ int64 float64_to_int64( float64 a ) | returned. *----------------------------------------------------------------------------*/ -int64 float64_to_int64_round_to_zero( float64 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits64 aSig; - int64 z; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); - shiftCount = aExp - 0x433; - if ( 0 <= shiftCount ) { - if ( 0x43E <= aExp ) { - if ( a != LIT64( 0xC3E0000000000000 ) ) { - float_raise( float_flag_invalid ); - if ( ! aSign - || ( ( aExp == 0x7FF ) - && ( aSig != LIT64( 0x0010000000000000 ) ) ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - z = aSig<>( - shiftCount ); - if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - } - if ( aSign ) z = - z; - return z; +int64_t float64_to_int64_round_to_zero(float64 a, float_status *status) +{ + flag aSign; + int aExp; + int shiftCount; + uint64_t aSig; + int64_t z; + a = float64_squash_input_denormal(a, status); + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); + shiftCount = aExp - 0x433; + if ( 0 <= shiftCount ) { + if ( 0x43E <= aExp ) { + if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) { + float_raise(float_flag_invalid, status); + if ( ! aSign + || ( ( aExp == 0x7FF ) + && ( aSig != LIT64( 0x0010000000000000 ) ) ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + z = aSig<float_exception_flags |= float_flag_inexact; + } + return 0; + } + z = aSig>>( - shiftCount ); + if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) { + status->float_exception_flags |= float_flag_inexact; + } + } + if ( aSign ) z = - z; + return z; } @@ -2114,75 +3377,368 @@ int64 float64_to_int64_round_to_zero( float64 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -float32 float64_to_float32( float64 a ) +float32 float64_to_float32(float64 a, float_status *status) { - flag aSign; - int16 aExp; - bits64 aSig; - bits32 zSig; + flag aSign; + int aExp; + uint64_t aSig; + uint32_t zSig; + a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 22, &aSig ); - zSig = aSig; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x381; - } - return roundAndPackFloat32( aSign, aExp, zSig ); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if (aSig) { + return commonNaNToFloat32(float64ToCommonNaN(a, status), status); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 22, &aSig ); + zSig = aSig; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x381; + } + return roundAndPackFloat32(aSign, aExp, zSig, status); } -#ifdef FLOATX80 /*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| half-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. *----------------------------------------------------------------------------*/ - -floatx80 float64_to_floatx80( float64 a ) +static float16 packFloat16(flag zSign, int zExp, uint16_t zSig) { - flag aSign; - int16 aExp; - bits64 aSig; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - return - packFloatx80( - aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); - + return make_float16( + (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig); } -// 31-12-2016: Added for Previous -floatx80 float64_to_floatx80_allowunnormal( float64 a ) -{ - flag aSign; - int16 aExp; - bits64 aSig; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper half-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the half-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal half- +| precision floating-point number. +| The `ieee' flag indicates whether to use IEEE standard half precision, or +| ARM-style "alternative representation", which omits the NaN and Inf +| encodings in order to raise the maximum representable exponent by one. +| The input significand `zSig' has its binary point between bits 22 +| and 23, which is 13 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| Note the slightly odd position of the binary point in zSig compared with the +| other roundAndPackFloat functions. This should probably be fixed if we +| need to implement more float16 routines than just conversion. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float16 roundAndPackFloat16(flag zSign, int zExp, + uint32_t zSig, flag ieee, + float_status *status) +{ + int maxexp = ieee ? 29 : 30; + uint32_t mask; + uint32_t increment; + bool rounding_bumps_exp; + bool is_tiny = false; + + /* Calculate the mask of bits of the mantissa which are not + * representable in half-precision and will be lost. + */ + if (zExp < 1) { + /* Will be denormal in halfprec */ + mask = 0x00ffffff; + if (zExp >= -11) { + mask >>= 11 + zExp; + } + } else { + /* Normal number in halfprec */ + mask = 0x00001fff; + } + + switch (status->float_rounding_mode) { + case float_round_nearest_even: + increment = (mask + 1) >> 1; + if ((zSig & mask) == increment) { + increment = zSig & (increment << 1); + } + break; + case float_round_ties_away: + increment = (mask + 1) >> 1; + break; + case float_round_up: + increment = zSign ? 0 : mask; + break; + case float_round_down: + increment = zSign ? mask : 0; + break; + default: /* round_to_zero */ + increment = 0; + break; + } + + rounding_bumps_exp = (zSig + increment >= 0x01000000); + + if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) { + if (ieee) { + float_raise(float_flag_overflow | float_flag_inexact, status); + return packFloat16(zSign, 0x1f, 0); + } else { + float_raise(float_flag_invalid, status); + return packFloat16(zSign, 0x1f, 0x3ff); + } + } + + if (zExp < 0) { + /* Note that flush-to-zero does not affect half-precision results */ + is_tiny = + (status->float_detect_tininess == float_tininess_before_rounding) + || (zExp < -1) + || (!rounding_bumps_exp); + } + if (zSig & mask) { + float_raise(float_flag_inexact, status); + if (is_tiny) { + float_raise(float_flag_underflow, status); + } + } + + zSig += increment; + if (rounding_bumps_exp) { + zSig >>= 1; + zExp++; + } + + if (zExp < -10) { + return packFloat16(zSign, 0, 0); + } + if (zExp < 0) { + zSig >>= -zExp; + zExp = 0; + } + return packFloat16(zSign, zExp, zSig >> 13); +} + +static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr, + uint32_t *zSigPtr) +{ + int8_t shiftCount = countLeadingZeros32(aSig) - 21; + *zSigPtr = aSig << shiftCount; + *zExpPtr = 1 - shiftCount; +} + +/* Half precision floats come in two formats: standard IEEE and "ARM" format. + The latter gains extra exponent range by omitting the NaN/Inf encodings. */ + +float32 float16_to_float32(float16 a, flag ieee, float_status *status) +{ + flag aSign; + int aExp; + uint32_t aSig; + + aSign = extractFloat16Sign(a); + aExp = extractFloat16Exp(a); + aSig = extractFloat16Frac(a); + + if (aExp == 0x1f && ieee) { + if (aSig) { + return commonNaNToFloat32(float16ToCommonNaN(a, status), status); + } + return packFloat32(aSign, 0xff, 0); + } + if (aExp == 0) { + if (aSig == 0) { + return packFloat32(aSign, 0, 0); + } + + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; + } + return packFloat32( aSign, aExp + 0x70, aSig << 13); +} + +float16 float32_to_float16(float32 a, flag ieee, float_status *status) +{ + flag aSign; + int aExp; + uint32_t aSig; + + a = float32_squash_input_denormal(a, status); + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if (aSig) { + /* Input is a NaN */ + if (!ieee) { + float_raise(float_flag_invalid, status); + return packFloat16(aSign, 0, 0); + } + return commonNaNToFloat16( + float32ToCommonNaN(a, status), status); + } + /* Infinity */ + if (!ieee) { + float_raise(float_flag_invalid, status); + return packFloat16(aSign, 0x1f, 0x3ff); + } + return packFloat16(aSign, 0x1f, 0); + } + if (aExp == 0 && aSig == 0) { + return packFloat16(aSign, 0, 0); + } + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ + aSig |= 0x00800000; + aExp -= 0x71; + + return roundAndPackFloat16(aSign, aExp, aSig, ieee, status); +} + +float64 float16_to_float64(float16 a, flag ieee, float_status *status) +{ + flag aSign; + int aExp; + uint32_t aSig; + + aSign = extractFloat16Sign(a); + aExp = extractFloat16Exp(a); + aSig = extractFloat16Frac(a); + + if (aExp == 0x1f && ieee) { + if (aSig) { + return commonNaNToFloat64( + float16ToCommonNaN(a, status), status); + } + return packFloat64(aSign, 0x7ff, 0); + } + if (aExp == 0) { + if (aSig == 0) { + return packFloat64(aSign, 0, 0); + } + + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; + } + return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42); +} + +float16 float64_to_float16(float64 a, flag ieee, float_status *status) +{ + flag aSign; + int aExp; + uint64_t aSig; + uint32_t zSig; + + a = float64_squash_input_denormal(a, status); + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aExp == 0x7FF) { + if (aSig) { + /* Input is a NaN */ + if (!ieee) { + float_raise(float_flag_invalid, status); + return packFloat16(aSign, 0, 0); + } + return commonNaNToFloat16( + float64ToCommonNaN(a, status), status); + } + /* Infinity */ + if (!ieee) { + float_raise(float_flag_invalid, status); + return packFloat16(aSign, 0x1f, 0x3ff); + } + return packFloat16(aSign, 0x1f, 0); + } + shift64RightJamming(aSig, 29, &aSig); + zSig = aSig; + if (aExp == 0 && zSig == 0) { + return packFloat16(aSign, 0, 0); + } + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ + zSig |= 0x00800000; + aExp -= 0x3F1; + + return roundAndPackFloat16(aSign, aExp, zSig, ieee, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 float64_to_floatx80(float64 a, float_status *status) +{ + flag aSign; + int aExp; + uint64_t aSig; + + a = float64_squash_input_denormal(a, status); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if (aSig) { + return commonNaNToFloatx80(float64ToCommonNaN(a, status), status); + } + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} + +#ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous +floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ) +{ + flag aSign; + int16_t aExp; + uint64_t aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a, status ), status ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); @@ -2193,11 +3749,7 @@ floatx80 float64_to_floatx80_allowunnormal( float64 a ) aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); } -// end of addition for Previous - -#endif - -#ifdef FLOATX128 +#endif // end of addition for Previous /*---------------------------------------------------------------------------- | Returns the result of converting the double-precision floating-point value @@ -2206,31 +3758,32 @@ floatx80 float64_to_floatx80_allowunnormal( float64 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -float128 float64_to_float128( float64 a ) +float128 float64_to_float128(float64 a, float_status *status) { - flag aSign; - int16 aExp; - bits64 aSig, zSig0, zSig1; + flag aSign; + int aExp; + uint64_t aSig, zSig0, zSig1; - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); + a = float64_squash_input_denormal(a, status); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if (aSig) { + return commonNaNToFloat128(float64ToCommonNaN(a, status), status); + } + return packFloat128( aSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); + return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); } -#endif - /*---------------------------------------------------------------------------- | Rounds the double-precision floating-point value `a' to an integer, and | returns the result as a double-precision floating-point value. The @@ -2238,57 +3791,90 @@ float128 float64_to_float128( float64 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_round_to_int( float64 a ) +float64 float64_round_to_int(float64 a, float_status *status) { - flag aSign; - int16 aExp; - bits64 lastBitMask, roundBitsMask; - int8 roundingMode; - float64 z; + flag aSign; + int aExp; + uint64_t lastBitMask, roundBitsMask; + uint64_t z; + a = float64_squash_input_denormal(a, status); - aExp = extractFloat64Exp( a ); - if ( 0x433 <= aExp ) { - if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { - return propagateFloat64NaN( a, a ); - } - return a; - } - if ( aExp < 0x3FF ) { - if ( (bits64) ( a<<1 ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat64Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { - return packFloat64( aSign, 0x3FF, 0 ); - } - break; - case float_round_down: - return aSign ? LIT64( 0xBFF0000000000000 ) : 0; - case float_round_up: - return - aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); - } - return packFloat64( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x433 - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { - z += roundBitsMask; - } - } - z &= ~ roundBitsMask; - if ( z != a ) float_exception_flags |= float_flag_inexact; - return z; + aExp = extractFloat64Exp( a ); + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { + return propagateFloat64NaN(a, a, status); + } + return a; + } + if ( aExp < 0x3FF ) { + if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a; + status->float_exception_flags |= float_flag_inexact; + aSign = extractFloat64Sign( a ); + switch (status->float_rounding_mode) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { + return packFloat64( aSign, 0x3FF, 0 ); + } + break; + case float_round_ties_away: + if (aExp == 0x3FE) { + return packFloat64(aSign, 0x3ff, 0); + } + break; + case float_round_down: + return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0); + case float_round_up: + return make_float64( + aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 )); + } + return packFloat64( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x433 - aExp; + roundBitsMask = lastBitMask - 1; + z = float64_val(a); + switch (status->float_rounding_mode) { + case float_round_nearest_even: + z += lastBitMask >> 1; + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat64Sign(make_float64(z))) { + z += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat64Sign(make_float64(z))) { + z += roundBitsMask; + } + break; + default: + abort(); + } + z &= ~ roundBitsMask; + if (z != float64_val(a)) { + status->float_exception_flags |= float_flag_inexact; + } + return make_float64(z); + +} +float64 float64_trunc_to_int(float64 a, float_status *status) +{ + int oldmode; + float64 res; + oldmode = status->float_rounding_mode; + status->float_rounding_mode = float_round_to_zero; + res = float64_round_to_int(a, status); + status->float_rounding_mode = oldmode; + return res; } /*---------------------------------------------------------------------------- @@ -2299,66 +3885,81 @@ float64 float64_round_to_int( float64 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +static float64 addFloat64Sigs(float64 a, float64 b, flag zSign, + float_status *status) { - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig; - int16 expDiff; + int aExp, bExp, zExp; + uint64_t aSig, bSig, zSig; + int expDiff; - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - aSig <<= 9; - bSig <<= 9; - if ( 0 < expDiff ) { - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= LIT64( 0x2000000000000000 ); - } - shift64RightJamming( bSig, expDiff, &bSig ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= LIT64( 0x2000000000000000 ); - } - shift64RightJamming( aSig, - expDiff, &aSig ); - zExp = bExp; - } - else { - if ( aExp == 0x7FF ) { - if ( aSig | bSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); - zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; - zExp = aExp; - goto roundAndPack; - } - aSig |= LIT64( 0x2000000000000000 ); - zSig = ( aSig + bSig )<<1; - --zExp; - if ( (sbits64) zSig < 0 ) { - zSig = aSig + bSig; - ++zExp; - } - roundAndPack: - return roundAndPackFloat64( zSign, zExp, zSig ); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 9; + bSig <<= 9; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if (aSig) { + return propagateFloat64NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if (bSig) { + return propagateFloat64NaN(a, b, status); + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x2000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if (aSig | bSig) { + return propagateFloat64NaN(a, b, status); + } + return a; + } + if ( aExp == 0 ) { + if (status->flush_to_zero) { + if (aSig | bSig) { + float_raise(float_flag_output_denormal, status); + } + return packFloat64(zSign, 0, 0); + } + return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); + } + zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= LIT64( 0x2000000000000000 ); + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (int64_t) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat64(zSign, zExp, zSig, status); } @@ -2370,70 +3971,77 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - aSig <<= 10; - bSig <<= 10; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FF ) { - if ( aSig | bSig ) return propagateFloat64NaN( a, b ); - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); - bExpBigger: - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign ^ 1, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= LIT64( 0x4000000000000000 ); - } - shift64RightJamming( aSig, - expDiff, &aSig ); - bSig |= LIT64( 0x4000000000000000 ); - bBigger: - zSig = bSig - aSig; - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= LIT64( 0x4000000000000000 ); - } - shift64RightJamming( bSig, expDiff, &bSig ); - aSig |= LIT64( 0x4000000000000000 ); - aBigger: - zSig = aSig - bSig; - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); +static float64 subFloat64Sigs(float64 a, float64 b, flag zSign, + float_status *status) +{ + int aExp, bExp, zExp; + uint64_t aSig, bSig, zSig; + int expDiff; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + aSig <<= 10; + bSig <<= 10; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if (aSig | bSig) { + return propagateFloat64NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat64(status->float_rounding_mode == float_round_down, 0, 0); + bExpBigger: + if ( bExp == 0x7FF ) { + if (bSig) { + return propagateFloat64NaN(a, b, status); + } + return packFloat64( zSign ^ 1, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( aSig, - expDiff, &aSig ); + bSig |= LIT64( 0x4000000000000000 ); + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if (aSig) { + return propagateFloat64NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= LIT64( 0x4000000000000000 ); + } + shift64RightJamming( bSig, expDiff, &bSig ); + aSig |= LIT64( 0x4000000000000000 ); + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status); } @@ -2443,18 +4051,20 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_add( float64 a, float64 b ) +float64 float64_add(float64 a, float64 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return addFloat64Sigs( a, b, aSign ); - } - else { - return subFloat64Sigs( a, b, aSign ); - } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs(a, b, aSign, status); + } + else { + return subFloat64Sigs(a, b, aSign, status); + } } @@ -2464,18 +4074,20 @@ float64 float64_add( float64 a, float64 b ) | for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_sub( float64 a, float64 b ) +float64 float64_sub(float64 a, float64 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return subFloat64Sigs( a, b, aSign ); - } - else { - return addFloat64Sigs( a, b, aSign ); - } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs(a, b, aSign, status); + } + else { + return addFloat64Sigs(a, b, aSign, status); + } } @@ -2485,55 +4097,60 @@ float64 float64_sub( float64 a, float64 b ) | for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_mul( float64 a, float64 b ) +float64 float64_mul(float64 a, float64 b, float_status *status) { - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; + flag aSign, bSign, zSign; + int aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { - return propagateFloat64NaN( a, b ); - } - if ( ( bExp | bSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FF; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - zSig0 |= ( zSig1 != 0 ); - if ( 0 <= (sbits64) ( zSig0<<1 ) ) { - zSig0 <<= 1; - --zExp; - } - return roundAndPackFloat64( zSign, zExp, zSig0 ); + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN(a, b, status); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if (bSig) { + return propagateFloat64NaN(a, b, status); + } + if ( ( aExp | aSig ) == 0 ) { + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FF; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (int64_t) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat64(zSign, zExp, zSig0, status); } @@ -2543,67 +4160,75 @@ float64 float64_mul( float64 a, float64 b ) | the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_div( float64 a, float64 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig; - bits64 rem0, rem1; - bits64 term0, term1; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, b ); - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - float_raise( float_flag_invalid ); - return float64_default_nan; - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - float_raise( float_flag_divbyzero ); - return packFloat64( zSign, 0x7FF, 0 ); - } - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FD; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - if ( bSig <= ( aSig + aSig ) ) { - aSig >>= 1; - ++zExp; - } - zSig = estimateDiv128To64( aSig, 0, bSig ); - if ( ( zSig & 0x1FF ) <= 2 ) { - mul64To128( bSig, zSig, &term0, &term1 ); - sub128( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig |= ( rem1 != 0 ); - } - return roundAndPackFloat64( zSign, zExp, zSig ); +float64 float64_div(float64 a, float64 b, float_status *status) +{ + flag aSign, bSign, zSign; + int aExp, bExp, zExp; + uint64_t aSig, bSig, zSig; + uint64_t rem0, rem1; + uint64_t term0, term1; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if (aSig) { + return propagateFloat64NaN(a, b, status); + } + if ( bExp == 0x7FF ) { + if (bSig) { + return propagateFloat64NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + return packFloat64( zSign, 0x7FF, 0 ); + } + if ( bExp == 0x7FF ) { + if (bSig) { + return propagateFloat64NaN(a, b, status); + } + return packFloat64( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + float_raise(float_flag_divbyzero, status); + return packFloat64( zSign, 0x7FF, 0 ); + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FD; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv128To64( aSig, 0, bSig ); + if ( ( zSig & 0x1FF ) <= 2 ) { + mul64To128( bSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat64(zSign, zExp, zSig, status); } @@ -2613,83 +4238,333 @@ float64 float64_div( float64 a, float64 b ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_rem( float64 a, float64 b ) +float64 float64_rem(float64 a, float64 b, float_status *status) { - flag aSign, zSign; - int16 aExp, bExp, expDiff; - bits64 aSig, bSig; - bits64 q, alternateASig; - sbits64 sigMean; + flag aSign, zSign; + int aExp, bExp, expDiff; + uint64_t aSig, bSig; + uint64_t q, alternateASig; + int64_t sigMean; - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); -// bSign = extractFloat64Sign( b ); - if ( aExp == 0x7FF ) { - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { - return propagateFloat64NaN( a, b ); - } - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - aSig = - ( ( bSig>>2 ) * q ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (sbits64) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (sbits64) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig = extractFloat64Frac( b ); + bExp = extractFloat64Exp( b ); + if ( aExp == 0x7FF ) { + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { + return propagateFloat64NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + if ( bExp == 0x7FF ) { + if (bSig) { + return propagateFloat64NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + normalizeFloat64Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (int64_t) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (int64_t) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the double-precision floating-point values +| `a' and `b' then adding 'c', with no intermediate rounding step after the +| multiplication. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic 754-2008. +| The flags argument allows the caller to select negation of the +| addend, the intermediate product, or the final result. (The difference +| between this and having the caller do a separate negation is that negating +| externally will flip the sign bit on NaNs.) +*----------------------------------------------------------------------------*/ + +float64 float64_muladd(float64 a, float64 b, float64 c, int flags, + float_status *status) +{ + flag aSign, bSign, cSign, zSign; + int aExp, bExp, cExp, pExp, zExp, expDiff; + uint64_t aSig, bSig, cSig; + flag pInf, pZero, pSign; + uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1; + int shiftcount; + flag signflip, infzero; + + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + c = float64_squash_input_denormal(c, status); + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + bSig = extractFloat64Frac(b); + bExp = extractFloat64Exp(b); + bSign = extractFloat64Sign(b); + cSig = extractFloat64Frac(c); + cExp = extractFloat64Exp(c); + cSign = extractFloat64Sign(c); + + infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) || + (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0)); + + /* It is implementation-defined whether the cases of (0,inf,qnan) + * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN + * they return if they do), so we have to hand this information + * off to the target-specific pick-a-NaN routine. + */ + if (((aExp == 0x7ff) && aSig) || + ((bExp == 0x7ff) && bSig) || + ((cExp == 0x7ff) && cSig)) { + return propagateFloat64MulAddNaN(a, b, c, infzero, status); + } + + if (infzero) { + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + + if (flags & float_muladd_negate_c) { + cSign ^= 1; + } + + signflip = (flags & float_muladd_negate_result) ? 1 : 0; + + /* Work out the sign and type of the product */ + pSign = aSign ^ bSign; + if (flags & float_muladd_negate_product) { + pSign ^= 1; + } + pInf = (aExp == 0x7ff) || (bExp == 0x7ff); + pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0); + + if (cExp == 0x7ff) { + if (pInf && (pSign ^ cSign)) { + /* addition of opposite-signed infinities => InvalidOperation */ + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + /* Otherwise generate an infinity of the same sign */ + return packFloat64(cSign ^ signflip, 0x7ff, 0); + } + + if (pInf) { + return packFloat64(pSign ^ signflip, 0x7ff, 0); + } + if (pZero) { + if (cExp == 0) { + if (cSig == 0) { + /* Adding two exact zeroes */ + if (pSign == cSign) { + zSign = pSign; + } else if (status->float_rounding_mode == float_round_down) { + zSign = 1; + } else { + zSign = 0; + } + return packFloat64(zSign ^ signflip, 0, 0); + } + /* Exact zero plus a denorm */ + if (status->flush_to_zero) { + float_raise(float_flag_output_denormal, status); + return packFloat64(cSign ^ signflip, 0, 0); + } + } + /* Zero plus something non-zero : just return the something */ + if (flags & float_muladd_halve_result) { + if (cExp == 0) { + normalizeFloat64Subnormal(cSig, &cExp, &cSig); + } + /* Subtract one to halve, and one again because roundAndPackFloat64 + * wants one less than the true exponent. + */ + cExp -= 2; + cSig = (cSig | 0x0010000000000000ULL) << 10; + return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status); + } + return packFloat64(cSign ^ signflip, cExp, cSig); + } + + if (aExp == 0) { + normalizeFloat64Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + normalizeFloat64Subnormal(bSig, &bExp, &bSig); + } + + /* Calculate the actual result a * b + c */ + + /* Multiply first; this is easy. */ + /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff + * because we want the true exponent, not the "one-less-than" + * flavour that roundAndPackFloat64() takes. + */ + pExp = aExp + bExp - 0x3fe; + aSig = (aSig | LIT64(0x0010000000000000))<<10; + bSig = (bSig | LIT64(0x0010000000000000))<<11; + mul64To128(aSig, bSig, &pSig0, &pSig1); + if ((int64_t)(pSig0 << 1) >= 0) { + shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1); + pExp--; + } + + zSign = pSign ^ signflip; + + /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit + * bit in position 126. + */ + if (cExp == 0) { + if (!cSig) { + /* Throw out the special case of c being an exact zero now */ + shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1); + if (flags & float_muladd_halve_result) { + pExp--; + } + return roundAndPackFloat64(zSign, pExp - 1, + pSig1, status); + } + normalizeFloat64Subnormal(cSig, &cExp, &cSig); + } + + /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the + * significand of the addend, with the explicit bit in position 126. + */ + cSig0 = cSig << (126 - 64 - 52); + cSig1 = 0; + cSig0 |= LIT64(0x4000000000000000); + expDiff = pExp - cExp; + + if (pSign == cSign) { + /* Addition */ + if (expDiff > 0) { + /* scale c to match p */ + shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1); + zExp = pExp; + } else if (expDiff < 0) { + /* scale p to match c */ + shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1); + zExp = cExp; + } else { + /* no scaling needed */ + zExp = cExp; + } + /* Add significands and make sure explicit bit ends up in posn 126 */ + add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); + if ((int64_t)zSig0 < 0) { + shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1); + } else { + zExp--; + } + shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1); + if (flags & float_muladd_halve_result) { + zExp--; + } + return roundAndPackFloat64(zSign, zExp, zSig1, status); + } else { + /* Subtraction */ + if (expDiff > 0) { + shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1); + sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); + zExp = pExp; + } else if (expDiff < 0) { + shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1); + sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1); + zExp = cExp; + zSign ^= 1; + } else { + zExp = pExp; + if (lt128(cSig0, cSig1, pSig0, pSig1)) { + sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); + } else if (lt128(pSig0, pSig1, cSig0, cSig1)) { + sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1); + zSign ^= 1; + } else { + /* Exact zero */ + zSign = signflip; + if (status->float_rounding_mode == float_round_down) { + zSign ^= 1; + } + return packFloat64(zSign, 0, 0); + } + } + --zExp; + /* Do the equivalent of normalizeRoundAndPackFloat64() but + * starting with the significand in a pair of uint64_t. + */ + if (zSig0) { + shiftcount = countLeadingZeros64(zSig0) - 1; + shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1); + if (zSig1) { + zSig0 |= 1; + } + zExp -= shiftcount; + } else { + shiftcount = countLeadingZeros64(zSig1); + if (shiftcount == 0) { + zSig0 = (zSig1 >> 1) | (zSig1 & 1); + zExp -= 63; + } else { + shiftcount--; + zSig0 = zSig1 << shiftcount; + zExp -= (shiftcount + 64); + } + } + if (flags & float_muladd_halve_result) { + zExp--; + } + return roundAndPackFloat64(zSign, zExp, zSig0, status); + } } /*---------------------------------------------------------------------------- @@ -2698,135 +4573,232 @@ float64 float64_rem( float64 a, float64 b ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 float64_sqrt( float64 a ) +float64 float64_sqrt(float64 a, float_status *status) { - flag aSign; - int16 aExp, zExp; - bits64 aSig, zSig, doubleZSig; - bits64 rem0, rem1, term0, term1; -// float64 z; + flag aSign; + int aExp, zExp; + uint64_t aSig, zSig, doubleZSig; + uint64_t rem0, rem1, term0, term1; + a = float64_squash_input_denormal(a, status); - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, a ); - if ( ! aSign ) return a; - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( aSign ) { - if ( ( aExp | aSig ) == 0 ) return a; - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return 0; - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; - aSig |= LIT64( 0x0010000000000000 ); - zSig = estimateSqrt32( aExp, aSig>>21 ); - aSig <<= 9 - ( aExp & 1 ); - zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); - if ( ( zSig & 0x1FF ) <= 5 ) { - doubleZSig = zSig<<1; - mul64To128( zSig, zSig, &term0, &term1 ); - sub128( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig; - doubleZSig -= 2; - add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); - } - zSig |= ( ( rem0 | rem1 ) != 0 ); - } - return roundAndPackFloat64( 0, zExp, zSig ); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if (aSig) { + return propagateFloat64NaN(a, a, status); + } + if ( ! aSign ) return a; + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return float64_zero; + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig |= LIT64( 0x0010000000000000 ); + zSig = estimateSqrt32( aExp, aSig>>21 ); + aSig <<= 9 - ( aExp & 1 ); + zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); + if ( ( zSig & 0x1FF ) <= 5 ) { + doubleZSig = zSig<<1; + mul64To128( zSig, zSig, &term0, &term1 ); + sub128( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig; + doubleZSig -= 2; + add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + return roundAndPackFloat64(0, zExp, zSig, status); + +} + +/*---------------------------------------------------------------------------- +| Returns the binary log of the double-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ +float64 float64_log2(float64 a, float_status *status) +{ + flag aSign, zSign; + int aExp; + uint64_t aSig, aSig0, aSig1, zSig, i; + a = float64_squash_input_denormal(a, status); + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + if ( aSign ) { + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + if ( aExp == 0x7FF ) { + if (aSig) { + return propagateFloat64NaN(a, float64_zero, status); + } + return a; + } + + aExp -= 0x3FF; + aSig |= LIT64( 0x0010000000000000 ); + zSign = aExp < 0; + zSig = (uint64_t)aExp << 52; + for (i = 1LL << 51; i > 0; i >>= 1) { + mul64To128( aSig, aSig, &aSig0, &aSig1 ); + aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 ); + if ( aSig & LIT64( 0x0020000000000000 ) ) { + aSig >>= 1; + zSig |= i; + } + } + if ( zSign ) + zSig = -zSig; + return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status); } /*---------------------------------------------------------------------------- | Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. The comparison is performed +| corresponding value `b', and 0 otherwise. The invalid exception is raised +| if either operand is a NaN. Otherwise, the comparison is performed | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_eq( float64 a, float64 b ) +int float64_eq(float64 a, float64 b, float_status *status) { - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + uint64_t av, bv; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + av = float64_val(a); + bv = float64_val(b); + return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); } /*---------------------------------------------------------------------------- | Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| equal to the corresponding value `b', and 0 otherwise. The invalid +| exception is raised if either operand is a NaN. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_le( float64 a, float64 b ) +int float64_le(float64 a, float64 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + uint64_t av, bv; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); } /*---------------------------------------------------------------------------- | Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. The comparison is performed according +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_lt( float64 a, float64 b ) +int float64_lt(float64 a, float64 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + uint64_t av, bv; - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); + +} +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point values `a' and `b' cannot +| be compared, and 0 otherwise. The invalid exception is raised if either +| operand is a NaN. The comparison is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_unordered(float64 a, float64 b, float_status *status) +{ + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + float_raise(float_flag_invalid, status); + return 1; + } + return 0; } /*---------------------------------------------------------------------------- | Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. The invalid exception is raised -| if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +| exception.The comparison is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_eq_signaling( float64 a, float64 b ) +int float64_eq_quiet(float64 a, float64 b, float_status *status) { - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + uint64_t av, bv; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if (float64_is_signaling_nan(a, status) + || float64_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + av = float64_val(a); + bv = float64_val(b); + return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); } @@ -2837,23 +4809,28 @@ flag float64_eq_signaling( float64 a, float64 b ) | IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_le_quiet( float64 a, float64 b ) +int float64_le_quiet(float64 a, float64 b, float_status *status) { - flag aSign, bSign; -// int16 aExp, bExp; + flag aSign, bSign; + uint64_t av, bv; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if (float64_is_signaling_nan(a, status) + || float64_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 ); + return ( av == bv ) || ( aSign ^ ( av < bv ) ); } @@ -2864,26 +4841,54 @@ flag float64_le_quiet( float64 a, float64 b ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float64_lt_quiet( float64 a, float64 b ) +int float64_lt_quiet(float64 a, float64 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; + uint64_t av, bv; + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if (float64_is_signaling_nan(a, status) + || float64_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + av = float64_val(a); + bv = float64_val(b); + if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 ); + return ( av != bv ) && ( aSign ^ ( av < bv ) ); } -#ifdef FLOATX80 +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point values `a' and `b' cannot +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The +| comparison is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float64_unordered_quiet(float64 a, float64 b, float_status *status) +{ + a = float64_squash_input_denormal(a, status); + b = float64_squash_input_denormal(b, status); + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) + ) { + if (float64_is_signaling_nan(a, status) + || float64_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 1; + } + return 0; +} /*---------------------------------------------------------------------------- | Returns the result of converting the extended double-precision floating- @@ -2895,20 +4900,24 @@ flag float64_lt_quiet( float64 a, float64 b ) | overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int32 floatx80_to_int32( floatx80 a ) +int32_t floatx80_to_int32(floatx80 a, float_status *status) { - flag aSign; - int32 aExp, shiftCount; - bits64 aSig; + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig; - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32( aSign, aSig ); + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return 1 << 31; + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0; + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32(aSign, aSig, status); } @@ -2922,39 +4931,44 @@ int32 floatx80_to_int32( floatx80 a ) | sign as `a' is returned. *----------------------------------------------------------------------------*/ -int32 floatx80_to_int32_round_to_zero( floatx80 a ) +int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) { - flag aSign; - int32 aExp, shiftCount; - bits64 aSig, savedASig; - int32 z; + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig, savedASig; + int32_t z; - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - shiftCount = 0x403E - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - z = (sbits32) z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return 0; + } + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return z; } @@ -2968,37 +4982,41 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a ) | overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int64 floatx80_to_int64( floatx80 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig, aSigExtra; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = 0x403E - aExp; - if ( shiftCount <= 0 ) { - if ( shiftCount ) { - float_raise( float_flag_invalid ); - if ( ! aSign - || ( ( aExp == 0x7FFF ) - && ( aSig != LIT64( 0x8000000000000000 ) ) ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - aSigExtra = 0; - } - else { - shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); - } - return roundAndPackInt64( aSign, aSig, aSigExtra ); - -} +int64_t floatx80_to_int64(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig, aSigExtra; -/*---------------------------------------------------------------------------- + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return 1ULL << 63; + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount <= 0 ) { + if ( shiftCount ) { + float_raise(float_flag_invalid, status); + if ( ! aSign + || ( ( aExp == 0x7FFF ) + && ( aSig != LIT64( 0x8000000000000000 ) ) ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + aSigExtra = 0; + } + else { + shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); + } + return roundAndPackInt64(aSign, aSig, aSigExtra, status); + +} + +/*---------------------------------------------------------------------------- | Returns the result of converting the extended double-precision floating- | point value `a' to the 64-bit two's complement integer format. The | conversion is performed according to the IEC/IEEE Standard for Binary @@ -3008,37 +5026,43 @@ int64 floatx80_to_int64( floatx80 a ) | sign as `a' is returned. *----------------------------------------------------------------------------*/ -int64 floatx80_to_int64_round_to_zero( floatx80 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig; - int64 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - shiftCount = aExp - 0x403E; - if ( 0 <= shiftCount ) { - aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); - if ( ( a.high != 0xC03E ) || aSig ) { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - else if ( aExp < 0x3FFF ) { - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - z = aSig>>( - shiftCount ); - if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; +int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig; + int64_t z; + + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return 1ULL << 63; + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = aExp - 0x403E; + if ( 0 <= shiftCount ) { + aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); + if ( ( a.high != 0xC03E ) || aSig ) { + float_raise(float_flag_invalid, status); + if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + else if ( aExp < 0x3FFF ) { + if (aExp | aSig) { + status->float_exception_flags |= float_flag_inexact; + } + return 0; + } + z = aSig>>( - shiftCount ); + if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) { + status->float_exception_flags |= float_flag_inexact; + } + if ( aSign ) z = - z; + return z; } @@ -3049,24 +5073,28 @@ int64 floatx80_to_int64_round_to_zero( floatx80 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 floatx80_to_float32( floatx80 a ) +float32 floatx80_to_float32(floatx80 a, float_status *status) { - flag aSign; - int32 aExp; - bits64 aSig; + flag aSign; + int32_t aExp; + uint64_t aSig; - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) { - return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); - } - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 33, &aSig ); - if ( aExp || aSig ) aExp -= 0x3F81; - return roundAndPackFloat32( aSign, aExp, aSig ); + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return float32_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) { + return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig, 33, &aSig ); + if ( aExp || aSig ) aExp -= 0x3F81; + return roundAndPackFloat32(aSign, aExp, aSig, status); } @@ -3077,29 +5105,31 @@ float32 floatx80_to_float32( floatx80 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 floatx80_to_float64( floatx80 a ) +float64 floatx80_to_float64(floatx80 a, float_status *status) { - flag aSign; - int32 aExp; - bits64 aSig, zSig; + flag aSign; + int32_t aExp; + uint64_t aSig, zSig; - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) { - return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - shift64RightJamming( aSig, 1, &zSig ); - if ( aExp || aSig ) aExp -= 0x3C01; - return roundAndPackFloat64( aSign, aExp, zSig ); + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return float64_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) { + return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shift64RightJamming( aSig, 1, &zSig ); + if ( aExp || aSig ) aExp -= 0x3C01; + return roundAndPackFloat64(aSign, aExp, zSig, status); } -#ifdef FLOATX128 - /*---------------------------------------------------------------------------- | Returns the result of converting the extended double-precision floating- | point value `a' to the quadruple-precision floating-point format. The @@ -3107,67 +5137,62 @@ float64 floatx80_to_float64( floatx80 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 floatx80_to_float128( floatx80 a ) +float128 floatx80_to_float128(floatx80 a, float_status *status) { - flag aSign; - int16 aExp; - bits64 aSig, zSig0, zSig1; + flag aSign; + int aExp; + uint64_t aSig, zSig0, zSig1; - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { - return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); - } + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return float128_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) { + return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status); + } +#ifdef SOFTFLOAT_68K + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } +#endif shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp, zSig0, zSig1 ); + return packFloat128( aSign, aExp, zSig0, zSig1 ); } -#endif - -// 30-01-2016: Added for Previous - -floatx80 floatx80_normalize( floatx80 a ) +#ifdef SOFTFLOAT_68K // 30-01-2016: Added for Previous +floatx80 floatx80_round32( floatx80 a, float_status *status ) { flag aSign; - int16 aExp; - bits64 aSig; + int16_t aExp; + uint64_t aSig; aSig = extractFloatx80Frac( a ); aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); - if (aSig == 0) { - aExp = 0; - return packFloatx80( aSign, aExp, aSig ); - } - while ( (aSig & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ) { - if ( aExp == 0 ) { - float_raise( float_flag_denormal ); - break; - } - aSig = aSig << 1; - aExp--; - } - return packFloatx80( aSign, aExp, aSig ); - + return roundAndPackFloatx80(32, aSign, aExp, aSig, 0, status); + } -floatx80 floatx80_round32( floatx80 a ) +floatx80 floatx80_round64( floatx80 a, float_status *status ) { flag aSign; - int16 aExp; - bits64 aSig; + int16_t aExp; + uint64_t aSig; aSig = extractFloatx80Frac( a ); aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); - return roundAndPackFloatx80(32, aSign, aExp, aSig, 0); - + return roundAndPackFloatx80(64, aSign, aExp, aSig, 0, status); + } -// end of addition for Previous +#endif // end of addition for Previous /*---------------------------------------------------------------------------- | Rounds the extended double-precision floating-point value `a' to an integer, @@ -3176,93 +5201,126 @@ floatx80 floatx80_round32( floatx80 a ) | Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 floatx80_round_to_int( floatx80 a ) +floatx80 floatx80_round_to_int(floatx80 a, float_status *status) { - flag aSign; - int32 aExp; - bits64 lastBitMask, roundBitsMask; - int8 roundingMode; - floatx80 z; + flag aSign; + int32_t aExp; + uint64_t lastBitMask, roundBitsMask; + floatx80 z; - aExp = extractFloatx80Exp( a ); - if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaN( a, a ); - } - return a; - } - if ( aExp < 0x3FFF ) { - if ( ( aExp == 0 ) - && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { - return a; - } - float_exception_flags |= float_flag_inexact; - aSign = extractFloatx80Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) - ) { - return - packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - break; - case float_round_down: - return - aSign ? - packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) - : packFloatx80( 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloatx80( 1, 0, 0 ) - : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - return packFloatx80( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x403E - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z.low += lastBitMask>>1; - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { - z.low += roundBitsMask; - } - } - z.low &= ~ roundBitsMask; - if ( z.low == 0 ) { - ++z.high; - z.low = LIT64( 0x8000000000000000 ); - } - if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; - return z; + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaN(a, a, status); + } + return a; + } + if ( aExp < 0x3FFF ) { + if ( ( aExp == 0 ) + #ifdef SOFTFLOAT_68K + && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { +#else + && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { +#endif + return a; + } + status->float_exception_flags |= float_flag_inexact; + aSign = extractFloatx80Sign( a ); + switch (status->float_rounding_mode) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); + } + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + switch (status->float_rounding_mode) { + case float_round_nearest_even: + z.low += lastBitMask>>1; + if ((z.low & roundBitsMask) == 0) { + z.low &= ~lastBitMask; + } + break; + case float_round_ties_away: + z.low += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + case float_round_down: + if (extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + default: + abort(); + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if (z.low != a.low) { + status->float_exception_flags |= float_flag_inexact; + } + return z; } -// 09-01-2017: Added for Previous -floatx80 floatx80_round_to_int_toward_zero( floatx80 a ) +#ifdef SOFTFLOAT_68K // 09-01-2017: Added for Previous +floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status) { flag aSign; - int32 aExp; - bits64 lastBitMask, roundBitsMask; + int32_t aExp; + uint64_t lastBitMask, roundBitsMask; floatx80 z; aExp = extractFloatx80Exp( a ); if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaN( a, a ); + if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) { + return propagateFloatx80NaN( a, a, status ); } return a; } if ( aExp < 0x3FFF ) { if ( ( aExp == 0 ) - && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { +#ifdef SOFTFLOAT_68K + && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { +#else + && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { +#endif return a; } - float_exception_flags |= float_flag_inexact; + status->float_exception_flags |= float_flag_inexact; aSign = extractFloatx80Sign( a ); return packFloatx80( aSign, 0, 0 ); } @@ -3275,11 +5333,11 @@ floatx80 floatx80_round_to_int_toward_zero( floatx80 a ) ++z.high; z.low = LIT64( 0x8000000000000000 ); } - if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; + if ( z.low != a.low ) status->float_exception_flags |= float_flag_inexact; return z; } -// End of addition for Previous +#endif // End of addition for Previous /*---------------------------------------------------------------------------- | Returns the result of adding the absolute values of the extended double- @@ -3289,62 +5347,81 @@ floatx80 floatx80_round_to_int_toward_zero( floatx80 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, + float_status *status) { - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - int32 expDiff; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + int32_t expDiff; - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return a; - } - if ( bExp == 0 ) --expDiff; - shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - zExp = aExp; + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); +#ifdef SOFTFLOAT_68K + if ( aExp == 0 ) { + if ( aSig == 0 ) return b; + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) ++expDiff; - shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - zExp = bExp; + if ( bExp == 0 ) { + if ( bSig == 0 ) return a; + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } - else { - if ( aExp == 0x7FFF ) { - if ( (bits64) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN( a, b ); - } - return a; - } - zSig1 = 0; - zSig0 = aSig + bSig; +#endif + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return a; + } +#ifndef SOFTFLOAT_68K + if ( bExp == 0 ) --expDiff; +#endif + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } +#ifndef SOFTFLOAT_68K + if ( aExp == 0 ) ++expDiff; +#endif + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN(a, b, status); + } + return a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + #ifndef SOFTFLOAT_68K if ( aExp == 0 ) { normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); goto roundAndPack; } - zExp = aExp; - goto shiftRight1; - } - zSig0 = aSig + bSig; - if ( (sbits64) zSig0 < 0 ) goto roundAndPack; - shiftRight1: - shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= LIT64( 0x8000000000000000 ); - ++zExp; - roundAndPack: - return - roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); - +#endif + zExp = aExp; + goto shiftRight1; + } + zSig0 = aSig + bSig; + if ( (int64_t) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } /*---------------------------------------------------------------------------- @@ -3355,64 +5432,70 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) +static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, + float_status *status) { - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - int32 expDiff; - floatx80 z; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + int32_t expDiff; - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( (bits64) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN( a, b ); - } - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + #ifndef SOFTFLOAT_68K if ( aExp == 0 ) { aExp = 1; bExp = 1; } - zSig1 = 0; - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); - bExpBigger: - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } +#endif + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } +#ifndef SOFTFLOAT_68K if ( aExp == 0 ) ++expDiff; - shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - bBigger: - sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return a; - } +#endif + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return a; + } +#ifndef SOFTFLOAT_68K if ( bExp == 0 ) --expDiff; - shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - aBigger: - sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - return - normalizeRoundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); - +#endif + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } /*---------------------------------------------------------------------------- @@ -3421,18 +5504,22 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 floatx80_add( floatx80 a, floatx80 b ) +floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return addFloatx80Sigs( a, b, aSign ); - } - else { - return subFloatx80Sigs( a, b, aSign ); - } + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs(a, b, aSign, status); + } + else { + return subFloatx80Sigs(a, b, aSign, status); + } } @@ -3442,18 +5529,22 @@ floatx80 floatx80_add( floatx80 a, floatx80 b ) | IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 floatx80_sub( floatx80 a, floatx80 b ) +floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return subFloatx80Sigs( a, b, aSign ); - } - else { - return addFloatx80Sigs( a, b, aSign ); - } + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs(a, b, aSign, status); + } + else { + return addFloatx80Sigs(a, b, aSign, status); + } } @@ -3463,57 +5554,58 @@ floatx80 floatx80_sub( floatx80 a, floatx80 b ) | IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 floatx80_mul( floatx80 a, floatx80 b ) -{ - flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b ); - } - if ( ( bExp | bSig ) == 0 ) goto invalid; - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FFE; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - if ( 0 < (sbits64) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - return - roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); +floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN(a, b, status); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (int64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } /*---------------------------------------------------------------------------- @@ -3522,184 +5614,191 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 floatx80_div( floatx80 a, floatx80 b ) -{ - flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - bits64 rem0, rem1, rem2, term0, term1, term2; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - goto invalid; - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return packFloatx80( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - float_raise( float_flag_divbyzero ); - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FFE; - rem1 = 0; - if ( bSig <= aSig ) { - shift128Right( aSig, 0, 1, &aSig, &rem1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig, rem1, bSig ); - mul64To128( bSig, zSig0, &term0, &term1 ); - sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig0; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, bSig ); - if ( (bits64) ( zSig1<<1 ) <= 8 ) { - mul64To128( bSig, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - while ( (sbits64) rem1 < 0 ) { - --zSig1; - add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); - } - zSig1 |= ( ( rem1 | rem2 ) != 0 ); - } - return - roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); - -} +floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + uint64_t rem0, rem1, rem2, term0, term1, term2; -/*---------------------------------------------------------------------------- -| Returns the remainder of the extended double-precision floating-point value -| `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ -#if 0 -floatx80 floatx80_rem( floatx80 a, floatx80 b ) -{ - flag aSign, zSign; - int32 aExp, bExp, expDiff; - bits64 aSig0, aSig1, bSig; - bits64 q, term0, term1, alternateASig0, alternateASig1; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); -// bSign = extractFloatx80Sign( b ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( (bits64) ( aSig0<<1 ) == 0 ) return a; - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - aSig1 = 0; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); - expDiff = 0; - } - q = ( bSig <= aSig0 ); - if ( q ) aSig0 -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - mul64To128( bSig, q, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++q; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - } - else { - term1 = 0; - term0 = bSig; - } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( q & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1 ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + goto invalid; + } + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + float_raise(float_flag_divbyzero, status); + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +#ifndef SOFTFLOAT_68K +floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, zSign; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t q, term0, term1, alternateASig0, alternateASig1; + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN(a, b, status); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1, status); } -#endif -// 09-01-2017: Modified version for Previous -floatx80 floatx80_rem( floatx80 a, floatx80 b, bits64 *q, flag *s ) +#else // 09-01-2017: Modified version for Previous +floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) { flag aSign, bSign, zSign; - int32 aExp, bExp, expDiff; - bits64 aSig0, aSig1, bSig; - bits64 qTemp, term0, term1, alternateASig0, alternateASig1; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t qTemp, term0, term1, alternateASig0, alternateASig1; floatx80 z; aSig0 = extractFloatx80Frac( a ); @@ -3711,20 +5810,20 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b, bits64 *q, flag *s ) *q = 0; *s = 0; if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b ); + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); } goto invalid; } if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); return a; } if ( bExp == 0 ) { if ( bSig == 0 ) { invalid: - float_raise( float_flag_invalid ); + float_raise( float_flag_invalid, status ); z.low = floatx80_default_nan_low; z.high = floatx80_default_nan_high; return z; @@ -3732,7 +5831,11 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b, bits64 *q, flag *s ) normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( (bits64) ( aSig0<<1 ) == 0 ) return a; +#ifdef SOFTFLOAT_68K + if ( aSig0 == 0 ) return a; +#else + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; +#endif normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } bSig |= LIT64( 0x8000000000000000 ); @@ -3789,23 +5892,24 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b, bits64 *q, flag *s ) } return normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1 ); + 80, zSign, bExp + expDiff, aSig0, aSig1, status ); } -// End of modification +#endif // End of modification + -// 08-01-2017: Added for Previous +#ifdef SOFTFLOAT_68K // 08-01-2017: Added for Previous /*---------------------------------------------------------------------------- | Returns the modulo remainder of the extended double-precision floating-point | value `a' with respect to the corresponding value `b'. *----------------------------------------------------------------------------*/ -floatx80 floatx80_mod( floatx80 a, floatx80 b, bits64 *q, flag *s ) +floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) { flag aSign, bSign, zSign; - int32 aExp, bExp, expDiff; - bits64 aSig0, aSig1, bSig; - bits64 qTemp, term0, term1; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t qTemp, term0, term1; floatx80 z; aSig0 = extractFloatx80Frac( a ); @@ -3817,20 +5921,20 @@ floatx80 floatx80_mod( floatx80 a, floatx80 b, bits64 *q, flag *s ) *q = 0; *s = 0; if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b ); + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); } goto invalid; } if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); return a; } if ( bExp == 0 ) { if ( bSig == 0 ) { invalid: - float_raise( float_flag_invalid ); + float_raise( float_flag_invalid, status ); z.low = floatx80_default_nan_low; z.high = floatx80_default_nan_high; return z; @@ -3838,7 +5942,11 @@ floatx80 floatx80_mod( floatx80 a, floatx80 b, bits64 *q, flag *s ) normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( (bits64) ( aSig0<<1 ) == 0 ) return a; +#ifdef SOFTFLOAT_68K + if ( aSig0 == 0 ) return a; +#else + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; +#endif normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); } bSig |= LIT64( 0x8000000000000000 ); @@ -3877,10 +5985,11 @@ floatx80 floatx80_mod( floatx80 a, floatx80 b, bits64 *q, flag *s ) } return normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1 ); + 80, zSign, bExp + expDiff, aSig0, aSig1, status ); } -// end of addition for Previous +#endif // end of addition for Previous + /*---------------------------------------------------------------------------- | Returns the square root of the extended double-precision floating-point @@ -3888,89 +5997,91 @@ floatx80 floatx80_mod( floatx80 a, floatx80 b, bits64 *q, flag *s ) | for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 floatx80_sqrt( floatx80 a ) +floatx80 floatx80_sqrt(floatx80 a, float_status *status) { - flag aSign; - int32 aExp, zExp; - bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 ) == 0 ) return a; - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - if ( aExp == 0 ) { - if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; - zSig0 = estimateSqrt32( aExp, aSig0>>32 ); - shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits64) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= doubleZSig0; - return - roundAndPackFloatx80( - floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); + flag aSign; + int32_t aExp, zExp; + uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; + uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig0 << 1)) { + return propagateFloatx80NaN(a, a, status); + } + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 ) == 0 ) return a; + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + if ( aExp == 0 ) { + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); + doubleZSig0 = zSig0<<1; + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( doubleZSig0, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + shortShift128Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= doubleZSig0; + return roundAndPackFloatx80(status->floatx80_rounding_precision, + 0, zExp, zSig0, zSig1, status); } -// 07-01-2017: Added for Previous + +#ifdef SOFTFLOAT_68K // 07-01-2017: Added for Previous /*---------------------------------------------------------------------------- | Returns the mantissa of the extended double-precision floating-point | value `a'. *----------------------------------------------------------------------------*/ -floatx80 floatx80_getman( floatx80 a ) +floatx80 floatx80_getman( floatx80 a, float_status *status) { flag aSign; - int32 aExp; - bits64 aSig; + int32_t aExp; + uint64_t aSig; aSig = extractFloatx80Frac( a ); aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a ); - float_raise( float_flag_invalid ); + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a, status ); + float_raise( float_flag_invalid, status ); a.low = floatx80_default_nan_low; a.high = floatx80_default_nan_high; return a; @@ -3981,7 +6092,7 @@ floatx80 floatx80_getman( floatx80 a ) normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - return roundAndPackFloatx80(floatx80_rounding_precision, aSign, 0x3FFF, aSig, 0); + return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x3FFF, aSig, 0, status); } /*---------------------------------------------------------------------------- @@ -3989,148 +6100,240 @@ floatx80 floatx80_getman( floatx80 a ) | value `a' as an extended double-precision value. *----------------------------------------------------------------------------*/ -floatx80 floatx80_getexp( floatx80 a ) +floatx80 floatx80_getexp( floatx80 a, float_status *status) { flag aSign; - int32 aExp; - bits64 aSig; + int32_t aExp; + uint64_t aSig; aSig = extractFloatx80Frac( a ); aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a ); - float_raise( float_flag_invalid ); + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a, status ); + float_raise( float_flag_invalid, status ); a.low = floatx80_default_nan_low; a.high = floatx80_default_nan_high; return a; } - if (aExp == 0 && aSig == 0) return packFloatx80(aSign, 0, 0); + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } - return int32_to_floatx80(aExp - 0x3FFF); + return int32_to_floatx80(aExp - 0x3FFF, status); } -// End of addition for Previous /*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| equal to the corresponding value `b', and 0 otherwise. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. + | Scales extended double-precision floating-point value in operand `a' by + | value `b'. The function truncates the value in the second operand 'b' to + | an integral value and adds that value to the exponent of the operand 'a'. + | The operation performed according to the IEC/IEEE Standard for Binary + | Floating-Point Arithmetic. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, bSign; + int32_t aExp, bExp, shiftCount; + uint64_t aSig, bSig; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + bSign = extractFloatx80Sign(b); + + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) || + ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); + } + float_raise( float_flag_invalid, status ); + a.low = floatx80_default_nan_low; + a.high = floatx80_default_nan_high; + return a; + } + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0); + if ( bExp < 0x3FFF ) return a; + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + if ( bExp < 0x3FFF ) return a; + + if ( 0x400E < bExp ) { + aExp = bSign ? -0x4000 : 0x7FFF; + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + } + + shiftCount = 0x403E - bExp; + bSig >>= shiftCount; + aExp = bSign ? ( aExp - bSig ) : ( aExp + bSig ); + + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status); + +} +#endif // End of addition for Previous + + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is equal +| to the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. Otherwise, the comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag floatx80_eq( floatx80 a, floatx80 b ) +int floatx80_eq(floatx80 a, floatx80 b, float_status *status) { - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) + || (extractFloatx80Exp(a) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(a) << 1)) + || (extractFloatx80Exp(b) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(b) << 1)) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); } /*---------------------------------------------------------------------------- | Returns 1 if the extended double-precision floating-point value `a' is | less than or equal to the corresponding value `b', and 0 otherwise. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. +| invalid exception is raised if either operand is a NaN. The comparison is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. *----------------------------------------------------------------------------*/ -flag floatx80_le( floatx80 a, floatx80 b ) +int floatx80_le(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) + || (extractFloatx80Exp(a) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(a) << 1)) + || (extractFloatx80Exp(b) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(b) << 1)) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); } /*---------------------------------------------------------------------------- | Returns 1 if the extended double-precision floating-point value `a' is -| less than the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| less than the corresponding value `b', and 0 otherwise. The invalid +| exception is raised if either operand is a NaN. The comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag floatx80_lt( floatx80 a, floatx80 b ) +int floatx80_lt(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) + || (extractFloatx80Exp(a) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(a) << 1)) + || (extractFloatx80Exp(b) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(b) << 1)) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); } /*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is equal -| to the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns 1 if the extended double-precision floating-point values `a' and `b' +| cannot be compared, and 0 otherwise. The invalid exception is raised if +| either operand is a NaN. The comparison is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ +int floatx80_unordered(floatx80 a, floatx80 b, float_status *status) +{ + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b) + || (extractFloatx80Exp(a) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(a) << 1)) + || (extractFloatx80Exp(b) == 0x7FFF + && (uint64_t) (extractFloatx80Frac(b) << 1)) + ) { + float_raise(float_flag_invalid, status); + return 1; + } + return 0; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is +| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +| cause an exception. The comparison is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag floatx80_eq_signaling( floatx80 a, floatx80 b ) +int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status) { - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return 0; + } + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if (floatx80_is_signaling_nan(a, status) + || floatx80_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); } @@ -4141,32 +6344,36 @@ flag floatx80_eq_signaling( floatx80 a, floatx80 b ) | to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag floatx80_le_quiet( floatx80 a, floatx80 b ) +int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return 0; + } + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if (floatx80_is_signaling_nan(a, status) + || floatx80_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); } @@ -4177,36 +6384,64 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b ) | IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag floatx80_lt_quiet( floatx80 a, floatx80 b ) +int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return 0; + } + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if (floatx80_is_signaling_nan(a, status) + || floatx80_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); } -#endif +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point values `a' and `b' +| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception. +| The comparison is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ +int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status) +{ + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return 1; + } + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) + ) { + if (floatx80_is_signaling_nan(a, status) + || floatx80_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 1; + } + return 0; +} #ifdef FLOATX128 @@ -4220,22 +6455,22 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b ) | largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int32 float128_to_int32( float128 a ) +int32_t float128_to_int32(float128 a, float_status *status) { - flag aSign; - int32 aExp, shiftCount; - bits64 aSig0, aSig1; + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig0, aSig1; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; - if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); - aSig0 |= ( aSig1 != 0 ); - shiftCount = 0x4028 - aExp; - if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); - return roundAndPackInt32( aSign, aSig0 ); + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + aSig0 |= ( aSig1 != 0 ); + shiftCount = 0x4028 - aExp; + if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); + return roundAndPackInt32(aSign, aSig0, status); } @@ -4249,42 +6484,43 @@ int32 float128_to_int32( float128 a ) | returned. *----------------------------------------------------------------------------*/ -int32 float128_to_int32_round_to_zero( float128 a ) +int32_t float128_to_int32_round_to_zero(float128 a, float_status *status) { - flag aSign; - int32 aExp, shiftCount; - bits64 aSig0, aSig1, savedASig; - int32 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - aSig0 |= ( aSig1 != 0 ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig0 |= LIT64( 0x0001000000000000 ); - shiftCount = 0x402F - aExp; - savedASig = aSig0; - aSig0 >>= shiftCount; - z = aSig0; - if ( aSign ) z = - z; - z = (sbits32) z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig0<float_exception_flags |= float_flag_inexact; + } + return 0; + } + aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = 0x402F - aExp; + savedASig = aSig0; + aSig0 >>= shiftCount; + z = aSig0; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig0<float_exception_flags |= float_flag_inexact; + } + return z; } @@ -4298,36 +6534,36 @@ int32 float128_to_int32_round_to_zero( float128 a ) | largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -int64 float128_to_int64( float128 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); - shiftCount = 0x402F - aExp; - if ( shiftCount <= 0 ) { - if ( 0x403E < aExp ) { - float_raise( float_flag_invalid ); - if ( ! aSign - || ( ( aExp == 0x7FFF ) - && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) - ) - ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); - } - else { - shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); - } - return roundAndPackInt64( aSign, aSig0, aSig1 ); +int64_t float128_to_int64(float128 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = 0x402F - aExp; + if ( shiftCount <= 0 ) { + if ( 0x403E < aExp ) { + float_raise(float_flag_invalid, status); + if ( ! aSign + || ( ( aExp == 0x7FFF ) + && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) + ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); + } + else { + shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); + } + return roundAndPackInt64(aSign, aSig0, aSig1, status); } @@ -4341,54 +6577,56 @@ int64 float128_to_int64( float128 a ) | returned. *----------------------------------------------------------------------------*/ -int64 float128_to_int64_round_to_zero( float128 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig0, aSig1; - int64 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); - shiftCount = aExp - 0x402F; - if ( 0 < shiftCount ) { - if ( 0x403E <= aExp ) { - aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); - if ( ( a.high == LIT64( 0xC03E000000000000 ) ) - && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { - if ( aSig1 ) float_exception_flags |= float_flag_inexact; - } - else { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - z = ( aSig0<>( ( - shiftCount ) & 63 ) ); - if ( (bits64) ( aSig1<>( - shiftCount ); - if ( aSig1 - || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { - float_exception_flags |= float_flag_inexact; - } - } - if ( aSign ) z = - z; - return z; +int64_t float128_to_int64_round_to_zero(float128 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig0, aSig1; + int64_t z; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); + shiftCount = aExp - 0x402F; + if ( 0 < shiftCount ) { + if ( 0x403E <= aExp ) { + aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); + if ( ( a.high == LIT64( 0xC03E000000000000 ) ) + && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { + if (aSig1) { + status->float_exception_flags |= float_flag_inexact; + } + } + else { + float_raise(float_flag_invalid, status); + if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + z = ( aSig0<>( ( - shiftCount ) & 63 ) ); + if ( (uint64_t) ( aSig1<float_exception_flags |= float_flag_inexact; + } + } + else { + if ( aExp < 0x3FFF ) { + if ( aExp | aSig0 | aSig1 ) { + status->float_exception_flags |= float_flag_inexact; + } + return 0; + } + z = aSig0>>( - shiftCount ); + if ( aSig1 + || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) { + status->float_exception_flags |= float_flag_inexact; + } + } + if ( aSign ) z = - z; + return z; } @@ -4399,31 +6637,31 @@ int64 float128_to_int64_round_to_zero( float128 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -float32 float128_to_float32( float128 a ) +float32 float128_to_float32(float128 a, float_status *status) { - flag aSign; - int32 aExp; - bits64 aSig0, aSig1; - bits32 zSig; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat32( float128ToCommonNaN( a ) ); - } - return packFloat32( aSign, 0xFF, 0 ); - } - aSig0 |= ( aSig1 != 0 ); - shift64RightJamming( aSig0, 18, &aSig0 ); - zSig = aSig0; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x3F81; - } - return roundAndPackFloat32( aSign, aExp, zSig ); + flag aSign; + int32_t aExp; + uint64_t aSig0, aSig1; + uint32_t zSig; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32(float128ToCommonNaN(a, status), status); + } + return packFloat32( aSign, 0xFF, 0 ); + } + aSig0 |= ( aSig1 != 0 ); + shift64RightJamming( aSig0, 18, &aSig0 ); + zSig = aSig0; + if ( aExp || zSig ) { + zSig |= 0x40000000; + aExp -= 0x3F81; + } + return roundAndPackFloat32(aSign, aExp, zSig, status); } @@ -4434,34 +6672,32 @@ float32 float128_to_float32( float128 a ) | Arithmetic. *----------------------------------------------------------------------------*/ -float64 float128_to_float64( float128 a ) +float64 float128_to_float64(float128 a, float_status *status) { - flag aSign; - int32 aExp; - bits64 aSig0, aSig1; + flag aSign; + int32_t aExp; + uint64_t aSig0, aSig1; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat64( float128ToCommonNaN( a ) ); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - aSig0 |= ( aSig1 != 0 ); - if ( aExp || aSig0 ) { - aSig0 |= LIT64( 0x4000000000000000 ); - aExp -= 0x3C01; - } - return roundAndPackFloat64( aSign, aExp, aSig0 ); + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat64(float128ToCommonNaN(a, status), status); + } + return packFloat64( aSign, 0x7FF, 0 ); + } + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + aSig0 |= ( aSig1 != 0 ); + if ( aExp || aSig0 ) { + aSig0 |= LIT64( 0x4000000000000000 ); + aExp -= 0x3C01; + } + return roundAndPackFloat64(aSign, aExp, aSig0, status); } -#ifdef FLOATX80 - /*---------------------------------------------------------------------------- | Returns the result of converting the quadruple-precision floating-point | value `a' to the extended double-precision floating-point format. The @@ -4469,36 +6705,34 @@ float64 float128_to_float64( float128 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 float128_to_floatx80( float128 a ) +floatx80 float128_to_floatx80(float128 a, float_status *status) { - flag aSign; - int32 aExp; - bits64 aSig0, aSig1; + flag aSign; + int32_t aExp; + uint64_t aSig0, aSig1; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloatx80( float128ToCommonNaN( a ) ); - } - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - else { - aSig0 |= LIT64( 0x0001000000000000 ); - } - shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); - return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloatx80(float128ToCommonNaN(a, status), status); + } + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); + return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status); } -#endif - /*---------------------------------------------------------------------------- | Rounds the quadruple-precision floating-point value `a' to an integer, and | returns the result as a quadruple-precision floating-point value. The @@ -4506,99 +6740,134 @@ floatx80 float128_to_floatx80( float128 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_round_to_int( float128 a ) +float128 float128_round_to_int(float128 a, float_status *status) { - flag aSign; - int32 aExp; - bits64 lastBitMask, roundBitsMask; - int8 roundingMode; - float128 z; - - aExp = extractFloat128Exp( a ); - if ( 0x402F <= aExp ) { - if ( 0x406F <= aExp ) { - if ( ( aExp == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) - ) { - return propagateFloat128NaN( a, a ); - } - return a; - } - lastBitMask = 1; - lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - if ( lastBitMask ) { - add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else { - if ( (sbits64) z.low < 0 ) { - ++z.high; - if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; - } - } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); - } - } - z.low &= ~ roundBitsMask; - } - else { - if ( aExp < 0x3FFF ) { - if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat128Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) - && ( extractFloat128Frac0( a ) - | extractFloat128Frac1( a ) ) - ) { - return packFloat128( aSign, 0x3FFF, 0, 0 ); - } - break; - case float_round_down: - return - aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) - : packFloat128( 0, 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloat128( 1, 0, 0, 0 ) - : packFloat128( 0, 0x3FFF, 0, 0 ); - } - return packFloat128( aSign, 0, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x402F - aExp; - roundBitsMask = lastBitMask - 1; - z.low = 0; - z.high = a.high; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z.high += lastBitMask>>1; - if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { - z.high &= ~ lastBitMask; - } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - z.high |= ( a.low != 0 ); - z.high += roundBitsMask; - } - } - z.high &= ~ roundBitsMask; - } - if ( ( z.low != a.low ) || ( z.high != a.high ) ) { - float_exception_flags |= float_flag_inexact; - } - return z; + flag aSign; + int32_t aExp; + uint64_t lastBitMask, roundBitsMask; + float128 z; + + aExp = extractFloat128Exp( a ); + if ( 0x402F <= aExp ) { + if ( 0x406F <= aExp ) { + if ( ( aExp == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) + ) { + return propagateFloat128NaN(a, a, status); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + switch (status->float_rounding_mode) { + case float_round_nearest_even: + if ( lastBitMask ) { + add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (int64_t) z.low < 0 ) { + ++z.high; + if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + break; + case float_round_ties_away: + if (lastBitMask) { + add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low); + } else { + if ((int64_t) z.low < 0) { + ++z.high; + } + } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); + } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); + } + break; + default: + abort(); + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp < 0x3FFF ) { + if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + status->float_exception_flags |= float_flag_inexact; + aSign = extractFloat128Sign( a ); + switch (status->float_rounding_mode) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) + && ( extractFloat128Frac0( a ) + | extractFloat128Frac1( a ) ) + ) { + return packFloat128( aSign, 0x3FFF, 0, 0 ); + } + break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloat128(aSign, 0x3FFF, 0, 0); + } + break; + case float_round_down: + return + aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) + : packFloat128( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat128( 1, 0, 0, 0 ) + : packFloat128( 0, 0x3FFF, 0, 0 ); + } + return packFloat128( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x402F - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + switch (status->float_rounding_mode) { + case float_round_nearest_even: + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + break; + case float_round_ties_away: + z.high += lastBitMask>>1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + z.high |= (a.low != 0); + z.high += roundBitsMask; + } + break; + default: + abort(); + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + status->float_exception_flags |= float_flag_inexact; + } + return z; } @@ -4610,73 +6879,86 @@ float128 float128_round_to_int( float128 a ) | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) +static float128 addFloat128Sigs(float128 a, float128 b, flag zSign, + float_status *status) { - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - int32 expDiff; + int32_t aExp, bExp, zExp; + uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int32_t expDiff; - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= LIT64( 0x0001000000000000 ); - } - shift128ExtraRightJamming( - bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= LIT64( 0x0001000000000000 ); - } - shift128ExtraRightJamming( - aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN( a, b ); - } - return a; - } - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); - zSig2 = 0; - zSig0 |= LIT64( 0x0002000000000000 ); - zExp = aExp; - goto shiftRight1; - } - aSig0 |= LIT64( 0x0001000000000000 ); - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - --zExp; - if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; - ++zExp; - shiftRight1: - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - roundAndPack: - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if (aSig0 | aSig1) { + return propagateFloat128NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if (bSig0 | bSig1) { + return propagateFloat128NaN(a, b, status); + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x0001000000000000 ); + } + shift128ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN(a, b, status); + } + return a; + } + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) { + if (status->flush_to_zero) { + if (zSig0 | zSig1) { + float_raise(float_flag_output_denormal, status); + } + return packFloat128(zSign, 0, 0, 0); + } + return packFloat128( zSign, 0, zSig0, zSig1 ); + } + zSig2 = 0; + zSig0 |= LIT64( 0x0002000000000000 ); + zExp = aExp; + goto shiftRight1; + } + aSig0 |= LIT64( 0x0001000000000000 ); + add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; + ++zExp; + shiftRight1: + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); } @@ -4688,79 +6970,83 @@ static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) -{ - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; - int32 expDiff; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN( a, b ); - } - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig0 < aSig0 ) goto aBigger; - if ( aSig0 < bSig0 ) goto bBigger; - if ( bSig1 < aSig1 ) goto aBigger; - if ( aSig1 < bSig1 ) goto bBigger; - return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); - bExpBigger: - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= LIT64( 0x4000000000000000 ); - } - shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - bSig0 |= LIT64( 0x4000000000000000 ); - bBigger: - sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= LIT64( 0x4000000000000000 ); - } - shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); - aSig0 |= LIT64( 0x4000000000000000 ); - aBigger: - sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); +static float128 subFloat128Sigs(float128 a, float128 b, flag zSign, + float_status *status) +{ + int32_t aExp, bExp, zExp; + uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int32_t expDiff; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + expDiff = aExp - bExp; + shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); + shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat128NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return float128_default_nan(status); + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat128(status->float_rounding_mode == float_round_down, + 0, 0, 0); + bExpBigger: + if ( bExp == 0x7FFF ) { + if (bSig0 | bSig1) { + return propagateFloat128NaN(a, b, status); + } + return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= LIT64( 0x4000000000000000 ); + bBigger: + sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if (aSig0 | aSig1) { + return propagateFloat128NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= LIT64( 0x4000000000000000 ); + } + shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= LIT64( 0x4000000000000000 ); + aBigger: + sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1, + status); } @@ -4770,18 +7056,18 @@ static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) | for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_add( float128 a, float128 b ) +float128 float128_add(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return addFloat128Sigs( a, b, aSign ); - } - else { - return subFloat128Sigs( a, b, aSign ); - } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return addFloat128Sigs(a, b, aSign, status); + } + else { + return subFloat128Sigs(a, b, aSign, status); + } } @@ -4791,18 +7077,18 @@ float128 float128_add( float128 a, float128 b ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_sub( float128 a, float128 b ) +float128 float128_sub(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return subFloat128Sigs( a, b, aSign ); - } - else { - return addFloat128Sigs( a, b, aSign ); - } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign == bSign ) { + return subFloat128Sigs(a, b, aSign, status); + } + else { + return addFloat128Sigs(a, b, aSign, status); + } } @@ -4812,61 +7098,60 @@ float128 float128_sub( float128 a, float128 b ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_mul( float128 a, float128 b ) -{ - flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN( a, b ); - } - if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - zExp = aExp + bExp - 0x4000; - aSig0 |= LIT64( 0x0001000000000000 ); - shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); - mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); - add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zSig2 |= ( zSig3 != 0 ); - if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - ++zExp; - } - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); +float128 float128_mul(float128 a, float128 b, float_status *status) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN(a, b, status); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if (bSig0 | bSig1) { + return propagateFloat128NaN(a, b, status); + } + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return float128_default_nan(status); + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x4000; + aSig0 |= LIT64( 0x0001000000000000 ); + shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); + mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); } @@ -4876,81 +7161,84 @@ float128 float128_mul( float128 a, float128 b ) | the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_div( float128 a, float128 b ) -{ - flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - goto invalid; - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return packFloat128( zSign, 0, 0, 0 ); - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } - float_raise( float_flag_divbyzero ); - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = aExp - bExp + 0x3FFD; - shortShift128Left( - aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); - shortShift128Left( - bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); - if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { - shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); - mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); - sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); - while ( (sbits64) rem0 < 0 ) { - --zSig0; - add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); - } - zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); - if ( ( zSig1 & 0x3FFF ) <= 4 ) { - mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); - sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits64) rem1 < 0 ) { - --zSig1; - add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); +float128 float128_div(float128 a, float128 b, float_status *status) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + bSign = extractFloat128Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if (aSig0 | aSig1) { + return propagateFloat128NaN(a, b, status); + } + if ( bExp == 0x7FFF ) { + if (bSig0 | bSig1) { + return propagateFloat128NaN(a, b, status); + } + goto invalid; + } + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + if ( bExp == 0x7FFF ) { + if (bSig0 | bSig1) { + return propagateFloat128NaN(a, b, status); + } + return packFloat128( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return float128_default_nan(status); + } + float_raise(float_flag_divbyzero, status); + return packFloat128( zSign, 0x7FFF, 0, 0 ); + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FFD; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { + shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); + mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 4 ) { + mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); } @@ -4960,295 +7248,315 @@ float128 float128_div( float128 a, float128 b ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_rem( float128 a, float128 b ) -{ - flag aSign, zSign; - int32 aExp, bExp, expDiff; - bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; - bits64 allZero, alternateASig0, alternateASig1, sigMean1; - sbits64 sigMean0; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); -// bSign = extractFloat128Sign( b ); - if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN( a, b ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return a; - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - expDiff = aExp - bExp; - if ( expDiff < -1 ) return a; - shortShift128Left( - aSig0 | LIT64( 0x0001000000000000 ), - aSig1, - 15 - ( expDiff < 0 ), - &aSig0, - &aSig1 - ); - shortShift128Left( - bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); - q = le128( bSig0, bSig1, aSig0, aSig1 ); - if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); - shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); - shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); - sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); - expDiff -= 61; - } - if ( -64 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - q >>= - expDiff; - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - expDiff += 52; - if ( expDiff < 0 ) { - shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - } - else { - shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); - } - mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); - sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); - } - else { - shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - } - do { - alternateASig0 = aSig0; - alternateASig1 = aSig1; - ++q; - sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - } while ( 0 <= (sbits64) aSig0 ); - add128( - aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 ); - if ( ( sigMean0 < 0 ) - || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - } - zSign = ( (sbits64) aSig0 < 0 ); - if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); - return - normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the square root of the quadruple-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float128_sqrt( float128 a ) -{ - flag aSign; - int32 aExp, zExp; - bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; - aSig0 |= LIT64( 0x0001000000000000 ); - zSig0 = estimateSqrt32( aExp, aSig0>>17 ); - shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & 0x1FFF ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits64) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +float128 float128_rem(float128 a, float128 b, float_status *status) +{ + flag aSign, zSign; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; + uint64_t allZero, alternateASig0, alternateASig1, sigMean1; + int64_t sigMean0; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + bSig1 = extractFloat128Frac1( b ); + bSig0 = extractFloat128Frac0( b ); + bExp = extractFloat128Exp( b ); + if ( aExp == 0x7FFF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat128NaN(a, b, status); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if (bSig0 | bSig1) { + return propagateFloat128NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return float128_default_nan(status); + } + normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift128Left( + aSig0 | LIT64( 0x0001000000000000 ), + aSig1, + 15 - ( expDiff < 0 ), + &aSig0, + &aSig1 + ); + shortShift128Left( + bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); + q = le128( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); + shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); + sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 61; + } + if ( -64 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + expDiff += 52; + if ( expDiff < 0 ) { + shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); + shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (int64_t) aSig0 ); + add128( + aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (int64_t) aSig0 < 0 ); + if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1, + status); +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the quadruple-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_eq( float128 a, float128 b ) +float128 float128_sqrt(float128 a, float_status *status) { - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if ( float128_is_signaling_nan( a ) - || float128_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); + flag aSign; + int32_t aExp, zExp; + uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; + uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if (aSig0 | aSig1) { + return propagateFloat128NaN(a, a, status); + } + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise(float_flag_invalid, status); + return float128_default_nan(status); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); + normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; + aSig0 |= LIT64( 0x0001000000000000 ); + zSig0 = estimateSqrt32( aExp, aSig0>>17 ); + shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); + doubleZSig0 = zSig0<<1; + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & 0x1FFF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( doubleZSig0, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + shortShift128Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status); } /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| Returns 1 if the quadruple-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. The invalid exception is +| raised if either operand is a NaN. Otherwise, the comparison is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_le( float128 a, float128 b ) +int float128_eq(float128 a, float128 b, float_status *status) { - flag aSign, bSign; - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); } /*---------------------------------------------------------------------------- | Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed +| or equal to the corresponding value `b', and 0 otherwise. The invalid +| exception is raised if either operand is a NaN. The comparison is performed | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_lt( float128 a, float128 b ) +int float128_le(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); } /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to +| Returns 1 if the quadruple-precision floating-point value `a' is less than | the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| raised if either operand is a NaN. The comparison is performed according +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_eq_signaling( float128 a, float128 b ) +int float128_lt(float128 a, float128 b, float_status *status) { - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); + flag aSign, bSign; + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise(float_flag_invalid, status); + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot +| be compared, and 0 otherwise. The invalid exception is raised if either +| operand is a NaN. The comparison is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_unordered(float128 a, float128 b, float_status *status) +{ + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + float_raise(float_flag_invalid, status); + return 1; + } + return 0; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is equal to +| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +| exception. The comparison is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_eq_quiet(float128 a, float128 b, float_status *status) +{ + + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if (float128_is_signaling_nan(a, status) + || float128_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + return + ( a.low == b.low ) + && ( ( a.high == b.high ) + || ( ( a.low == 0 ) + && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) ) + ); } @@ -5259,32 +7567,32 @@ flag float128_eq_signaling( float128 a, float128 b ) | IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_le_quiet( float128 a, float128 b ) +int float128_le_quiet(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if ( float128_is_signaling_nan( a ) - || float128_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if (float128_is_signaling_nan(a, status) + || float128_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + == 0 ); + } + return + aSign ? le128( b.high, b.low, a.high, a.low ) + : le128( a.high, a.low, b.high, b.low ); } @@ -5295,32 +7603,716 @@ flag float128_le_quiet( float128 a, float128 b ) | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_lt_quiet( float128 a, float128 b ) +int float128_lt_quiet(float128 a, float128 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if ( float128_is_signaling_nan( a ) - || float128_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if (float128_is_signaling_nan(a, status) + || float128_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 0; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + return + aSign + && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + != 0 ); + } + return + aSign ? lt128( b.high, b.low, a.high, a.low ) + : lt128( a.high, a.low, b.high, b.low ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot +| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The +| comparison is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int float128_unordered_quiet(float128 a, float128 b, float_status *status) +{ + if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) + && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) + || ( ( extractFloat128Exp( b ) == 0x7FFF ) + && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + ) { + if (float128_is_signaling_nan(a, status) + || float128_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return 1; + } + return 0; +} + +/* misc functions */ +float32 uint32_to_float32(uint32_t a, float_status *status) +{ + return int64_to_float32(a, status); +} + +float64 uint32_to_float64(uint32_t a, float_status *status) +{ + return int64_to_float64(a, status); +} + +uint32_t float32_to_uint32(float32 a, float_status *status) +{ + int64_t v; + uint32_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int64(a, status); + if (v < 0) { + res = 0; + } else if (v > 0xffffffff) { + res = 0xffffffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *status) +{ + int64_t v; + uint32_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int64_round_to_zero(a, status); + if (v < 0) { + res = 0; + } else if (v > 0xffffffff) { + res = 0xffffffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +int16_t float32_to_int16(float32 a, float_status *status) +{ + int32_t v; + int16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a, status); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +uint16_t float32_to_uint16(float32 a, float_status *status) +{ + int32_t v; + uint16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a, status); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *status) +{ + int64_t v; + uint16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int64_round_to_zero(a, status); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +uint32_t float64_to_uint32(float64 a, float_status *status) +{ + uint64_t v; + uint32_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_uint64(a, status); + if (v > 0xffffffff) { + res = 0xffffffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *status) +{ + uint64_t v; + uint32_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_uint64_round_to_zero(a, status); + if (v > 0xffffffff) { + res = 0xffffffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +int16_t float64_to_int16(float64 a, float_status *status) +{ + int64_t v; + int16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a, status); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +uint16_t float64_to_uint16(float64 a, float_status *status) +{ + int64_t v; + uint16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a, status); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *status) +{ + int64_t v; + uint16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int64_round_to_zero(a, status); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid, status); + return res; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. If the conversion overflows, the +| largest unsigned integer is returned. If 'a' is negative, the value is +| rounded and zero is returned; negative values that do not round to zero +| will raise the inexact exception. +*----------------------------------------------------------------------------*/ + +uint64_t float64_to_uint64(float64 a, float_status *status) +{ + flag aSign; + int aExp; + int shiftCount; + uint64_t aSig, aSigExtra; + a = float64_squash_input_denormal(a, status); + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aSign && (aExp > 1022)) { + float_raise(float_flag_invalid, status); + if (float64_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + if (aExp) { + aSig |= LIT64(0x0010000000000000); + } + shiftCount = 0x433 - aExp; + if (shiftCount <= 0) { + if (0x43E < aExp) { + float_raise(float_flag_invalid, status); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + aSigExtra = 0; + aSig <<= -shiftCount; + } else { + shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra); + } + return roundAndPackUint64(aSign, aSig, aSigExtra, status); +} + +uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status) +{ + signed char current_rounding_mode = status->float_rounding_mode; + set_float_rounding_mode(float_round_to_zero, status); + int64_t v = float64_to_uint64(a, status); + set_float_rounding_mode(current_rounding_mode, status); + return v; +} + +#define COMPARE(s, nan_exp) \ +static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\ + int is_quiet, float_status *status) \ +{ \ + flag aSign, bSign; \ + uint ## s ## _t av, bv; \ + a = float ## s ## _squash_input_denormal(a, status); \ + b = float ## s ## _squash_input_denormal(b, status); \ + \ + if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \ + extractFloat ## s ## Frac( a ) ) || \ + ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \ + extractFloat ## s ## Frac( b ) )) { \ + if (!is_quiet || \ + float ## s ## _is_signaling_nan(a, status) || \ + float ## s ## _is_signaling_nan(b, status)) { \ + float_raise(float_flag_invalid, status); \ + } \ + return float_relation_unordered; \ + } \ + aSign = extractFloat ## s ## Sign( a ); \ + bSign = extractFloat ## s ## Sign( b ); \ + av = float ## s ## _val(a); \ + bv = float ## s ## _val(b); \ + if ( aSign != bSign ) { \ + if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \ + /* zero case */ \ + return float_relation_equal; \ + } else { \ + return 1 - (2 * aSign); \ + } \ + } else { \ + if (av == bv) { \ + return float_relation_equal; \ + } else { \ + return 1 - 2 * (aSign ^ ( av < bv )); \ + } \ + } \ +} \ + \ +int float ## s ## _compare(float ## s a, float ## s b, float_status *status) \ +{ \ + return float ## s ## _compare_internal(a, b, 0, status); \ +} \ + \ +int float ## s ## _compare_quiet(float ## s a, float ## s b, \ + float_status *status) \ +{ \ + return float ## s ## _compare_internal(a, b, 1, status); \ +} + +COMPARE(32, 0xff) +COMPARE(64, 0x7ff) + +static inline int floatx80_compare_internal(floatx80 a, floatx80 b, + int is_quiet, float_status *status) +{ + flag aSign, bSign; + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return float_relation_unordered; + } + if (( ( extractFloatx80Exp( a ) == 0x7fff ) && + ( extractFloatx80Frac( a )<<1 ) ) || + ( ( extractFloatx80Exp( b ) == 0x7fff ) && + ( extractFloatx80Frac( b )<<1 ) )) { + if (!is_quiet || + floatx80_is_signaling_nan(a, status) || + floatx80_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return float_relation_unordered; + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign != bSign ) { + + if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) && + ( ( a.low | b.low ) == 0 ) ) { + /* zero case */ + return float_relation_equal; + } else { + return 1 - (2 * aSign); + } + } else { + if (a.low == b.low && a.high == b.high) { + return float_relation_equal; + } else { + return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); + } + } +} + +int floatx80_compare(floatx80 a, floatx80 b, float_status *status) +{ + return floatx80_compare_internal(a, b, 0, status); +} + +int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status) +{ + return floatx80_compare_internal(a, b, 1, status); +} + +static inline int float128_compare_internal(float128 a, float128 b, + int is_quiet, float_status *status) +{ + flag aSign, bSign; + + if (( ( extractFloat128Exp( a ) == 0x7fff ) && + ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) || + ( ( extractFloat128Exp( b ) == 0x7fff ) && + ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) { + if (!is_quiet || + float128_is_signaling_nan(a, status) || + float128_is_signaling_nan(b, status)) { + float_raise(float_flag_invalid, status); + } + return float_relation_unordered; + } + aSign = extractFloat128Sign( a ); + bSign = extractFloat128Sign( b ); + if ( aSign != bSign ) { + if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) { + /* zero case */ + return float_relation_equal; + } else { + return 1 - (2 * aSign); + } + } else { + if (a.low == b.low && a.high == b.high) { + return float_relation_equal; + } else { + return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) )); + } + } +} + +int float128_compare(float128 a, float128 b, float_status *status) +{ + return float128_compare_internal(a, b, 0, status); +} + +int float128_compare_quiet(float128 a, float128 b, float_status *status) +{ + return float128_compare_internal(a, b, 1, status); +} + +/* min() and max() functions. These can't be implemented as + * 'compare and pick one input' because that would mishandle + * NaNs and +0 vs -0. + * + * minnum() and maxnum() functions. These are similar to the min() + * and max() functions but if one of the arguments is a QNaN and + * the other is numerical then the numerical argument is returned. + * minnum() and maxnum correspond to the IEEE 754-2008 minNum() + * and maxNum() operations. min() and max() are the typical min/max + * semantics provided by many CPUs which predate that specification. + * + * minnummag() and maxnummag() functions correspond to minNumMag() + * and minNumMag() from the IEEE-754 2008. + */ +#define MINMAX(s) \ +static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \ + int ismin, int isieee, \ + int ismag, \ + float_status *status) \ +{ \ + flag aSign, bSign; \ + uint ## s ## _t av, bv, aav, abv; \ + a = float ## s ## _squash_input_denormal(a, status); \ + b = float ## s ## _squash_input_denormal(b, status); \ + if (float ## s ## _is_any_nan(a) || \ + float ## s ## _is_any_nan(b)) { \ + if (isieee) { \ + if (float ## s ## _is_quiet_nan(a, status) && \ + !float ## s ##_is_any_nan(b)) { \ + return b; \ + } else if (float ## s ## _is_quiet_nan(b, status) && \ + !float ## s ## _is_any_nan(a)) { \ + return a; \ + } \ + } \ + return propagateFloat ## s ## NaN(a, b, status); \ + } \ + aSign = extractFloat ## s ## Sign(a); \ + bSign = extractFloat ## s ## Sign(b); \ + av = float ## s ## _val(a); \ + bv = float ## s ## _val(b); \ + if (ismag) { \ + aav = float ## s ## _abs(av); \ + abv = float ## s ## _abs(bv); \ + if (aav != abv) { \ + if (ismin) { \ + return (aav < abv) ? a : b; \ + } else { \ + return (aav < abv) ? b : a; \ + } \ + } \ + } \ + if (aSign != bSign) { \ + if (ismin) { \ + return aSign ? a : b; \ + } else { \ + return aSign ? b : a; \ + } \ + } else { \ + if (ismin) { \ + return (aSign ^ (av < bv)) ? a : b; \ + } else { \ + return (aSign ^ (av < bv)) ? b : a; \ + } \ + } \ +} \ + \ +float ## s float ## s ## _min(float ## s a, float ## s b, \ + float_status *status) \ +{ \ + return float ## s ## _minmax(a, b, 1, 0, 0, status); \ +} \ + \ +float ## s float ## s ## _max(float ## s a, float ## s b, \ + float_status *status) \ +{ \ + return float ## s ## _minmax(a, b, 0, 0, 0, status); \ +} \ + \ +float ## s float ## s ## _minnum(float ## s a, float ## s b, \ + float_status *status) \ +{ \ + return float ## s ## _minmax(a, b, 1, 1, 0, status); \ +} \ + \ +float ## s float ## s ## _maxnum(float ## s a, float ## s b, \ + float_status *status) \ +{ \ + return float ## s ## _minmax(a, b, 0, 1, 0, status); \ +} \ + \ +float ## s float ## s ## _minnummag(float ## s a, float ## s b, \ + float_status *status) \ +{ \ + return float ## s ## _minmax(a, b, 1, 1, 1, status); \ +} \ + \ +float ## s float ## s ## _maxnummag(float ## s a, float ## s b, \ + float_status *status) \ +{ \ + return float ## s ## _minmax(a, b, 0, 1, 1, status); \ +} + +MINMAX(32) +MINMAX(64) + + +/* Multiply A by 2 raised to the power N. */ +float32 float32_scalbn(float32 a, int n, float_status *status) +{ + flag aSign; + int16_t aExp; + uint32_t aSig; + + a = float32_squash_input_denormal(a, status); + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + + if ( aExp == 0xFF ) { + if ( aSig ) { + return propagateFloat32NaN(a, a, status); + } + return a; + } + if (aExp != 0) { + aSig |= 0x00800000; + } else if (aSig == 0) { + return a; + } else { + aExp++; + } + + if (n > 0x200) { + n = 0x200; + } else if (n < -0x200) { + n = -0x200; + } + + aExp += n - 1; + aSig <<= 7; + return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status); +} + +float64 float64_scalbn(float64 a, int n, float_status *status) +{ + flag aSign; + int16_t aExp; + uint64_t aSig; + + a = float64_squash_input_denormal(a, status); + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + + if ( aExp == 0x7FF ) { + if ( aSig ) { + return propagateFloat64NaN(a, a, status); + } + return a; + } + if (aExp != 0) { + aSig |= LIT64( 0x0010000000000000 ); + } else if (aSig == 0) { + return a; + } else { + aExp++; + } + + if (n > 0x1000) { + n = 0x1000; + } else if (n < -0x1000) { + n = -0x1000; + } + + aExp += n - 1; + aSig <<= 10; + return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status); +} + +floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF ) { + if ( aSig<<1 ) { + return propagateFloatx80NaN(a, a, status); + } + return a; + } + + if (aExp == 0) { + if (aSig == 0) { + return a; + } + aExp++; + } + + if (n > 0x10000) { + n = 0x10000; + } else if (n < -0x10000) { + n = -0x10000; + } + + aExp += n; + return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, + aSign, aExp, aSig, 0, status); +} + +float128 float128_scalbn(float128 a, int n, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig0, aSig1; + + aSig1 = extractFloat128Frac1( a ); + aSig0 = extractFloat128Frac0( a ); + aExp = extractFloat128Exp( a ); + aSign = extractFloat128Sign( a ); + if ( aExp == 0x7FFF ) { + if ( aSig0 | aSig1 ) { + return propagateFloat128NaN(a, a, status); + } + return a; + } + if (aExp != 0) { + aSig0 |= LIT64( 0x0001000000000000 ); + } else if (aSig0 == 0 && aSig1 == 0) { + return a; + } else { + aExp++; + } + + if (n > 0x10000) { + n = 0x10000; + } else if (n < -0x10000) { + n = -0x10000; + } + + aExp += n - 1; + return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1 + , status); } diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index 317d4f22..e65beeeb 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -1,8 +1,26 @@ - -/*============================================================================ - -This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. +#define SOFTFLOAT_68K + +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -11,479 +29,756 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ #ifndef SOFTFLOAT_H #define SOFTFLOAT_H + +#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH) +#include +#endif + + +/* This 'flag' type must be able to hold at least 0 and 1. It should + * probably be replaced with 'bool' but the uses would need to be audited + * to check that they weren't accidentally relying on it being a larger type. + */ +typedef uint8_t flag; + +#define LIT64( a ) a##LL + /*---------------------------------------------------------------------------- -| The macro `FLOATX80' must be defined to enable the extended double-precision -| floating-point format `floatx80'. If this macro is not defined, the -| `floatx80' type will not be defined, and none of the functions that either -| input or output the `floatx80' type will be defined. The same applies to -| the `FLOATX128' macro and the quadruple-precision format `float128'. +| Software IEC/IEEE floating-point ordering relations *----------------------------------------------------------------------------*/ -#define FLOATX80 -#define FLOATX128 +enum { + float_relation_less = -1, + float_relation_equal = 0, + float_relation_greater = 1, + float_relation_unordered = 2 +}; /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point types. *----------------------------------------------------------------------------*/ - -#include "mamesf.h" - -typedef bits32 float32; -typedef bits64 float64; -#ifdef FLOATX80 +/* Use structures for soft-float types. This prevents accidentally mixing + them with native int/float types. A sufficiently clever compiler and + sane ABI should be able to see though these structs. However + x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */ +//#define USE_SOFTFLOAT_STRUCT_TYPES +#ifdef USE_SOFTFLOAT_STRUCT_TYPES typedef struct { - bits16 high; - bits64 low; -} floatx80; + uint16_t v; +} float16; +#define float16_val(x) (((float16)(x)).v) +#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; }) +#define const_float16(x) { x } +typedef struct { + uint32_t v; +} float32; +/* The cast ensures an error if the wrong type is passed. */ +#define float32_val(x) (((float32)(x)).v) +#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; }) +#define const_float32(x) { x } +typedef struct { + uint64_t v; +} float64; +#define float64_val(x) (((float64)(x)).v) +#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; }) +#define const_float64(x) { x } +#else +typedef uint16_t float16; +typedef uint32_t float32; +typedef uint64_t float64; +#define float16_val(x) (x) +#define float32_val(x) (x) +#define float64_val(x) (x) +#define make_float16(x) (x) +#define make_float32(x) (x) +#define make_float64(x) (x) +#define const_float16(x) (x) +#define const_float32(x) (x) +#define const_float64(x) (x) #endif -#ifdef FLOATX128 typedef struct { - bits64 high, low; -} float128; + uint64_t low; + uint16_t high; +} floatx80; +#define make_floatx80(exp, mant) ((floatx80) { mant, exp }) +#define make_floatx80_init(exp, mant) { .low = mant, .high = exp } +typedef struct { +#ifdef HOST_WORDS_BIGENDIAN + uint64_t high, low; +#else + uint64_t low, high; #endif - -/*---------------------------------------------------------------------------- -| Primitive arithmetic functions, including multi-word arithmetic, and -| division and square root approximations. (Can be specialized to target if -| desired.) -*----------------------------------------------------------------------------*/ -#include "softfloat-macros.h" +} float128; +#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ }) +#define make_float128_init(high_, low_) { .high = high_, .low = low_ } /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point underflow tininess-detection mode. *----------------------------------------------------------------------------*/ -extern int8 float_detect_tininess; enum { - float_tininess_after_rounding = 0, - float_tininess_before_rounding = 1 + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 }; /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point rounding mode. *----------------------------------------------------------------------------*/ -extern int8 float_rounding_mode; enum { - float_round_nearest_even = 0, - float_round_to_zero = 1, - float_round_down = 2, - float_round_up = 3 + float_round_nearest_even = 0, + float_round_down = 1, + float_round_up = 2, + float_round_to_zero = 3, + float_round_ties_away = 4, }; /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point exception flags. *----------------------------------------------------------------------------*/ -extern int8 float_exception_flags; enum { - float_flag_invalid = 0x01, float_flag_denormal = 0x02, float_flag_divbyzero = 0x04, float_flag_overflow = 0x08, - float_flag_underflow = 0x10, float_flag_inexact = 0x20 + float_flag_invalid = 1, + float_flag_denormal = 2, + float_flag_divbyzero = 4, + float_flag_overflow = 8, + float_flag_underflow = 16, + float_flag_inexact = 32, + float_flag_input_denormal = 64, + float_flag_output_denormal = 128 }; +typedef struct float_status { + signed char float_detect_tininess; + signed char float_rounding_mode; + uint8_t float_exception_flags; + signed char floatx80_rounding_precision; + /* should denormalised results go to zero and set the inexact flag? */ + flag flush_to_zero; + /* should denormalised inputs go to zero and set the input_denormal flag? */ + flag flush_inputs_to_zero; + flag default_nan_mode; + flag snan_bit_is_one; +} float_status; + +static inline void set_float_detect_tininess(int val, float_status *status) +{ + status->float_detect_tininess = val; +} +static inline void set_float_rounding_mode(int val, float_status *status) +{ + status->float_rounding_mode = val; +} +static inline void set_float_exception_flags(int val, float_status *status) +{ + status->float_exception_flags = val; +} +static inline void set_floatx80_rounding_precision(int val, + float_status *status) +{ + status->floatx80_rounding_precision = val; +} +static inline void set_flush_to_zero(flag val, float_status *status) +{ + status->flush_to_zero = val; +} +static inline void set_flush_inputs_to_zero(flag val, float_status *status) +{ + status->flush_inputs_to_zero = val; +} +static inline void set_default_nan_mode(flag val, float_status *status) +{ + status->default_nan_mode = val; +} +static inline void set_snan_bit_is_one(flag val, float_status *status) +{ + status->snan_bit_is_one = val; +} +static inline int get_float_detect_tininess(float_status *status) +{ + return status->float_detect_tininess; +} +static inline int get_float_rounding_mode(float_status *status) +{ + return status->float_rounding_mode; +} +static inline int get_float_exception_flags(float_status *status) +{ + return status->float_exception_flags; +} +static inline int get_floatx80_rounding_precision(float_status *status) +{ + return status->floatx80_rounding_precision; +} +static inline flag get_flush_to_zero(float_status *status) +{ + return status->flush_to_zero; +} +static inline flag get_flush_inputs_to_zero(float_status *status) +{ + return status->flush_inputs_to_zero; +} +static inline flag get_default_nan_mode(float_status *status) +{ + return status->default_nan_mode; +} + /*---------------------------------------------------------------------------- | Routine to raise any or all of the software IEC/IEEE floating-point | exception flags. *----------------------------------------------------------------------------*/ -void float_raise( int8 ); +void float_raise(uint8_t flags, float_status *status); /*---------------------------------------------------------------------------- -| Software IEC/IEEE integer-to-floating-point conversion routines. +| If `a' is denormal and we are in flush-to-zero mode then set the +| input-denormal exception and return zero. Otherwise just return the value. *----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 ); -float64 int32_to_float64( int32 ); -#ifdef FLOATX80 -floatx80 int32_to_floatx80( int32 ); -#endif -#ifdef FLOATX128 -float128 int32_to_float128( int32 ); -#endif -float32 int64_to_float32( int64 ); -float64 int64_to_float64( int64 ); -#ifdef FLOATX80 -floatx80 int64_to_floatx80( int64 ); -#endif -#ifdef FLOATX128 -float128 int64_to_float128( int64 ); -#endif +float32 float32_squash_input_denormal(float32 a, float_status *status); +float64 float64_squash_input_denormal(float64 a, float_status *status); /*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision conversion routines. +| Options to indicate which negations to perform in float*_muladd() +| Using these differs from negating an input or output before calling +| the muladd function in that this means that a NaN doesn't have its +| sign bit inverted before it is propagated. +| We also support halving the result before rounding, as a special +| case to support the ARM fused-sqrt-step instruction FRSQRTS. *----------------------------------------------------------------------------*/ -int32 float32_to_int32( float32 ); -int32 float32_to_int32_round_to_zero( float32 ); -int64 float32_to_int64( float32 ); -int64 float32_to_int64_round_to_zero( float32 ); -float64 float32_to_float64( float32 ); -#ifdef FLOATX80 -floatx80 float32_to_floatx80( float32 ); -floatx80 float32_to_floatx80_allowunnormal( float32 ); -#endif -#ifdef FLOATX128 -float128 float32_to_float128( float32 ); -#endif +enum { + float_muladd_negate_c = 1, + float_muladd_negate_product = 2, + float_muladd_negate_result = 4, + float_muladd_halve_result = 8, +}; /*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision operations. +| Software IEC/IEEE integer-to-floating-point conversion routines. *----------------------------------------------------------------------------*/ -float32 float32_round_to_int( float32 ); -float32 float32_add( float32, float32 ); -float32 float32_sub( float32, float32 ); -float32 float32_mul( float32, float32 ); -float32 float32_div( float32, float32 ); -float32 float32_rem( float32, float32 ); -float32 float32_sqrt( float32 ); -flag float32_eq( float32, float32 ); -flag float32_le( float32, float32 ); -flag float32_lt( float32, float32 ); -flag float32_eq_signaling( float32, float32 ); -flag float32_le_quiet( float32, float32 ); -flag float32_lt_quiet( float32, float32 ); -flag float32_is_signaling_nan( float32 ); +float32 int32_to_float32(int32_t, float_status *status); +float64 int32_to_float64(int32_t, float_status *status); +float32 uint32_to_float32(uint32_t, float_status *status); +float64 uint32_to_float64(uint32_t, float_status *status); +floatx80 int32_to_floatx80(int32_t, float_status *status); +float128 int32_to_float128(int32_t, float_status *status); +float32 int64_to_float32(int64_t, float_status *status); +float64 int64_to_float64(int64_t, float_status *status); +floatx80 int64_to_floatx80(int64_t, float_status *status); +float128 int64_to_float128(int64_t, float_status *status); +float32 uint64_to_float32(uint64_t, float_status *status); +float64 uint64_to_float64(uint64_t, float_status *status); +float128 uint64_to_float128(uint64_t, float_status *status); + +/* We provide the int16 versions for symmetry of API with float-to-int */ +static inline float32 int16_to_float32(int16_t v, float_status *status) +{ + return int32_to_float32(v, status); +} + +static inline float32 uint16_to_float32(uint16_t v, float_status *status) +{ + return uint32_to_float32(v, status); +} + +static inline float64 int16_to_float64(int16_t v, float_status *status) +{ + return int32_to_float64(v, status); +} + +static inline float64 uint16_to_float64(uint16_t v, float_status *status) +{ + return uint32_to_float64(v, status); +} /*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision conversion routines. +| Software half-precision conversion routines. *----------------------------------------------------------------------------*/ -int32 float64_to_int32( float64 ); -int32 float64_to_int32_round_to_zero( float64 ); -int64 float64_to_int64( float64 ); -int64 float64_to_int64_round_to_zero( float64 ); -float32 float64_to_float32( float64 ); -#ifdef FLOATX80 -floatx80 float64_to_floatx80( float64 ); -floatx80 float64_to_floatx80_allowunnormal( float64 ); -#endif -#ifdef FLOATX128 -float128 float64_to_float128( float64 ); -#endif +float16 float32_to_float16(float32, flag, float_status *status); +float32 float16_to_float32(float16, flag, float_status *status); +float16 float64_to_float16(float64 a, flag ieee, float_status *status); +float64 float16_to_float64(float16 a, flag ieee, float_status *status); /*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision operations. +| Software half-precision operations. *----------------------------------------------------------------------------*/ -float64 float64_round_to_int( float64 ); -float64 float64_add( float64, float64 ); -float64 float64_sub( float64, float64 ); -float64 float64_mul( float64, float64 ); -float64 float64_div( float64, float64 ); -float64 float64_rem( float64, float64 ); -float64 float64_sqrt( float64 ); -flag float64_eq( float64, float64 ); -flag float64_le( float64, float64 ); -flag float64_lt( float64, float64 ); -flag float64_eq_signaling( float64, float64 ); -flag float64_le_quiet( float64, float64 ); -flag float64_lt_quiet( float64, float64 ); -flag float64_is_signaling_nan( float64 ); - -#ifdef FLOATX80 +int float16_is_quiet_nan(float16, float_status *status); +int float16_is_signaling_nan(float16, float_status *status); +float16 float16_maybe_silence_nan(float16, float_status *status); + +static inline int float16_is_any_nan(float16 a) +{ + return ((float16_val(a) & ~0x8000) > 0x7c00); +} /*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision conversion routines. +| The pattern for a default generated half-precision NaN. *----------------------------------------------------------------------------*/ -int32 floatx80_to_int32( floatx80 ); -int32 floatx80_to_int32_round_to_zero( floatx80 ); -int64 floatx80_to_int64( floatx80 ); -int64 floatx80_to_int64_round_to_zero( floatx80 ); -float32 floatx80_to_float32( floatx80 ); -float64 floatx80_to_float64( floatx80 ); -#ifdef FLOATX128 -float128 floatx80_to_float128( floatx80 ); -#endif -floatx80 floatx80_scale(floatx80 a, floatx80 b); -bits64 extractFloatx80Frac( floatx80 a ); -int32 extractFloatx80Exp( floatx80 a ); +float16 float16_default_nan(float_status *status); /*---------------------------------------------------------------------------- -| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an -| extended double-precision floating-point value, returning the result. +| Software IEC/IEEE single-precision conversion routines. *----------------------------------------------------------------------------*/ +int16_t float32_to_int16(float32, float_status *status); +uint16_t float32_to_uint16(float32, float_status *status); +int16_t float32_to_int16_round_to_zero(float32, float_status *status); +uint16_t float32_to_uint16_round_to_zero(float32, float_status *status); +int32_t float32_to_int32(float32, float_status *status); +int32_t float32_to_int32_round_to_zero(float32, float_status *status); +uint32_t float32_to_uint32(float32, float_status *status); +uint32_t float32_to_uint32_round_to_zero(float32, float_status *status); +int64_t float32_to_int64(float32, float_status *status); +uint64_t float32_to_uint64(float32, float_status *status); +uint64_t float32_to_uint64_round_to_zero(float32, float_status *status); +int64_t float32_to_int64_round_to_zero(float32, float_status *status); +float64 float32_to_float64(float32, float_status *status); +floatx80 float32_to_floatx80(float32, float_status *status); +float128 float32_to_float128(float32, float_status *status); + +floatx80 float32_to_floatx80_allowunnormal(float32, float_status *status); -INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 float32_round_to_int(float32, float_status *status); +float32 float32_add(float32, float32, float_status *status); +float32 float32_sub(float32, float32, float_status *status); +float32 float32_mul(float32, float32, float_status *status); +float32 float32_div(float32, float32, float_status *status); +float32 float32_rem(float32, float32, float_status *status); +float32 float32_muladd(float32, float32, float32, int, float_status *status); +float32 float32_sqrt(float32, float_status *status); +float32 float32_exp2(float32, float_status *status); +float32 float32_log2(float32, float_status *status); +int float32_eq(float32, float32, float_status *status); +int float32_le(float32, float32, float_status *status); +int float32_lt(float32, float32, float_status *status); +int float32_unordered(float32, float32, float_status *status); +int float32_eq_quiet(float32, float32, float_status *status); +int float32_le_quiet(float32, float32, float_status *status); +int float32_lt_quiet(float32, float32, float_status *status); +int float32_unordered_quiet(float32, float32, float_status *status); +int float32_compare(float32, float32, float_status *status); +int float32_compare_quiet(float32, float32, float_status *status); +float32 float32_min(float32, float32, float_status *status); +float32 float32_max(float32, float32, float_status *status); +float32 float32_minnum(float32, float32, float_status *status); +float32 float32_maxnum(float32, float32, float_status *status); +float32 float32_minnummag(float32, float32, float_status *status); +float32 float32_maxnummag(float32, float32, float_status *status); +int float32_is_quiet_nan(float32, float_status *status); +int float32_is_signaling_nan(float32, float_status *status); +float32 float32_maybe_silence_nan(float32, float_status *status); +float32 float32_scalbn(float32, int, float_status *status); + +static inline float32 float32_abs(float32 a) { - floatx80 z; + /* Note that abs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float32(float32_val(a) & 0x7fffffff); +} - z.low = zSig; - z.high = ( ( (bits16) zSign )<<15 ) + zExp; - return z; +static inline float32 float32_chs(float32 a) +{ + /* Note that chs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float32(float32_val(a) ^ 0x80000000); +} + +static inline int float32_is_infinity(float32 a) +{ + return (float32_val(a) & 0x7fffffff) == 0x7f800000; +} + +static inline int float32_is_neg(float32 a) +{ + return float32_val(a) >> 31; +} + +static inline int float32_is_zero(float32 a) +{ + return (float32_val(a) & 0x7fffffff) == 0; +} +static inline int float32_is_any_nan(float32 a) +{ + return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL); } +static inline int float32_is_zero_or_denormal(float32 a) +{ + return (float32_val(a) & 0x7f800000) == 0; +} + +static inline float32 float32_set_sign(float32 a, int sign) +{ + return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31)); +} + +#define float32_zero make_float32(0) +#define float32_one make_float32(0x3f800000) +#define float32_ln2 make_float32(0x3f317218) +#define float32_pi make_float32(0x40490fdb) +#define float32_half make_float32(0x3f000000) +#define float32_infinity make_float32(0x7f800000) + + /*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision rounding precision. Valid -| values are 32, 64, and 80. +| The pattern for a default generated single-precision NaN. *----------------------------------------------------------------------------*/ -extern int8 floatx80_rounding_precision; +float32 float32_default_nan(float_status *status); /*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision operations. +| Software IEC/IEEE double-precision conversion routines. *----------------------------------------------------------------------------*/ -floatx80 floatx80_round_to_int( floatx80 ); -floatx80 floatx80_round_to_int_toward_zero( floatx80 ); -floatx80 floatx80_round32( floatx80 ); -floatx80 floatx80_normalize( floatx80 ); -floatx80 floatx80_add( floatx80, floatx80 ); -floatx80 floatx80_sub( floatx80, floatx80 ); -floatx80 floatx80_mul( floatx80, floatx80 ); -floatx80 floatx80_div( floatx80, floatx80 ); -//floatx80 floatx80_rem( floatx80, floatx80 ); -floatx80 floatx80_sqrt( floatx80 ); - -flag floatx80_eq( floatx80, floatx80 ); -flag floatx80_le( floatx80, floatx80 ); -flag floatx80_lt( floatx80, floatx80 ); -flag floatx80_eq_signaling( floatx80, floatx80 ); -flag floatx80_le_quiet( floatx80, floatx80 ); -flag floatx80_lt_quiet( floatx80, floatx80 ); - -flag floatx80_is_signaling_nan( floatx80 ); -flag floatx80_is_nan( floatx80 ); -flag floatx80_is_zero( floatx80 ); -flag floatx80_is_infinity( floatx80 ); -flag floatx80_is_negative( floatx80 ); -flag floatx80_is_denormal( floatx80 ); -flag floatx80_is_unnormal( floatx80 ); -flag floatx80_is_normal( floatx80 ); - -int floatx80_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a); -int floatx80_fsin(floatx80 *a); -int floatx80_fcos(floatx80 *a); -int floatx80_ftan(floatx80 *a); - -floatx80 floatx80_flognp1(floatx80 a); -floatx80 floatx80_flogn(floatx80 a); -floatx80 floatx80_flog2(floatx80 a); -floatx80 floatx80_flog10(floatx80 a); - -floatx80 floatx80_getman( floatx80 a ); -floatx80 floatx80_getexp( floatx80 a ); -floatx80 floatx80_rem( floatx80 a, floatx80 b, bits64 *q, flag *s ); -floatx80 floatx80_mod( floatx80 a, floatx80 b, bits64 *q, flag *s ); - -// roundAndPackFloatx80 used to be in softfloat-round-pack, is now in softfloat.c -floatx80 roundAndPackFloatx80(int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1); +int16_t float64_to_int16(float64, float_status *status); +uint16_t float64_to_uint16(float64, float_status *status); +int16_t float64_to_int16_round_to_zero(float64, float_status *status); +uint16_t float64_to_uint16_round_to_zero(float64, float_status *status); +int32_t float64_to_int32(float64, float_status *status); +int32_t float64_to_int32_round_to_zero(float64, float_status *status); +uint32_t float64_to_uint32(float64, float_status *status); +uint32_t float64_to_uint32_round_to_zero(float64, float_status *status); +int64_t float64_to_int64(float64, float_status *status); +int64_t float64_to_int64_round_to_zero(float64, float_status *status); +uint64_t float64_to_uint64(float64 a, float_status *status); +uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status); +float32 float64_to_float32(float64, float_status *status); +floatx80 float64_to_floatx80(float64, float_status *status); +float128 float64_to_float128(float64, float_status *status); + +floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ); -#endif +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +float64 float64_round_to_int(float64, float_status *status); +float64 float64_trunc_to_int(float64, float_status *status); +float64 float64_add(float64, float64, float_status *status); +float64 float64_sub(float64, float64, float_status *status); +float64 float64_mul(float64, float64, float_status *status); +float64 float64_div(float64, float64, float_status *status); +float64 float64_rem(float64, float64, float_status *status); +float64 float64_muladd(float64, float64, float64, int, float_status *status); +float64 float64_sqrt(float64, float_status *status); +float64 float64_log2(float64, float_status *status); +int float64_eq(float64, float64, float_status *status); +int float64_le(float64, float64, float_status *status); +int float64_lt(float64, float64, float_status *status); +int float64_unordered(float64, float64, float_status *status); +int float64_eq_quiet(float64, float64, float_status *status); +int float64_le_quiet(float64, float64, float_status *status); +int float64_lt_quiet(float64, float64, float_status *status); +int float64_unordered_quiet(float64, float64, float_status *status); +int float64_compare(float64, float64, float_status *status); +int float64_compare_quiet(float64, float64, float_status *status); +float64 float64_min(float64, float64, float_status *status); +float64 float64_max(float64, float64, float_status *status); +float64 float64_minnum(float64, float64, float_status *status); +float64 float64_maxnum(float64, float64, float_status *status); +float64 float64_minnummag(float64, float64, float_status *status); +float64 float64_maxnummag(float64, float64, float_status *status); +int float64_is_quiet_nan(float64 a, float_status *status); +int float64_is_signaling_nan(float64, float_status *status); +float64 float64_maybe_silence_nan(float64, float_status *status); +float64 float64_scalbn(float64, int, float_status *status); + +static inline float64 float64_abs(float64 a) +{ + /* Note that abs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float64(float64_val(a) & 0x7fffffffffffffffLL); +} + +static inline float64 float64_chs(float64 a) +{ + /* Note that chs does *not* handle NaN specially, nor does + * it flush denormal inputs to zero. + */ + return make_float64(float64_val(a) ^ 0x8000000000000000LL); +} + +static inline int float64_is_infinity(float64 a) +{ + return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL; +} -#ifdef FLOATX128 +static inline int float64_is_neg(float64 a) +{ + return float64_val(a) >> 63; +} + +static inline int float64_is_zero(float64 a) +{ + return (float64_val(a) & 0x7fffffffffffffffLL) == 0; +} + +static inline int float64_is_any_nan(float64 a) +{ + return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL); +} + +static inline int float64_is_zero_or_denormal(float64 a) +{ + return (float64_val(a) & 0x7ff0000000000000LL) == 0; +} + +static inline float64 float64_set_sign(float64 a, int sign) +{ + return make_float64((float64_val(a) & 0x7fffffffffffffffULL) + | ((int64_t)sign << 63)); +} + +#define float64_zero make_float64(0) +#define float64_one make_float64(0x3ff0000000000000LL) +#define float64_ln2 make_float64(0x3fe62e42fefa39efLL) +#define float64_pi make_float64(0x400921fb54442d18LL) +#define float64_half make_float64(0x3fe0000000000000LL) +#define float64_infinity make_float64(0x7ff0000000000000LL) /*---------------------------------------------------------------------------- -| Software IEC/IEEE quadruple-precision conversion routines. +| The pattern for a default generated double-precision NaN. *----------------------------------------------------------------------------*/ -int32 float128_to_int32( float128 ); -int32 float128_to_int32_round_to_zero( float128 ); -int64 float128_to_int64( float128 ); -int64 float128_to_int64_round_to_zero( float128 ); -float32 float128_to_float32( float128 ); -float64 float128_to_float64( float128 ); -#ifdef FLOATX80 -floatx80 float128_to_floatx80( float128 ); -#endif +float64 float64_default_nan(float_status *status); /*---------------------------------------------------------------------------- -| Software IEC/IEEE quadruple-precision operations. +| Software IEC/IEEE extended double-precision conversion routines. *----------------------------------------------------------------------------*/ -float128 float128_round_to_int( float128 ); -float128 float128_add( float128, float128 ); -float128 float128_sub( float128, float128 ); -float128 float128_mul( float128, float128 ); -float128 float128_div( float128, float128 ); -float128 float128_rem( float128, float128 ); -float128 float128_sqrt( float128 ); -flag float128_eq( float128, float128 ); -flag float128_le( float128, float128 ); -flag float128_lt( float128, float128 ); -flag float128_eq_signaling( float128, float128 ); -flag float128_le_quiet( float128, float128 ); -flag float128_lt_quiet( float128, float128 ); -flag float128_is_signaling_nan( float128 ); +int32_t floatx80_to_int32(floatx80, float_status *status); +int32_t floatx80_to_int32_round_to_zero(floatx80, float_status *status); +int64_t floatx80_to_int64(floatx80, float_status *status); +int64_t floatx80_to_int64_round_to_zero(floatx80, float_status *status); +float32 floatx80_to_float32(floatx80, float_status *status); +float64 floatx80_to_float64(floatx80, float_status *status); +float128 floatx80_to_float128(floatx80, float_status *status); + +floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status); +floatx80 floatx80_round32( floatx80, float_status *status); +floatx80 floatx80_round64( floatx80, float_status *status); + +int floatx80_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status *status); +int floatx80_fsin(floatx80 *a, float_status *status); +int floatx80_fcos(floatx80 *a, float_status *status); +int floatx80_ftan(floatx80 *a, float_status *status); + +floatx80 floatx80_flognp1(floatx80 a, float_status *status); +floatx80 floatx80_flogn(floatx80 a, float_status *status);; +floatx80 floatx80_flog2(floatx80 a, float_status *status); +floatx80 floatx80_flog10(floatx80 a, float_status *status); + +floatx80 floatx80_getman( floatx80 a, float_status *status); +floatx80 floatx80_getexp( floatx80 a, float_status *status); +floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); +floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); +floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status); /*---------------------------------------------------------------------------- -| Packs the sign `zSign', the exponent `zExp', and the significand formed -| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision -| floating-point value, returning the result. After being shifted into the -| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply -| added together to form the most significant 32 bits of the result. This -| means that any integer portion of `zSig0' will be added into the exponent. -| Since a properly normalized significand will have an integer portion equal -| to 1, the `zExp' input should be 1 less than the desired result exponent -| whenever `zSig0' and `zSig1' concatenated form a complete, normalized -| significand. +| Software IEC/IEEE extended double-precision operations. *----------------------------------------------------------------------------*/ +floatx80 floatx80_round_to_int(floatx80, float_status *status); +floatx80 floatx80_add(floatx80, floatx80, float_status *status); +floatx80 floatx80_sub(floatx80, floatx80, float_status *status); +floatx80 floatx80_mul(floatx80, floatx80, float_status *status); +floatx80 floatx80_div(floatx80, floatx80, float_status *status); +//floatx80 floatx80_rem(floatx80, floatx80, float_status *status); +floatx80 floatx80_sqrt(floatx80, float_status *status); +int floatx80_eq(floatx80, floatx80, float_status *status); +int floatx80_le(floatx80, floatx80, float_status *status); +int floatx80_lt(floatx80, floatx80, float_status *status); +int floatx80_unordered(floatx80, floatx80, float_status *status); +int floatx80_eq_quiet(floatx80, floatx80, float_status *status); +int floatx80_le_quiet(floatx80, floatx80, float_status *status); +int floatx80_lt_quiet(floatx80, floatx80, float_status *status); +int floatx80_unordered_quiet(floatx80, floatx80, float_status *status); +int floatx80_compare(floatx80, floatx80, float_status *status); +int floatx80_compare_quiet(floatx80, floatx80, float_status *status); +int floatx80_is_quiet_nan(floatx80, float_status *status); +int floatx80_is_signaling_nan(floatx80, float_status *status); +floatx80 floatx80_maybe_silence_nan(floatx80, float_status *status); +floatx80 floatx80_scalbn(floatx80, int, float_status *status); + +//flag floatx80_is_unnormal( floatx80 a ); +//flag floatx80_is_denormal( floatx80 a ); + +static inline floatx80 floatx80_abs(floatx80 a) +{ + a.high &= 0x7fff; + return a; +} -INLINE float128 - packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) +static inline floatx80 floatx80_chs(floatx80 a) { - float128 z; + a.high ^= 0x8000; + return a; +} - z.low = zSig1; - z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; - return z; +static inline int floatx80_is_zero_or_denormal(floatx80 a) +{ + return (a.high & 0x7fff) == 0; +} +static inline int floatx80_is_any_nan(floatx80 a) +{ + return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1); } /*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and extended significand formed by the concatenation of `zSig0', `zSig1', -| and `zSig2', and returns the proper quadruple-precision floating-point value -| corresponding to the abstract input. Ordinarily, the abstract value is -| simply rounded and packed into the quadruple-precision format, with the -| inexact exception raised if the abstract input cannot be represented -| exactly. However, if the abstract value is too large, the overflow and -| inexact exceptions are raised and an infinity or maximal finite value is -| returned. If the abstract value is too small, the input value is rounded to -| a subnormal number, and the underflow and inexact exceptions are raised if -| the abstract input cannot be represented exactly as a subnormal quadruple- -| precision floating-point number. -| The input significand must be normalized or smaller. If the input -| significand is not normalized, `zExp' must be 0; in that case, the result -| returned is a subnormal number, and it must not require rounding. In the -| usual case that the input significand is normalized, `zExp' must be 1 less -| than the ``true'' floating-point exponent. The handling of underflow and -| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Return whether the given value is an invalid floatx80 encoding. +| Invalid floatx80 encodings arise when the integer bit is not set, but +| the exponent is not zero. The only times the integer bit is permitted to +| be zero is in subnormal numbers and the value zero. +| This includes what the Intel software developer's manual calls pseudo-NaNs, +| pseudo-infinities and un-normal numbers. It does not include +| pseudo-denormals, which must still be correctly handled as inputs even +| if they are never generated as outputs. *----------------------------------------------------------------------------*/ +static inline bool floatx80_invalid_encoding(floatx80 a) +{ + return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0; +} -INLINE float128 - roundAndPackFloat128( - flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) -{ - int8 roundingMode; - flag roundNearestEven, increment, isTiny; - - roundingMode = float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (sbits64) zSig2 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } - } - } - if ( 0x7FFD <= (bits32) zExp ) { - if ( ( 0x7FFD < zExp ) - || ( ( zExp == 0x7FFD ) - && eq128( - LIT64( 0x0001FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ), - zSig0, - zSig1 - ) - && increment - ) - ) { - float_raise( float_flag_overflow | float_flag_inexact ); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return - packFloat128( - zSign, - 0x7FFE, - LIT64( 0x0000FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ) - ); - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( zExp < 0 ) { - isTiny = - ( float_detect_tininess == float_tininess_before_rounding ) - || ( zExp < -1 ) - || ! increment - || lt128( - zSig0, - zSig1, - LIT64( 0x0001FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ) - ); - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); - zExp = 0; - if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); - if ( roundNearestEven ) { - increment = ( (sbits64) zSig2 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } - } - } - } - if ( zSig2 ) float_exception_flags |= float_flag_inexact; - if ( increment ) { - add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); - zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); - } - else { - if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; - } - return packFloat128( zSign, zExp, zSig0, zSig1 ); +#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL) +#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL) +#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL) +#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL) +#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL) +#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL) -} +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_default_nan(float_status *status); /*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand formed by the concatenation of `zSig0' and `zSig1', and -| returns the proper quadruple-precision floating-point value corresponding -| to the abstract input. This routine is just like `roundAndPackFloat128' -| except that the input significand has fewer bits and does not have to be -| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- -| point exponent. +| Software IEC/IEEE quadruple-precision conversion routines. *----------------------------------------------------------------------------*/ +int32_t float128_to_int32(float128, float_status *status); +int32_t float128_to_int32_round_to_zero(float128, float_status *status); +int64_t float128_to_int64(float128, float_status *status); +int64_t float128_to_int64_round_to_zero(float128, float_status *status); +float32 float128_to_float32(float128, float_status *status); +float64 float128_to_float64(float128, float_status *status); +floatx80 float128_to_floatx80(float128, float_status *status); -INLINE float128 - normalizeRoundAndPackFloat128( - flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) -{ - int8 shiftCount; - bits64 zSig2; - - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = countLeadingZeros64( zSig0 ) - 15; - if ( 0 <= shiftCount ) { - zSig2 = 0; - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - } - else { - shift128ExtraRightJamming( - zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); - } - zExp -= shiftCount; - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); +/*---------------------------------------------------------------------------- +| Software IEC/IEEE quadruple-precision operations. +*----------------------------------------------------------------------------*/ +float128 float128_round_to_int(float128, float_status *status); +float128 float128_add(float128, float128, float_status *status); +float128 float128_sub(float128, float128, float_status *status); +float128 float128_mul(float128, float128, float_status *status); +float128 float128_div(float128, float128, float_status *status); +float128 float128_rem(float128, float128, float_status *status); +float128 float128_sqrt(float128, float_status *status); +int float128_eq(float128, float128, float_status *status); +int float128_le(float128, float128, float_status *status); +int float128_lt(float128, float128, float_status *status); +int float128_unordered(float128, float128, float_status *status); +int float128_eq_quiet(float128, float128, float_status *status); +int float128_le_quiet(float128, float128, float_status *status); +int float128_lt_quiet(float128, float128, float_status *status); +int float128_unordered_quiet(float128, float128, float_status *status); +int float128_compare(float128, float128, float_status *status); +int float128_compare_quiet(float128, float128, float_status *status); +int float128_is_quiet_nan(float128, float_status *status); +int float128_is_signaling_nan(float128, float_status *status); +float128 float128_maybe_silence_nan(float128, float_status *status); +float128 float128_scalbn(float128, int, float_status *status); + +static inline float128 float128_abs(float128 a) +{ + a.high &= 0x7fffffffffffffffLL; + return a; +} +static inline float128 float128_chs(float128 a) +{ + a.high ^= 0x8000000000000000LL; + return a; } -#endif -#endif //SOFTFLOAT_H \ No newline at end of file +static inline int float128_is_infinity(float128 a) +{ + return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0; +} + +static inline int float128_is_neg(float128 a) +{ + return a.high >> 63; +} + +static inline int float128_is_zero(float128 a) +{ + return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0; +} + +static inline int float128_is_zero_or_denormal(float128 a) +{ + return (a.high & 0x7fff000000000000LL) == 0; +} + +static inline int float128_is_any_nan(float128 a) +{ + return ((a.high >> 48) & 0x7fff) == 0x7fff && + ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0)); +} + +#define float128_zero make_float128(0, 0) + +/*---------------------------------------------------------------------------- +| The pattern for a default generated quadruple-precision NaN. +*----------------------------------------------------------------------------*/ +float128 float128_default_nan(float_status *status); + +#endif /* SOFTFLOAT_H */ -- 2.47.3