FPU softfloat update.

author Toni Wilen <twilen@winuae.net>

Tue, 10 Jan 2017 18:27:12 +0000 (20:27 +0200)

committer Toni Wilen <twilen@winuae.net>

Tue, 10 Jan 2017 18:27:12 +0000 (20:27 +0200)
author Toni Wilen <twilen@winuae.net>
Tue, 10 Jan 2017 18:27:12 +0000 (20:27 +0200)
committer Toni Wilen <twilen@winuae.net>
Tue, 10 Jan 2017 18:27:12 +0000 (20:27 +0200)
diff --git a/fpp.cpp b/fpp.cpp

index 1ef695c514e1d39716d98dcf98bb3eb77e8f4340..9d8e93d1ada0ef7cb58789cd54f96a6b77fd1ad1 100644 (file)
--- a/fpp.cpp
+++ b/fpp.cpp
@@ -35,9 +35,7 @@
  #include "cpummu030.h"
  #include "debug.h"
  
-#ifdef WITH_SOFTFLOAT
-#include "softfloatx80.h"
-#endif
+#include "softfloat/softfloat.h"
  
  #define DEBUG_FPP 0
  #define EXCEPTION_FPP 1
@@ -119,6 +117,7 @@ static double *fp_1e4096 = (double *)dhex_inf;
  static double *fp_inf    = (double *)dhex_inf;
  static double *fp_nan    = (double *)dhex_nan;
  #endif
+static const double twoto32 = 4294967296.0;
  double fp_1e8 = 1.0e8;
  float  fp_1e0 = 1, fp_1e1 = 10, fp_1e2 = 100, fp_1e4 = 10000;
  static bool fpu_mmu_fixup;
@@ -128,12 +127,9 @@ static bool fpu_mmu_fixup;
  #define FFLAG_NAN   0x0400
  
  
-#ifdef WITH_SOFTFLOAT
  static floatx80 fxsizes[6] = { 0 };
  static floatx80 fxzero;
  static floatx80 fx_1e0, fx_1e1, fx_1e2, fx_1e4, fx_1e8;
-struct float_status_t fxstatus;
-#endif
  static const fptype fsizes[] = { -128.0, 127.0, -32768.0, 32767.0, -2147483648.0, 2147483647.0 };
  
  #define FP_INEXACT (1 << 9)
@@ -146,181 +142,409 @@ static const fptype fsizes[] = { -128.0, 127.0, -32768.0, 32767.0, -2147483648.0
  
  STATIC_INLINE void MAKE_FPSR (fptype *fp)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat)
+       if (currprefs.fpu_softfloat) {
                 return;
-#endif
-       int status = fetestexcept (FE_ALL_EXCEPT);
-       if (status) {
-               if (status & FE_INEXACT)
-                       regs.fp_result_status |= FP_INEXACT;
-               if (status & FE_DIVBYZERO)
-                       regs.fp_result_status |= FP_DIVBYZERO;
-               if (status & FE_UNDERFLOW)
-                       regs.fp_result_status |= FP_UNDERFLOW;
-               if (status & FE_OVERFLOW)
-                       regs.fp_result_status |= FP_OVERFLOW;
-               if (status & FE_INVALID)
-                       regs.fp_result_status |= FP_OPERAND;
-       }
-       regs.fp_result.fp = *fp;
-}
-
-#ifdef WITH_SOFTFLOAT
+       } else {
+               int status = fetestexcept (FE_ALL_EXCEPT);
+               if (status) {
+                       if (status & FE_INEXACT)
+                               regs.fp_result_status |= FP_INEXACT;
+                       if (status & FE_DIVBYZERO)
+                               regs.fp_result_status |= FP_DIVBYZERO;
+                       if (status & FE_UNDERFLOW)
+                               regs.fp_result_status |= FP_UNDERFLOW;
+                       if (status & FE_OVERFLOW)
+                               regs.fp_result_status |= FP_OVERFLOW;
+                       if (status & FE_INVALID)
+                               regs.fp_result_status |= FP_OPERAND;
+               }
+               regs.fp_result.fp = *fp;
+       }
+}
+
  STATIC_INLINE void MAKE_FPSR_SOFTFLOAT(floatx80 fx)
  {
-       if (fxstatus.float_exception_flags & float_flag_invalid)
+       if (float_exception_flags & float_flag_invalid)
                 regs.fp_result_status |= FP_OPERAND;
-       if (fxstatus.float_exception_flags & float_flag_divbyzero)
+       if (float_exception_flags & float_flag_divbyzero)
                 regs.fp_result_status |= FP_DIVBYZERO;
-       if (fxstatus.float_exception_flags & float_flag_overflow)
+       if (float_exception_flags & float_flag_overflow)
                 regs.fp_result_status |= FP_OVERFLOW;
-       if (fxstatus.float_exception_flags & float_flag_underflow)
+       if (float_exception_flags & float_flag_underflow)
                 regs.fp_result_status |= FP_UNDERFLOW;
-       if (fxstatus.float_exception_flags & float_flag_inexact)
+       if (float_exception_flags & float_flag_inexact)
                 regs.fp_result_status |= FP_INEXACT;
         regs.fp_result.fpx = fx;
  }
-#endif
  
-STATIC_INLINE void CLEAR_STATUS (void)
+static void CLEAR_STATUS (void)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat)
+       if (currprefs.fpu_softfloat) {
+               float_exception_flags = 0;
                 return;
-#endif
+       }
         feclearexcept (FE_ALL_EXCEPT);
  }
  
-#ifdef WITH_SOFTFLOAT
-static void softfloat_set(floatx80 *fx, uae_u32 *f)
+static void fp_roundsgl(fpdata *fpd)
  {
-       fx->exp = (uae_u16)f[2];
-       fx->fraction = ((uae_u64)f[1] << 32) | f[0];
+       if (currprefs.fpu_softfloat) {
+               fpd->fpx = floatx80_round32(fpd->fpx);
+       } else {
+               int expon;
+               float mant;
+#ifdef USE_LONG_DOUBLE
+               mant = (float)(frexpl(fpd->fp, &expon) * 2.0);
+               fpd->fp = ldexpl((fptype)mant, expon - 1);
+#else
+               mant = (float)(frexp(fpd->fp, &expon) * 2.0);
+               fpd->fp = ldexp((fptype)mant, expon - 1);
+#endif
+       }
  }
-static void softfloat_get(floatx80 *fx, uae_u32 *f)
+static void fp_round32(fpdata *fpd)
  {
-       f[2] = fx->exp;
-       f[1] = fx->fraction >> 32;
-       f[0] = (uae_u32)fx->fraction;
+       if (currprefs.fpu_softfloat) {
+               float32 f = floatx80_to_float32(fpd->fpx);
+               fpd->fpx = float32_to_floatx80(f);
+       } else {
+               fpd->fp = (float)fpd->fp;
+       }
  }
+static void fp_round64(fpdata *fpd)
+{
+       if (currprefs.fpu_softfloat) {
+               float64 f = floatx80_to_float64(fpd->fpx);
+               fpd->fpx = float64_to_floatx80(f);
+       } else {
+#ifdef USE_LONG_DOUBLE
+               fpd->fp = (double)fpd->fp;
+#else
+               ;
  #endif
+       }
+}
  
-static void fpnan (fpdata *fpd)
+static void to_native(fptype *fp, fpdata *fpd)
  {
-       fpd->fp = *fp_nan;
-#ifdef WITH_SOFTFLOAT
-       softfloat_set(&fpd->fpx, xhex_nan);
-#endif
+       if (currprefs.fpu_softfloat) {
+               int expon;
+               fptype frac;
+               floatx80 *fpx = &fpd->fpx;
+    
+               expon = fpx->high & 0x7fff;
+    
+               if (floatx80_is_zero(*fpx)) {
+                       *fp = floatx80_is_negative(*fpx) ? -0.0 : +0.0;
+                       return;
+               }
+               if (floatx80_is_nan(*fpx)) {
+                       *fp = sqrtl(-1);
+                       return;
+               }
+               if (floatx80_is_infinity(*fpx)) {
+                       *fp = floatx80_is_negative(*fpx) ? logl(0.0) : (1.0/0.0);
+                       return;
+               }
+    
+               frac = (long double)fpx->low / (long double)(twoto32 * 2147483648.0);
+               if (floatx80_is_negative(*fpx))
+                       frac = -frac;
+               *fp = ldexpl (frac, expon - 16383);
+       } else {
+               *fp = fpd->fp;
+       }
  }
  
-static void fpclear (fpdata *fpd)
+static void from_native(fptype fp, fpdata *fpd)
  {
-       fpd->fp = 0;
-#ifdef WITH_SOFTFLOAT
-       fpd->fpx = int32_to_floatx80(0);
-#endif
+       if (currprefs.fpu_softfloat) {
+               int expon;
+               fptype frac;
+               floatx80 *fpx = &fpd->fpx;
+    
+               if (signbit(fp))
+                       fpx->high = 0x8000;
+               else
+                       fpx->high = 0x0000;
+    
+               if (isnan(fp)) {
+                       fpx->high |= 0x7fff;
+                       fpx->low = LIT64(0xffffffffffffffff);
+                       return;
+               }
+               if (isinf(fp)) {
+                       fpx->high |= 0x7fff;
+                       fpx->low = LIT64(0x0000000000000000);
+                       return;
+               }
+               if (fp == 0.0) {
+                       fpx->low = LIT64(0x0000000000000000);
+                       return;
+               }
+               if (fp < 0.0)
+                       fp = -fp;
+    
+               frac = frexpl (fp, &expon);
+               frac += 0.5 / (twoto32 * twoto32);
+               if (frac >= 1.0) {
+                       frac /= 2.0;
+                       expon++;
+               }
+               fpx->high |= (expon + 16383 - 1) & 0x7fff;
+               fpx->low = (bits64)(frac * (long double)(twoto32 * twoto32));
+    
+               while (!(fpx->low & LIT64( 0x8000000000000000))) {
+                       if (fpx->high == 0) {
+                               float_raise( float_flag_denormal );
+                               break;
+                       }
+                       fpx->low <<= 1;
+                       fpx->high--;
+               }
+       } else {
+               fpd->fp = fp;
+       }
  }
-static void fpset (fpdata *fpd, uae_s32 val)
+
+static void softfloat_set(floatx80 *fx, uae_u32 *f)
  {
-       fpd->fp = (fptype)val;
-#ifdef WITH_SOFTFLOAT
-       fpd->fpx = int32_to_floatx80(val);
-#endif
+       fx->high = (uae_u16)f[2];
+       fx->low = ((uae_u64)f[1] << 32) | f[0];
+}
+static void softfloat_get(floatx80 *fx, uae_u32 *f)
+{
+       f[2] = fx->high;
+       f[1] = fx->low >> 32;
+       f[0] = (uae_u32)fx->low;
  }
  
-void to_single(fpdata *fpd, uae_u32 value)
+static void to_single_xn(fpdata *fpd, uae_u32 wrd1)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
-               float32 f = value;
-               fpd->fpx = float32_to_floatx80(f, fxstatus);
-       } else
-#endif
-               fpd->fp = to_single_x(value);
+           float32 f = wrd1;
+           fpd->fpx = float32_to_floatx80(f); // automatically fix denormals
+       } else {
+               union {
+                       float f;
+                       uae_u32 u;
+               } val;
+               val.u = wrd1;
+               fpd->fp = (fptype) val.f;
+       }
  }
-static uae_u32 from_single(fpdata *fpd)
+static void to_single_x(fpdata *fpd, uae_u32 wrd1)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
-               float32 f = floatx80_to_float32(fpd->fpx, fxstatus);
+           float32 f = wrd1;
+           fpd->fpx = float32_to_floatx80_allowunnormal(f);
+       } else {
+               union {
+                       float f;
+                       uae_u32 u;
+               } val;
+               val.u = wrd1;
+               fpd->fp = (fptype) val.f;
+       }
+}
+static uae_u32 from_single_x(fpdata *fpd)
+{
+       if (currprefs.fpu_softfloat) {
+               float32 f = floatx80_to_float32(fpd->fpx);
                 return f;
-       } else
+       } else {
+               union {
+                       float f;
+                       uae_u32 u;
+               } val;
+               val.f = (float)fpd->fp;
+               return val.u;
+       }
+}
+
+static void to_double_xn(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2)
+{
+       if (currprefs.fpu_softfloat) {
+               float64 f = ((float64)wrd1 << 32) | wrd2;
+               fpd->fpx = float64_to_floatx80(f); // automatically fix denormals
+       } else {
+               union {
+                       double d;
+                       uae_u32 u[2];
+               } val;
+#ifdef WORDS_BIGENDIAN
+               val.u[0] = wrd1;
+               val.u[1] = wrd2;
+#else
+               val.u[1] = wrd1;
+               val.u[0] = wrd2;
  #endif
-               return from_single_x(fpd->fp);
+               fpd->fp = (fptype) val.d;
+       }
  }
-void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2)
+static void to_double_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat) {
+       if (currprefs.fpu_softfloat) {
                 float64 f = ((float64)wrd1 << 32) | wrd2;
-               fpd->fpx = float64_to_floatx80(f, fxstatus);
-       } else
+               fpd->fpx = float64_to_floatx80_allowunnormal(f);
+       } else {
+               union {
+                       double d;
+                       uae_u32 u[2];
+               } val;
+#ifdef WORDS_BIGENDIAN
+               val.u[0] = wrd1;
+               val.u[1] = wrd2;
+#else
+               val.u[1] = wrd1;
+               val.u[0] = wrd2;
  #endif
-               fpd->fp = to_double_x(wrd1, wrd2);
+               fpd->fp = (fptype) val.d;
+       }
  }
-static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2)
+static void from_double_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
-               float64 f = floatx80_to_float64(fpd->fpx, fxstatus);
+               float64 f = floatx80_to_float64(fpd->fpx);
                 *wrd1 = f >> 32;
                 *wrd2 = (uae_u32)f;
-               return;
-       } else
+       } else {
+               union {
+                       double d;
+                       uae_u32 u[2];
+               } val;
+               val.d = (double) fpd->fp;
+#ifdef WORDS_BIGENDIAN
+               *wrd1 = val.u[0];
+               *wrd2 = val.u[1];
+#else
+               *wrd1 = val.u[1];
+               *wrd2 = val.u[0];
  #endif
-               return from_double_x(fpd->fp, wrd1, wrd2);
+       }
  }
-void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
+#ifdef USE_LONG_DOUBLE
+static void to_exten_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
-               fpd->fpx.exp = wrd1 >> 16;
-               fpd->fpx.fraction = ((uae_u64)wrd2 << 32) | wrd3;
-#if 0
-               if ((currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) || currprefs.fpu_no_unimplemented) {
-                       // automatically fix denormals if 6888x or no implemented emulation
-                       Bit64u Sig = extractFloatx80Frac(fpd->fpx);
-                       Bit32s Exp = extractFloatx80Exp(fpd->fpx);
-                       if (Exp == 0 && Sig != 0)
-                               normalizeFloatx80Subnormal(Sig, &Exp, &Sig);
-               }
-#endif
-       } else
+               uae_u32 wrd[3] = { wrd1, wrd2, wrd3 };
+               softfloat_set(&fpd->fpx, wrd);
+       } else {
+               union {
+                       fptype ld;
+                       uae_u32 u[3];
+               } val;
+#if WORDS_BIGENDIAN
+               val.u[0] = (wrd1 & 0xffff0000) | ((wrd2 & 0xffff0000) >> 16);
+               val.u[1] = (wrd2 & 0x0000ffff) | ((wrd3 & 0xffff0000) >> 16);
+               val.u[2] = (wrd3 & 0x0000ffff) << 16;
+#else
+               val.u[0] = wrd3;
+               val.u[1] = wrd2;
+               val.u[2] = wrd1 >> 16;
  #endif
-               to_exten_x(&fpd->fp, wrd1, wrd2, wrd3);
+               fpd->fp = val.ld;
+       }
  }
-static void from_exten(fpdata *fpd, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3)
+static void from_exten_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
-               *wrd1 = fpd->fpx.exp << 16;
-               *wrd2 = fpd->fpx.fraction >> 32;
-               *wrd3 = (uae_u32)fpd->fpx.fraction;
-       } else
+               uae_u32 wrd[3];
+               softfloat_get(&fpd->fpx, wrd);
+               *wrd1 = wrd[0];
+               *wrd2 = wrd[1];
+               *wrd3 = wrd[2];
+       } else {
+               union {
+                       fptype ld;
+                       uae_u32 u[3];
+               } val;
+               val.ld = *fp;
+#if WORDS_BIGENDIAN
+               *wrd1 = val.u[0] & 0xffff0000;
+               *wrd2 = ((val.u[0] & 0x0000ffff) << 16) | ((val.u[1] & 0xffff0000) >> 16);
+               *wrd3 = ((val.u[1] & 0x0000ffff) << 16) | ((val.u[2] & 0xffff0000) >> 16);
+#else
+               *wrd3 = val.u[0];
+               *wrd2 = val.u[1];
+               *wrd1 = val.u[2] << 16;
  #endif
-               from_exten_x(fpd->fp, wrd1, wrd2, wrd3);
+       }
  }
+#else // if !USE_LONG_DOUBLE
+static void to_exten_x(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
+{
+       if (currprefs.fpu_softfloat) {
+               uae_u32 wrd[3] = { wrd1, wrd2, wrd3 };
+               softfloat_set(&fpd->fpx, wrd);
+       } else {
+               fptype frac;
+               if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) {
+                       fpd->fp = (wrd1 & 0x80000000) ? -0.0 : +0.0;
+                       return;
+               }
+               frac = ((fptype)wrd2 + ((fptype)wrd3 / twoto32)) / 2147483648.0;
+               if (wrd1 & 0x80000000)
+                       frac = -frac;
+               fpd->fp = ldexp (frac, ((wrd1 >> 16) & 0x7fff) - 16383);
+       }
+}
+static void from_exten_x(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3)
+{
+       if (currprefs.fpu_softfloat) {
+               uae_u32 wrd[3];
+               softfloat_get(&fpd->fpx, wrd);
+               *wrd1 = wrd[0];
+               *wrd2 = wrd[1];
+               *wrd3 = wrd[2];
+       } else {
+               int expon;
+               fptype frac;
+               fptype v;
+
+               v = fpd->fp;
+               if (v == 0.0) {
+                       *wrd1 = signbit(v) ? 0x80000000 : 0;
+                       *wrd2 = 0;
+                       *wrd3 = 0;
+                       return;
+               }
+               if (v < 0) {
+                       *wrd1 = 0x80000000;
+                       v = -v;
+               } else {
+                       *wrd1 = 0;
+               }
+               frac = frexp (v, &expon);
+               frac += 0.5 / (twoto32 * twoto32);
+               if (frac >= 1.0) {
+                       frac /= 2.0;
+                       expon++;
+               }
+               *wrd1 |= (((expon + 16383 - 1) & 0x7fff) << 16);
+               *wrd2 = (uae_u32) (frac * twoto32);
+               *wrd3 = (uae_u32) ((frac * twoto32 - *wrd2) * twoto32);
+       }
+}
+#endif // !USE_LONG_DOUBLE
  
-
-#if 0
-static void normalize(uae_u32 *pwrd1, uae_u32 *pwrd2, uae_u32 *pwrd3)
+static void normalize_exten(uae_u32 *pwrd1, uae_u32 *pwrd2, uae_u32 *pwrd3)
  {
         uae_u32 wrd1 = *pwrd1;
         uae_u32 wrd2 = *pwrd2;
         uae_u32 wrd3 = *pwrd3;
-       int exp = (wrd1 >> 16) & 0x7fff;
+       uae_u16 exp = (wrd1 >> 16) & 0x7fff;
         // Normalize if unnormal.
         if (exp != 0 && exp != 0x7fff && !(wrd2 & 0x80000000)) {
                 while (!(wrd2 & 0x80000000) && (wrd2 || wrd3)) {
+                       if (exp == 0)
+                               break; // Result is denormal
                         wrd2 <<= 1;
                         if (wrd3 & 0x80000000)
                                 wrd2 |= 1;
                         wrd3 <<= 1;
                         exp--;
                 }
-               if (exp < 0)
-                       exp = 0;
                 if (!wrd2 && !wrd3)
                         exp = 0;
                 *pwrd1 = (wrd1 & 0x80000000) | (exp << 16);
@@ -328,7 +552,189 @@ static void normalize(uae_u32 *pwrd1, uae_u32 *pwrd2, uae_u32 *pwrd3)
                 *pwrd3 = wrd3;
         }
  }
+
+static void from_int(fpdata *fpd, uae_s32 src)
+{
+       if (currprefs.fpu_softfloat) {
+           fpd->fpx = int32_to_floatx80(src);
+       } else {
+               fpd->fp = (fptype)src;
+       }
+}
+
+static void fpclear (fpdata *fpd)
+{
+    from_int(fpd, 0);
+}
+static void fpset (fpdata *fpd, uae_s32 val)
+{
+    from_int(fpd, val);
+}
+
+void to_single(fpdata *fpd, uae_u32 wrd1)
+{
+    // automatically fix denormals if 6888x
+    if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882)
+        to_single_xn(fpd, wrd1);
+    else
+        to_single_x(fpd, wrd1);
+}
+static uae_u32 from_single(fpdata *fpd)
+{
+    return from_single_x(fpd);
+}
+void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2)
+{
+    // automatically fix denormals if 6888x
+    if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882)
+        to_double_xn(fpd, wrd1, wrd2);
+    else
+        to_double_x(fpd, wrd1, wrd2);
+}
+static void from_double(fpdata *fpd, uae_u32 *wrd1, uae_u32 *wrd2)
+{
+    from_double_x(fpd, wrd1, wrd2);
+}
+
+void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
+{
+    // automatically fix unnormals if 6888x
+       if (currprefs.fpu_model == 68881 || currprefs.fpu_model == 68882) {
+               normalize_exten(&wrd1, &wrd2, &wrd3);
+       }
+       to_exten_x(fpd, wrd1, wrd2, wrd3);
+}
+static void to_exten_fmovem(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
+{
+       to_exten_x(fpd, wrd1, wrd2, wrd3);
+}
+static void from_exten(fpdata *fpd, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3)
+{
+       from_exten_x(fpd, wrd1, wrd2, wrd3);
+}
+
+/* Floating Point Control Register (FPCR)
+ *
+ * Exception Enable Byte
+ * x--- ---- ---- ----  bit 15: BSUN (branch/set on unordered)
+ * -x-- ---- ---- ----  bit 14: SNAN (signaling not a number)
+ * --x- ---- ---- ----  bit 13: OPERR (operand error)
+ * ---x ---- ---- ----  bit 12: OVFL (overflow)
+ * ---- x--- ---- ----  bit 11: UNFL (underflow)
+ * ---- -x-- ---- ----  bit 10: DZ (divide by zero)
+ * ---- --x- ---- ----  bit 9: INEX 2 (inexact operation)
+ * ---- ---x ---- ----  bit 8: INEX 1 (inexact decimal input)
+ *
+ * Mode Control Byte
+ * ---- ---- xx-- ----  bits 7 and 6: PREC (rounding precision)
+ * ---- ---- --xx ----  bits 5 and 4: RND (rounding mode)
+ * ---- ---- ---- xxxx  bits 3 to 0: all 0
+ */
+
+#define FPCR_PREC   0x00C0
+#define FPCR_RND    0x0030
+
+/* Floating Point Status Register (FPSR)
+ *
+ * Condition Code Byte
+ * xxxx ---- ---- ---- ---- ---- ---- ----  bits 31 to 28: all 0
+ * ---- x--- ---- ---- ---- ---- ---- ----  bit 27: N (negative)
+ * ---- -x-- ---- ---- ---- ---- ---- ----  bit 26: Z (zero)
+ * ---- --x- ---- ---- ---- ---- ---- ----  bit 25: I (infinity)
+ * ---- ---x ---- ---- ---- ---- ---- ----  bit 24: NAN (not a number or unordered)
+ *
+ * Quotient Byte (set and reset only by FMOD and FREM)
+ * ---- ---- x--- ---- ---- ---- ---- ----  bit 23: sign of quotient
+ * ---- ---- -xxx xxxx ---- ---- ---- ----  bits 22 to 16: 7 least significant bits of quotient
+ *
+ * Exception Status Byte
+ * ---- ---- ---- ---- x--- ---- ---- ----  bit 15: BSUN (branch/set on unordered)
+ * ---- ---- ---- ---- -x-- ---- ---- ----  bit 14: SNAN (signaling not a number)
+ * ---- ---- ---- ---- --x- ---- ---- ----  bit 13: OPERR (operand error)
+ * ---- ---- ---- ---- ---x ---- ---- ----  bit 12: OVFL (overflow)
+ * ---- ---- ---- ---- ---- x--- ---- ----  bit 11: UNFL (underflow)
+ * ---- ---- ---- ---- ---- -x-- ---- ----  bit 10: DZ (divide by zero)
+ * ---- ---- ---- ---- ---- --x- ---- ----  bit 9: INEX 2 (inexact operation)
+ * ---- ---- ---- ---- ---- ---x ---- ----  bit 8: INEX 1 (inexact decimal input)
+ *
+ * Accrued Exception Byte
+ * ---- ---- ---- ---- ---- ---- x--- ----  bit 7: IOP (invalid operation)
+ * ---- ---- ---- ---- ---- ---- -x-- ----  bit 6: OVFL (overflow)
+ * ---- ---- ---- ---- ---- ---- --x- ----  bit 5: UNFL (underflow)
+ * ---- ---- ---- ---- ---- ---- ---x ----  bit 4: DZ (divide by zero)
+ * ---- ---- ---- ---- ---- ---- ---- x---  bit 3: INEX (inexact)
+ * ---- ---- ---- ---- ---- ---- ---- -xxx  bits 2 to 0: all 0
+ */
+
+#define FPSR_ZEROBITS   0xF0000007
+
+#define FPSR_CC_N       0x08000000
+#define FPSR_CC_Z       0x04000000
+#define FPSR_CC_I       0x02000000
+#define FPSR_CC_NAN     0x01000000
+
+#define FPSR_QUOT_SIGN  0x00800000
+#define FPSR_QUOT_LSB   0x007F0000
+
+#define FPSR_BSUN       0x00008000
+#define FPSR_SNAN       0x00004000
+#define FPSR_OPERR      0x00002000
+#define FPSR_OVFL       0x00001000
+#define FPSR_UNFL       0x00000800
+#define FPSR_DZ         0x00000400
+#define FPSR_INEX2      0x00000200
+#define FPSR_INEX1      0x00000100
+
+#define FPSR_AE_IOP     0x00000080
+#define FPSR_AE_OVFL    0x00000040
+#define FPSR_AE_UNFL    0x00000020
+#define FPSR_AE_DZ      0x00000010
+#define FPSR_AE_INEX    0x00000008
+
+static void fpnan (fpdata *fpd)
+{
+    to_exten(fpd, xhex_nan[0], xhex_nan[1], xhex_nan[2]);
+}
+
+const TCHAR *fp_print(fpdata *fpd)
+{
+       static TCHAR fs[32];
+       if (currprefs.fpu_softfloat) {
+               bool n, u, d;
+               fptype result = 0.0;
+               int i;
+               floatx80 *fx = &fpd->fpx;
+
+               n = floatx80_is_negative(*fx);
+               u = floatx80_is_unnormal(*fx);
+               d = floatx80_is_denormal(*fx);
+    
+               if (floatx80_is_zero(*fx)) {
+                       _stprintf(fs, _T("%c%#.17Le%s%s"), n?'-':'+', (fptype) 0.0, u ? _T("U") : _T(""), d ? _T("D") : _T(""));
+               } else if (floatx80_is_infinity(*fx)) {
+                       _stprintf(fs, _T("%c%s"), n?'-':'+', _T("inf"));
+               } else if (floatx80_is_signaling_nan(*fx)) {
+                       _stprintf(fs, _T("%c%s"), n?'-':'+', _T("snan"));
+               } else if (floatx80_is_nan(*fx)) {
+                       _stprintf(fs, _T("%c%s"), n?'-':'+', _T("nan"));
+               } else {
+                       for (i = 63; i >= 0; i--) {
+                               if (fx->low & (((uae_u64)1)<<i)) {
+                                       result += (fptype) 1.0 / (((uae_u64)1)<<(63-i));
+                               }
+                       }
+                       result *= powl(2.0, (fx->high&0x7FFF) - 0x3FFF);
+                       _stprintf(fs, _T("%c%#.17Le%s%s"), n?'-':'+', result, u ? _T("U") : _T(""), d ? _T("D") : _T(""));
+               }
+       } else {
+#if USE_LONG_DOUBLE
+               _stprintf(fs, _T("#%Le"), fpd->fp);
+#else
+               _stprintf(fs, _T("#%e"), fpd->fp);
  #endif
+       }
+       return fs;
+}
  
  static bool fpu_get_constant_fp(fpdata *fp, int cr)
  {
@@ -408,7 +814,6 @@ static bool fpu_get_constant_fp(fpdata *fp, int cr)
         return true;
  }
  
-#ifdef WITH_SOFTFLOAT
  static bool fpu_get_constant_softfloat(fpdata *fp, int cr)
  {
         uae_u32 *f = NULL;
@@ -491,52 +896,36 @@ static bool fpu_get_constant_softfloat(fpdata *fp, int cr)
                 fp->fpx = fx;
         return true;
  }
-#endif
  
  bool fpu_get_constant(fpdata *fp, int cr)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat)
+       if (currprefs.fpu_softfloat) {
                 return fpu_get_constant_softfloat(fp, cr);
-#endif
-       return fpu_get_constant_fp(fp, cr);
+       } else {
+               return fpu_get_constant_fp(fp, cr);
+       }
  }
  
-#ifdef WITH_SOFTFLOAT
-
-static inline void set_fpucw_softfloat(uae_u32 m68k_cw)
+static void set_fpucw_softfloat(uae_u32 m68k_cw)
  {
-       switch((m68k_cw >> 6) & 3) {
-       case 0: // X
-       default: // undefined
-               fxstatus.float_rounding_precision = 80;
-               break;
-       case 1: // S
-               fxstatus.float_rounding_precision = 32;
-               break;
-       case 2: // D
-               fxstatus.float_rounding_precision = 64;
-               break;
-       }
+       floatx80_rounding_precision = 80;
         switch((m68k_cw >> 4) & 3) {
-       case 0: // to neareset
-               fxstatus.float_rounding_precision = float_round_nearest_even;
+       case 0: // to nearest
+               float_rounding_mode = float_round_nearest_even;
                 break;
         case 1: // to zero
-               fxstatus.float_rounding_mode = float_round_to_zero;
+               float_rounding_mode = float_round_to_zero;
                 break;
         case 2: // to minus
-               fxstatus.float_rounding_mode = float_round_down;
+               float_rounding_mode = float_round_down;
                 break;
         case 3: // to plus
-               fxstatus.float_rounding_mode = float_round_up;
+               float_rounding_mode = float_round_up;
                 break;
         }
         return;
  }
  
-#endif /* WITH_SOFTFLOAT */
-
  #if defined(CPU_i386) || defined(CPU_x86_64)
  
  /* The main motivation for dynamically creating an x86(-64) function in
@@ -618,11 +1007,9 @@ static inline void set_fpucw_x87(uae_u32 m68k_cw)
  
  static void native_set_fpucw(uae_u32 m68k_cw)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
                 set_fpucw_softfloat(m68k_cw);
         }
-#endif
  #if defined(CPU_i386) || defined(CPU_x86_64)
         set_fpucw_x87(m68k_cw);
  #endif
@@ -989,24 +1376,21 @@ static void fpu_null (void)
  #define fp_round_to_zero(x)    ((x) >= 0.0 ? floor(x) : ceil(x))
  #define fp_round_to_nearest(x) ((x) >= 0.0 ? (int)((x) + 0.5) : (int)((x) - 0.5))
  
-static tointtype toint(fpdata *src, int size)
+static tointtype to_int(fpdata *src, int size)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
-               if (floatx80_compare(src->fpx, fxsizes[size * 2 + 0], fxstatus) == float_relation_greater)
-                       return floatx80_to_int32(fxsizes[size * 2 + 0], fxstatus);
-               if (floatx80_compare(src->fpx, fxsizes[size * 2 + 1], fxstatus) == float_relation_less)
-                       return floatx80_to_int32(fxsizes[size * 2 + 1], fxstatus);
-               return floatx80_to_int32(src->fpx, fxstatus);
-       } else
-#endif
-       {
+               if (floatx80_lt(src->fpx, fxsizes[size * 2 + 0]))
+                       return floatx80_to_int32(fxsizes[size * 2 + 0]);
+               if (floatx80_le(fxsizes[size * 2 + 1], src->fpx))
+                       return floatx80_to_int32(fxsizes[size * 2 + 1]);
+               return floatx80_to_int32(src->fpx);
+       } else {
                 fptype fp = src->fp;
                 if (fp < fsizes[size * 2 + 0])
                         fp = fsizes[size * 2 + 0];
                 if (fp > fsizes[size * 2 + 1])
                         fp = fsizes[size * 2 + 1];
-       #if defined(X86_MSVC_ASSEMBLY_FPU)
+#if defined(X86_MSVC_ASSEMBLY_FPU)
                 {
                         fptype tmp_fp;
                         __asm {
@@ -1016,7 +1400,7 @@ static tointtype toint(fpdata *src, int size)
                         }
                         return (tointtype)tmp_fp;
                 }
-       #else /* no X86_MSVC */
+#else /* no X86_MSVC */
                 {
                         int result = (int)fp;
                         switch (regs.fpcr & 0x30)
@@ -1036,61 +1420,80 @@ static tointtype toint(fpdata *src, int size)
                         }
                         return result;
                 }
-       #endif
+#endif
         }
  }
  
  static bool fp_is_snan(fpdata *fpd)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat)
+       if (currprefs.fpu_softfloat) {
                 return floatx80_is_signaling_nan(fpd->fpx) != 0;
-#endif
-       return false;
+       } else {
+               return false;
+       }
+}
+static void fp_unset_snan(fpdata *fpd)
+{
+    fpd->fpx.low |= LIT64(0x4000000000000000);
  }
  static bool fp_is_nan (fpdata *fpd)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat)
+       if (currprefs.fpu_softfloat) {
                 return floatx80_is_nan(fpd->fpx) != 0;
-#endif
+       } else {
  #ifdef HAVE_ISNAN
-       return isnan(fpd->fp) != 0;
+               return isnan(fpd->fp) != 0;
  #else
-       return false;
+               return false;
  #endif
+       }
  }
  static bool fp_is_infinity (fpdata *fpd)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
-               float_class_t fc = floatx80_class(fpd->fpx);
-               return fc == float_negative_inf || fc == float_positive_inf;
-       }
-#endif
+               return floatx80_is_infinity(fpd->fpx) != 0;
+       } else {
  #ifdef _MSC_VER
-       return !_finite (fpd->fp);
+               return !_finite (fpd->fp);
  #elif defined(HAVE_ISINF)
-       return isinf(fpd->fp);
+               return isinf(fpd->fp);
  #else
-       return false;
+               return false;
  #endif
+       }
  }
  static bool fp_is_zero(fpdata *fpd)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat)
-               return floatx80_compare_quiet(fpd->fpx, fxzero, fxstatus) == float_relation_equal;
-#endif
-       return fpd->fp == 0.0;
+       if (currprefs.fpu_softfloat) {
+               return floatx80_is_zero(fpd->fpx) != 0;
+       } else {
+               return fpd->fp == 0.0;
+       }
  }
  static bool fp_is_neg(fpdata *fpd)
  {
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat)
-               return extractFloatx80Sign(fpd->fpx) != 0;
-#endif
-       return fpd->fp < 0.0;
+       if (currprefs.fpu_softfloat) {
+               return floatx80_is_negative(fpd->fpx) != 0;
+       } else {
+               return fpd->fp < 0.0;
+       }
+}
+
+static bool fp_is_denormal(fpdata *fpd)
+{
+       if (currprefs.fpu_softfloat) {
+               return floatx80_is_denormal(fpd->fpx) != 0;
+       } else {
+               return false;
+       }
+}
+static bool fp_is_unnormal(fpdata *fpd)
+{
+       if (currprefs.fpu_softfloat) {
+           return floatx80_is_unnormal(fpd->fpx) != 0;
+       } else {
+               return false;
+       }
  }
  
  uae_u32 fpp_get_fpsr (void)
@@ -1188,6 +1591,19 @@ static void to_pack (fpdata *fpd, uae_u32 *wrd)
         char *cp;
         char str[100];
  
+    if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) {
+        // infinity has extended exponent and all 0 packed fraction
+        // nans are copies bit by bit
+        to_exten_fmovem(fpd, wrd[0], wrd[1], wrd[2]);
+        return;
+    }
+    if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) {
+        // exponent is not cared about, if mantissa is zero
+        wrd[0] &= 0x80000000;
+        to_exten_fmovem(fpd, wrd[0], wrd[1], wrd[2]);
+        return;
+    }
+
         cp = str;
         if (wrd[0] & 0x80000000)
                 *cp++ = '-';
@@ -1221,14 +1637,7 @@ static void to_pack (fpdata *fpd, uae_u32 *wrd)
  #else
         sscanf (str, "%le", &d);
  #endif
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat) {
-               uae_u32 wrd[3];
-               from_exten_x(d, &wrd[0], &wrd[1], &wrd[2]);
-               softfloat_set(&fpd->fpx, wrd);
-       } else
-#endif
-               fpd->fp = d;
+       from_native(d, fpd);
  }
  
  static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor)
@@ -1240,23 +1649,21 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor)
         char str[100];
         fptype fp;
  
-       wrd[0] = wrd[1] = wrd[2] = 0;
+   if (fp_is_nan (src)) {
+        // copied bit by bit, no conversion
+        from_exten(src, &wrd[0], &wrd[1], &wrd[2]);
+        return;
+    }
+    if (fp_is_infinity (src)) {
+        // extended exponent and all 0 packed fraction
+        from_exten(src, &wrd[0], &wrd[1], &wrd[2]);
+        wrd[1] = wrd[2] = 0;
+        return;
+    }
  
-       if (fp_is_nan (src) || fp_is_infinity (src)) {
-               wrd[0] |= (1 << 30) | (1 << 29) | (1 << 30); // YY=1
-               wrd[0] |= 0xfff << 16; // Exponent=FFF
-               // TODO: mantissa should be set if NAN
-               return;
-       }
+       wrd[0] = wrd[1] = wrd[2] = 0;
  
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat) {
-               uae_u32 out[3];
-               softfloat_get(&src->fpx, out);
-               to_exten_x(&fp, out[0], out[1], out[2]);
-       } else
-#endif
-               fp = src->fp;
+       to_native(&fp, src);
  
  #if USE_LONG_DOUBLE
         sprintf (str, "%#.17Le", fp);
@@ -1382,30 +1789,26 @@ static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor)
  // 68040/060 does not support denormals
  static bool fault_if_no_denormal_support_pre(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented && currprefs.fpu_softfloat) {
-               Bit64u Sig = extractFloatx80Frac(fpd->fpx);
-               Bit32s Exp = extractFloatx80Exp(fpd->fpx);
+               bits64 Sig = extractFloatx80Frac(fpd->fpx);
+               bits32 Exp = extractFloatx80Exp(fpd->fpx);
                 if (Exp == 0 && Sig != 0) {
                         fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_PRE, fpd, -1, size);
                         return true;
                 }
         }
-#endif
         return false;
  }
  static bool fault_if_no_denormal_support_post(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *fpd, int size)
  {
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat && currprefs.cpu_model >= 68040 && currprefs.fpu_model && currprefs.fpu_no_unimplemented) {
-               Bit64u Sig = extractFloatx80Frac(fpd->fpx);
-               Bit32s Exp = extractFloatx80Exp(fpd->fpx);
+               bits64 Sig = extractFloatx80Frac(fpd->fpx);
+               bits32 Exp = extractFloatx80Exp(fpd->fpx);
                 if (Exp == 0 && Sig != 0) {
                         fpu_op_unimp(opcode, extra, ea, oldpc, FPU_EXP_UNIMP_DATATYPE_POST, fpd, -1, size);
                         return true;
                 }
         }
-#endif
         return false;
  }
  
@@ -1628,15 +2031,15 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
                         switch (size)
                         {
                                 case 6:
-                                       m68k_dreg (regs, reg) = (uae_u32)(((toint (value, 0) & 0xff)
+                                       m68k_dreg (regs, reg) = (uae_u32)(((to_int (value, 0) & 0xff)
                                                 | (m68k_dreg (regs, reg) & ~0xff)));
                                         break;
                                 case 4:
-                                       m68k_dreg (regs, reg) = (uae_u32)(((toint (value, 1) & 0xffff)
+                                       m68k_dreg (regs, reg) = (uae_u32)(((to_int (value, 1) & 0xffff)
                                                 | (m68k_dreg (regs, reg) & ~0xffff)));
                                         break;
                                 case 0:
-                                       m68k_dreg (regs, reg) = (uae_u32)toint (value, 2);
+                                       m68k_dreg (regs, reg) = (uae_u32)to_int (value, 2);
                                         break;
                                 case 1:
                                         m68k_dreg (regs, reg) = from_single (value);
@@ -1703,7 +2106,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
                 case 0:
                         if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2))
                                 return 1;
-                       x_cp_put_long(ad, (uae_u32)toint(value, 2));
+                       x_cp_put_long(ad, (uae_u32)to_int(value, 2));
                         break;
                 case 1:
                         if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2))
@@ -1745,7 +2148,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
                 case 4:
                         if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2))
                                 return 1;
-                       x_cp_put_word(ad, (uae_s16)toint(value, 1));
+                       x_cp_put_word(ad, (uae_s16)to_int(value, 1));
                         break;
                 case 5:
                         {
@@ -1761,7 +2164,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o
                 case 6:
                         if (fault_if_no_denormal_support_post(opcode, extra, ad, oldpc, value, 2))
                                 return 1;
-                       x_cp_put_byte(ad, (uae_s8)toint(value, 0));
+                       x_cp_put_byte(ad, (uae_s8)to_int(value, 0));
                         break;
                 default:
                         return 0;
@@ -2468,18 +2871,6 @@ static uaecptr fmovem2fpp (uaecptr ad, uae_u32 list, int incr, int regdir)
         return ad;
  }
  
-// round to float
-static void fround (int reg)
-{
-#ifdef WITH_SOFTFLOAT
-       if (currprefs.fpu_softfloat) {
-               float32 f = floatx80_to_float32(regs.fp[reg].fpx, fxstatus);
-               regs.fp[reg].fpx = float32_to_floatx80(f, fxstatus);
-       } else
-#endif
-               regs.fp[reg].fp = (float)regs.fp[reg].fp;
-}
-
  static bool arithmetic_fp(fptype src, int reg, int extra)
  {
         bool sgl = false;
@@ -2676,6 +3067,10 @@ static bool arithmetic_fp(fptype src, int reg, int extra)
                 case 0x37:
                         regs.fp[extra & 7].fp = cos (src);
                         regs.fp[reg].fp = sin (src);
+            if (((regs.fpcr >> 6) & 3) == 1)
+                               fp_round32(&regs.fp[extra & 7]);
+            else if (((regs.fpcr >> 6) & 3) == 2)
+                               fp_round64(&regs.fp[extra & 7]);
                         break;
                 case 0x38: /* FCMP */
                         {
@@ -2691,25 +3086,29 @@ static bool arithmetic_fp(fptype src, int reg, int extra)
                 default:
                         return false;
         }
-       // round to float?
-       if (sgl || (extra & 0x44) == 0x40 || ((regs.fpcr >> 6) & 3) == 1)
-               fround (reg);
+
+       if (sgl) {
+        fp_roundsgl(&regs.fp[reg]);
+       } else if ((extra & 0x44) == 0x40 || ((regs.fpcr >> 6) & 3) == 1) {
+        fp_round32(&regs.fp[reg]);
+       } else if ((extra & 0x44) == 0x44 || ((regs.fpcr >> 6) & 3) == 2) {
+        fp_round64(&regs.fp[reg]);
+       }
+
         MAKE_FPSR (&regs.fp[reg].fp);
         return true;
  }
  
-#ifdef WITH_SOFTFLOAT
-static bool arithmetic_softfloat(floatx80 *srcd, int reg, int extra)
+static bool arithmetic_softfloat(fpdata *srcd, int reg, int extra)
  {
-       floatx80 fx = *srcd;
+       floatx80 fx = srcd->fpx;
         floatx80 f = regs.fp[reg].fpx;
-       int float_rounding_mode;
+       int8 float_rounding_mode_temp;
         bool sgl = false;
-       Bit64u q;
  
         // SNAN -> QNAN if SNAN interrupt is not enabled
         if (floatx80_is_signaling_nan(fx) && !(regs.fpcr & 0x4000)) {
-               fx.fraction |= 0x40000000;
+        fp_unset_snan(srcd);
         }
  
         switch (extra & 0x7f)
@@ -2720,18 +3119,18 @@ static bool arithmetic_softfloat(floatx80 *srcd, int reg, int extra)
                         regs.fp[reg].fpx = fx;  
                         break;
                 case 0x01: /* FINT */
-                       regs.fp[reg].fpx = floatx80_round_to_int(fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_round_to_int(fx);
                         break;
                 case 0x03: /* FINTRZ */
-                       float_rounding_mode = fxstatus.float_rounding_mode;
-                       fxstatus.float_rounding_mode = float_round_to_zero;
-                       regs.fp[reg].fpx = floatx80_round_to_int(fx, fxstatus);
-                       float_rounding_mode = fxstatus.float_rounding_mode;
+                       float_rounding_mode_temp = float_rounding_mode;
+                       float_rounding_mode = float_round_to_zero;
+                       regs.fp[reg].fpx = floatx80_round_to_int(fx);
+                       float_rounding_mode = float_rounding_mode_temp;
                         break;
                 case 0x04: /* FSQRT */
                 case 0x41: /* FSSQRT */
                 case 0x45: /* FDSQRT */
-                       regs.fp[reg].fpx = floatx80_sqrt(fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_sqrt(fx);
                         break;
                 case 0x18: /* FABS */
                 case 0x58: /* FSABS */
@@ -2747,54 +3146,54 @@ static bool arithmetic_softfloat(floatx80 *srcd, int reg, int extra)
                 case 0x20: /* FDIV */
                 case 0x60: /* FSDIV */
                 case 0x64: /* FDDIV */
-                       regs.fp[reg].fpx = floatx80_div(f, fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_div(f, fx);
                         break;
                 case 0x22: /* FADD */
                 case 0x62: /* FSADD */
                 case 0x66: /* FDADD */
-                       regs.fp[reg].fpx = floatx80_add(f, fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_add(f, fx);
                         break;
                 case 0x23: /* FMUL */
                 case 0x63: /* FSMUL */
                 case 0x67: /* FDMUL */
-                       regs.fp[reg].fpx = floatx80_mul(f, fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_mul(f, fx);
                         break;
                 case 0x24: /* FSGLDIV */
-                       regs.fp[reg].fpx = floatx80_div(f, fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_div(f, fx);
                         sgl = true;
                         break;
                 case 0x25: /* FREM */
-                       floatx80_ieee754_remainder(f, fx, regs.fp[reg].fpx, q, fxstatus);
+                       floatx80_rem(f, fx);
                         break;
                 case 0x27: /* FSGLMUL */
-                       regs.fp[reg].fpx = floatx80_mul(f, fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_mul(f, fx);
                         sgl = true;
                         break;
                 case 0x28: /* FSUB */
                 case 0x68: /* FSSUB */
                 case 0x6c: /* FDSUB */
-                       regs.fp[reg].fpx = floatx80_sub(f, fx, fxstatus);
+                       regs.fp[reg].fpx = floatx80_sub(f, fx);
                         break;
                 case 0x38: /* FCMP */
-                       f = floatx80_sub(f, fx, fxstatus);
+                       f = floatx80_sub(f, fx);
                         regs.fpsr = 0;
                         MAKE_FPSR_SOFTFLOAT(f);
                         return true;
                 case 0x3a: /* FTST */
                         regs.fpsr = 0;
-                       MAKE_FPSR_SOFTFLOAT(f);
+                       MAKE_FPSR_SOFTFLOAT(fx);
                         return true;
  
                 case 0x1d: /* FCOS */
-                       fcos(f, fxstatus);
+                       floatx80_fcos(&f);
                         regs.fp[reg].fpx = f;
                         break;
                 case 0x0e: /* FSIN */
-                       fsin(f, fxstatus);
+                       floatx80_fsin(&f);
                         regs.fp[reg].fpx = f;
                         break;
                 case 0x0f: /* FTAN */
-                       ftan(f, fxstatus);
+                       floatx80_ftan(&f);
                         regs.fp[reg].fpx = f;
                         break;
                 case 0x30: /* FSINCOS */
@@ -2805,11 +3204,31 @@ static bool arithmetic_softfloat(floatx80 *srcd, int reg, int extra)
                 case 0x35: /* FSINCOS */
                 case 0x36: /* FSINCOS */
                 case 0x37: /* FSINCOS */
-                       fsincos(f, &regs.fp[extra & 7].fpx, &regs.fp[reg].fpx, fxstatus);
+                       floatx80_fsincos(f, &regs.fp[extra & 7].fpx, &regs.fp[reg].fpx);
+            if (((regs.fpcr >> 6) & 3) == 1)
+                               fp_round32(&regs.fp[extra & 7]);
+            else if (((regs.fpcr >> 6) & 3) == 2)
+                               fp_round64(&regs.fp[extra & 7]);
+                       break;
+               case 0x14: /* FLOGN */
+                       regs.fp[reg].fpx = floatx80_flogn(f);
+                       break;
+               case 0x15: /* FLOG10 */
+                       regs.fp[reg].fpx = floatx80_flog10(f);
+                       break;
+               case 0x16: /* FLOG2 */
+                       regs.fp[reg].fpx = floatx80_flog2(f);
                         break;
-
-                       // some of following are supported by softfloat, later..
                 case 0x06: /* FLOGNP1 */
+                       regs.fp[reg].fpx = floatx80_flognp1(f);
+                       break;
+               case 0x1e: /* FGETEXP */
+                       regs.fp[reg].fpx = floatx80_getexp(f);
+                       break;
+               case 0x1f: /* FGETMAN */
+                       regs.fp[reg].fpx = floatx80_getman(f);
+                       break;
+
                 case 0x08: /* FETOXM1 */
                 case 0x09: /* FTANH */
                 case 0x0a: /* FATAN */
@@ -2818,35 +3237,33 @@ static bool arithmetic_softfloat(floatx80 *srcd, int reg, int extra)
                 case 0x10: /* FETOX */
                 case 0x11: /* FTWOTOX */
                 case 0x12: /* FTENTOX */
-               case 0x14: /* FLOGN */
-               case 0x15: /* FLOG10 */
-               case 0x16: /* FLOG2 */
                 case 0x19: /* FCOSH */
                 case 0x1c: /* FACOS */
-               case 0x1e: /* FGETEXP */
-               case 0x1f: /* FGETMAN */
                         {
                                 // This is horribly ineffective..
-                               fptype fp;
-                               uae_u32 out[3];
-                               // convert softfloat to raw words
-                               softfloat_get(&fx, out);
-                               // convert to double/long double
-                               to_exten_x(&fp, out[0], out[1], out[2]);
+                               fptype fpa;
+                               fpdata fpdx = { 0 };
+                               fpdx.fpx = fx;
+                               to_native(&fpa, &fpdx);
                                 // emulate instruction using normal fpu code
-                               if (!arithmetic_fp(fp, reg, extra))
+                               if (!arithmetic_fp(fpa, reg, extra))
                                         return false;
-                               // convert back to raw
-                               from_exten_x(regs.fp[reg].fp, &out[0], &out[1], &out[2]);
-                               // convert to softfloat internal format
-                               softfloat_set(&regs.fp[reg].fpx, out);
+                               from_native(fpa, &regs.fp[reg]);
                                 MAKE_FPSR_SOFTFLOAT(regs.fp[reg].fpx);
                         }
                         break;
         }
+
+    if (sgl) {
+        fp_roundsgl(&regs.fp[reg]);
+       } else if ((extra & 0x44) == 0x40 || ((regs.fpcr >> 6) & 3) == 1) {
+        fp_round32(&regs.fp[reg]);
+       } else if ((extra & 0x44) == 0x44 || ((regs.fpcr >> 6) & 3) == 2) {
+        fp_round64(&regs.fp[reg]);
+       }
+
         return true;
  }
-#endif
  
  static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
  {
@@ -3102,11 +3519,9 @@ static void fpuop_arithmetic2 (uae_u32 opcode, uae_u16 extra)
                         regs.fpiar =  pc;
  
                         CLEAR_STATUS ();
-#ifdef WITH_SOFTFLOAT
                         if (currprefs.fpu_softfloat)
-                               v = arithmetic_softfloat(&srcd.fpx, reg, extra);
+                               v = arithmetic_softfloat(&srcd, reg, extra);
                         else
-#endif
                                 v = arithmetic_fp(srcd.fp, reg, extra);
                         if (!v)
                                 fpu_noinst (opcode, pc);
@@ -3126,7 +3541,6 @@ void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra)
         if (fpu_mmu_fixup) {
                 mmufixup[0].reg = -1;
         }
-#ifdef WITH_SOFTFLOAT
         if (currprefs.fpu_softfloat) {
                 // Any exception status bit and matching exception enable bits set?
                 if ((regs.fpcr >> 8) & (regs.fpsr >> 8)) {
@@ -3144,7 +3558,6 @@ void fpuop_arithmetic (uae_u32 opcode, uae_u16 extra)
                         write_log (_T("FPU exception: %08x %d!\n"), regs.fpsr, vector);
                 }
         }
-#endif
  }
  
  void fpu_reset (void)
@@ -3159,7 +3572,6 @@ void fpu_reset (void)
         native_set_fpucw (regs.fpcr);
         fpux_restore (NULL);
  
-#ifdef WITH_SOFTFLOAT
         fxsizes[0] = int32_to_floatx80(-128);
         fxsizes[1] = int32_to_floatx80(127);
         fxsizes[2] = int32_to_floatx80(-32768);
@@ -3172,7 +3584,6 @@ void fpu_reset (void)
         fx_1e2 = int32_to_floatx80(100);
         fx_1e4 = int32_to_floatx80(10000);
         fx_1e8 = int32_to_floatx80(100000000);
-#endif
  }
  
  uae_u8 *restore_fpu (uae_u8 *src)
diff --git a/include/fpp-softfloat.h b/include/fpp-softfloat.h

new file mode 100644 (file)

index 0000000..3bec3df
--- /dev/null
+++ b/include/fpp-softfloat.h
@@ -0,0 +1,541 @@
+/*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * MC68881 emulation
+  *
+  * Conversion routines for hosts knowing floating point format.
+  *
+  * Copyright 1996 Herman ten Brugge
+  * Modified 2005 Peter Keunecke
+  */
+
+#ifndef FPP_H
+#define FPP_H
+
+#define __USE_ISOC9X  /* We might be able to pick up a NaN */
+
+#include <math.h>
+#include <float.h>
+
+#include <softfloat.h>
+
+
+#define        FPCR_ROUNDING_MODE      0x00000030
+#define        FPCR_ROUND_NEAR         0x00000000
+#define        FPCR_ROUND_ZERO         0x00000010
+#define        FPCR_ROUND_MINF         0x00000020
+#define        FPCR_ROUND_PINF         0x00000030
+
+#define        FPCR_ROUNDING_PRECISION 0x000000c0
+#define        FPCR_PRECISION_SINGLE   0x00000040
+#define        FPCR_PRECISION_DOUBLE   0x00000080
+#define FPCR_PRECISION_EXTENDED        0x00000000
+
+extern uae_u32 fpp_get_fpsr (void);
+extern void to_single(fptype *fp, uae_u32 wrd1);
+extern void to_double(fptype *fp, uae_u32 wrd1, uae_u32 wrd2);
+extern void to_exten(fptype *fp, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3);
+extern void normalize_exten (uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3);
+
+
+/* Functions for setting host/library modes and getting status */
+STATIC_INLINE void set_fp_mode(uae_u32 mode_control)
+{
+    switch(mode_control & FPCR_ROUNDING_PRECISION) {
+        case FPCR_PRECISION_SINGLE:   // S
+            //floatx80_rounding_precision = 32;
+            break;
+        case FPCR_PRECISION_DOUBLE:   // D
+            //floatx80_rounding_precision = 64;
+            break;
+        case FPCR_PRECISION_EXTENDED: // X
+        default:                      // undefined
+            //floatx80_rounding_precision = 80;
+            break;
+    }
+    floatx80_rounding_precision = 80;
+    
+    switch(mode_control & FPCR_ROUNDING_MODE) {
+        case FPCR_ROUND_NEAR: // to neareset
+            float_rounding_mode = float_round_nearest_even;
+            break;
+        case FPCR_ROUND_ZERO: // to zero
+            float_rounding_mode = float_round_to_zero;
+            break;
+        case FPCR_ROUND_MINF: // to minus
+            float_rounding_mode = float_round_down;
+            break;
+        case FPCR_ROUND_PINF: // to plus
+            float_rounding_mode = float_round_up;
+            break;
+    }
+    return;
+}
+STATIC_INLINE void get_fp_status(uae_u32 *status)
+{
+    if (float_exception_flags & float_flag_invalid)
+        *status |= 0x2000;
+    if (float_exception_flags & float_flag_divbyzero)
+        *status |= 0x0400;
+    if (float_exception_flags & float_flag_overflow)
+        *status |= 0x1000;
+    if (float_exception_flags & float_flag_underflow)
+        *status |= 0x0800;
+    if (float_exception_flags & float_flag_inexact)
+        *status |= 0x0200;
+}
+STATIC_INLINE void clear_fp_status(void)
+{
+    float_exception_flags = 0;
+}
+
+/* Helper functions */
+STATIC_INLINE const char *fp_print(fptype *fx)
+{
+    static char fs[32];
+    bool n, u, d;
+    long double result = 0.0;
+    int i;
+    
+    n = floatx80_is_negative(*fx);
+    u = floatx80_is_unnormal(*fx);
+    d = floatx80_is_denormal(*fx);
+    
+    if (floatx80_is_zero(*fx)) {
+        sprintf(fs, "%c%#.17Le%s%s", n?'-':'+', (long double) 0.0, u?"U":"", d?"D":"");
+    } else if (floatx80_is_infinity(*fx)) {
+        sprintf(fs, "%c%s", n?'-':'+', "inf");
+    } else if (floatx80_is_signaling_nan(*fx)) {
+        sprintf(fs, "%c%s", n?'-':'+', "snan");
+    } else if (floatx80_is_nan(*fx)) {
+        sprintf(fs, "%c%s", n?'-':'+', "nan");
+    } else {
+        for (i = 63; i >= 0; i--) {
+            if (fx->low & (((uae_u64)1)<<i)) {
+                result += (long double) 1.0 / (((uae_u64)1)<<(63-i));
+            }
+        }
+        result *= powl(2.0, (fx->high&0x7FFF) - 0x3FFF);
+        sprintf(fs, "%c%#.17Le%s%s", n?'-':'+', result, u?"U":"", d?"D":"");
+    }
+    
+    return fs;
+}
+
+STATIC_INLINE void softfloat_set(fptype *fx, uae_u32 *f)
+{
+    fx->high = (uae_u16)(f[0] >> 16);
+    fx->low = ((uae_u64)f[1] << 32) | f[2];
+}
+
+STATIC_INLINE void softfloat_get(fptype *fx, uae_u32 *f)
+{
+    f[0] = (uae_u32)(fx->high << 16);
+    f[1] = fx->low >> 32;
+    f[2] = (uae_u32)fx->low;
+}
+
+/* Functions for detecting float type */
+STATIC_INLINE bool fp_is_snan(fptype *fp)
+{
+    return floatx80_is_signaling_nan(*fp) != 0;
+}
+STATIC_INLINE void fp_unset_snan(fptype *fp)
+{
+    fp->low |= LIT64(0x4000000000000000);
+}
+STATIC_INLINE bool fp_is_nan (fptype *fp)
+{
+    return floatx80_is_nan(*fp) != 0;
+}
+STATIC_INLINE bool fp_is_infinity (fptype *fp)
+{
+    return floatx80_is_infinity(*fp) != 0;
+}
+STATIC_INLINE bool fp_is_zero(fptype *fp)
+{
+    return floatx80_is_zero(*fp) != 0;
+}
+STATIC_INLINE bool fp_is_neg(fptype *fp)
+{
+    return floatx80_is_negative(*fp) != 0;
+}
+STATIC_INLINE bool fp_is_denormal(fptype *fp)
+{
+    return floatx80_is_denormal(*fp) != 0;
+}
+STATIC_INLINE bool fp_is_unnormal(fptype *fp)
+{
+    return floatx80_is_unnormal(*fp) != 0;
+}
+
+/* Functions for converting between float formats */
+static const long double twoto32 = 4294967296.0;
+
+STATIC_INLINE void to_native(long double *fp, fptype fpx)
+{
+    int expon;
+    long double frac;
+    
+    expon = fpx.high & 0x7fff;
+    
+    if (floatx80_is_zero(fpx)) {
+        *fp = floatx80_is_negative(fpx) ? -0.0 : +0.0;
+        return;
+    }
+    if (floatx80_is_nan(fpx)) {
+        *fp = sqrtl(-1);
+        return;
+    }
+    if (floatx80_is_infinity(fpx)) {
+        *fp = floatx80_is_negative(fpx) ? logl(0.0) : (1.0/0.0);
+        return;
+    }
+    
+    frac = (long double)fpx.low / (long double)(twoto32 * 2147483648.0);
+    if (floatx80_is_negative(fpx))
+        frac = -frac;
+    *fp = ldexpl (frac, expon - 16383);
+}
+
+STATIC_INLINE void from_native(long double fp, fptype *fpx)
+{
+    int expon;
+    long double frac;
+    
+    if (signbit(fp))
+        fpx->high = 0x8000;
+    else
+        fpx->high = 0x0000;
+    
+    if (isnan(fp)) {
+        fpx->high |= 0x7fff;
+        fpx->low = LIT64(0xffffffffffffffff);
+        return;
+    }
+    if (isinf(fp)) {
+        fpx->high |= 0x7fff;
+        fpx->low = LIT64(0x0000000000000000);
+        return;
+    }
+    if (fp == 0.0) {
+        fpx->low = LIT64(0x0000000000000000);
+        return;
+    }
+    if (fp < 0.0)
+        fp = -fp;
+    
+    frac = frexpl (fp, &expon);
+    frac += 0.5 / (twoto32 * twoto32);
+    if (frac >= 1.0) {
+        frac /= 2.0;
+        expon++;
+    }
+    fpx->high |= (expon + 16383 - 1) & 0x7fff;
+    fpx->low = (bits64)(frac * (long double)(twoto32 * twoto32));
+    
+    while (!(fpx->low & LIT64( 0x8000000000000000))) {
+        if (fpx->high == 0) {
+            float_raise( float_flag_denormal );
+            break;
+        }
+        fpx->low <<= 1;
+        fpx->high--;
+    }
+}
+
+STATIC_INLINE void to_single_xn(fptype *fp, uae_u32 wrd1)
+{
+    float32 f = wrd1;
+    *fp = float32_to_floatx80(f); // automatically fix denormals
+}
+STATIC_INLINE void to_single_x(fptype *fp, uae_u32 wrd1)
+{
+    float32 f = wrd1;
+    *fp = float32_to_floatx80_allowunnormal(f);
+}
+STATIC_INLINE uae_u32 from_single_x(fptype *fp)
+{
+    float32 f = floatx80_to_float32(*fp);
+    return f;
+}
+
+STATIC_INLINE void to_double_xn(fptype *fp, uae_u32 wrd1, uae_u32 wrd2)
+{
+    float64 f = ((float64)wrd1 << 32) | wrd2;
+    *fp = float64_to_floatx80(f); // automatically fix denormals
+}
+STATIC_INLINE void to_double_x(fptype *fp, uae_u32 wrd1, uae_u32 wrd2)
+{
+    float64 f = ((float64)wrd1 << 32) | wrd2;
+    *fp = float64_to_floatx80_allowunnormal(f);
+}
+STATIC_INLINE void from_double_x(fptype *fp, uae_u32 *wrd1, uae_u32 *wrd2)
+{
+    float64 f = floatx80_to_float64(*fp);
+    *wrd1 = f >> 32;
+    *wrd2 = (uae_u32)f;
+}
+
+STATIC_INLINE void to_exten_x(fptype *fp, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
+{
+    uae_u32 wrd[3] = { wrd1, wrd2, wrd3 };
+    softfloat_set(fp, wrd);
+}
+STATIC_INLINE void from_exten_x(fptype *fp, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3)
+{
+    uae_u32 wrd[3];
+    softfloat_get(fp, wrd);
+    *wrd1 = wrd[0];
+    *wrd2 = wrd[1];
+    *wrd3 = wrd[2];
+}
+
+STATIC_INLINE uae_s64 to_int(fptype *src, int size)
+{
+    static fptype fxsizes[6];
+    static bool setup = false;
+    if (!setup) {
+        fxsizes[0] = int32_to_floatx80(-128);
+        fxsizes[1] = int32_to_floatx80(127);
+        fxsizes[2] = int32_to_floatx80(-32768);
+        fxsizes[3] = int32_to_floatx80(32767);
+        fxsizes[4] = int32_to_floatx80(-2147483648);
+        fxsizes[5] = int32_to_floatx80(2147483647);
+        setup = true;
+    }
+    
+    if (floatx80_lt(*src, fxsizes[size * 2 + 0])) {
+        return floatx80_to_int32(fxsizes[size * 2 + 0]);
+    }
+    if (floatx80_le(fxsizes[size * 2 + 1], *src)) {
+        return floatx80_to_int32(fxsizes[size * 2 + 1]);
+    }
+    return floatx80_to_int32(*src);
+}
+STATIC_INLINE fptype from_int(uae_s32 src)
+{
+    return int32_to_floatx80(src);
+}
+
+/* Functions for rounding */
+
+// round to float with extended precision exponent
+STATIC_INLINE void fp_roundsgl(fptype *fp)
+{
+    *fp = floatx80_round32(*fp);
+}
+
+// round to float
+STATIC_INLINE void fp_round32(fptype *fp)
+{
+    float32 f = floatx80_to_float32(*fp);
+    *fp = float32_to_floatx80(f);
+}
+
+// round to double
+STATIC_INLINE void fp_round64(fptype *fp)
+{
+    float64 f = floatx80_to_float64(*fp);
+    *fp = float64_to_floatx80(f);
+}
+
+/* Arithmetic functions */
+
+STATIC_INLINE fptype fp_int(fptype a)
+{
+    return floatx80_round_to_int(a);
+}
+STATIC_INLINE fptype fp_intrz(fptype a)
+{
+    int8 save_rm;
+    fptype b;
+    save_rm = float_rounding_mode;
+    float_rounding_mode = float_round_to_zero;
+    b = floatx80_round_to_int(a);
+    float_rounding_mode = save_rm;
+    return b;
+}
+STATIC_INLINE fptype fp_sqrt(fptype a)
+{
+    return floatx80_sqrt(a);
+}
+STATIC_INLINE fptype fp_lognp1(fptype a)
+{
+    return floatx80_flognp1(a);
+}
+STATIC_INLINE fptype fp_sin(fptype a)
+{
+    floatx80_fsin(&a);
+    return a;
+}
+STATIC_INLINE fptype fp_tan(fptype a)
+{
+    floatx80_ftan(&a);
+    return a;
+}
+STATIC_INLINE fptype fp_logn(fptype a)
+{
+    return floatx80_flogn(a);
+}
+STATIC_INLINE fptype fp_log10(fptype a)
+{
+    return floatx80_flog10(a);
+}
+STATIC_INLINE fptype fp_log2(fptype a)
+{
+    return floatx80_flog2(a);
+}
+STATIC_INLINE fptype fp_abs(fptype a)
+{
+    return floatx80_abs(a);
+}
+STATIC_INLINE fptype fp_neg(fptype a)
+{
+    return floatx80_chs(a);
+}
+STATIC_INLINE fptype fp_cos(fptype a)
+{
+    floatx80_fcos(&a);
+    return a;
+}
+STATIC_INLINE fptype fp_getexp(fptype a)
+{
+    return floatx80_getexp(a);
+}
+STATIC_INLINE fptype fp_getman(fptype a)
+{
+    return floatx80_getman(a);
+}
+STATIC_INLINE fptype fp_div(fptype a, fptype b)
+{
+    return floatx80_div(a, b);
+}
+STATIC_INLINE fptype fp_add(fptype a, fptype b)
+{
+    return floatx80_add(a, b);
+}
+STATIC_INLINE fptype fp_mul(fptype a, fptype b)
+{
+    return floatx80_mul(a, b);
+}
+STATIC_INLINE fptype fp_rem(fptype a, fptype b)
+{
+    return floatx80_rem(a, b);
+}
+STATIC_INLINE fptype fp_scale(fptype a, fptype b)
+{
+    return floatx80_scale(a, b);
+}
+STATIC_INLINE fptype fp_sub(fptype a, fptype b)
+{
+    return floatx80_sub(a, b);
+}
+
+/* FIXME: create softfloat functions for following arithmetics */
+#define fp_round_to_minus_infinity(x) floor(x)
+#define fp_round_to_plus_infinity(x) ceil(x)
+#define fp_round_to_zero(x)    ((x) >= 0.0 ? floor(x) : ceil(x))
+#define fp_round_to_nearest(x) ((x) >= 0.0 ? (int)((x) + 0.5) : (int)((x) - 0.5))
+
+STATIC_INLINE fptype fp_sinh(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = sinhl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_etoxm1(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = expl(fpa) - 1.0;
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_tanh(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = tanhl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_atan(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = atanl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_asin(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = asinl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_atanh(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = atanhl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_etox(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = expl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_twotox(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = powl(2.0, fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_tentox(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = powl(10.0, fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_cosh(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = coshl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_acos(fptype a)
+{
+    long double fpa;
+    to_native(&fpa, a);
+    fpa = acosl(fpa);
+    from_native(fpa, &a);
+    return a;
+}
+STATIC_INLINE fptype fp_mod(fptype a, fptype b)
+{
+    long double fpa, fpb;
+    long double quot;
+    to_native(&fpa, a);
+    to_native(&fpb, a);
+    quot = fp_round_to_zero(fpa / fpb);
+    fpa -= quot * fpb;
+    from_native(fpa, &a);
+    return a;
+}
+
+#endif
diff --git a/od-win32/md-fpp.h b/od-win32/md-fpp.h

index 45058c0e0eeef54e56b4396019dce5f9d1d3a225..6404123394025664edb2798e23e2320bcf0ea729 100644 (file)
--- a/od-win32/md-fpp.h
+++ b/od-win32/md-fpp.h
@@ -25,7 +25,9 @@
  extern void to_single(fpdata *fpd, uae_u32 value);
  extern void to_double(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2);
  extern void to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3);
+extern const TCHAR *fp_print(fpdata *fpd);
  
+#if 0
  STATIC_INLINE void exten_zeronormalize(uae_u32 *pwrd1, uae_u32 *pwrd2, uae_u32 *pwrd3)
  {
         uae_u32 wrd1 = *pwrd1;
@@ -198,7 +200,6 @@ STATIC_INLINE void from_double_x(double src, uae_u32 * wrd1, uae_u32 * wrd2)
  }
  #endif
  
-static const double twoto32 = 4294967296.0;
  #ifndef HAVE_to_exten
  #define HAVE_to_exten
  STATIC_INLINE void to_exten_x(fptype *fp, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
@@ -248,3 +249,4 @@ STATIC_INLINE void from_exten_x(fptype fp, uae_u32 * wrd1, uae_u32 * wrd2, uae_u
         *wrd3 = (uae_u32) ((frac * twoto32 - *wrd2) * twoto32);
  }
  #endif
+#endif
diff --git a/od-win32/sysconfig.h b/od-win32/sysconfig.h

index f887120bd1544d7a834a6b1b65217cb859e8abef..baba2e5762169562f59ee2f81a391e46315109d8 100644 (file)
--- a/od-win32/sysconfig.h
+++ b/od-win32/sysconfig.h
@@ -52,7 +52,6 @@
  #define UAESERIAL /* uaeserial.device emulation */
  #define FPUEMU /* FPU emulation */
  #define FPU_UAE
-#define WITH_SOFTFLOAT
  #define MMUEMU /* Aranym 68040 MMU */
  #define FULLMMU /* Aranym 68040 MMU */
  #define CPUEMU_0 /* generic 680x0 emulation */
diff --git a/softfloat/README.txt b/softfloat/README.txt

new file mode 100644 (file)

index 0000000..9500d25
--- /dev/null
+++ b/softfloat/README.txt
@@ -0,0 +1,78 @@
+MAME note: this package is derived from the following original SoftFloat 
+package and has been "re-packaged" to work with MAME's conventions and
+build system.  The source files come from bits64/ and bits64/templates
+in the original distribution as MAME requires a compiler with a 64-bit
+integer type.
+
+
+Package Overview for SoftFloat Release 2b
+
+John R. Hauser
+2002 May 27
+
+
+----------------------------------------------------------------------------
+Overview
+
+SoftFloat is a software implementation of floating-point that conforms to
+the IEC/IEEE Standard for Binary Floating-Point Arithmetic.  SoftFloat is
+distributed in the form of C source code.  Compiling the SoftFloat sources
+generates two things:
+
+-- A SoftFloat object file (typically `softfloat.o') containing the complete
+   set of IEC/IEEE floating-point routines.
+
+-- A `timesoftfloat' program for evaluating the speed of the SoftFloat
+   routines.  (The SoftFloat module is linked into this program.)
+
+The SoftFloat package is documented in four text files:
+
+   SoftFloat.txt          Documentation for using the SoftFloat functions.
+   SoftFloat-source.txt   Documentation for compiling SoftFloat.
+   SoftFloat-history.txt  History of major changes to SoftFloat.
+   timesoftfloat.txt      Documentation for using `timesoftfloat'.
+
+Other files in the package comprise the source code for SoftFloat.
+
+Please be aware that some work is involved in porting this software to other
+targets.  It is not just a matter of getting `make' to complete without
+error messages.  I would have written the code that way if I could, but
+there are fundamental differences between systems that can't be hidden.
+You should not attempt to compile SoftFloat without first reading both
+`SoftFloat.txt' and `SoftFloat-source.txt'.
+
+
+----------------------------------------------------------------------------
+Legal Notice
+
+SoftFloat was written by me, John R. Hauser.  This work was made possible in
+part by the International Computer Science Institute, located at Suite 600,
+1947 Center Street, Berkeley, California 94704.  Funding was partially
+provided by the National Science Foundation under grant MIP-9311980.  The
+original version of this code was written as part of a project to build
+a fixed-point vector processor in collaboration with the University of
+California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL
+LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO
+FURTHERMORE EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER
+SCIENCE INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES,
+COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE
+SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, provided
+that the minimal documentation requirements stated in the source code are
+satisfied.
+
+
+----------------------------------------------------------------------------
+Contact Information
+
+At the time of this writing, the most up-to-date information about
+SoftFloat and the latest release can be found at the Web page `http://
+www.cs.berkeley.edu/~jhauser/arithmetic/SoftFloat.html'.
+
+
diff --git a/softfloat/SOFTFLOAT-MACROS.H b/softfloat/SOFTFLOAT-MACROS.H

new file mode 100644 (file)

index 0000000..4607d34
--- /dev/null
+++ b/softfloat/SOFTFLOAT-MACROS.H
@@ -0,0 +1,743 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 128 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'.  Any bits shifted off are lost.
+| The value of `count' must be less than 64.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    uint8 borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+
+    aLow = (bits32)a;
+    aHigh = a>>32;
+    bLow = (bits32)b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+INLINE bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32((bits32)a );
+    return shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 != b0 ) || ( a1 != b1 );
+
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 floatx80_chs(floatx80 reg)
+{
+    reg.high ^= 0x8000;
+    return reg;
+}
+
+/*-----------------------------------------------------------------------------
+| Calculates the absolute value of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 floatx80_abs(floatx80 reg)
+{
+    reg.high &= 0x7FFF;
+    return reg;
+}
diff --git a/softfloat/fpu_constant.h b/softfloat/fpu_constant.h

new file mode 100644 (file)

index 0000000..fdd9719
--- /dev/null
+++ b/softfloat/fpu_constant.h
@@ -0,0 +1,80 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef _FPU_CONSTANTS_H_
+#define _FPU_CONSTANTS_H_
+
+// Pentium CPU uses only 68-bit precision M_PI approximation
+#define BETTER_THAN_PENTIUM
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+//////////////////////////////
+// PI, PI/2, PI/4 constants
+//////////////////////////////
+
+#define FLOATX80_PI_EXP  (0x4000)
+
+// 128-bit PI fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_PI_HI (0xc90fdaa22168c234U)
+#define FLOAT_PI_LO (0xc4c6628b80dc1cd1U)
+#else
+#define FLOAT_PI_HI (0xc90fdaa22168c234U)
+#define FLOAT_PI_LO (0xC000000000000000U)
+#endif
+
+#define FLOATX80_PI2_EXP  (0x3FFF)
+#define FLOATX80_PI4_EXP  (0x3FFE)
+
+//////////////////////////////
+// 3PI/4 constant
+//////////////////////////////
+
+#define FLOATX80_3PI4_EXP (0x4000)
+
+// 128-bit 3PI/4 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
+#define FLOAT_3PI4_LO (0x9394c9e8a0a5159cU)
+#else
+#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
+#define FLOAT_3PI4_LO (0x9000000000000000U)
+#endif
+
+//////////////////////////////
+// 1/LN2 constant
+//////////////////////////////
+
+#define FLOAT_LN2INV_EXP  (0x3FFF)
+
+// 128-bit 1/LN2 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
+#define FLOAT_LN2INV_LO (0xbe87fed0691d3e89U)
+#else
+#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
+#define FLOAT_LN2INV_LO (0xC000000000000000U)
+#endif
+
+#endif
diff --git a/softfloat/fsincos.cpp b/softfloat/fsincos.cpp

new file mode 100644 (file)

index 0000000..695936e
--- /dev/null
+++ b/softfloat/fsincos.cpp
@@ -0,0 +1,652 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOATX128
+
+#define USE_estimateDiv128To64
+#include "mamesf.h"
+#include "softfloat.h"
+//#include "softfloat-specialize"
+#include "fpu_constant.h"
+
+static const floatx80 floatx80_one = { 0x3fff, 0x8000000000000000U };
+static const floatx80 floatx80_default_nan = { 0xffff, 0xffffffffffffffffU };
+
+#define packFloat2x128m(zHi, zLo) {(zHi), (zLo)}
+#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
+
+#define EXP_BIAS 0x3FFF
+
+#if 0 /* Included in softfloat-specialize" */
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+       return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+       return a.high & 0x7FFF;
+
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+       return a.high>>15;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 propagateFloatx80NaNOneArg(floatx80 a)
+{
+       if (floatx80_is_signaling_nan(a))
+               float_raise(float_flag_invalid);
+
+       a.low |= 0xC000000000000000U;
+
+       return a;
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
+{
+       int shiftCount = countLeadingZeros64(aSig);
+       *zSigPtr = aSig<<shiftCount;
+       *zExpPtr = 1 - shiftCount;
+}
+
+/* reduce trigonometric function argument using 128-bit precision
+   M_PI approximation */
+static uint64_t argument_reduction_kernel(uint64_t aSig0, int Exp, uint64_t *zSig0, uint64_t *zSig1)
+{
+       uint64_t term0, term1, term2;
+       uint64_t aSig1 = 0;
+
+       shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
+       uint64_t q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
+       mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
+       sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+       while ((int64_t)(*zSig1) < 0) {
+               --q;
+               add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
+       }
+       *zSig1 = term2;
+       return q;
+}
+
+static int reduce_trig_arg(int expDiff, int *zSign, uint64_t *aSig0, uint64_t *aSig1)
+{
+       uint64_t term0, term1, q = 0;
+
+       if (expDiff < 0) {
+               shift128Right(*aSig0, 0, 1, aSig0, aSig1);
+               expDiff = 0;
+       }
+       if (expDiff > 0) {
+               q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1);
+       }
+       else {
+               if (FLOAT_PI_HI <= *aSig0) {
+                       *aSig0 -= FLOAT_PI_HI;
+                       q = 1;
+               }
+       }
+
+       shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
+       if (! lt128(*aSig0, *aSig1, term0, term1))
+       {
+               int lt = lt128(term0, term1, *aSig0, *aSig1);
+               int eq = eq128(*aSig0, *aSig1, term0, term1);
+
+               if ((eq && (q & 1)) || lt) {
+                       *zSign = !(*zSign);
+                       ++q;
+               }
+               if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1);
+       }
+
+       return (int)(q & 3);
+}
+
+#define SIN_ARR_SIZE 11
+#define COS_ARR_SIZE 11
+
+static float128 sin_arr[SIN_ARR_SIZE] =
+{
+       PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+       PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
+       PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
+       PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
+       PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
+       PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
+       PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
+       PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
+       PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
+       PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
+       PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45)  /* 21 */
+};
+
+static float128 cos_arr[COS_ARR_SIZE] =
+{
+       PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
+       PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
+       PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
+       PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
+       PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
+       PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
+       PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
+       PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
+       PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
+       PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
+       PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb)  /* 20 */
+};
+
+extern float128 OddPoly (float128 x, float128 *arr, unsigned n);
+
+/* 0 <= x <= pi/4 */
+INLINE float128 poly_sin(float128 x)
+{
+       //                 3     5     7     9     11     13     15
+       //                x     x     x     x     x      x      x
+       // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
+       //                3!    5!    7!    9!    11!    13!    15!
+       //
+       //                 2     4     6     8     10     12     14
+       //                x     x     x     x     x      x      x
+       //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
+       //                3!    5!    7!    9!    11!    13!    15!
+       //
+       //           3                          3
+       //          --       4k                --        4k+2
+       //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+       //          --  2k                     --  2k+1
+       //          k=0                        k=0
+       //
+       //                          2
+       //   sin(x) ~ x * [ p(x) + x * q(x) ]
+       //
+
+       return OddPoly(x, sin_arr, SIN_ARR_SIZE);
+}
+
+extern float128 EvenPoly(float128 x, float128 *arr, unsigned n);
+
+/* 0 <= x <= pi/4 */
+INLINE float128 poly_cos(float128 x)
+{
+       //                 2     4     6     8     10     12     14
+       //                x     x     x     x     x      x      x
+       // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
+       //                2!    4!    6!    8!    10!    12!    14!
+       //
+       //           3                          3
+       //          --       4k                --        4k+2
+       //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+       //          --  2k                     --  2k+1
+       //          k=0                        k=0
+       //
+       //                      2
+       //   cos(x) ~ [ p(x) + x * q(x) ]
+       //
+
+       return EvenPoly(x, cos_arr, COS_ARR_SIZE);
+}
+
+INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+       if (sin_a) *sin_a = a;
+       if (cos_a) *cos_a = a;
+}
+
+INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+       if (sin_a) *sin_a = a;
+       if (cos_a) *cos_a = floatx80_one;
+}
+
+static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient)
+{
+       if (quotient & 0x1) {
+               r = poly_cos(r);
+               neg = 0;
+       } else  {
+               r = poly_sin(r);
+       }
+
+       floatx80 result = float128_to_floatx80(r);
+       if (quotient & 0x2)
+               neg = ! neg;
+
+       if (neg)
+               result = floatx80_chs(result);
+
+       return result;
+}
+
+// =================================================
+// SFFSINCOS               Compute sin(x) and cos(x)
+// =================================================
+
+//
+// Uses the following identities:
+// ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+
+int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
+{
+       uint64_t aSig0, aSig1 = 0;
+       int32_t aExp, zExp, expDiff;
+       int aSign, zSign;
+       int q = 0;
+
+       aSig0 = extractFloatx80Frac(a);
+       aExp = extractFloatx80Exp(a);
+       aSign = extractFloatx80Sign(a);
+
+       /* invalid argument */
+       if (aExp == 0x7FFF) {
+               if ((uint64_t) (aSig0<<1)) {
+                       sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a));
+                       return 0;
+               }
+
+               float_raise(float_flag_invalid);
+               sincos_invalid(sin_a, cos_a, floatx80_default_nan);
+               return 0;
+       }
+
+       if (aExp == 0) {
+               if (aSig0 == 0) {
+                       sincos_tiny_argument(sin_a, cos_a, a);
+                       return 0;
+               }
+
+//        float_raise(float_flag_denormal);
+
+               /* handle pseudo denormals */
+               if (! (aSig0 & 0x8000000000000000U))
+               {
+                       float_raise(float_flag_inexact);
+                       if (sin_a)
+                               float_raise(float_flag_underflow);
+                       sincos_tiny_argument(sin_a, cos_a, a);
+                       return 0;
+               }
+
+               normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+       }
+
+       zSign = aSign;
+       zExp = EXP_BIAS;
+       expDiff = aExp - zExp;
+
+       /* argument is out-of-range */
+       if (expDiff >= 63)
+               return -1;
+
+       float_raise(float_flag_inexact);
+
+       if (expDiff < -1) {    // doesn't require reduction
+               if (expDiff <= -68) {
+                       a = packFloatx80(aSign, aExp, aSig0);
+                       sincos_tiny_argument(sin_a, cos_a, a);
+                       return 0;
+               }
+               zExp = aExp;
+       }
+       else {
+               q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
+       }
+
+       /* **************************** */
+       /* argument reduction completed */
+       /* **************************** */
+
+       /* using float128 for approximation */
+       float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
+
+       if (aSign) q = -q;
+       if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
+       if (sin_a) *sin_a = sincos_approximation(zSign, r,   q);
+
+       return 0;
+}
+
+int floatx80_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
+{
+    return sf_fsincos(a, sin_a, cos_a);
+}
+
+int floatx80_fsin(floatx80 *a)
+{
+       return sf_fsincos(*a, a, 0);
+}
+
+int floatx80_fcos(floatx80 *a)
+{
+       return sf_fsincos(*a, 0, a);
+}
+
+// =================================================
+// FPTAN                 Compute tan(x)
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+// 2. ----------------------------------------------------------
+//
+//           sin(x)
+//  tan(x) = ------
+//           cos(x)
+//
+
+int floatx80_ftan(floatx80 *a)
+{
+       uint64_t aSig0, aSig1 = 0;
+       int32_t aExp, zExp, expDiff;
+       int aSign, zSign;
+       int q = 0;
+
+       aSig0 = extractFloatx80Frac(*a);
+       aExp = extractFloatx80Exp(*a);
+       aSign = extractFloatx80Sign(*a);
+
+       /* invalid argument */
+       if (aExp == 0x7FFF) {
+               if ((uint64_t) (aSig0<<1))
+               {
+                       *a = propagateFloatx80NaNOneArg(*a);
+                       return 0;
+               }
+
+               float_raise(float_flag_invalid);
+               *a = floatx80_default_nan;
+               return 0;
+       }
+
+       if (aExp == 0) {
+               if (aSig0 == 0) return 0;
+//        float_raise(float_flag_denormal);
+               /* handle pseudo denormals */
+               if (! (aSig0 & 0x8000000000000000U))
+               {
+                       float_raise(float_flag_inexact | float_flag_underflow);
+                       return 0;
+               }
+               normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+       }
+
+       zSign = aSign;
+       zExp = EXP_BIAS;
+       expDiff = aExp - zExp;
+
+       /* argument is out-of-range */
+       if (expDiff >= 63)
+               return -1;
+
+       float_raise(float_flag_inexact);
+
+       if (expDiff < -1) {    // doesn't require reduction
+               if (expDiff <= -68) {
+                       *a = packFloatx80(aSign, aExp, aSig0);
+                       return 0;
+               }
+               zExp = aExp;
+       }
+       else {
+               q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
+       }
+
+       /* **************************** */
+       /* argument reduction completed */
+       /* **************************** */
+
+       /* using float128 for approximation */
+       float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
+
+       float128 sin_r = poly_sin(r);
+       float128 cos_r = poly_cos(r);
+
+       if (q & 0x1) {
+               r = float128_div(cos_r, sin_r);
+               zSign = ! zSign;
+       } else {
+               r = float128_div(sin_r, cos_r);
+       }
+
+       *a = float128_to_floatx80(r);
+       if (zSign)
+               *a = floatx80_chs(*a);
+
+       return 0;
+}
+
+//                            2         3         4               n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       2k                --        2k+1
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvalPoly(float128 x, float128 *arr, unsigned n)
+{
+       float128 x2 = float128_mul(x, x);
+       unsigned i;
+
+       assert(n > 1);
+
+       float128 r1 = arr[--n];
+       i = n;
+       while(i >= 2) {
+               r1 = float128_mul(r1, x2);
+               i -= 2;
+               r1 = float128_add(r1, arr[i]);
+       }
+       if (i) r1 = float128_mul(r1, x);
+
+       float128 r2 = arr[--n];
+       i = n;
+       while(i >= 2) {
+               r2 = float128_mul(r2, x2);
+               i -= 2;
+               r2 = float128_add(r2, arr[i]);
+       }
+       if (i) r2 = float128_mul(r2, x);
+
+       return float128_add(r1, r2);
+}
+
+//                  2         4         6         8               2n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                    2
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvenPoly(float128 x, float128 *arr, unsigned n)
+{
+               return EvalPoly(float128_mul(x, x), arr, n);
+}
+
+//                        3         5         7         9               2n+1
+// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//          0         1         2         3         4               n
+//                        2         4         6         8               2n
+//      = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//               0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                        2
+//   f(x) ~ x * [ p(x) + x * q(x) ]
+//
+
+float128 OddPoly(float128 x, float128 *arr, unsigned n)
+{
+               return float128_mul(x, EvenPoly(x, arr, n));
+}
+
+/*----------------------------------------------------------------------------
+| Scales extended double-precision floating-point value in operand `a' by
+| value `b'. The function truncates the value in the second operand 'b' to
+| an integral value and adds that value to the exponent of the operand 'a'.
+| The operation performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b );
+
+floatx80 floatx80_scale(floatx80 a, floatx80 b)
+{
+       sbits32 aExp, bExp;
+       bits64 aSig, bSig;
+
+       // handle unsupported extended double-precision floating encodings
+/*    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(float_flag_invalid);
+        return floatx80_default_nan;
+    }*/
+
+       aSig = extractFloatx80Frac(a);
+       aExp = extractFloatx80Exp(a);
+       int aSign = extractFloatx80Sign(a);
+       bSig = extractFloatx80Frac(b);
+       bExp = extractFloatx80Exp(b);
+       int bSign = extractFloatx80Sign(b);
+
+       if (aExp == 0x7FFF) {
+               if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1)))
+               {
+                       return propagateFloatx80NaN(a, b);
+               }
+               if ((bExp == 0x7FFF) && bSign) {
+                       float_raise(float_flag_invalid);
+                       return floatx80_default_nan;
+               }
+               if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
+               return a;
+       }
+       if (bExp == 0x7FFF) {
+               if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b);
+               if ((aExp | aSig) == 0) {
+                       if (! bSign) {
+                               float_raise(float_flag_invalid);
+                               return floatx80_default_nan;
+                       }
+                       return a;
+               }
+               if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
+               if (bSign) return packFloatx80(aSign, 0, 0);
+               return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U);
+       }
+       if (aExp == 0) {
+               if (aSig == 0) return a;
+               float_raise(float_flag_denormal);
+               normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+       }
+       if (bExp == 0) {
+               if (bSig == 0) return a;
+               float_raise(float_flag_denormal);
+               normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+       }
+
+       if (bExp > 0x400E) {
+               /* generate appropriate overflow/underflow */
+               return roundAndPackFloatx80(80, aSign,
+                                                       bSign ? -0x3FFF : 0x7FFF, aSig, 0);
+       }
+       if (bExp < 0x3FFF) return a;
+
+       int shiftCount = 0x403E - bExp;
+       bSig >>= shiftCount;
+       sbits32 scale = bSig;
+       if (bSign) scale = -scale; /* -32768..32767 */
+       return
+               roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);
+}
diff --git a/softfloat/fyl2x.cpp b/softfloat/fyl2x.cpp

new file mode 100644 (file)

index 0000000..28dddb1
--- /dev/null
+++ b/softfloat/fyl2x.cpp
@@ -0,0 +1,494 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+float_raise(float_flag_invalid)
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012)
+ * ==========================================================================*/
+
+#define FLOATX128
+
+#define USE_estimateDiv128To64
+#include "mamesf.h"
+#include "softfloat.h"
+//#include "softfloat-specialize"
+#include "fpu_constant.h"
+/*
+static const floatx80 floatx80_log10_2 = packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U);
+static const floatx80 floatx80_ln_2 = packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU);
+static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, 0x8000000000000000U);
+static const floatx80 floatx80_default_nan = packFloatx80(0, 0xffff, 0xffffffffffffffffU);
+*/
+static const floatx80 floatx80_log10_2 = { 0x3ffd, 0x9a209a84fbcff798U };
+static const floatx80 floatx80_ln_2 = { 0x3ffe, 0xb17217f7d1cf79acU };
+static const floatx80 floatx80_one = { 0x3fff, 0x8000000000000000U };
+static const floatx80 floatx80_default_nan = { 0xffff, 0xffffffffffffffffU };
+
+#define packFloat_128(zHi, zLo) {(zHi), (zLo)}
+#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo))
+
+#define EXP_BIAS 0x3FFF
+
+#if 0 /* Included in softfloat-specialize */
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+       return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+       return a.high & 0x7FFF;
+
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+       return a.high>>15;
+
+}
+
+#if 0
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 propagateFloatx80NaNOneArg(floatx80 a)
+{
+       if (floatx80_is_signaling_nan(a))
+               float_raise(float_flag_invalid);
+
+       a.low |= 0xC000000000000000U;
+
+       return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+INLINE void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
+{
+       int shiftCount = countLeadingZeros64(aSig);
+       *zSigPtr = aSig<<shiftCount;
+       *zExpPtr = 1 - shiftCount;
+}
+
+#if 0 /* Included in softfoat-specialize */
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_nan(floatx80 a)
+{
+       return ((a.high & 0x7FFF) == 0x7FFF) && (int64_t) (a.low<<1);
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b)
+{
+       int aIsNaN = floatx80_is_nan(a);
+       int aIsSignalingNaN = floatx80_is_signaling_nan(a);
+       int bIsNaN = floatx80_is_nan(b);
+       int bIsSignalingNaN = floatx80_is_signaling_nan(b);
+       a.low |= 0xC000000000000000U;
+       b.low |= 0xC000000000000000U;
+       if (aIsSignalingNaN | bIsSignalingNaN) float_raise(float_flag_invalid);
+       if (aIsSignalingNaN) {
+               if (bIsSignalingNaN) goto returnLargerSignificand;
+               return bIsNaN ? b : a;
+       }
+       else if (aIsNaN) {
+               if (bIsSignalingNaN | ! bIsNaN) return a;
+       returnLargerSignificand:
+               if (a.low < b.low) return b;
+               if (b.low < a.low) return a;
+               return (a.high < b.high) ? a : b;
+       }
+       else {
+               return b;
+       }
+}
+
+static const float128 float128_one =
+       packFloat_128(0x3fff000000000000U, 0x0000000000000000U);
+static const float128 float128_two =
+       packFloat_128(0x4000000000000000U, 0x0000000000000000U);
+
+static const float128 float128_ln2inv2 =
+       packFloat_128(0x400071547652b82fU, 0xe1777d0ffda0d23aU);
+
+#define SQRT2_HALF_SIG  0xb504f333f9de6484U
+
+extern float128 OddPoly(float128 x, float128 *arr, unsigned n);
+
+#define L2_ARR_SIZE 9
+
+static float128 ln_arr[L2_ARR_SIZE] =
+{
+       PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+       PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /*  3 */
+       PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
+       PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /*  7 */
+       PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
+       PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
+       PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
+       PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
+       PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2)  /* 17 */
+};
+
+static float128 poly_ln(float128 x1)
+{
+/*
+    //
+    //                     3     5     7     9     11     13     15
+    //        1+u         u     u     u     u     u      u      u
+    // 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
+    //        1-u         3     5     7     9     11     13     15
+    //
+    //                     2     4     6     8     10     12     14
+    //                    u     u     u     u     u      u      u
+    //       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
+    //                    3     5     7     9     11     13     15
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(u) = >  C  * u           q(u) = >  C   * u
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //          1+u                 2
+    //   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
+    //          1-u
+    //
+*/
+       return OddPoly(x1, ln_arr, L2_ARR_SIZE);
+}
+
+/* required sqrt(2)/2 < x < sqrt(2) */
+static float128 poly_l2(float128 x)
+{
+       /* using float128 for approximation */
+       float128 x_p1 = float128_add(x, float128_one);
+       float128 x_m1 = float128_sub(x, float128_one);
+       x = float128_div(x_m1, x_p1);
+       x = poly_ln(x);
+       x = float128_mul(x, float128_ln2inv2);
+       return x;
+}
+
+static float128 poly_l2p1(float128 x)
+{
+       /* using float128 for approximation */
+       float128 x_p2 = float128_add(x, float128_two);
+       x = float128_div(x, x_p2);
+       x = poly_ln(x);
+       x = float128_mul(x, float128_ln2inv2);
+       return x;
+}
+
+// =================================================
+// FYL2X                   Compute y * log (x)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                1+u             x-1
+//   ln (x) = ln -----, when u = -----
+//                1-u             x+1
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+static floatx80 fyl2x(floatx80 a, floatx80 b)
+{
+       uint64_t aSig = extractFloatx80Frac(a);
+       int32_t aExp = extractFloatx80Exp(a);
+       int aSign = extractFloatx80Sign(a);
+       uint64_t bSig = extractFloatx80Frac(b);
+       int32_t bExp = extractFloatx80Exp(b);
+       int bSign = extractFloatx80Sign(b);
+
+       int zSign = bSign ^ 1;
+
+       if (aExp == 0x7FFF) {
+               if ((uint64_t) (aSig<<1)
+                               || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+               {
+                       return propagateFloatx80NaN(a, b);
+               }
+               if (aSign)
+               {
+invalid:
+                       float_raise(float_flag_invalid);
+                       return floatx80_default_nan;
+               }
+               else {
+                       if (bExp == 0) {
+                               if (bSig == 0) goto invalid;
+                               float_raise(float_flag_denormal);
+                       }
+                       return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+               }
+       }
+       if (bExp == 0x7FFF)
+       {
+               if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b);
+               if (aSign && (uint64_t)(aExp | aSig)) goto invalid;
+               if (aSig && (aExp == 0))
+                       float_raise(float_flag_denormal);
+               if (aExp < 0x3FFF) {
+                       return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+               }
+               if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid;
+               return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+       }
+       if (aExp == 0) {
+               if (aSig == 0) {
+                       if ((bExp | bSig) == 0) goto invalid;
+                       float_raise(float_flag_divbyzero);
+                       return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+               }
+               if (aSign) goto invalid;
+               float_raise(float_flag_denormal);
+               normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+       }
+       if (aSign) goto invalid;
+       if (bExp == 0) {
+               if (bSig == 0) {
+                       if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
+                       return packFloatx80(bSign, 0, 0);
+               }
+               float_raise(float_flag_denormal);
+               normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+       }
+       if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0))
+               return packFloatx80(bSign, 0, 0);
+
+       float_raise(float_flag_inexact);
+
+       int ExpDiff = aExp - 0x3FFF;
+       aExp = 0;
+       if (aSig >= SQRT2_HALF_SIG) {
+               ExpDiff++;
+               aExp--;
+       }
+
+       /* ******************************** */
+       /* using float128 for approximation */
+       /* ******************************** */
+
+       uint64_t zSig0, zSig1;
+       shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+       float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
+       x = poly_l2(x);
+       x = float128_add(x, int64_to_float128((int64_t) ExpDiff));
+       return floatx80_mul(b, float128_to_floatx80(x));
+}
+
+// =================================================
+// FYL2XP1                 Compute y * log (x + 1)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                  1+u              x
+//   ln (x+1) = ln -----, when u = -----
+//                  1-u             x+2
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+floatx80 fyl2xp1(floatx80 a, floatx80 b)
+{
+       int32_t aExp, bExp;
+       uint64_t aSig, bSig, zSig0, zSig1, zSig2;
+       int aSign, bSign;
+
+       aSig = extractFloatx80Frac(a);
+       aExp = extractFloatx80Exp(a);
+       aSign = extractFloatx80Sign(a);
+       bSig = extractFloatx80Frac(b);
+       bExp = extractFloatx80Exp(b);
+       bSign = extractFloatx80Sign(b);
+       int zSign = aSign ^ bSign;
+
+       if (aExp == 0x7FFF) {
+               if ((uint64_t) (aSig<<1)
+                               || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+               {
+                       return propagateFloatx80NaN(a, b);
+               }
+               if (aSign)
+               {
+invalid:
+                       float_raise(float_flag_invalid);
+                       return floatx80_default_nan;
+               }
+                       else {
+                       if (bExp == 0) {
+                               if (bSig == 0) goto invalid;
+                               float_raise(float_flag_denormal);
+                       }
+                       return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+               }
+       }
+       if (bExp == 0x7FFF)
+       {
+               if ((uint64_t) (bSig<<1))
+                       return propagateFloatx80NaN(a, b);
+
+               if (aExp == 0) {
+                       if (aSig == 0) goto invalid;
+                       float_raise(float_flag_denormal);
+               }
+
+               return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+       }
+       if (aExp == 0) {
+               if (aSig == 0) {
+                       if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
+                       return packFloatx80(zSign, 0, 0);
+               }
+               float_raise(float_flag_denormal);
+               normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+       }
+       if (bExp == 0) {
+               if (bSig == 0) return packFloatx80(zSign, 0, 0);
+               float_raise(float_flag_denormal);
+               normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+       }
+
+       float_raise(float_flag_inexact);
+
+       if (aSign && aExp >= 0x3FFF)
+               return a;
+
+       if (aExp >= 0x3FFC) // big argument
+       {
+               return fyl2x(floatx80_add(a, floatx80_one), b);
+       }
+
+       // handle tiny argument
+       if (aExp < EXP_BIAS-70)
+       {
+               // first order approximation, return (a*b)/ln(2)
+               int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
+
+       mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
+               if (0 < (int64_t) zSig0) {
+                       shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+                       --zExp;
+               }
+
+               zExp = zExp + bExp - 0x3FFE;
+       mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
+               if (0 < (int64_t) zSig0) {
+                       shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+                       --zExp;
+               }
+
+               return
+                       roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
+       }
+
+       /* ******************************** */
+       /* using float128 for approximation */
+       /* ******************************** */
+
+       shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+       float128 x = packFloat128(aSign, aExp, zSig0, zSig1);
+       x = poly_l2p1(x);
+       return floatx80_mul(b, float128_to_floatx80(x));
+}
+
+floatx80 floatx80_flognp1(floatx80 a)
+{
+       return fyl2xp1(a, floatx80_ln_2);
+}
+
+floatx80 floatx80_flogn(floatx80 a)
+{
+       return fyl2x(a, floatx80_ln_2);
+}
+
+floatx80 floatx80_flog2(floatx80 a)
+{
+       return fyl2x(a, floatx80_one);
+}
+
+floatx80 floatx80_flog10(floatx80 a)
+{
+       return fyl2x(a, floatx80_log10_2);
+}
diff --git a/softfloat/mamesf.h b/softfloat/mamesf.h

new file mode 100644 (file)

index 0000000..9e55a22
--- /dev/null
+++ b/softfloat/mamesf.h
@@ -0,0 +1,72 @@
+#ifndef MAMESF_H
+#define MAMESF_H
+
+/*----------------------------------------------------------------------------
+| One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined.
+*----------------------------------------------------------------------------*/
+#ifdef LSB_FIRST
+#define LITTLEENDIAN
+#else
+#define BIGENDIAN
+#endif
+
+/*----------------------------------------------------------------------------
+| The macro `BITS64' can be defined to indicate that 64-bit integer types are
+| supported by the compiler.
+*----------------------------------------------------------------------------*/
+#define BITS64
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified.  For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+//#include "assert.h"
+#include "sysdeps.h"
+
+typedef int8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+typedef uint16_t uint16;
+typedef int16_t int16;
+typedef uint32_t uint32;
+typedef int32_t int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines a type that holds integers
+| of _exactly_ the number of bits specified.  For instance, for most
+| implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+| `unsigned short int' and `signed short int' (or `short int'), respectively.
+*----------------------------------------------------------------------------*/
+typedef uint8_t bits8;
+typedef int8_t sbits8;
+typedef uint16_t bits16;
+typedef int16_t sbits16;
+typedef uint32_t bits32;
+typedef int32_t sbits32;
+typedef uint64_t bits64;
+typedef int64_t sbits64;
+
+/*----------------------------------------------------------------------------
+| The `LIT64' macro takes as its argument a textual integer literal and
+| if necessary ``marks'' the literal as having a 64-bit integer type.
+| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be
+| appended with the letters `LL' standing for `long long', which is `gcc's
+| name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+| defined as the identity macro:  `#define LIT64( a ) a'.
+*----------------------------------------------------------------------------*/
+#define LIT64( a ) a##ULL
+
+/*----------------------------------------------------------------------------
+| The macro `INLINE' can be used before functions that should be inlined.  If
+| a compiler does not support explicit inlining, this macro should be defined
+| to be `static'.
+*----------------------------------------------------------------------------*/
+#define INLINE static inline
+
+#endif //MAMESF_H
+\ No newline at end of file
diff --git a/softfloat/milieu.h b/softfloat/milieu.h

new file mode 100644 (file)

index 0000000..10687b7
--- /dev/null
+++ b/softfloat/milieu.h
@@ -0,0 +1,42 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Include common integer types and flags.
+*----------------------------------------------------------------------------*/
+#include "mamesf.h"
+
+/*----------------------------------------------------------------------------
+| Symbolic Boolean literals.
+*----------------------------------------------------------------------------*/
+#define FALSE 0
+#define TRUE 1
diff --git a/softfloat/softfloat-specialize.h b/softfloat/softfloat-specialize.h

new file mode 100644 (file)

index 0000000..f0ec9ef
--- /dev/null
+++ b/softfloat/softfloat-specialize.h
@@ -0,0 +1,554 @@
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| Underflow tininess-detection mode, statically initialized to default value.
+| (The declaration in `softfloat.h' must match the `int8' type here.)
+*----------------------------------------------------------------------------*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'.  Floating-point traps can be
+| defined here if desired.  It is currently not possible for such a trap to
+| substitute a result value.  If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void float_raise( int8 flags )
+{
+
+    float_exception_flags |= flags;
+
+}
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float32_default_nan 0xFFFFFFFF
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_nan( float32 a )
+{
+
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float32_is_signaling_nan( float32 a )
+{
+
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float32ToCommonNaN( float32 a )
+{
+    commonNaNT z;
+
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32NaN( float32 a, float32 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_nan( float64 a )
+{
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float64_is_signaling_nan( float64 a )
+{
+
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float64ToCommonNaN( float64 a )
+{
+    commonNaNT z;
+
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64NaN( float64 a, float64 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_nan( floatx80 a )
+{
+
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_is_signaling_nan( floatx80 a )
+{
+    bits64 aLow;
+
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
+// 28-12-2016: Added for Previous:
+
+/*----------------------------------------------------------------------------
+ | Returns 1 if the extended double-precision floating-point value `a' is
+ | zero; otherwise returns 0.
+ *----------------------------------------------------------------------------*/
+
+flag floatx80_is_zero( floatx80 a )
+{
+    
+    return ( ( a.high & 0x7FFF ) == 0 ) && ( a.low == 0 );
+    
+}
+
+/*----------------------------------------------------------------------------
+ | Returns 1 if the extended double-precision floating-point value `a' is
+ | infinity; otherwise returns 0.
+ *----------------------------------------------------------------------------*/
+
+flag floatx80_is_infinity( floatx80 a )
+{
+    
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && ( (bits64) ( a.low<<1 ) == 0 );
+    
+}
+
+/*----------------------------------------------------------------------------
+ | Returns 1 if the extended double-precision floating-point value `a' is
+ | negative; otherwise returns 0.
+ *----------------------------------------------------------------------------*/
+
+flag floatx80_is_negative( floatx80 a )
+{
+    
+    return ( ( a.high & 0x8000 ) == 0x8000 );
+    
+}
+
+/*----------------------------------------------------------------------------
+ | Returns 1 if the extended double-precision floating-point value `a' is
+ | denormal; otherwise returns 0.
+ *----------------------------------------------------------------------------*/
+
+flag floatx80_is_denormal( floatx80 a )
+{
+    
+    return
+           ( ( a.high & 0x7FFF ) == 0 )
+        && ( (bits64) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) )
+        && (bits64) ( a.low<<1 );
+    
+}
+
+/*----------------------------------------------------------------------------
+ | Returns 1 if the extended double-precision floating-point value `a' is
+ | unnormal; otherwise returns 0.
+ *----------------------------------------------------------------------------*/
+
+flag floatx80_is_unnormal( floatx80 a )
+{
+    
+    return
+           ( ( a.high & 0x7FFF ) > 0 )
+        && ( ( a.high & 0x7FFF ) < 0x7FFF)
+        && ( (bits64) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) );
+    
+}
+
+/*----------------------------------------------------------------------------
+ | Returns 1 if the extended double-precision floating-point value `a' is
+ | normal; otherwise returns 0.
+ *----------------------------------------------------------------------------*/
+
+flag floatx80_is_normal( floatx80 a )
+{
+    
+    return
+           ( ( a.high & 0x7FFF ) < 0x7FFF )
+        && ( (bits64) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x8000000000000000 ) );
+    
+}
+
+// End of addition for Previous
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT floatx80ToCommonNaN( floatx80 a )
+{
+    commonNaNT z;
+
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+bits64 extractFloatx80Frac( floatx80 a )
+{
+
+    return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+int32 extractFloatx80Exp( floatx80 a )
+{
+
+    return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+
+    return a.high>>15;
+
+}
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.  The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
+#define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_nan( float128 a )
+{
+
+    return
+           ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
+        && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+flag float128_is_signaling_nan( float128 a )
+{
+
+    return
+           ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+        && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float128ToCommonNaN( float128 a )
+{
+    commonNaNT z;
+
+    if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>63;
+    shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float128 commonNaNToFloat128( commonNaNT a )
+{
+    float128 z;
+
+    shift128Right( a.high, a.low, 16, &z.high, &z.low );
+    z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
+    return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float128 propagateFloat128NaN( float128 a, float128 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float128_is_nan( a );
+    aIsSignalingNaN = float128_is_signaling_nan( a );
+    bIsNaN = float128_is_nan( b );
+    bIsSignalingNaN = float128_is_signaling_nan( b );
+    a.high |= LIT64( 0x0000800000000000 );
+    b.high |= LIT64( 0x0000800000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
+
diff --git a/softfloat/softfloat.cpp b/softfloat/softfloat.cpp

new file mode 100644 (file)

index 0000000..acc821a
--- /dev/null
+++ b/softfloat/softfloat.cpp
@@ -0,0 +1,5099 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#include "milieu.h"
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Floating-point rounding mode, extended double-precision rounding precision,
+| and exception flags.
+*----------------------------------------------------------------------------*/
+int8 float_exception_flags = 0;
+#ifdef FLOATX80
+int8 floatx80_rounding_precision = 80;
+#endif
+
+int8 float_rounding_mode = float_round_nearest_even;
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the largest
+| positive or negative integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+{
+       int8 roundingMode;
+       flag roundNearestEven;
+       int8 roundIncrement, roundBits;
+       int32 z;
+
+       roundingMode = float_rounding_mode;
+       roundNearestEven = ( roundingMode == float_round_nearest_even );
+       roundIncrement = 0x40;
+       if ( ! roundNearestEven ) {
+               if ( roundingMode == float_round_to_zero ) {
+                       roundIncrement = 0;
+               }
+               else {
+                       roundIncrement = 0x7F;
+                       if ( zSign ) {
+                               if ( roundingMode == float_round_up ) roundIncrement = 0;
+                       }
+                       else {
+                               if ( roundingMode == float_round_down ) roundIncrement = 0;
+                       }
+               }
+       }
+       roundBits = absZ & 0x7F;
+       absZ = ( absZ + roundIncrement )>>7;
+       absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+       z = absZ;
+       if ( zSign ) z = - z;
+    z = (sbits32) z;
+       if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+               float_raise( float_flag_invalid );
+               return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+       }
+       if ( roundBits ) float_exception_flags |= float_flag_inexact;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest positive or negative integer is
+| returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
+{
+       int8 roundingMode;
+       flag roundNearestEven, increment;
+       int64 z;
+
+       roundingMode = float_rounding_mode;
+       roundNearestEven = ( roundingMode == float_round_nearest_even );
+       increment = ( (sbits64) absZ1 < 0 );
+       if ( ! roundNearestEven ) {
+               if ( roundingMode == float_round_to_zero ) {
+                       increment = 0;
+               }
+               else {
+                       if ( zSign ) {
+                               increment = ( roundingMode == float_round_down ) && absZ1;
+                       }
+                       else {
+                               increment = ( roundingMode == float_round_up ) && absZ1;
+                       }
+               }
+       }
+       if ( increment ) {
+               ++absZ0;
+               if ( absZ0 == 0 ) goto overflow;
+               absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+       }
+       z = absZ0;
+       if ( zSign ) z = - z;
+    z = (sbits64) z;
+       if ( z && ( ( z < 0 ) ^ zSign ) ) {
+       overflow:
+               float_raise( float_flag_invalid );
+               return
+                               zSign ? (sbits64) LIT64( 0x8000000000000000 )
+                       : LIT64( 0x7FFFFFFFFFFFFFFF );
+       }
+       if ( absZ1 ) float_exception_flags |= float_flag_inexact;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits32 extractFloat32Frac( float32 a )
+{
+       return a & 0x007FFFFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat32Exp( float32 a )
+{
+       return ( a>>23 ) & 0xFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat32Sign( float32 a )
+{
+       return a>>31;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+       normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+       int8 shiftCount;
+
+       shiftCount = countLeadingZeros32( aSig ) - 8;
+       *zSigPtr = aSig<<shiftCount;
+       *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+       return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+       int8 roundingMode;
+       flag roundNearestEven;
+       int8 roundIncrement, roundBits;
+       flag isTiny;
+
+       roundingMode = float_rounding_mode;
+       roundNearestEven = ( roundingMode == float_round_nearest_even );
+       roundIncrement = 0x40;
+       if ( ! roundNearestEven ) {
+               if ( roundingMode == float_round_to_zero ) {
+                       roundIncrement = 0;
+               }
+               else {
+                       roundIncrement = 0x7F;
+                       if ( zSign ) {
+                               if ( roundingMode == float_round_up ) roundIncrement = 0;
+                       }
+                       else {
+                               if ( roundingMode == float_round_down ) roundIncrement = 0;
+                       }
+               }
+       }
+       roundBits = zSig & 0x7F;
+       if ( 0xFD <= (bits16) zExp ) {
+               if (    ( 0xFD < zExp )
+                               || (    ( zExp == 0xFD )
+                                       && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+                       ) {
+                       float_raise( float_flag_overflow | float_flag_inexact );
+                       return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+               }
+               if ( zExp < 0 ) {
+                       isTiny =
+                                       ( float_detect_tininess == float_tininess_before_rounding )
+                               || ( zExp < -1 )
+                               || ( zSig + roundIncrement < 0x80000000 );
+                       shift32RightJamming( zSig, - zExp, &zSig );
+                       zExp = 0;
+                       roundBits = zSig & 0x7F;
+                       if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+               }
+       }
+       if ( roundBits ) float_exception_flags |= float_flag_inexact;
+       zSig = ( zSig + roundIncrement )>>7;
+       zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+       if ( zSig == 0 ) zExp = 0;
+       return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float32
+       normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+       int8 shiftCount;
+
+       shiftCount = countLeadingZeros32( zSig ) - 1;
+       return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat64Frac( float64 a )
+{
+       return a & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int16 extractFloat64Exp( float64 a )
+{
+       return ( a>>52 ) & 0x7FF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat64Sign( float64 a )
+{
+       return a>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+       normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+       int8 shiftCount;
+
+       shiftCount = countLeadingZeros64( aSig ) - 11;
+       *zSigPtr = aSig<<shiftCount;
+       *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+       return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+       int8 roundingMode;
+       flag roundNearestEven;
+       int16 roundIncrement, roundBits;
+       flag isTiny;
+
+       roundingMode = float_rounding_mode;
+       roundNearestEven = ( roundingMode == float_round_nearest_even );
+       roundIncrement = 0x200;
+       if ( ! roundNearestEven ) {
+               if ( roundingMode == float_round_to_zero ) {
+                       roundIncrement = 0;
+               }
+               else {
+                       roundIncrement = 0x3FF;
+                       if ( zSign ) {
+                               if ( roundingMode == float_round_up ) roundIncrement = 0;
+                       }
+                       else {
+                               if ( roundingMode == float_round_down ) roundIncrement = 0;
+                       }
+               }
+       }
+       roundBits = zSig & 0x3FF;
+       if ( 0x7FD <= (bits16) zExp ) {
+               if (    ( 0x7FD < zExp )
+                               || (    ( zExp == 0x7FD )
+                                       && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+                       ) {
+                       float_raise( float_flag_overflow | float_flag_inexact );
+                       return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+               }
+               if ( zExp < 0 ) {
+                       isTiny =
+                                       ( float_detect_tininess == float_tininess_before_rounding )
+                               || ( zExp < -1 )
+                               || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+                       shift64RightJamming( zSig, - zExp, &zSig );
+                       zExp = 0;
+                       roundBits = zSig & 0x3FF;
+                       if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+               }
+       }
+       if ( roundBits ) float_exception_flags |= float_flag_inexact;
+       zSig = ( zSig + roundIncrement )>>10;
+       zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+       if ( zSig == 0 ) zExp = 0;
+       return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float64
+       normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+       int8 shiftCount;
+
+       shiftCount = countLeadingZeros64( zSig ) - 1;
+       return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+       normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+       int8 shiftCount;
+
+       shiftCount = countLeadingZeros64( aSig );
+       *zSigPtr = aSig<<shiftCount;
+       *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+// roundAndPackFloatx80 is now also used in fyl2x.c
+
+/* static */ floatx80
+       roundAndPackFloatx80(
+               int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+       )
+{
+       int8 roundingMode;
+       flag roundNearestEven, increment, isTiny;
+       int64 roundIncrement, roundMask, roundBits;
+
+       roundingMode = float_rounding_mode;
+       roundNearestEven = ( roundingMode == float_round_nearest_even );
+       if ( roundingPrecision == 80 ) goto precision80;
+       if ( roundingPrecision == 64 ) {
+               roundIncrement = LIT64( 0x0000000000000400 );
+               roundMask = LIT64( 0x00000000000007FF );
+       }
+       else if ( roundingPrecision == 32 ) {
+               roundIncrement = LIT64( 0x0000008000000000 );
+               roundMask = LIT64( 0x000000FFFFFFFFFF );
+       }
+       else {
+               goto precision80;
+       }
+       zSig0 |= ( zSig1 != 0 );
+       if ( ! roundNearestEven ) {
+               if ( roundingMode == float_round_to_zero ) {
+                       roundIncrement = 0;
+               }
+               else {
+                       roundIncrement = roundMask;
+                       if ( zSign ) {
+                               if ( roundingMode == float_round_up ) roundIncrement = 0;
+                       }
+                       else {
+                               if ( roundingMode == float_round_down ) roundIncrement = 0;
+                       }
+               }
+       }
+       roundBits = zSig0 & roundMask;
+       if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+               if (    ( 0x7FFE < zExp )
+                               || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+                       ) {
+                       goto overflow;
+               }
+               if ( zExp <= 0 ) {
+                       isTiny =
+                                       ( float_detect_tininess == float_tininess_before_rounding )
+                               || ( zExp < 0 )
+                               || ( zSig0 <= zSig0 + roundIncrement );
+                       shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+                       zExp = 0;
+                       roundBits = zSig0 & roundMask;
+                       if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+                       if ( roundBits ) float_exception_flags |= float_flag_inexact;
+                       zSig0 += roundIncrement;
+                       if ( (sbits64) zSig0 < 0 ) zExp = 1;
+                       roundIncrement = roundMask + 1;
+                       if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+                               roundMask |= roundIncrement;
+                       }
+                       zSig0 &= ~ roundMask;
+                       return packFloatx80( zSign, zExp, zSig0 );
+               }
+       }
+       if ( roundBits ) float_exception_flags |= float_flag_inexact;
+       zSig0 += roundIncrement;
+       if ( zSig0 < roundIncrement ) {
+               ++zExp;
+               zSig0 = LIT64( 0x8000000000000000 );
+       }
+       roundIncrement = roundMask + 1;
+       if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+               roundMask |= roundIncrement;
+       }
+       zSig0 &= ~ roundMask;
+       if ( zSig0 == 0 ) zExp = 0;
+       return packFloatx80( zSign, zExp, zSig0 );
+       precision80:
+       increment = ( (sbits64) zSig1 < 0 );
+       if ( ! roundNearestEven ) {
+               if ( roundingMode == float_round_to_zero ) {
+                       increment = 0;
+               }
+               else {
+                       if ( zSign ) {
+                               increment = ( roundingMode == float_round_down ) && zSig1;
+                       }
+                       else {
+                               increment = ( roundingMode == float_round_up ) && zSig1;
+                       }
+               }
+       }
+       if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+               if (    ( 0x7FFE < zExp )
+                               || (    ( zExp == 0x7FFE )
+                                       && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+                                       && increment
+                               )
+                       ) {
+                       roundMask = 0;
+       overflow:
+                       float_raise( float_flag_overflow | float_flag_inexact );
+                       if (    ( roundingMode == float_round_to_zero )
+                                       || ( zSign && ( roundingMode == float_round_up ) )
+                                       || ( ! zSign && ( roundingMode == float_round_down ) )
+                               ) {
+                               return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+                       }
+                       return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+               }
+               if ( zExp <= 0 ) {
+                       isTiny =
+                                       ( float_detect_tininess == float_tininess_before_rounding )
+                               || ( zExp < 0 )
+                               || ! increment
+                               || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+                       shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+                       zExp = 0;
+                       if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
+                       if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+                       if ( roundNearestEven ) {
+                               increment = ( (sbits64) zSig1 < 0 );
+                       }
+                       else {
+                               if ( zSign ) {
+                                       increment = ( roundingMode == float_round_down ) && zSig1;
+                               }
+                               else {
+                                       increment = ( roundingMode == float_round_up ) && zSig1;
+                               }
+                       }
+                       if ( increment ) {
+                               ++zSig0;
+                               zSig0 &=
+                                       ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+                               if ( (sbits64) zSig0 < 0 ) zExp = 1;
+                       }
+                       return packFloatx80( zSign, zExp, zSig0 );
+               }
+       }
+       if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+       if ( increment ) {
+               ++zSig0;
+               if ( zSig0 == 0 ) {
+                       ++zExp;
+                       zSig0 = LIT64( 0x8000000000000000 );
+               }
+               else {
+                       zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+               }
+       }
+       else {
+               if ( zSig0 == 0 ) zExp = 0;
+       }
+       return packFloatx80( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+       normalizeRoundAndPackFloatx80(
+               int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+       )
+{
+       int8 shiftCount;
+
+       if ( zSig0 == 0 ) {
+               zSig0 = zSig1;
+               zSig1 = 0;
+               zExp -= 64;
+       }
+       shiftCount = countLeadingZeros64( zSig0 );
+       shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+       zExp -= shiftCount;
+       return
+               roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac1( float128 a )
+{
+       return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac0( float128 a )
+{
+       return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloat128Exp( float128 a )
+{
+       return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat128Sign( float128 a )
+{
+       return a.high>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static void
+       normalizeFloat128Subnormal(
+               bits64 aSig0,
+               bits64 aSig1,
+               int32 *zExpPtr,
+               bits64 *zSig0Ptr,
+               bits64 *zSig1Ptr
+       )
+{
+       int8 shiftCount;
+
+       if ( aSig0 == 0 ) {
+               shiftCount = countLeadingZeros64( aSig1 ) - 15;
+               if ( shiftCount < 0 ) {
+                       *zSig0Ptr = aSig1>>( - shiftCount );
+                       *zSig1Ptr = aSig1<<( shiftCount & 63 );
+               }
+               else {
+                       *zSig0Ptr = aSig1<<shiftCount;
+                       *zSig1Ptr = 0;
+               }
+               *zExpPtr = - shiftCount - 63;
+       }
+       else {
+               shiftCount = countLeadingZeros64( aSig0 ) - 15;
+               shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+               *zExpPtr = 1 - shiftCount;
+       }
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int32_to_float32( int32 a )
+{
+       flag zSign;
+
+       if ( a == 0 ) return 0;
+       if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+       zSign = ( a < 0 );
+       return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64( int32 a )
+{
+       flag zSign;
+       uint32 absA;
+       int8 shiftCount;
+       bits64 zSig;
+
+       if ( a == 0 ) return 0;
+       zSign = ( a < 0 );
+       absA = zSign ? - a : a;
+       shiftCount = countLeadingZeros32( absA ) + 21;
+       zSig = absA;
+       return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80( int32 a )
+{
+       flag zSign;
+       uint32 absA;
+       int8 shiftCount;
+       bits64 zSig;
+
+       if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+       zSign = ( a < 0 );
+       absA = zSign ? - a : a;
+       shiftCount = countLeadingZeros32( absA ) + 32;
+       zSig = absA;
+       return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int32_to_float128( int32 a )
+{
+       flag zSign;
+       uint32 absA;
+       int8 shiftCount;
+       bits64 zSig0;
+
+       if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+       zSign = ( a < 0 );
+       absA = zSign ? - a : a;
+       shiftCount = countLeadingZeros32( absA ) + 17;
+       zSig0 = absA;
+       return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32( int64 a )
+{
+       flag zSign;
+       uint64 absA;
+       int8 shiftCount;
+//    bits32 zSig;
+
+       if ( a == 0 ) return 0;
+       zSign = ( a < 0 );
+       absA = zSign ? - a : a;
+       shiftCount = countLeadingZeros64( absA ) - 40;
+       if ( 0 <= shiftCount ) {
+               return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
+       }
+       else {
+               shiftCount += 7;
+               if ( shiftCount < 0 ) {
+                       shift64RightJamming( absA, - shiftCount, &absA );
+               }
+               else {
+                       absA <<= shiftCount;
+               }
+               return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64( int64 a )
+{
+       flag zSign;
+
+       if ( a == 0 ) return 0;
+       if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
+               return packFloat64( 1, 0x43E, 0 );
+       }
+       zSign = ( a < 0 );
+       return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80( int64 a )
+{
+       flag zSign;
+       uint64 absA;
+       int8 shiftCount;
+
+       if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+       zSign = ( a < 0 );
+       absA = zSign ? - a : a;
+       shiftCount = countLeadingZeros64( absA );
+       return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128( int64 a )
+{
+       flag zSign;
+       uint64 absA;
+       int8 shiftCount;
+       int32 zExp;
+       bits64 zSig0, zSig1;
+
+       if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+       zSign = ( a < 0 );
+       absA = zSign ? - a : a;
+       shiftCount = countLeadingZeros64( absA ) + 49;
+       zExp = 0x406E - shiftCount;
+       if ( 64 <= shiftCount ) {
+               zSig1 = 0;
+               zSig0 = absA;
+               shiftCount -= 64;
+       }
+       else {
+               zSig1 = absA;
+               zSig0 = 0;
+       }
+       shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+       return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32( float32 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits32 aSig;
+       bits64 aSig64;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
+       if ( aExp ) aSig |= 0x00800000;
+       shiftCount = 0xAF - aExp;
+       aSig64 = aSig;
+       aSig64 <<= 32;
+       if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
+       return roundAndPackInt32( aSign, aSig64 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32_round_to_zero( float32 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits32 aSig;
+       int32 z;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       shiftCount = aExp - 0x9E;
+       if ( 0 <= shiftCount ) {
+               if ( a != 0xCF000000 ) {
+                       float_raise( float_flag_invalid );
+                       if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+               }
+               return (sbits32) 0x80000000;
+       }
+       else if ( aExp <= 0x7E ) {
+               if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+               return 0;
+       }
+       aSig = ( aSig | 0x00800000 )<<8;
+       z = aSig>>( - shiftCount );
+       if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+               float_exception_flags |= float_flag_inexact;
+       }
+       if ( aSign ) z = - z;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64( float32 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits32 aSig;
+       bits64 aSig64, aSigExtra;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       shiftCount = 0xBE - aExp;
+       if ( shiftCount < 0 ) {
+               float_raise( float_flag_invalid );
+               if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+                       return LIT64( 0x7FFFFFFFFFFFFFFF );
+               }
+               return (sbits64) LIT64( 0x8000000000000000 );
+       }
+       if ( aExp ) aSig |= 0x00800000;
+       aSig64 = aSig;
+       aSig64 <<= 40;
+       shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
+       return roundAndPackInt64( aSign, aSig64, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64_round_to_zero( float32 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits32 aSig;
+       bits64 aSig64;
+       int64 z;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       shiftCount = aExp - 0xBE;
+       if ( 0 <= shiftCount ) {
+               if ( a != 0xDF000000 ) {
+                       float_raise( float_flag_invalid );
+                       if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+                               return LIT64( 0x7FFFFFFFFFFFFFFF );
+                       }
+               }
+               return (sbits64) LIT64( 0x8000000000000000 );
+       }
+       else if ( aExp <= 0x7E ) {
+               if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+               return 0;
+       }
+       aSig64 = aSig | 0x00800000;
+       aSig64 <<= 40;
+       z = aSig64>>( - shiftCount );
+       if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
+               float_exception_flags |= float_flag_inexact;
+       }
+       if ( aSign ) z = - z;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64( float32 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits32 aSig;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       if ( aExp == 0xFF ) {
+               if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
+               return packFloat64( aSign, 0x7FF, 0 );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+               normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+               --aExp;
+       }
+       return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80( float32 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits32 aSig;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       if ( aExp == 0xFF ) {
+               if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+               return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+               normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+       }
+       aSig |= 0x00800000;
+       return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+// 31-12-2016: Added for Previous
+floatx80 float32_to_floatx80_allowunnormal( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+    
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        return packFloatx80( aSign, 0x3F81, ( (bits64) aSig )<<40 );
+    }
+    aSig |= 0x00800000;
+    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+    
+}
+// end of addition for Previous
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float32_to_float128( float32 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits32 aSig;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       if ( aExp == 0xFF ) {
+               if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
+               return packFloat128( aSign, 0x7FFF, 0, 0 );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+               normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+               --aExp;
+       }
+       return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int( float32 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits32 lastBitMask, roundBitsMask;
+       int8 roundingMode;
+       float32 z;
+
+       aExp = extractFloat32Exp( a );
+       if ( 0x96 <= aExp ) {
+               if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+                       return propagateFloat32NaN( a, a );
+               }
+               return a;
+       }
+       if ( aExp <= 0x7E ) {
+               if ( (bits32) ( a<<1 ) == 0 ) return a;
+               float_exception_flags |= float_flag_inexact;
+               aSign = extractFloat32Sign( a );
+               switch ( float_rounding_mode ) {
+                       case float_round_nearest_even:
+                       if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+                               return packFloat32( aSign, 0x7F, 0 );
+                       }
+                       break;
+                       case float_round_down:
+                       return aSign ? 0xBF800000 : 0;
+                       case float_round_up:
+                       return aSign ? 0x80000000 : 0x3F800000;
+               }
+               return packFloat32( aSign, 0, 0 );
+       }
+       lastBitMask = 1;
+       lastBitMask <<= 0x96 - aExp;
+       roundBitsMask = lastBitMask - 1;
+       z = a;
+       roundingMode = float_rounding_mode;
+       if ( roundingMode == float_round_nearest_even ) {
+               z += lastBitMask>>1;
+               if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+       }
+       else if ( roundingMode != float_round_to_zero ) {
+               if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+                       z += roundBitsMask;
+               }
+       }
+       z &= ~ roundBitsMask;
+       if ( z != a ) float_exception_flags |= float_flag_inexact;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+       int16 aExp, bExp, zExp;
+       bits32 aSig, bSig, zSig;
+       int16 expDiff;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       bSig = extractFloat32Frac( b );
+       bExp = extractFloat32Exp( b );
+       expDiff = aExp - bExp;
+       aSig <<= 6;
+       bSig <<= 6;
+       if ( 0 < expDiff ) {
+               if ( aExp == 0xFF ) {
+                       if ( aSig ) return propagateFloat32NaN( a, b );
+                       return a;
+               }
+               if ( bExp == 0 ) {
+                       --expDiff;
+               }
+               else {
+                       bSig |= 0x20000000;
+               }
+               shift32RightJamming( bSig, expDiff, &bSig );
+               zExp = aExp;
+       }
+       else if ( expDiff < 0 ) {
+               if ( bExp == 0xFF ) {
+                       if ( bSig ) return propagateFloat32NaN( a, b );
+                       return packFloat32( zSign, 0xFF, 0 );
+               }
+               if ( aExp == 0 ) {
+                       ++expDiff;
+               }
+               else {
+                       aSig |= 0x20000000;
+               }
+               shift32RightJamming( aSig, - expDiff, &aSig );
+               zExp = bExp;
+       }
+       else {
+               if ( aExp == 0xFF ) {
+                       if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+                       return a;
+               }
+               if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+               zSig = 0x40000000 + aSig + bSig;
+               zExp = aExp;
+               goto roundAndPack;
+       }
+       aSig |= 0x20000000;
+       zSig = ( aSig + bSig )<<1;
+       --zExp;
+       if ( (sbits32) zSig < 0 ) {
+               zSig = aSig + bSig;
+               ++zExp;
+       }
+       roundAndPack:
+       return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+       int16 aExp, bExp, zExp;
+       bits32 aSig, bSig, zSig;
+       int16 expDiff;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       bSig = extractFloat32Frac( b );
+       bExp = extractFloat32Exp( b );
+       expDiff = aExp - bExp;
+       aSig <<= 7;
+       bSig <<= 7;
+       if ( 0 < expDiff ) goto aExpBigger;
+       if ( expDiff < 0 ) goto bExpBigger;
+       if ( aExp == 0xFF ) {
+               if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+               float_raise( float_flag_invalid );
+               return float32_default_nan;
+       }
+       if ( aExp == 0 ) {
+               aExp = 1;
+               bExp = 1;
+       }
+       if ( bSig < aSig ) goto aBigger;
+       if ( aSig < bSig ) goto bBigger;
+       return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+       bExpBigger:
+       if ( bExp == 0xFF ) {
+               if ( bSig ) return propagateFloat32NaN( a, b );
+               return packFloat32( zSign ^ 1, 0xFF, 0 );
+       }
+       if ( aExp == 0 ) {
+               ++expDiff;
+       }
+       else {
+               aSig |= 0x40000000;
+       }
+       shift32RightJamming( aSig, - expDiff, &aSig );
+       bSig |= 0x40000000;
+       bBigger:
+       zSig = bSig - aSig;
+       zExp = bExp;
+       zSign ^= 1;
+       goto normalizeRoundAndPack;
+       aExpBigger:
+       if ( aExp == 0xFF ) {
+               if ( aSig ) return propagateFloat32NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) {
+               --expDiff;
+       }
+       else {
+               bSig |= 0x40000000;
+       }
+       shift32RightJamming( bSig, expDiff, &bSig );
+       aSig |= 0x40000000;
+       aBigger:
+       zSig = aSig - bSig;
+       zExp = aExp;
+       normalizeRoundAndPack:
+       --zExp;
+       return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add( float32 a, float32 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloat32Sign( a );
+       bSign = extractFloat32Sign( b );
+       if ( aSign == bSign ) {
+               return addFloat32Sigs( a, b, aSign );
+       }
+       else {
+               return subFloat32Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub( float32 a, float32 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloat32Sign( a );
+       bSign = extractFloat32Sign( b );
+       if ( aSign == bSign ) {
+               return subFloat32Sigs( a, b, aSign );
+       }
+       else {
+               return addFloat32Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul( float32 a, float32 b )
+{
+       flag aSign, bSign, zSign;
+       int16 aExp, bExp, zExp;
+       bits32 aSig, bSig;
+       bits64 zSig64;
+       bits32 zSig;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       bSig = extractFloat32Frac( b );
+       bExp = extractFloat32Exp( b );
+       bSign = extractFloat32Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0xFF ) {
+               if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+                       return propagateFloat32NaN( a, b );
+               }
+               if ( ( bExp | bSig ) == 0 ) {
+                       float_raise( float_flag_invalid );
+                       return float32_default_nan;
+               }
+               return packFloat32( zSign, 0xFF, 0 );
+       }
+       if ( bExp == 0xFF ) {
+               if ( bSig ) return propagateFloat32NaN( a, b );
+               if ( ( aExp | aSig ) == 0 ) {
+                       float_raise( float_flag_invalid );
+                       return float32_default_nan;
+               }
+               return packFloat32( zSign, 0xFF, 0 );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+               normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+               normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+       }
+       zExp = aExp + bExp - 0x7F;
+       aSig = ( aSig | 0x00800000 )<<7;
+       bSig = ( bSig | 0x00800000 )<<8;
+       shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+       zSig = zSig64;
+       if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+               zSig <<= 1;
+               --zExp;
+       }
+       return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div( float32 a, float32 b )
+{
+       flag aSign, bSign, zSign;
+       int16 aExp, bExp, zExp;
+       bits32 aSig, bSig, zSig;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       bSig = extractFloat32Frac( b );
+       bExp = extractFloat32Exp( b );
+       bSign = extractFloat32Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0xFF ) {
+               if ( aSig ) return propagateFloat32NaN( a, b );
+               if ( bExp == 0xFF ) {
+                       if ( bSig ) return propagateFloat32NaN( a, b );
+                       float_raise( float_flag_invalid );
+                       return float32_default_nan;
+               }
+               return packFloat32( zSign, 0xFF, 0 );
+       }
+       if ( bExp == 0xFF ) {
+               if ( bSig ) return propagateFloat32NaN( a, b );
+               return packFloat32( zSign, 0, 0 );
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) {
+                       if ( ( aExp | aSig ) == 0 ) {
+                               float_raise( float_flag_invalid );
+                               return float32_default_nan;
+                       }
+                       float_raise( float_flag_divbyzero );
+                       return packFloat32( zSign, 0xFF, 0 );
+               }
+               normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+               normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+       }
+       zExp = aExp - bExp + 0x7D;
+       aSig = ( aSig | 0x00800000 )<<7;
+       bSig = ( bSig | 0x00800000 )<<8;
+       if ( bSig <= ( aSig + aSig ) ) {
+               aSig >>= 1;
+               ++zExp;
+       }
+       zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+       if ( ( zSig & 0x3F ) == 0 ) {
+               zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
+       }
+       return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the single-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_rem( float32 a, float32 b )
+{
+       flag aSign, zSign;
+       int16 aExp, bExp, expDiff;
+       bits32 aSig, bSig;
+       bits32 q;
+       bits64 aSig64, bSig64, q64;
+       bits32 alternateASig;
+       sbits32 sigMean;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       bSig = extractFloat32Frac( b );
+       bExp = extractFloat32Exp( b );
+//    bSign = extractFloat32Sign( b );
+       if ( aExp == 0xFF ) {
+               if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+                       return propagateFloat32NaN( a, b );
+               }
+               float_raise( float_flag_invalid );
+               return float32_default_nan;
+       }
+       if ( bExp == 0xFF ) {
+               if ( bSig ) return propagateFloat32NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) {
+                       float_raise( float_flag_invalid );
+                       return float32_default_nan;
+               }
+               normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return a;
+               normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+       }
+       expDiff = aExp - bExp;
+       aSig |= 0x00800000;
+       bSig |= 0x00800000;
+       if ( expDiff < 32 ) {
+               aSig <<= 8;
+               bSig <<= 8;
+               if ( expDiff < 0 ) {
+                       if ( expDiff < -1 ) return a;
+                       aSig >>= 1;
+               }
+               q = ( bSig <= aSig );
+               if ( q ) aSig -= bSig;
+               if ( 0 < expDiff ) {
+                       q = ( ( (bits64) aSig )<<32 ) / bSig;
+                       q >>= 32 - expDiff;
+                       bSig >>= 2;
+                       aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+               }
+               else {
+                       aSig >>= 2;
+                       bSig >>= 2;
+               }
+       }
+       else {
+               if ( bSig <= aSig ) aSig -= bSig;
+               aSig64 = ( (bits64) aSig )<<40;
+               bSig64 = ( (bits64) bSig )<<40;
+               expDiff -= 64;
+               while ( 0 < expDiff ) {
+                       q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+                       q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+                       aSig64 = - ( ( bSig * q64 )<<38 );
+                       expDiff -= 62;
+               }
+               expDiff += 64;
+               q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+               q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+               q = q64>>( 64 - expDiff );
+               bSig <<= 6;
+               aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+       }
+       do {
+               alternateASig = aSig;
+               ++q;
+               aSig -= bSig;
+       } while ( 0 <= (sbits32) aSig );
+       sigMean = aSig + alternateASig;
+       if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+               aSig = alternateASig;
+       }
+       zSign = ( (sbits32) aSig < 0 );
+       if ( zSign ) aSig = - aSig;
+       return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt( float32 a )
+{
+       flag aSign;
+       int16 aExp, zExp;
+       bits32 aSig, zSig;
+       bits64 rem, term;
+
+       aSig = extractFloat32Frac( a );
+       aExp = extractFloat32Exp( a );
+       aSign = extractFloat32Sign( a );
+       if ( aExp == 0xFF ) {
+               if ( aSig ) return propagateFloat32NaN( a, 0 );
+               if ( ! aSign ) return a;
+               float_raise( float_flag_invalid );
+               return float32_default_nan;
+       }
+       if ( aSign ) {
+               if ( ( aExp | aSig ) == 0 ) return a;
+               float_raise( float_flag_invalid );
+               return float32_default_nan;
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return 0;
+               normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+       }
+       zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+       aSig = ( aSig | 0x00800000 )<<8;
+       zSig = estimateSqrt32( aExp, aSig ) + 2;
+       if ( ( zSig & 0x7F ) <= 5 ) {
+               if ( zSig < 2 ) {
+                       zSig = 0x7FFFFFFF;
+                       goto roundAndPack;
+               }
+               aSig >>= aExp & 1;
+               term = ( (bits64) zSig ) * zSig;
+               rem = ( ( (bits64) aSig )<<32 ) - term;
+               while ( (sbits64) rem < 0 ) {
+                       --zSig;
+                       rem += ( ( (bits64) zSig )<<1 ) | 1;
+               }
+               zSig |= ( rem != 0 );
+       }
+       shift32RightJamming( zSig, 1, &zSig );
+       roundAndPack:
+       return roundAndPackFloat32( 0, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_eq( float32 a, float32 b )
+{
+       if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+                       || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+               ) {
+               if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_le( float32 a, float32 b )
+{
+       flag aSign, bSign;
+
+       if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+                       || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloat32Sign( a );
+       bSign = extractFloat32Sign( b );
+       if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+       return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_lt( float32 a, float32 b )
+{
+       flag aSign, bSign;
+
+       if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+                       || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloat32Sign( a );
+       bSign = extractFloat32Sign( b );
+       if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+       return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_eq_signaling( float32 a, float32 b )
+{
+       if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+                       || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_le_quiet( float32 a, float32 b )
+{
+       flag aSign, bSign;
+//    int16 aExp, bExp;
+
+       if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+                       || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+               ) {
+               if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloat32Sign( a );
+       bSign = extractFloat32Sign( b );
+       if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+       return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float32_lt_quiet( float32 a, float32 b )
+{
+       flag aSign, bSign;
+
+       if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+                       || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+               ) {
+               if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloat32Sign( a );
+       bSign = extractFloat32Sign( b );
+       if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+       return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32( float64 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits64 aSig;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+       if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+       shiftCount = 0x42C - aExp;
+       if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+       return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32_round_to_zero( float64 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits64 aSig, savedASig;
+       int32 z;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( 0x41E < aExp ) {
+               if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+               goto invalid;
+       }
+       else if ( aExp < 0x3FF ) {
+               if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+               return 0;
+       }
+       aSig |= LIT64( 0x0010000000000000 );
+       shiftCount = 0x433 - aExp;
+       savedASig = aSig;
+       aSig >>= shiftCount;
+       z = aSig;
+       if ( aSign ) z = - z;
+    z = (sbits32) z;
+       if ( ( z < 0 ) ^ aSign ) {
+       invalid:
+               float_raise( float_flag_invalid );
+               return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+       }
+       if ( ( aSig<<shiftCount ) != savedASig ) {
+               float_exception_flags |= float_flag_inexact;
+       }
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64( float64 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits64 aSig, aSigExtra;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+       shiftCount = 0x433 - aExp;
+       if ( shiftCount <= 0 ) {
+               if ( 0x43E < aExp ) {
+                       float_raise( float_flag_invalid );
+                       if (    ! aSign
+                                       || (    ( aExp == 0x7FF )
+                                               && ( aSig != LIT64( 0x0010000000000000 ) ) )
+                               ) {
+                               return LIT64( 0x7FFFFFFFFFFFFFFF );
+                       }
+                       return (sbits64) LIT64( 0x8000000000000000 );
+               }
+               aSigExtra = 0;
+               aSig <<= - shiftCount;
+       }
+       else {
+               shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+       }
+       return roundAndPackInt64( aSign, aSig, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64_round_to_zero( float64 a )
+{
+       flag aSign;
+       int16 aExp, shiftCount;
+       bits64 aSig;
+       int64 z;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+       shiftCount = aExp - 0x433;
+       if ( 0 <= shiftCount ) {
+               if ( 0x43E <= aExp ) {
+                       if ( a != LIT64( 0xC3E0000000000000 ) ) {
+                               float_raise( float_flag_invalid );
+                               if (    ! aSign
+                                               || (    ( aExp == 0x7FF )
+                                                       && ( aSig != LIT64( 0x0010000000000000 ) ) )
+                                       ) {
+                                       return LIT64( 0x7FFFFFFFFFFFFFFF );
+                               }
+                       }
+                       return (sbits64) LIT64( 0x8000000000000000 );
+               }
+               z = aSig<<shiftCount;
+       }
+       else {
+               if ( aExp < 0x3FE ) {
+                       if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+                       return 0;
+               }
+               z = aSig>>( - shiftCount );
+               if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+                       float_exception_flags |= float_flag_inexact;
+               }
+       }
+       if ( aSign ) z = - z;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32( float64 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits64 aSig;
+       bits32 zSig;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( aExp == 0x7FF ) {
+               if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
+               return packFloat32( aSign, 0xFF, 0 );
+       }
+       shift64RightJamming( aSig, 22, &aSig );
+       zSig = aSig;
+       if ( aExp || zSig ) {
+               zSig |= 0x40000000;
+               aExp -= 0x381;
+       }
+       return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80( float64 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits64 aSig;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( aExp == 0x7FF ) {
+               if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+               return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+               normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+       }
+       return
+               packFloatx80(
+                       aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+// 31-12-2016: Added for Previous
+floatx80 float64_to_floatx80_allowunnormal( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        return packFloatx80( aSign, 0x3C01, aSig<<11);
+    }
+    return
+    packFloatx80(
+                 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+    
+}
+// end of addition for Previous
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float64_to_float128( float64 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits64 aSig, zSig0, zSig1;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( aExp == 0x7FF ) {
+               if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
+               return packFloat128( aSign, 0x7FFF, 0, 0 );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+               normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+               --aExp;
+       }
+       shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+       return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int( float64 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits64 lastBitMask, roundBitsMask;
+       int8 roundingMode;
+       float64 z;
+
+       aExp = extractFloat64Exp( a );
+       if ( 0x433 <= aExp ) {
+               if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+                       return propagateFloat64NaN( a, a );
+               }
+               return a;
+       }
+       if ( aExp < 0x3FF ) {
+               if ( (bits64) ( a<<1 ) == 0 ) return a;
+               float_exception_flags |= float_flag_inexact;
+               aSign = extractFloat64Sign( a );
+               switch ( float_rounding_mode ) {
+                       case float_round_nearest_even:
+                       if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+                               return packFloat64( aSign, 0x3FF, 0 );
+                       }
+                       break;
+                       case float_round_down:
+                       return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+                       case float_round_up:
+                       return
+                       aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+               }
+               return packFloat64( aSign, 0, 0 );
+       }
+       lastBitMask = 1;
+       lastBitMask <<= 0x433 - aExp;
+       roundBitsMask = lastBitMask - 1;
+       z = a;
+       roundingMode = float_rounding_mode;
+       if ( roundingMode == float_round_nearest_even ) {
+               z += lastBitMask>>1;
+               if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+       }
+       else if ( roundingMode != float_round_to_zero ) {
+               if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+                       z += roundBitsMask;
+               }
+       }
+       z &= ~ roundBitsMask;
+       if ( z != a ) float_exception_flags |= float_flag_inexact;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+       int16 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig;
+       int16 expDiff;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       bSig = extractFloat64Frac( b );
+       bExp = extractFloat64Exp( b );
+       expDiff = aExp - bExp;
+       aSig <<= 9;
+       bSig <<= 9;
+       if ( 0 < expDiff ) {
+               if ( aExp == 0x7FF ) {
+                       if ( aSig ) return propagateFloat64NaN( a, b );
+                       return a;
+               }
+               if ( bExp == 0 ) {
+                       --expDiff;
+               }
+               else {
+                       bSig |= LIT64( 0x2000000000000000 );
+               }
+               shift64RightJamming( bSig, expDiff, &bSig );
+               zExp = aExp;
+       }
+       else if ( expDiff < 0 ) {
+               if ( bExp == 0x7FF ) {
+                       if ( bSig ) return propagateFloat64NaN( a, b );
+                       return packFloat64( zSign, 0x7FF, 0 );
+               }
+               if ( aExp == 0 ) {
+                       ++expDiff;
+               }
+               else {
+                       aSig |= LIT64( 0x2000000000000000 );
+               }
+               shift64RightJamming( aSig, - expDiff, &aSig );
+               zExp = bExp;
+       }
+       else {
+               if ( aExp == 0x7FF ) {
+                       if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+                       return a;
+               }
+               if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+               zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+               zExp = aExp;
+               goto roundAndPack;
+       }
+       aSig |= LIT64( 0x2000000000000000 );
+       zSig = ( aSig + bSig )<<1;
+       --zExp;
+       if ( (sbits64) zSig < 0 ) {
+               zSig = aSig + bSig;
+               ++zExp;
+       }
+       roundAndPack:
+       return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+       int16 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig;
+       int16 expDiff;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       bSig = extractFloat64Frac( b );
+       bExp = extractFloat64Exp( b );
+       expDiff = aExp - bExp;
+       aSig <<= 10;
+       bSig <<= 10;
+       if ( 0 < expDiff ) goto aExpBigger;
+       if ( expDiff < 0 ) goto bExpBigger;
+       if ( aExp == 0x7FF ) {
+               if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+               float_raise( float_flag_invalid );
+               return float64_default_nan;
+       }
+       if ( aExp == 0 ) {
+               aExp = 1;
+               bExp = 1;
+       }
+       if ( bSig < aSig ) goto aBigger;
+       if ( aSig < bSig ) goto bBigger;
+       return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+       bExpBigger:
+       if ( bExp == 0x7FF ) {
+               if ( bSig ) return propagateFloat64NaN( a, b );
+               return packFloat64( zSign ^ 1, 0x7FF, 0 );
+       }
+       if ( aExp == 0 ) {
+               ++expDiff;
+       }
+       else {
+               aSig |= LIT64( 0x4000000000000000 );
+       }
+       shift64RightJamming( aSig, - expDiff, &aSig );
+       bSig |= LIT64( 0x4000000000000000 );
+       bBigger:
+       zSig = bSig - aSig;
+       zExp = bExp;
+       zSign ^= 1;
+       goto normalizeRoundAndPack;
+       aExpBigger:
+       if ( aExp == 0x7FF ) {
+               if ( aSig ) return propagateFloat64NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) {
+               --expDiff;
+       }
+       else {
+               bSig |= LIT64( 0x4000000000000000 );
+       }
+       shift64RightJamming( bSig, expDiff, &bSig );
+       aSig |= LIT64( 0x4000000000000000 );
+       aBigger:
+       zSig = aSig - bSig;
+       zExp = aExp;
+       normalizeRoundAndPack:
+       --zExp;
+       return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add( float64 a, float64 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloat64Sign( a );
+       bSign = extractFloat64Sign( b );
+       if ( aSign == bSign ) {
+               return addFloat64Sigs( a, b, aSign );
+       }
+       else {
+               return subFloat64Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub( float64 a, float64 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloat64Sign( a );
+       bSign = extractFloat64Sign( b );
+       if ( aSign == bSign ) {
+               return subFloat64Sigs( a, b, aSign );
+       }
+       else {
+               return addFloat64Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul( float64 a, float64 b )
+{
+       flag aSign, bSign, zSign;
+       int16 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig0, zSig1;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       bSig = extractFloat64Frac( b );
+       bExp = extractFloat64Exp( b );
+       bSign = extractFloat64Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0x7FF ) {
+               if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+                       return propagateFloat64NaN( a, b );
+               }
+               if ( ( bExp | bSig ) == 0 ) {
+                       float_raise( float_flag_invalid );
+                       return float64_default_nan;
+               }
+               return packFloat64( zSign, 0x7FF, 0 );
+       }
+       if ( bExp == 0x7FF ) {
+               if ( bSig ) return propagateFloat64NaN( a, b );
+               if ( ( aExp | aSig ) == 0 ) {
+                       float_raise( float_flag_invalid );
+                       return float64_default_nan;
+               }
+               return packFloat64( zSign, 0x7FF, 0 );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+               normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+               normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+       }
+       zExp = aExp + bExp - 0x3FF;
+       aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+       bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+       mul64To128( aSig, bSig, &zSig0, &zSig1 );
+       zSig0 |= ( zSig1 != 0 );
+       if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+               zSig0 <<= 1;
+               --zExp;
+       }
+       return roundAndPackFloat64( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div( float64 a, float64 b )
+{
+       flag aSign, bSign, zSign;
+       int16 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig;
+       bits64 rem0, rem1;
+       bits64 term0, term1;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       bSig = extractFloat64Frac( b );
+       bExp = extractFloat64Exp( b );
+       bSign = extractFloat64Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0x7FF ) {
+               if ( aSig ) return propagateFloat64NaN( a, b );
+               if ( bExp == 0x7FF ) {
+                       if ( bSig ) return propagateFloat64NaN( a, b );
+                       float_raise( float_flag_invalid );
+                       return float64_default_nan;
+               }
+               return packFloat64( zSign, 0x7FF, 0 );
+       }
+       if ( bExp == 0x7FF ) {
+               if ( bSig ) return propagateFloat64NaN( a, b );
+               return packFloat64( zSign, 0, 0 );
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) {
+                       if ( ( aExp | aSig ) == 0 ) {
+                               float_raise( float_flag_invalid );
+                               return float64_default_nan;
+                       }
+                       float_raise( float_flag_divbyzero );
+                       return packFloat64( zSign, 0x7FF, 0 );
+               }
+               normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+               normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+       }
+       zExp = aExp - bExp + 0x3FD;
+       aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+       bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+       if ( bSig <= ( aSig + aSig ) ) {
+               aSig >>= 1;
+               ++zExp;
+       }
+       zSig = estimateDiv128To64( aSig, 0, bSig );
+       if ( ( zSig & 0x1FF ) <= 2 ) {
+               mul64To128( bSig, zSig, &term0, &term1 );
+               sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+               while ( (sbits64) rem0 < 0 ) {
+                       --zSig;
+                       add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+               }
+               zSig |= ( rem1 != 0 );
+       }
+       return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the double-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_rem( float64 a, float64 b )
+{
+       flag aSign, zSign;
+       int16 aExp, bExp, expDiff;
+       bits64 aSig, bSig;
+       bits64 q, alternateASig;
+       sbits64 sigMean;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       bSig = extractFloat64Frac( b );
+       bExp = extractFloat64Exp( b );
+//    bSign = extractFloat64Sign( b );
+       if ( aExp == 0x7FF ) {
+               if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+                       return propagateFloat64NaN( a, b );
+               }
+               float_raise( float_flag_invalid );
+               return float64_default_nan;
+       }
+       if ( bExp == 0x7FF ) {
+               if ( bSig ) return propagateFloat64NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) {
+                       float_raise( float_flag_invalid );
+                       return float64_default_nan;
+               }
+               normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return a;
+               normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+       }
+       expDiff = aExp - bExp;
+       aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+       bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+       if ( expDiff < 0 ) {
+               if ( expDiff < -1 ) return a;
+               aSig >>= 1;
+       }
+       q = ( bSig <= aSig );
+       if ( q ) aSig -= bSig;
+       expDiff -= 64;
+       while ( 0 < expDiff ) {
+               q = estimateDiv128To64( aSig, 0, bSig );
+               q = ( 2 < q ) ? q - 2 : 0;
+               aSig = - ( ( bSig>>2 ) * q );
+               expDiff -= 62;
+       }
+       expDiff += 64;
+       if ( 0 < expDiff ) {
+               q = estimateDiv128To64( aSig, 0, bSig );
+               q = ( 2 < q ) ? q - 2 : 0;
+               q >>= 64 - expDiff;
+               bSig >>= 2;
+               aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+       }
+       else {
+               aSig >>= 2;
+               bSig >>= 2;
+       }
+       do {
+               alternateASig = aSig;
+               ++q;
+               aSig -= bSig;
+       } while ( 0 <= (sbits64) aSig );
+       sigMean = aSig + alternateASig;
+       if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+               aSig = alternateASig;
+       }
+       zSign = ( (sbits64) aSig < 0 );
+       if ( zSign ) aSig = - aSig;
+       return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt( float64 a )
+{
+       flag aSign;
+       int16 aExp, zExp;
+       bits64 aSig, zSig, doubleZSig;
+       bits64 rem0, rem1, term0, term1;
+//    float64 z;
+
+       aSig = extractFloat64Frac( a );
+       aExp = extractFloat64Exp( a );
+       aSign = extractFloat64Sign( a );
+       if ( aExp == 0x7FF ) {
+               if ( aSig ) return propagateFloat64NaN( a, a );
+               if ( ! aSign ) return a;
+               float_raise( float_flag_invalid );
+               return float64_default_nan;
+       }
+       if ( aSign ) {
+               if ( ( aExp | aSig ) == 0 ) return a;
+               float_raise( float_flag_invalid );
+               return float64_default_nan;
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return 0;
+               normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+       }
+       zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+       aSig |= LIT64( 0x0010000000000000 );
+       zSig = estimateSqrt32( aExp, aSig>>21 );
+       aSig <<= 9 - ( aExp & 1 );
+       zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
+       if ( ( zSig & 0x1FF ) <= 5 ) {
+               doubleZSig = zSig<<1;
+               mul64To128( zSig, zSig, &term0, &term1 );
+               sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+               while ( (sbits64) rem0 < 0 ) {
+                       --zSig;
+                       doubleZSig -= 2;
+                       add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
+               }
+               zSig |= ( ( rem0 | rem1 ) != 0 );
+       }
+       return roundAndPackFloat64( 0, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_eq( float64 a, float64 b )
+{
+       if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+                       || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+               ) {
+               if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_le( float64 a, float64 b )
+{
+       flag aSign, bSign;
+
+       if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+                       || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloat64Sign( a );
+       bSign = extractFloat64Sign( b );
+       if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+       return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_lt( float64 a, float64 b )
+{
+       flag aSign, bSign;
+
+       if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+                       || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloat64Sign( a );
+       bSign = extractFloat64Sign( b );
+       if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+       return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise.  The invalid exception is raised
+| if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_eq_signaling( float64 a, float64 b )
+{
+       if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+                       || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_le_quiet( float64 a, float64 b )
+{
+       flag aSign, bSign;
+//    int16 aExp, bExp;
+
+       if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+                       || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+               ) {
+               if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloat64Sign( a );
+       bSign = extractFloat64Sign( b );
+       if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+       return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float64_lt_quiet( float64 a, float64 b )
+{
+       flag aSign, bSign;
+
+       if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+                       || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+               ) {
+               if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloat64Sign( a );
+       bSign = extractFloat64Sign( b );
+       if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+       return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN, the
+| largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32( floatx80 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+       shiftCount = 0x4037 - aExp;
+       if ( shiftCount <= 0 ) shiftCount = 1;
+       shift64RightJamming( aSig, shiftCount, &aSig );
+       return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig, savedASig;
+       int32 z;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       if ( 0x401E < aExp ) {
+               if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+               goto invalid;
+       }
+       else if ( aExp < 0x3FFF ) {
+               if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+               return 0;
+       }
+       shiftCount = 0x403E - aExp;
+       savedASig = aSig;
+       aSig >>= shiftCount;
+       z = aSig;
+       if ( aSign ) z = - z;
+    z = (sbits32) z;
+       if ( ( z < 0 ) ^ aSign ) {
+       invalid:
+               float_raise( float_flag_invalid );
+               return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+       }
+       if ( ( aSig<<shiftCount ) != savedASig ) {
+               float_exception_flags |= float_flag_inexact;
+       }
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode.  If `a' is a NaN,
+| the largest positive integer is returned.  Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64( floatx80 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig, aSigExtra;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       shiftCount = 0x403E - aExp;
+       if ( shiftCount <= 0 ) {
+               if ( shiftCount ) {
+                       float_raise( float_flag_invalid );
+                       if (    ! aSign
+                                       || (    ( aExp == 0x7FFF )
+                                               && ( aSig != LIT64( 0x8000000000000000 ) ) )
+                               ) {
+                               return LIT64( 0x7FFFFFFFFFFFFFFF );
+                       }
+                       return (sbits64) LIT64( 0x8000000000000000 );
+               }
+               aSigExtra = 0;
+       }
+       else {
+               shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+       }
+       return roundAndPackInt64( aSign, aSig, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64_round_to_zero( floatx80 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig;
+       int64 z;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       shiftCount = aExp - 0x403E;
+       if ( 0 <= shiftCount ) {
+               aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+               if ( ( a.high != 0xC03E ) || aSig ) {
+                       float_raise( float_flag_invalid );
+                       if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
+                               return LIT64( 0x7FFFFFFFFFFFFFFF );
+                       }
+               }
+               return (sbits64) LIT64( 0x8000000000000000 );
+       }
+       else if ( aExp < 0x3FFF ) {
+               if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+               return 0;
+       }
+       z = aSig>>( - shiftCount );
+       if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+               float_exception_flags |= float_flag_inexact;
+       }
+       if ( aSign ) z = - z;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32( floatx80 a )
+{
+       flag aSign;
+       int32 aExp;
+       bits64 aSig;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       if ( aExp == 0x7FFF ) {
+               if ( (bits64) ( aSig<<1 ) ) {
+                       return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+               }
+               return packFloat32( aSign, 0xFF, 0 );
+       }
+       shift64RightJamming( aSig, 33, &aSig );
+       if ( aExp || aSig ) aExp -= 0x3F81;
+       return roundAndPackFloat32( aSign, aExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64( floatx80 a )
+{
+       flag aSign;
+       int32 aExp;
+       bits64 aSig, zSig;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       if ( aExp == 0x7FFF ) {
+               if ( (bits64) ( aSig<<1 ) ) {
+                       return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+               }
+               return packFloat64( aSign, 0x7FF, 0 );
+       }
+       shift64RightJamming( aSig, 1, &zSig );
+       if ( aExp || aSig ) aExp -= 0x3C01;
+       return roundAndPackFloat64( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128( floatx80 a )
+{
+       flag aSign;
+       int16 aExp;
+       bits64 aSig, zSig0, zSig1;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
+               return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
+       }
+       shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+       return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+// 30-01-2016: Added for Previous
+
+floatx80 floatx80_normalize( floatx80 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    
+    if (aSig == 0) {
+        aExp = 0;
+        return packFloatx80( aSign, aExp, aSig );
+    }
+    while ( (aSig & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ) {
+        if ( aExp == 0 ) {
+            float_raise( float_flag_denormal );
+            break;
+        }
+        aSig = aSig << 1;
+        aExp--;
+    }
+    return packFloatx80( aSign, aExp, aSig );
+    
+}
+
+floatx80 floatx80_round32( floatx80 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    
+    return roundAndPackFloatx80(32, aSign, aExp, aSig, 0);
+
+}
+// end of addition for Previous
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended quadruple-precision floating-point
+| value.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int( floatx80 a )
+{
+       flag aSign;
+       int32 aExp;
+       bits64 lastBitMask, roundBitsMask;
+       int8 roundingMode;
+       floatx80 z;
+
+       aExp = extractFloatx80Exp( a );
+       if ( 0x403E <= aExp ) {
+               if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+                       return propagateFloatx80NaN( a, a );
+               }
+               return a;
+       }
+       if ( aExp < 0x3FFF ) {
+               if (    ( aExp == 0 )
+                               && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+                       return a;
+               }
+               float_exception_flags |= float_flag_inexact;
+               aSign = extractFloatx80Sign( a );
+               switch ( float_rounding_mode ) {
+                       case float_round_nearest_even:
+                       if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+                               ) {
+                               return
+                                       packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+                       }
+                       break;
+                       case float_round_down:
+                       return
+                                       aSign ?
+                                               packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+                               : packFloatx80( 0, 0, 0 );
+                       case float_round_up:
+                       return
+                                       aSign ? packFloatx80( 1, 0, 0 )
+                               : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+               }
+               return packFloatx80( aSign, 0, 0 );
+       }
+       lastBitMask = 1;
+       lastBitMask <<= 0x403E - aExp;
+       roundBitsMask = lastBitMask - 1;
+       z = a;
+       roundingMode = float_rounding_mode;
+       if ( roundingMode == float_round_nearest_even ) {
+               z.low += lastBitMask>>1;
+               if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+       }
+       else if ( roundingMode != float_round_to_zero ) {
+               if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+                       z.low += roundBitsMask;
+               }
+       }
+       z.low &= ~ roundBitsMask;
+       if ( z.low == 0 ) {
+               ++z.high;
+               z.low = LIT64( 0x8000000000000000 );
+       }
+       if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
+| negated before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+       int32 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig0, zSig1;
+       int32 expDiff;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       bSig = extractFloatx80Frac( b );
+       bExp = extractFloatx80Exp( b );
+       expDiff = aExp - bExp;
+       if ( 0 < expDiff ) {
+               if ( aExp == 0x7FFF ) {
+                       if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+                       return a;
+               }
+               if ( bExp == 0 ) --expDiff;
+               shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+               zExp = aExp;
+       }
+       else if ( expDiff < 0 ) {
+               if ( bExp == 0x7FFF ) {
+                       if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+                       return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+               }
+               if ( aExp == 0 ) ++expDiff;
+               shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+               zExp = bExp;
+       }
+       else {
+               if ( aExp == 0x7FFF ) {
+                       if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+                               return propagateFloatx80NaN( a, b );
+                       }
+                       return a;
+               }
+               zSig1 = 0;
+               zSig0 = aSig + bSig;
+               if ( aExp == 0 ) {
+                       normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+                       goto roundAndPack;
+               }
+               zExp = aExp;
+               goto shiftRight1;
+       }
+       zSig0 = aSig + bSig;
+       if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+       shiftRight1:
+       shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+       zSig0 |= LIT64( 0x8000000000000000 );
+       ++zExp;
+       roundAndPack:
+       return
+               roundAndPackFloatx80(
+                       floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+       int32 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig0, zSig1;
+       int32 expDiff;
+       floatx80 z;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       bSig = extractFloatx80Frac( b );
+       bExp = extractFloatx80Exp( b );
+       expDiff = aExp - bExp;
+       if ( 0 < expDiff ) goto aExpBigger;
+       if ( expDiff < 0 ) goto bExpBigger;
+       if ( aExp == 0x7FFF ) {
+               if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+                       return propagateFloatx80NaN( a, b );
+               }
+               float_raise( float_flag_invalid );
+               z.low = floatx80_default_nan_low;
+               z.high = floatx80_default_nan_high;
+               return z;
+       }
+       if ( aExp == 0 ) {
+               aExp = 1;
+               bExp = 1;
+       }
+       zSig1 = 0;
+       if ( bSig < aSig ) goto aBigger;
+       if ( aSig < bSig ) goto bBigger;
+       return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+       bExpBigger:
+       if ( bExp == 0x7FFF ) {
+               if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+               return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+       }
+       if ( aExp == 0 ) ++expDiff;
+       shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+       bBigger:
+       sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+       zExp = bExp;
+       zSign ^= 1;
+       goto normalizeRoundAndPack;
+       aExpBigger:
+       if ( aExp == 0x7FFF ) {
+               if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) --expDiff;
+       shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+       aBigger:
+       sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+       zExp = aExp;
+       normalizeRoundAndPack:
+       return
+               normalizeRoundAndPackFloatx80(
+                       floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloatx80Sign( a );
+       bSign = extractFloatx80Sign( b );
+       if ( aSign == bSign ) {
+               return addFloatx80Sigs( a, b, aSign );
+       }
+       else {
+               return subFloatx80Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloatx80Sign( a );
+       bSign = extractFloatx80Sign( b );
+       if ( aSign == bSign ) {
+               return subFloatx80Sigs( a, b, aSign );
+       }
+       else {
+               return addFloatx80Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign, zSign;
+       int32 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig0, zSig1;
+       floatx80 z;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       bSig = extractFloatx80Frac( b );
+       bExp = extractFloatx80Exp( b );
+       bSign = extractFloatx80Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0x7FFF ) {
+               if (    (bits64) ( aSig<<1 )
+                               || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+                       return propagateFloatx80NaN( a, b );
+               }
+               if ( ( bExp | bSig ) == 0 ) goto invalid;
+               return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+       }
+       if ( bExp == 0x7FFF ) {
+               if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+               if ( ( aExp | aSig ) == 0 ) {
+       invalid:
+                       float_raise( float_flag_invalid );
+                       z.low = floatx80_default_nan_low;
+                       z.high = floatx80_default_nan_high;
+                       return z;
+               }
+               return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+               normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+               normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+       }
+       zExp = aExp + bExp - 0x3FFE;
+       mul64To128( aSig, bSig, &zSig0, &zSig1 );
+       if ( 0 < (sbits64) zSig0 ) {
+               shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+               --zExp;
+       }
+       return
+               roundAndPackFloatx80(
+                       floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign, zSign;
+       int32 aExp, bExp, zExp;
+       bits64 aSig, bSig, zSig0, zSig1;
+       bits64 rem0, rem1, rem2, term0, term1, term2;
+       floatx80 z;
+
+       aSig = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       bSig = extractFloatx80Frac( b );
+       bExp = extractFloatx80Exp( b );
+       bSign = extractFloatx80Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0x7FFF ) {
+               if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+               if ( bExp == 0x7FFF ) {
+                       if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+                       goto invalid;
+               }
+               return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+       }
+       if ( bExp == 0x7FFF ) {
+               if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+               return packFloatx80( zSign, 0, 0 );
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) {
+                       if ( ( aExp | aSig ) == 0 ) {
+       invalid:
+                               float_raise( float_flag_invalid );
+                               z.low = floatx80_default_nan_low;
+                               z.high = floatx80_default_nan_high;
+                               return z;
+                       }
+                       float_raise( float_flag_divbyzero );
+                       return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+               }
+               normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+       }
+       if ( aExp == 0 ) {
+               if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+               normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+       }
+       zExp = aExp - bExp + 0x3FFE;
+       rem1 = 0;
+       if ( bSig <= aSig ) {
+               shift128Right( aSig, 0, 1, &aSig, &rem1 );
+               ++zExp;
+       }
+       zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+       mul64To128( bSig, zSig0, &term0, &term1 );
+       sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+       while ( (sbits64) rem0 < 0 ) {
+               --zSig0;
+               add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+       }
+       zSig1 = estimateDiv128To64( rem1, 0, bSig );
+       if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+               mul64To128( bSig, zSig1, &term1, &term2 );
+               sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+               while ( (sbits64) rem1 < 0 ) {
+                       --zSig1;
+                       add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+               }
+               zSig1 |= ( ( rem1 | rem2 ) != 0 );
+       }
+       return
+               roundAndPackFloatx80(
+                       floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem( floatx80 a, floatx80 b )
+{
+       flag aSign, zSign;
+       int32 aExp, bExp, expDiff;
+       bits64 aSig0, aSig1, bSig;
+       bits64 q, term0, term1, alternateASig0, alternateASig1;
+       floatx80 z;
+
+       aSig0 = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       bSig = extractFloatx80Frac( b );
+       bExp = extractFloatx80Exp( b );
+//    bSign = extractFloatx80Sign( b );
+       if ( aExp == 0x7FFF ) {
+               if (    (bits64) ( aSig0<<1 )
+                               || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+                       return propagateFloatx80NaN( a, b );
+               }
+               goto invalid;
+       }
+       if ( bExp == 0x7FFF ) {
+               if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) {
+               if ( bSig == 0 ) {
+       invalid:
+                       float_raise( float_flag_invalid );
+                       z.low = floatx80_default_nan_low;
+                       z.high = floatx80_default_nan_high;
+                       return z;
+               }
+               normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+       }
+       if ( aExp == 0 ) {
+               if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+               normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+       }
+       bSig |= LIT64( 0x8000000000000000 );
+       zSign = aSign;
+       expDiff = aExp - bExp;
+       aSig1 = 0;
+       if ( expDiff < 0 ) {
+               if ( expDiff < -1 ) return a;
+               shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+               expDiff = 0;
+       }
+       q = ( bSig <= aSig0 );
+       if ( q ) aSig0 -= bSig;
+       expDiff -= 64;
+       while ( 0 < expDiff ) {
+               q = estimateDiv128To64( aSig0, aSig1, bSig );
+               q = ( 2 < q ) ? q - 2 : 0;
+               mul64To128( bSig, q, &term0, &term1 );
+               sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+               shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+               expDiff -= 62;
+       }
+       expDiff += 64;
+       if ( 0 < expDiff ) {
+               q = estimateDiv128To64( aSig0, aSig1, bSig );
+               q = ( 2 < q ) ? q - 2 : 0;
+               q >>= 64 - expDiff;
+               mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+               sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+               shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+               while ( le128( term0, term1, aSig0, aSig1 ) ) {
+                       ++q;
+                       sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+               }
+       }
+       else {
+               term1 = 0;
+               term0 = bSig;
+       }
+       sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+       if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+                       || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+                               && ( q & 1 ) )
+               ) {
+               aSig0 = alternateASig0;
+               aSig1 = alternateASig1;
+               zSign = ! zSign;
+       }
+       return
+               normalizeRoundAndPackFloatx80(
+                       80, zSign, bExp + expDiff, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt( floatx80 a )
+{
+       flag aSign;
+       int32 aExp, zExp;
+       bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+       bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+       floatx80 z;
+
+       aSig0 = extractFloatx80Frac( a );
+       aExp = extractFloatx80Exp( a );
+       aSign = extractFloatx80Sign( a );
+       if ( aExp == 0x7FFF ) {
+               if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
+               if ( ! aSign ) return a;
+               goto invalid;
+       }
+       if ( aSign ) {
+               if ( ( aExp | aSig0 ) == 0 ) return a;
+       invalid:
+               float_raise( float_flag_invalid );
+               z.low = floatx80_default_nan_low;
+               z.high = floatx80_default_nan_high;
+               return z;
+       }
+       if ( aExp == 0 ) {
+               if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+               normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+       }
+       zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+       zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+       shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
+       zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+       doubleZSig0 = zSig0<<1;
+       mul64To128( zSig0, zSig0, &term0, &term1 );
+       sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+       while ( (sbits64) rem0 < 0 ) {
+               --zSig0;
+               doubleZSig0 -= 2;
+               add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+       }
+       zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+       if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+               if ( zSig1 == 0 ) zSig1 = 1;
+               mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+               sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+               mul64To128( zSig1, zSig1, &term2, &term3 );
+               sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+               while ( (sbits64) rem1 < 0 ) {
+                       --zSig1;
+                       shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+                       term3 |= 1;
+                       term2 |= doubleZSig0;
+                       add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+               }
+               zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+       }
+       shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+       zSig0 |= doubleZSig0;
+       return
+               roundAndPackFloatx80(
+                       floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+
+}
+
+// 07-01-2017: Added for Previous
+/*----------------------------------------------------------------------------
+ | Returns the mantissa of the extended double-precision floating-point
+ | value `a'.
+ *----------------------------------------------------------------------------*/
+
+floatx80 floatx80_getman( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig;
+    
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a );
+        float_raise( float_flag_invalid );
+        a.low = floatx80_default_nan_low;
+        a.high = floatx80_default_nan_high;
+        return a;
+    }
+    
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    
+    return roundAndPackFloatx80(floatx80_rounding_precision, aSign, 0x3FFF, aSig, 0);
+}
+
+/*----------------------------------------------------------------------------
+ | Returns the exponent of the extended double-precision floating-point
+ | value `a' as an extended double-precision value.
+ *----------------------------------------------------------------------------*/
+
+floatx80 floatx80_getexp( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig;
+    
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, a );
+        float_raise( float_flag_invalid );
+        a.low = floatx80_default_nan_low;
+        a.high = floatx80_default_nan_high;
+        return a;
+    }
+    
+    if (aExp == 0 && aSig == 0) return packFloatx80(aSign, 0, 0);
+    
+    return int32_to_floatx80(aExp - 0x3FFF);
+}
+// End of addition for Previous
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise.  The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq( floatx80 a, floatx80 b )
+{
+       if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+                       || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+               ) {
+               if (    floatx80_is_signaling_nan( a )
+                               || floatx80_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       return
+                       ( a.low == b.low )
+               && (    ( a.high == b.high )
+                               || (    ( a.low == 0 )
+                                       && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+                       );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise.  The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+                       || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloatx80Sign( a );
+       bSign = extractFloatx80Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       == 0 );
+       }
+       return
+                       aSign ? le128( b.high, b.low, a.high, a.low )
+               : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+                       || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloatx80Sign( a );
+       bSign = extractFloatx80Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       != 0 );
+       }
+       return
+                       aSign ? lt128( b.high, b.low, a.high, a.low )
+               : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq_signaling( floatx80 a, floatx80 b )
+{
+       if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+                       || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       return
+                       ( a.low == b.low )
+               && (    ( a.high == b.high )
+                               || (    ( a.low == 0 )
+                                       && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+                       );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+| do not cause an exception.  Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le_quiet( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+                       || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+               ) {
+               if (    floatx80_is_signaling_nan( a )
+                               || floatx80_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloatx80Sign( a );
+       bSign = extractFloatx80Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       == 0 );
+       }
+       return
+                       aSign ? le128( b.high, b.low, a.high, a.low )
+               : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+| an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt_quiet( floatx80 a, floatx80 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+                       || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+                               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+               ) {
+               if (    floatx80_is_signaling_nan( a )
+                               || floatx80_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloatx80Sign( a );
+       bSign = extractFloatx80Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       != 0 );
+       }
+       return
+                       aSign ? lt128( b.high, b.low, a.high, a.low )
+               : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32( float128 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig0, aSig1;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+       if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+       aSig0 |= ( aSig1 != 0 );
+       shiftCount = 0x4028 - aExp;
+       if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+       return roundAndPackInt32( aSign, aSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.  If
+| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32_round_to_zero( float128 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig0, aSig1, savedASig;
+       int32 z;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       aSig0 |= ( aSig1 != 0 );
+       if ( 0x401E < aExp ) {
+               if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+               goto invalid;
+       }
+       else if ( aExp < 0x3FFF ) {
+               if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
+               return 0;
+       }
+       aSig0 |= LIT64( 0x0001000000000000 );
+       shiftCount = 0x402F - aExp;
+       savedASig = aSig0;
+       aSig0 >>= shiftCount;
+       z = aSig0;
+       if ( aSign ) z = - z;
+    z = (sbits32) z;
+       if ( ( z < 0 ) ^ aSign ) {
+       invalid:
+               float_raise( float_flag_invalid );
+               return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+       }
+       if ( ( aSig0<<shiftCount ) != savedASig ) {
+               float_exception_flags |= float_flag_inexact;
+       }
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64( float128 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig0, aSig1;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+       shiftCount = 0x402F - aExp;
+       if ( shiftCount <= 0 ) {
+               if ( 0x403E < aExp ) {
+                       float_raise( float_flag_invalid );
+                       if (    ! aSign
+                                       || (    ( aExp == 0x7FFF )
+                                               && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+                                       )
+                               ) {
+                               return LIT64( 0x7FFFFFFFFFFFFFFF );
+                       }
+                       return (sbits64) LIT64( 0x8000000000000000 );
+               }
+               shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+       }
+       else {
+               shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+       }
+       return roundAndPackInt64( aSign, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64_round_to_zero( float128 a )
+{
+       flag aSign;
+       int32 aExp, shiftCount;
+       bits64 aSig0, aSig1;
+       int64 z;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+       shiftCount = aExp - 0x402F;
+       if ( 0 < shiftCount ) {
+               if ( 0x403E <= aExp ) {
+                       aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
+                       if (    ( a.high == LIT64( 0xC03E000000000000 ) )
+                                       && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+                               if ( aSig1 ) float_exception_flags |= float_flag_inexact;
+                       }
+                       else {
+                               float_raise( float_flag_invalid );
+                               if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
+                                       return LIT64( 0x7FFFFFFFFFFFFFFF );
+                               }
+                       }
+                       return (sbits64) LIT64( 0x8000000000000000 );
+               }
+               z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
+               if ( (bits64) ( aSig1<<shiftCount ) ) {
+                       float_exception_flags |= float_flag_inexact;
+               }
+       }
+       else {
+               if ( aExp < 0x3FFF ) {
+                       if ( aExp | aSig0 | aSig1 ) {
+                               float_exception_flags |= float_flag_inexact;
+                       }
+                       return 0;
+               }
+               z = aSig0>>( - shiftCount );
+               if (    aSig1
+                               || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+                       float_exception_flags |= float_flag_inexact;
+               }
+       }
+       if ( aSign ) z = - z;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float128_to_float32( float128 a )
+{
+       flag aSign;
+       int32 aExp;
+       bits64 aSig0, aSig1;
+       bits32 zSig;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       if ( aExp == 0x7FFF ) {
+               if ( aSig0 | aSig1 ) {
+                       return commonNaNToFloat32( float128ToCommonNaN( a ) );
+               }
+               return packFloat32( aSign, 0xFF, 0 );
+       }
+       aSig0 |= ( aSig1 != 0 );
+       shift64RightJamming( aSig0, 18, &aSig0 );
+       zSig = aSig0;
+       if ( aExp || zSig ) {
+               zSig |= 0x40000000;
+               aExp -= 0x3F81;
+       }
+       return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float128_to_float64( float128 a )
+{
+       flag aSign;
+       int32 aExp;
+       bits64 aSig0, aSig1;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       if ( aExp == 0x7FFF ) {
+               if ( aSig0 | aSig1 ) {
+                       return commonNaNToFloat64( float128ToCommonNaN( a ) );
+               }
+               return packFloat64( aSign, 0x7FF, 0 );
+       }
+       shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+       aSig0 |= ( aSig1 != 0 );
+       if ( aExp || aSig0 ) {
+               aSig0 |= LIT64( 0x4000000000000000 );
+               aExp -= 0x3C01;
+       }
+       return roundAndPackFloat64( aSign, aExp, aSig0 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80( float128 a )
+{
+       flag aSign;
+       int32 aExp;
+       bits64 aSig0, aSig1;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       if ( aExp == 0x7FFF ) {
+               if ( aSig0 | aSig1 ) {
+                       return commonNaNToFloatx80( float128ToCommonNaN( a ) );
+               }
+               return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+       }
+       if ( aExp == 0 ) {
+               if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+               normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+       }
+       else {
+               aSig0 |= LIT64( 0x0001000000000000 );
+       }
+       shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+       return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| returns the result as a quadruple-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_round_to_int( float128 a )
+{
+       flag aSign;
+       int32 aExp;
+       bits64 lastBitMask, roundBitsMask;
+       int8 roundingMode;
+       float128 z;
+
+       aExp = extractFloat128Exp( a );
+       if ( 0x402F <= aExp ) {
+               if ( 0x406F <= aExp ) {
+                       if (    ( aExp == 0x7FFF )
+                                       && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+                               ) {
+                               return propagateFloat128NaN( a, a );
+                       }
+                       return a;
+               }
+               lastBitMask = 1;
+               lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+               roundBitsMask = lastBitMask - 1;
+               z = a;
+               roundingMode = float_rounding_mode;
+               if ( roundingMode == float_round_nearest_even ) {
+                       if ( lastBitMask ) {
+                               add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+                               if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+                       }
+                       else {
+                               if ( (sbits64) z.low < 0 ) {
+                                       ++z.high;
+                                       if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+                               }
+                       }
+               }
+               else if ( roundingMode != float_round_to_zero ) {
+                       if (   extractFloat128Sign( z )
+                                       ^ ( roundingMode == float_round_up ) ) {
+                               add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+                       }
+               }
+               z.low &= ~ roundBitsMask;
+       }
+       else {
+               if ( aExp < 0x3FFF ) {
+                       if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+                       float_exception_flags |= float_flag_inexact;
+                       aSign = extractFloat128Sign( a );
+                       switch ( float_rounding_mode ) {
+                               case float_round_nearest_even:
+                               if (    ( aExp == 0x3FFE )
+                                               && (   extractFloat128Frac0( a )
+                                                       | extractFloat128Frac1( a ) )
+                                       ) {
+                                       return packFloat128( aSign, 0x3FFF, 0, 0 );
+                               }
+                               break;
+                               case float_round_down:
+                               return
+                                               aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+                                       : packFloat128( 0, 0, 0, 0 );
+                               case float_round_up:
+                               return
+                                               aSign ? packFloat128( 1, 0, 0, 0 )
+                                       : packFloat128( 0, 0x3FFF, 0, 0 );
+                       }
+                       return packFloat128( aSign, 0, 0, 0 );
+               }
+               lastBitMask = 1;
+               lastBitMask <<= 0x402F - aExp;
+               roundBitsMask = lastBitMask - 1;
+               z.low = 0;
+               z.high = a.high;
+               roundingMode = float_rounding_mode;
+               if ( roundingMode == float_round_nearest_even ) {
+                       z.high += lastBitMask>>1;
+                       if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+                               z.high &= ~ lastBitMask;
+                       }
+               }
+               else if ( roundingMode != float_round_to_zero ) {
+                       if (   extractFloat128Sign( z )
+                                       ^ ( roundingMode == float_round_up ) ) {
+                               z.high |= ( a.low != 0 );
+                               z.high += roundBitsMask;
+                       }
+               }
+               z.high &= ~ roundBitsMask;
+       }
+       if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+               float_exception_flags |= float_flag_inexact;
+       }
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+       int32 aExp, bExp, zExp;
+       bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+       int32 expDiff;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       bSig1 = extractFloat128Frac1( b );
+       bSig0 = extractFloat128Frac0( b );
+       bExp = extractFloat128Exp( b );
+       expDiff = aExp - bExp;
+       if ( 0 < expDiff ) {
+               if ( aExp == 0x7FFF ) {
+                       if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+                       return a;
+               }
+               if ( bExp == 0 ) {
+                       --expDiff;
+               }
+               else {
+                       bSig0 |= LIT64( 0x0001000000000000 );
+               }
+               shift128ExtraRightJamming(
+                       bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+               zExp = aExp;
+       }
+       else if ( expDiff < 0 ) {
+               if ( bExp == 0x7FFF ) {
+                       if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+                       return packFloat128( zSign, 0x7FFF, 0, 0 );
+               }
+               if ( aExp == 0 ) {
+                       ++expDiff;
+               }
+               else {
+                       aSig0 |= LIT64( 0x0001000000000000 );
+               }
+               shift128ExtraRightJamming(
+                       aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+               zExp = bExp;
+       }
+       else {
+               if ( aExp == 0x7FFF ) {
+                       if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+                               return propagateFloat128NaN( a, b );
+                       }
+                       return a;
+               }
+               add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+               if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+               zSig2 = 0;
+               zSig0 |= LIT64( 0x0002000000000000 );
+               zExp = aExp;
+               goto shiftRight1;
+       }
+       aSig0 |= LIT64( 0x0001000000000000 );
+       add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+       --zExp;
+       if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+       ++zExp;
+       shiftRight1:
+       shift128ExtraRightJamming(
+               zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+       roundAndPack:
+       return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+       int32 aExp, bExp, zExp;
+       bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+       int32 expDiff;
+       float128 z;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       bSig1 = extractFloat128Frac1( b );
+       bSig0 = extractFloat128Frac0( b );
+       bExp = extractFloat128Exp( b );
+       expDiff = aExp - bExp;
+       shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+       shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+       if ( 0 < expDiff ) goto aExpBigger;
+       if ( expDiff < 0 ) goto bExpBigger;
+       if ( aExp == 0x7FFF ) {
+               if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+                       return propagateFloat128NaN( a, b );
+               }
+               float_raise( float_flag_invalid );
+               z.low = float128_default_nan_low;
+               z.high = float128_default_nan_high;
+               return z;
+       }
+       if ( aExp == 0 ) {
+               aExp = 1;
+               bExp = 1;
+       }
+       if ( bSig0 < aSig0 ) goto aBigger;
+       if ( aSig0 < bSig0 ) goto bBigger;
+       if ( bSig1 < aSig1 ) goto aBigger;
+       if ( aSig1 < bSig1 ) goto bBigger;
+       return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
+       bExpBigger:
+       if ( bExp == 0x7FFF ) {
+               if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+               return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+       }
+       if ( aExp == 0 ) {
+               ++expDiff;
+       }
+       else {
+               aSig0 |= LIT64( 0x4000000000000000 );
+       }
+       shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+       bSig0 |= LIT64( 0x4000000000000000 );
+       bBigger:
+       sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+       zExp = bExp;
+       zSign ^= 1;
+       goto normalizeRoundAndPack;
+       aExpBigger:
+       if ( aExp == 0x7FFF ) {
+               if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) {
+               --expDiff;
+       }
+       else {
+               bSig0 |= LIT64( 0x4000000000000000 );
+       }
+       shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+       aSig0 |= LIT64( 0x4000000000000000 );
+       aBigger:
+       sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+       zExp = aExp;
+       normalizeRoundAndPack:
+       --zExp;
+       return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add( float128 a, float128 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloat128Sign( a );
+       bSign = extractFloat128Sign( b );
+       if ( aSign == bSign ) {
+               return addFloat128Sigs( a, b, aSign );
+       }
+       else {
+               return subFloat128Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub( float128 a, float128 b )
+{
+       flag aSign, bSign;
+
+       aSign = extractFloat128Sign( a );
+       bSign = extractFloat128Sign( b );
+       if ( aSign == bSign ) {
+               return subFloat128Sigs( a, b, aSign );
+       }
+       else {
+               return addFloat128Sigs( a, b, aSign );
+       }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul( float128 a, float128 b )
+{
+       flag aSign, bSign, zSign;
+       int32 aExp, bExp, zExp;
+       bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+       float128 z;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       bSig1 = extractFloat128Frac1( b );
+       bSig0 = extractFloat128Frac0( b );
+       bExp = extractFloat128Exp( b );
+       bSign = extractFloat128Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0x7FFF ) {
+               if (    ( aSig0 | aSig1 )
+                               || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+                       return propagateFloat128NaN( a, b );
+               }
+               if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+               return packFloat128( zSign, 0x7FFF, 0, 0 );
+       }
+       if ( bExp == 0x7FFF ) {
+               if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+               if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+       invalid:
+                       float_raise( float_flag_invalid );
+                       z.low = float128_default_nan_low;
+                       z.high = float128_default_nan_high;
+                       return z;
+               }
+               return packFloat128( zSign, 0x7FFF, 0, 0 );
+       }
+       if ( aExp == 0 ) {
+               if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+               normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+       }
+       if ( bExp == 0 ) {
+               if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+               normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+       }
+       zExp = aExp + bExp - 0x4000;
+       aSig0 |= LIT64( 0x0001000000000000 );
+       shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+       mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+       add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+       zSig2 |= ( zSig3 != 0 );
+       if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+               shift128ExtraRightJamming(
+                       zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+               ++zExp;
+       }
+       return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div( float128 a, float128 b )
+{
+       flag aSign, bSign, zSign;
+       int32 aExp, bExp, zExp;
+       bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+       bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+       float128 z;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       bSig1 = extractFloat128Frac1( b );
+       bSig0 = extractFloat128Frac0( b );
+       bExp = extractFloat128Exp( b );
+       bSign = extractFloat128Sign( b );
+       zSign = aSign ^ bSign;
+       if ( aExp == 0x7FFF ) {
+               if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+               if ( bExp == 0x7FFF ) {
+                       if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+                       goto invalid;
+               }
+               return packFloat128( zSign, 0x7FFF, 0, 0 );
+       }
+       if ( bExp == 0x7FFF ) {
+               if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+               return packFloat128( zSign, 0, 0, 0 );
+       }
+       if ( bExp == 0 ) {
+               if ( ( bSig0 | bSig1 ) == 0 ) {
+                       if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+       invalid:
+                               float_raise( float_flag_invalid );
+                               z.low = float128_default_nan_low;
+                               z.high = float128_default_nan_high;
+                               return z;
+                       }
+                       float_raise( float_flag_divbyzero );
+                       return packFloat128( zSign, 0x7FFF, 0, 0 );
+               }
+               normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+       }
+       if ( aExp == 0 ) {
+               if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+               normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+       }
+       zExp = aExp - bExp + 0x3FFD;
+       shortShift128Left(
+               aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+       shortShift128Left(
+               bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+       if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+               shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+               ++zExp;
+       }
+       zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+       mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+       sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+       while ( (sbits64) rem0 < 0 ) {
+               --zSig0;
+               add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+       }
+       zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+       if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+               mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+               sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+               while ( (sbits64) rem1 < 0 ) {
+                       --zSig1;
+                       add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+               }
+               zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+       }
+       shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+       return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_rem( float128 a, float128 b )
+{
+       flag aSign, zSign;
+       int32 aExp, bExp, expDiff;
+       bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+       bits64 allZero, alternateASig0, alternateASig1, sigMean1;
+       sbits64 sigMean0;
+       float128 z;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       bSig1 = extractFloat128Frac1( b );
+       bSig0 = extractFloat128Frac0( b );
+       bExp = extractFloat128Exp( b );
+//    bSign = extractFloat128Sign( b );
+       if ( aExp == 0x7FFF ) {
+               if (    ( aSig0 | aSig1 )
+                               || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+                       return propagateFloat128NaN( a, b );
+               }
+               goto invalid;
+       }
+       if ( bExp == 0x7FFF ) {
+               if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+               return a;
+       }
+       if ( bExp == 0 ) {
+               if ( ( bSig0 | bSig1 ) == 0 ) {
+       invalid:
+                       float_raise( float_flag_invalid );
+                       z.low = float128_default_nan_low;
+                       z.high = float128_default_nan_high;
+                       return z;
+               }
+               normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+       }
+       if ( aExp == 0 ) {
+               if ( ( aSig0 | aSig1 ) == 0 ) return a;
+               normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+       }
+       expDiff = aExp - bExp;
+       if ( expDiff < -1 ) return a;
+       shortShift128Left(
+               aSig0 | LIT64( 0x0001000000000000 ),
+               aSig1,
+               15 - ( expDiff < 0 ),
+               &aSig0,
+               &aSig1
+       );
+       shortShift128Left(
+               bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+       q = le128( bSig0, bSig1, aSig0, aSig1 );
+       if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+       expDiff -= 64;
+       while ( 0 < expDiff ) {
+               q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+               q = ( 4 < q ) ? q - 4 : 0;
+               mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+               shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+               shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+               sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+               expDiff -= 61;
+       }
+       if ( -64 < expDiff ) {
+               q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+               q = ( 4 < q ) ? q - 4 : 0;
+               q >>= - expDiff;
+               shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+               expDiff += 52;
+               if ( expDiff < 0 ) {
+                       shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+               }
+               else {
+                       shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+               }
+               mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+               sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+       }
+       else {
+               shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+               shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+       }
+       do {
+               alternateASig0 = aSig0;
+               alternateASig1 = aSig1;
+               ++q;
+               sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+       } while ( 0 <= (sbits64) aSig0 );
+       add128(
+               aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
+       if (    ( sigMean0 < 0 )
+                       || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+               aSig0 = alternateASig0;
+               aSig1 = alternateASig1;
+       }
+       zSign = ( (sbits64) aSig0 < 0 );
+       if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+       return
+               normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sqrt( float128 a )
+{
+       flag aSign;
+       int32 aExp, zExp;
+       bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+       bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+       float128 z;
+
+       aSig1 = extractFloat128Frac1( a );
+       aSig0 = extractFloat128Frac0( a );
+       aExp = extractFloat128Exp( a );
+       aSign = extractFloat128Sign( a );
+       if ( aExp == 0x7FFF ) {
+               if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
+               if ( ! aSign ) return a;
+               goto invalid;
+       }
+       if ( aSign ) {
+               if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+       invalid:
+               float_raise( float_flag_invalid );
+               z.low = float128_default_nan_low;
+               z.high = float128_default_nan_high;
+               return z;
+       }
+       if ( aExp == 0 ) {
+               if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+               normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+       }
+       zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+       aSig0 |= LIT64( 0x0001000000000000 );
+       zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+       shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+       zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+       doubleZSig0 = zSig0<<1;
+       mul64To128( zSig0, zSig0, &term0, &term1 );
+       sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+       while ( (sbits64) rem0 < 0 ) {
+               --zSig0;
+               doubleZSig0 -= 2;
+               add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+       }
+       zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+       if ( ( zSig1 & 0x1FFF ) <= 5 ) {
+               if ( zSig1 == 0 ) zSig1 = 1;
+               mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+               sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+               mul64To128( zSig1, zSig1, &term2, &term3 );
+               sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+               while ( (sbits64) rem1 < 0 ) {
+                       --zSig1;
+                       shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+                       term3 |= 1;
+                       term2 |= doubleZSig0;
+                       add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+               }
+               zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+       }
+       shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+       return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq( float128 a, float128 b )
+{
+       if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+                               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+                       || (    ( extractFloat128Exp( b ) == 0x7FFF )
+                               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+               ) {
+               if (    float128_is_signaling_nan( a )
+                               || float128_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       return
+                       ( a.low == b.low )
+               && (    ( a.high == b.high )
+                               || (    ( a.low == 0 )
+                                       && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+                       );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le( float128 a, float128 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+                               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+                       || (    ( extractFloat128Exp( b ) == 0x7FFF )
+                               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloat128Sign( a );
+       bSign = extractFloat128Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       == 0 );
+       }
+       return
+                       aSign ? le128( b.high, b.low, a.high, a.low )
+               : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt( float128 a, float128 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+                               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+                       || (    ( extractFloat128Exp( b ) == 0x7FFF )
+                               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       aSign = extractFloat128Sign( a );
+       bSign = extractFloat128Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       != 0 );
+       }
+       return
+                       aSign ? lt128( b.high, b.low, a.high, a.low )
+               : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise.  The invalid exception is
+| raised if either operand is a NaN.  Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq_signaling( float128 a, float128 b )
+{
+       if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+                               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+                       || (    ( extractFloat128Exp( b ) == 0x7FFF )
+                               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+               ) {
+               float_raise( float_flag_invalid );
+               return 0;
+       }
+       return
+                       ( a.low == b.low )
+               && (    ( a.high == b.high )
+                               || (    ( a.low == 0 )
+                                       && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+                       );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+| cause an exception.  Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le_quiet( float128 a, float128 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+                               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+                       || (    ( extractFloat128Exp( b ) == 0x7FFF )
+                               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+               ) {
+               if (    float128_is_signaling_nan( a )
+                               || float128_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloat128Sign( a );
+       bSign = extractFloat128Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       == 0 );
+       }
+       return
+                       aSign ? le128( b.high, b.low, a.high, a.low )
+               : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+| exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt_quiet( float128 a, float128 b )
+{
+       flag aSign, bSign;
+
+       if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
+                               && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+                       || (    ( extractFloat128Exp( b ) == 0x7FFF )
+                               && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+               ) {
+               if (    float128_is_signaling_nan( a )
+                               || float128_is_signaling_nan( b ) ) {
+                       float_raise( float_flag_invalid );
+               }
+               return 0;
+       }
+       aSign = extractFloat128Sign( a );
+       bSign = extractFloat128Sign( b );
+       if ( aSign != bSign ) {
+               return
+                               aSign
+                       && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                                       != 0 );
+       }
+       return
+                       aSign ? lt128( b.high, b.low, a.high, a.low )
+               : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h

new file mode 100644 (file)

index 0000000..cb21f46
--- /dev/null
+++ b/softfloat/softfloat.h
@@ -0,0 +1,485 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+/*----------------------------------------------------------------------------
+| The macro `FLOATX80' must be defined to enable the extended double-precision
+| floating-point format `floatx80'.  If this macro is not defined, the
+| `floatx80' type will not be defined, and none of the functions that either
+| input or output the `floatx80' type will be defined.  The same applies to
+| the `FLOATX128' macro and the quadruple-precision format `float128'.
+*----------------------------------------------------------------------------*/
+#define FLOATX80
+#define FLOATX128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+
+#include "mamesf.h"
+
+typedef bits32 float32;
+typedef bits64 float64;
+#ifdef FLOATX80
+typedef struct {
+       bits16 high;
+       bits64 low;
+} floatx80;
+#endif
+#ifdef FLOATX128
+typedef struct {
+       bits64 high, low;
+} float128;
+#endif
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations.  (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_detect_tininess;
+enum {
+       float_tininess_after_rounding  = 0,
+       float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+extern int8 float_rounding_mode;
+enum {
+       float_round_nearest_even = 0,
+       float_round_to_zero      = 1,
+       float_round_down         = 2,
+       float_round_up           = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+extern int8 float_exception_flags;
+enum {
+       float_flag_invalid = 0x01, float_flag_denormal = 0x02, float_flag_divbyzero = 0x04, float_flag_overflow = 0x08,
+       float_flag_underflow = 0x10, float_flag_inexact = 0x20
+};
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 );
+float64 int32_to_float64( int32 );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( int32 );
+#endif
+#ifdef FLOATX128
+float128 int32_to_float128( int32 );
+#endif
+float32 int64_to_float32( int64 );
+float64 int64_to_float64( int64 );
+#ifdef FLOATX80
+floatx80 int64_to_floatx80( int64 );
+#endif
+#ifdef FLOATX128
+float128 int64_to_float128( int64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float32_to_int32( float32 );
+int32 float32_to_int32_round_to_zero( float32 );
+int64 float32_to_int64( float32 );
+int64 float32_to_int64_round_to_zero( float32 );
+float64 float32_to_float64( float32 );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 );
+floatx80 float32_to_floatx80_allowunnormal( float32 );
+#endif
+#ifdef FLOATX128
+float128 float32_to_float128( float32 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 );
+float32 float32_add( float32, float32 );
+float32 float32_sub( float32, float32 );
+float32 float32_mul( float32, float32 );
+float32 float32_div( float32, float32 );
+float32 float32_rem( float32, float32 );
+float32 float32_sqrt( float32 );
+flag float32_eq( float32, float32 );
+flag float32_le( float32, float32 );
+flag float32_lt( float32, float32 );
+flag float32_eq_signaling( float32, float32 );
+flag float32_le_quiet( float32, float32 );
+flag float32_lt_quiet( float32, float32 );
+flag float32_is_signaling_nan( float32 );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float64_to_int32( float64 );
+int32 float64_to_int32_round_to_zero( float64 );
+int64 float64_to_int64( float64 );
+int64 float64_to_int64_round_to_zero( float64 );
+float32 float64_to_float32( float64 );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 );
+floatx80 float64_to_floatx80_allowunnormal( float64 );
+#endif
+#ifdef FLOATX128
+float128 float64_to_float128( float64 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 );
+float64 float64_add( float64, float64 );
+float64 float64_sub( float64, float64 );
+float64 float64_mul( float64, float64 );
+float64 float64_div( float64, float64 );
+float64 float64_rem( float64, float64 );
+float64 float64_sqrt( float64 );
+flag float64_eq( float64, float64 );
+flag float64_le( float64, float64 );
+flag float64_lt( float64, float64 );
+flag float64_eq_signaling( float64, float64 );
+flag float64_le_quiet( float64, float64 );
+flag float64_lt_quiet( float64, float64 );
+flag float64_is_signaling_nan( float64 );
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 );
+int32 floatx80_to_int32_round_to_zero( floatx80 );
+int64 floatx80_to_int64( floatx80 );
+int64 floatx80_to_int64_round_to_zero( floatx80 );
+float32 floatx80_to_float32( floatx80 );
+float64 floatx80_to_float64( floatx80 );
+#ifdef FLOATX128
+float128 floatx80_to_float128( floatx80 );
+#endif
+floatx80 floatx80_scale(floatx80 a, floatx80 b);
+bits64 extractFloatx80Frac( floatx80 a );
+int32 extractFloatx80Exp( floatx80 a );
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+       floatx80 z;
+
+       z.low = zSig;
+       z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision rounding precision.  Valid
+| values are 32, 64, and 80.
+*----------------------------------------------------------------------------*/
+extern int8 floatx80_rounding_precision;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 );
+floatx80 floatx80_round32( floatx80 );
+floatx80 floatx80_normalize( floatx80 );
+floatx80 floatx80_add( floatx80, floatx80 );
+floatx80 floatx80_sub( floatx80, floatx80 );
+floatx80 floatx80_mul( floatx80, floatx80 );
+floatx80 floatx80_div( floatx80, floatx80 );
+floatx80 floatx80_rem( floatx80, floatx80 );
+floatx80 floatx80_sqrt( floatx80 );
+flag floatx80_eq( floatx80, floatx80 );
+flag floatx80_le( floatx80, floatx80 );
+flag floatx80_lt( floatx80, floatx80 );
+flag floatx80_eq_signaling( floatx80, floatx80 );
+flag floatx80_le_quiet( floatx80, floatx80 );
+flag floatx80_lt_quiet( floatx80, floatx80 );
+
+flag floatx80_is_signaling_nan( floatx80 );
+flag floatx80_is_nan( floatx80 );
+flag floatx80_is_zero( floatx80 );
+flag floatx80_is_infinity( floatx80 );
+flag floatx80_is_negative( floatx80 );
+flag floatx80_is_denormal( floatx80 );
+flag floatx80_is_unnormal( floatx80 );
+flag floatx80_is_normal( floatx80 );
+
+int floatx80_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a);
+int floatx80_fsin(floatx80 *a);
+int floatx80_fcos(floatx80 *a);
+int floatx80_ftan(floatx80 *a);
+
+floatx80 floatx80_flognp1(floatx80 a);
+floatx80 floatx80_flogn(floatx80 a);
+floatx80 floatx80_flog2(floatx80 a);
+floatx80 floatx80_flog10(floatx80 a);
+
+floatx80 floatx80_getman( floatx80 a );
+floatx80 floatx80_getexp( floatx80 a );
+
+// roundAndPackFloatx80 used to be in softfloat-round-pack, is now in softfloat.c
+floatx80 roundAndPackFloatx80(int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1);
+
+#endif
+
+#ifdef FLOATX128
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 );
+int32 float128_to_int32_round_to_zero( float128 );
+int64 float128_to_int64( float128 );
+int64 float128_to_int64_round_to_zero( float128 );
+float32 float128_to_float32( float128 );
+float64 float128_to_float64( float128 );
+#ifdef FLOATX80
+floatx80 float128_to_floatx80( float128 );
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 );
+float128 float128_add( float128, float128 );
+float128 float128_sub( float128, float128 );
+float128 float128_mul( float128, float128 );
+float128 float128_div( float128, float128 );
+float128 float128_rem( float128, float128 );
+float128 float128_sqrt( float128 );
+flag float128_eq( float128, float128 );
+flag float128_le( float128, float128 );
+flag float128_lt( float128, float128 );
+flag float128_eq_signaling( float128, float128 );
+flag float128_le_quiet( float128, float128 );
+flag float128_lt_quiet( float128, float128 );
+flag float128_is_signaling_nan( float128 );
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float128
+       packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+       float128 z;
+
+       z.low = zSig1;
+       z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+       return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+INLINE float128
+       roundAndPackFloat128(
+               flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
+{
+       int8 roundingMode;
+       flag roundNearestEven, increment, isTiny;
+
+       roundingMode = float_rounding_mode;
+       roundNearestEven = ( roundingMode == float_round_nearest_even );
+       increment = ( (sbits64) zSig2 < 0 );
+       if ( ! roundNearestEven ) {
+               if ( roundingMode == float_round_to_zero ) {
+                       increment = 0;
+               }
+               else {
+                       if ( zSign ) {
+                               increment = ( roundingMode == float_round_down ) && zSig2;
+                       }
+                       else {
+                               increment = ( roundingMode == float_round_up ) && zSig2;
+                       }
+               }
+       }
+       if ( 0x7FFD <= (bits32) zExp ) {
+               if (    ( 0x7FFD < zExp )
+                               || (    ( zExp == 0x7FFD )
+                                       && eq128(
+                                                       LIT64( 0x0001FFFFFFFFFFFF ),
+                                                       LIT64( 0xFFFFFFFFFFFFFFFF ),
+                                                       zSig0,
+                                                       zSig1
+                                               )
+                                       && increment
+                               )
+                       ) {
+                       float_raise( float_flag_overflow | float_flag_inexact );
+                       if (    ( roundingMode == float_round_to_zero )
+                                       || ( zSign && ( roundingMode == float_round_up ) )
+                                       || ( ! zSign && ( roundingMode == float_round_down ) )
+                               ) {
+                               return
+                                       packFloat128(
+                                               zSign,
+                                               0x7FFE,
+                                               LIT64( 0x0000FFFFFFFFFFFF ),
+                                               LIT64( 0xFFFFFFFFFFFFFFFF )
+                                       );
+                       }
+                       return packFloat128( zSign, 0x7FFF, 0, 0 );
+               }
+               if ( zExp < 0 ) {
+                       isTiny =
+                                       ( float_detect_tininess == float_tininess_before_rounding )
+                               || ( zExp < -1 )
+                               || ! increment
+                               || lt128(
+                                               zSig0,
+                                               zSig1,
+                                               LIT64( 0x0001FFFFFFFFFFFF ),
+                                               LIT64( 0xFFFFFFFFFFFFFFFF )
+                                       );
+                       shift128ExtraRightJamming(
+                               zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+                       zExp = 0;
+                       if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
+                       if ( roundNearestEven ) {
+                               increment = ( (sbits64) zSig2 < 0 );
+                       }
+                       else {
+                               if ( zSign ) {
+                                       increment = ( roundingMode == float_round_down ) && zSig2;
+                               }
+                               else {
+                                       increment = ( roundingMode == float_round_up ) && zSig2;
+                               }
+                       }
+               }
+       }
+       if ( zSig2 ) float_exception_flags |= float_flag_inexact;
+       if ( increment ) {
+               add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+               zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+       }
+       else {
+               if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+       }
+       return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+INLINE float128
+       normalizeRoundAndPackFloat128(
+               flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+       int8 shiftCount;
+       bits64 zSig2;
+
+       if ( zSig0 == 0 ) {
+               zSig0 = zSig1;
+               zSig1 = 0;
+               zExp -= 64;
+       }
+       shiftCount = countLeadingZeros64( zSig0 ) - 15;
+       if ( 0 <= shiftCount ) {
+               zSig2 = 0;
+               shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+       }
+       else {
+               shift128ExtraRightJamming(
+                       zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+       }
+       zExp -= shiftCount;
+       return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+#endif
+
+#endif //SOFTFLOAT_H
+\ No newline at end of file
author	Toni Wilen <twilen@winuae.net>
	Tue, 10 Jan 2017 18:27:12 +0000 (20:27 +0200)
committer	Toni Wilen <twilen@winuae.net>
	Tue, 10 Jan 2017 18:27:12 +0000 (20:27 +0200)
fpp.cpp		patch \| blob \| history
include/fpp-softfloat.h	[new file with mode: 0644]	patch \| blob
od-win32/md-fpp.h		patch \| blob \| history
od-win32/sysconfig.h		patch \| blob \| history
softfloat/README.txt	[new file with mode: 0644]	patch \| blob
softfloat/SOFTFLOAT-MACROS.H	[new file with mode: 0644]	patch \| blob
softfloat/fpu_constant.h	[new file with mode: 0644]	patch \| blob
softfloat/fsincos.cpp	[new file with mode: 0644]	patch \| blob
softfloat/fyl2x.cpp	[new file with mode: 0644]	patch \| blob
softfloat/mamesf.h	[new file with mode: 0644]	patch \| blob
softfloat/milieu.h	[new file with mode: 0644]	patch \| blob
softfloat/softfloat-specialize.h	[new file with mode: 0644]	patch \| blob
softfloat/softfloat.cpp	[new file with mode: 0644]	patch \| blob
softfloat/softfloat.h	[new file with mode: 0644]	patch \| blob