From 3bd9645d99dd0899f52cf95a99b5f5442b6b4791 Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Sat, 18 Mar 2017 20:20:34 +0200 Subject: [PATCH] FPU packed decimal update. --- fpp.cpp | 261 +-------------- fpp_native.cpp | 211 ++++++++++++ fpp_softfloat.cpp | 127 +------- include/fpp.h | 22 +- softfloat/softfloat_decimal.cpp | 552 +++++++++++++++++--------------- 5 files changed, 547 insertions(+), 626 deletions(-) diff --git a/fpp.cpp b/fpp.cpp index 7e38ccfb..8444eedb 100644 --- a/fpp.cpp +++ b/fpp.cpp @@ -54,6 +54,9 @@ FPP_TO_NATIVE fpp_to_native; FPP_TO_INT fpp_to_int; FPP_FROM_INT fpp_from_int; +FPP_PACK fpp_to_pack; +FPP_PACK fpp_from_pack; + FPP_TO_SINGLE fpp_to_single; FPP_FROM_SINGLE fpp_from_single; FPP_TO_DOUBLE fpp_to_double; @@ -546,9 +549,6 @@ static uae_u32 fpsr_make_status(void) { uae_u32 exception; - if (!currprefs.fpu_exceptions) - return 0; - // get external status fpp_get_status(®s.fpsr); @@ -564,6 +564,9 @@ static uae_u32 fpsr_make_status(void) if (regs.fpsr & (FPSR_OVFL | FPSR_INEX2 | FPSR_INEX1)) regs.fpsr |= FPSR_AE_INEX; // INEX = INEX1 || INEX2 || OVFL + if (!currprefs.fpu_exceptions) + return 0; + // return exceptions that interrupt calculation exception = regs.fpsr & regs.fpcr & (FPSR_SNAN | FPSR_OPERR | FPSR_DZ); if (currprefs.cpu_model >= 68040 && currprefs.fpu_model) @@ -1106,248 +1109,6 @@ static void fpu_null (void) fpnan (®s.fp[i]); } -/* single : S 8*E 23*F */ -/* double : S 11*E 52*F */ -/* extended : S 15*E 64*F */ -/* E = 0 & F = 0 -> 0 */ -/* E = MAX & F = 0 -> Infin */ -/* E = MAX & F # 0 -> NotANumber */ -/* E = biased by 127 (single) ,1023 (double) ,16383 (extended) */ - -#if 1 - -void to_pack_softfloat (fpdata *fp, uae_u32 *wrd); - -static void to_pack (fpdata *fpd, uae_u32 *wrd) -{ - to_pack_softfloat(fpd, wrd); -} - - -#else - -static void to_pack (fpdata *fpd, uae_u32 *wrd) -{ - fptype d; - char *cp; - char str[100]; - - if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) { - // infinity has extended exponent and all 0 packed fraction - // nans are copies bit by bit - fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); - return; - } - if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) { - // exponent is not cared about, if mantissa is zero - wrd[0] &= 0x80000000; - fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); - return; - } - - cp = str; - if (wrd[0] & 0x80000000) - *cp++ = '-'; - *cp++ = (wrd[0] & 0xf) + '0'; - *cp++ = '.'; - *cp++ = ((wrd[1] >> 28) & 0xf) + '0'; - *cp++ = ((wrd[1] >> 24) & 0xf) + '0'; - *cp++ = ((wrd[1] >> 20) & 0xf) + '0'; - *cp++ = ((wrd[1] >> 16) & 0xf) + '0'; - *cp++ = ((wrd[1] >> 12) & 0xf) + '0'; - *cp++ = ((wrd[1] >> 8) & 0xf) + '0'; - *cp++ = ((wrd[1] >> 4) & 0xf) + '0'; - *cp++ = ((wrd[1] >> 0) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 28) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 24) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 20) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 16) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 12) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 8) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 4) & 0xf) + '0'; - *cp++ = ((wrd[2] >> 0) & 0xf) + '0'; - *cp++ = 'E'; - if (wrd[0] & 0x40000000) - *cp++ = '-'; - *cp++ = ((wrd[0] >> 24) & 0xf) + '0'; - *cp++ = ((wrd[0] >> 20) & 0xf) + '0'; - *cp++ = ((wrd[0] >> 16) & 0xf) + '0'; - *cp = 0; -#if USE_LONG_DOUBLE - sscanf (str, "%Le", &d); -#else - sscanf (str, "%le", &d); -#endif - fpp_from_native(d, fpd); -} - -#endif - -#if 1 - -void from_pack_softfloat (fpdata *fp, uae_u32 *wrd, int kfactor); - -static void from_pack (fpdata *fpd, uae_u32 *wrd, int kfactor) -{ - from_pack_softfloat(fpd, wrd, kfactor); -} - -#else - -static void from_pack (fpdata *src, uae_u32 *wrd, int kfactor) -{ - int i, j, t; - int exp; - int ndigits; - char *cp, *strp; - char str[100]; - fptype fp; - - if (fpp_is_nan (src)) { - // copy bit by bit, handle signaling nan - fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); - return; - } - if (fpp_is_infinity (src)) { - // extended exponent and all 0 packed fraction - fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); - wrd[1] = wrd[2] = 0; - return; - } - - wrd[0] = wrd[1] = wrd[2] = 0; - - fpp_to_native(&fp, src); - -#if USE_LONG_DOUBLE - sprintf (str, "%#.17Le", fp); -#else - sprintf (str, "%#.17e", fp); -#endif - - // get exponent - cp = str; - while (*cp != 'e') { - if (*cp == 0) - return; - cp++; - } - cp++; - if (*cp == '+') - cp++; - exp = atoi (cp); - - // remove trailing zeros - cp = str; - while (*cp != 'e') { - cp++; - } - cp[0] = 0; - cp--; - while (cp > str && *cp == '0') { - *cp = 0; - cp--; - } - - cp = str; - // get sign - if (*cp == '-') { - cp++; - wrd[0] = 0x80000000; - } else if (*cp == '+') { - cp++; - } - strp = cp; - - if (kfactor <= 0) { - ndigits = abs (exp) + (-kfactor) + 1; - } else { - if (kfactor > 17) { - kfactor = 17; - fpsr_set_exception(FPSR_OPERR); - } - ndigits = kfactor; - } - - if (ndigits < 0) - ndigits = 0; - if (ndigits > 16) - ndigits = 16; - - // remove decimal point - strp[1] = strp[0]; - strp++; - // add trailing zeros - i = strlen (strp); - cp = strp + i; - while (i < ndigits) { - *cp++ = '0'; - i++; - } - i = ndigits + 1; - while (i < 17) { - strp[i] = 0; - i++; - } - *cp = 0; - i = ndigits - 1; - // need to round? - if (i >= 0 && strp[i + 1] >= '5') { - while (i >= 0) { - strp[i]++; - if (strp[i] <= '9') - break; - if (i == 0) { - strp[i] = '1'; - exp++; - } else { - strp[i] = '0'; - } - i--; - } - } - strp[ndigits] = 0; - - // store first digit of mantissa - cp = strp; - wrd[0] |= *cp++ - '0'; - - // store rest of mantissa - for (j = 1; j < 3; j++) { - for (i = 0; i < 8; i++) { - wrd[j] <<= 4; - if (*cp >= '0' && *cp <= '9') - wrd[j] |= *cp++ - '0'; - } - } - - // exponent - if (exp < 0) { - wrd[0] |= 0x40000000; - exp = -exp; - } - if (exp > 9999) // ?? - exp = 9999; - if (exp > 999) { - int d = exp / 1000; - wrd[0] |= d << 12; - exp -= d * 1000; - fpsr_set_exception(FPSR_OPERR); - } - i = 100; - t = 0; - while (i >= 1) { - int d = exp / i; - t <<= 4; - t |= d; - exp -= d * i; - i /= 10; - } - wrd[0] |= t << 16; -} - -#endif - // 68040/060 does not support denormals static bool normalize_or_fault_if_no_denormal_support(uae_u16 opcode, uae_u16 extra, uaecptr ea, uaecptr oldpc, fpdata *src) { @@ -1566,7 +1327,7 @@ static int get_fp_value (uae_u32 opcode, uae_u16 extra, fpdata *src, uaecptr old wrd[2] = (doext ? exts[2] : x_cp_get_long (ad)); if (fault_if_no_packed_support (opcode, extra, adold, oldpc, NULL, wrd)) return 1; - to_pack (src, wrd); + fpp_to_pack (src, wrd, 0); fpp_normalize(src); return 1; } @@ -1748,7 +1509,7 @@ static int put_fp_value (fpdata *value, uae_u32 opcode, uae_u16 extra, uaecptr o if (kfactor & 64) kfactor |= ~63; fpp_normalize(value); - from_pack (value, wrd, kfactor); + fpp_from_pack(value, wrd, kfactor); x_cp_put_long (ad, wrd[0]); ad += 4; x_cp_put_long (ad, wrd[1]); @@ -3147,10 +2908,10 @@ void fpu_modechange(void) for (int i = 0; i < 8; i++) { fpp_from_exten_fmovem(®s.fp[i], &temp_ext[i][0], &temp_ext[i][1], &temp_ext[i][2]); } - if (currprefs.fpu_softfloat && !changed_prefs.fpu_softfloat) { - fp_init_native(); - } else if (!currprefs.fpu_softfloat && changed_prefs.fpu_softfloat) { + if (currprefs.fpu_softfloat) { fp_init_softfloat(); + } else { + fp_init_native(); } for (int i = 0; i < 8; i++) { fpp_to_exten_fmovem(®s.fp[i], temp_ext[i][0], temp_ext[i][1], temp_ext[i][2]); diff --git a/fpp_native.cpp b/fpp_native.cpp index 32b4f2b4..703a06c8 100644 --- a/fpp_native.cpp +++ b/fpp_native.cpp @@ -1031,6 +1031,214 @@ static uae_u32 fp_get_internal_grs(void) return 0; } +static void fp_from_pack (fpdata *src, uae_u32 *wrd, int kfactor) +{ + int i, j, t; + int exp; + int ndigits; + char *cp, *strp; + char str[100]; + fptype fp; + + if (fpp_is_nan (src)) { + // copy bit by bit, handle signaling nan + fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); + return; + } + if (fpp_is_infinity (src)) { + // extended exponent and all 0 packed fraction + fpp_from_exten(src, &wrd[0], &wrd[1], &wrd[2]); + wrd[1] = wrd[2] = 0; + return; + } + + wrd[0] = wrd[1] = wrd[2] = 0; + + fpp_to_native(&fp, src); + +#if USE_LONG_DOUBLE + sprintf (str, "%#.17Le", fp); +#else + sprintf (str, "%#.17e", fp); +#endif + + // get exponent + cp = str; + while (*cp != 'e') { + if (*cp == 0) + return; + cp++; + } + cp++; + if (*cp == '+') + cp++; + exp = atoi (cp); + + // remove trailing zeros + cp = str; + while (*cp != 'e') { + cp++; + } + cp[0] = 0; + cp--; + while (cp > str && *cp == '0') { + *cp = 0; + cp--; + } + + cp = str; + // get sign + if (*cp == '-') { + cp++; + wrd[0] = 0x80000000; + } else if (*cp == '+') { + cp++; + } + strp = cp; + + if (kfactor <= 0) { + ndigits = abs (exp) + (-kfactor) + 1; + } else { + if (kfactor > 17) { + kfactor = 17; + fpsr_set_exception(FPSR_OPERR); + } + ndigits = kfactor; + } + + if (ndigits < 0) + ndigits = 0; + if (ndigits > 16) + ndigits = 16; + + // remove decimal point + strp[1] = strp[0]; + strp++; + // add trailing zeros + i = strlen (strp); + cp = strp + i; + while (i < ndigits) { + *cp++ = '0'; + i++; + } + i = ndigits + 1; + while (i < 17) { + strp[i] = 0; + i++; + } + *cp = 0; + i = ndigits - 1; + // need to round? + if (i >= 0 && strp[i + 1] >= '5') { + while (i >= 0) { + strp[i]++; + if (strp[i] <= '9') + break; + if (i == 0) { + strp[i] = '1'; + exp++; + } else { + strp[i] = '0'; + } + i--; + } + } + strp[ndigits] = 0; + + // store first digit of mantissa + cp = strp; + wrd[0] |= *cp++ - '0'; + + // store rest of mantissa + for (j = 1; j < 3; j++) { + for (i = 0; i < 8; i++) { + wrd[j] <<= 4; + if (*cp >= '0' && *cp <= '9') + wrd[j] |= *cp++ - '0'; + } + } + + // exponent + if (exp < 0) { + wrd[0] |= 0x40000000; + exp = -exp; + } + if (exp > 9999) // ?? + exp = 9999; + if (exp > 999) { + int d = exp / 1000; + wrd[0] |= d << 12; + exp -= d * 1000; + fpsr_set_exception(FPSR_OPERR); + } + i = 100; + t = 0; + while (i >= 1) { + int d = exp / i; + t <<= 4; + t |= d; + exp -= d * i; + i /= 10; + } + wrd[0] |= t << 16; +} + +static void fp_to_pack (fpdata *fpd, uae_u32 *wrd, int dummy) +{ + fptype d; + char *cp; + char str[100]; + + if (((wrd[0] >> 16) & 0x7fff) == 0x7fff) { + // infinity has extended exponent and all 0 packed fraction + // nans are copies bit by bit + fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); + return; + } + if (!(wrd[0] & 0xf) && !wrd[1] && !wrd[2]) { + // exponent is not cared about, if mantissa is zero + wrd[0] &= 0x80000000; + fpp_to_exten(fpd, wrd[0], wrd[1], wrd[2]); + return; + } + + cp = str; + if (wrd[0] & 0x80000000) + *cp++ = '-'; + *cp++ = (wrd[0] & 0xf) + '0'; + *cp++ = '.'; + *cp++ = ((wrd[1] >> 28) & 0xf) + '0'; + *cp++ = ((wrd[1] >> 24) & 0xf) + '0'; + *cp++ = ((wrd[1] >> 20) & 0xf) + '0'; + *cp++ = ((wrd[1] >> 16) & 0xf) + '0'; + *cp++ = ((wrd[1] >> 12) & 0xf) + '0'; + *cp++ = ((wrd[1] >> 8) & 0xf) + '0'; + *cp++ = ((wrd[1] >> 4) & 0xf) + '0'; + *cp++ = ((wrd[1] >> 0) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 28) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 24) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 20) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 16) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 12) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 8) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 4) & 0xf) + '0'; + *cp++ = ((wrd[2] >> 0) & 0xf) + '0'; + *cp++ = 'E'; + if (wrd[0] & 0x40000000) + *cp++ = '-'; + *cp++ = ((wrd[0] >> 24) & 0xf) + '0'; + *cp++ = ((wrd[0] >> 20) & 0xf) + '0'; + *cp++ = ((wrd[0] >> 16) & 0xf) + '0'; + *cp = 0; +#if USE_LONG_DOUBLE + sscanf (str, "%Le", &d); +#else + sscanf (str, "%le", &d); +#endif + fpp_from_native(d, fpd); +} + + void fp_init_native(void) { set_floatx80_rounding_precision(80, &fs); @@ -1056,6 +1264,9 @@ void fp_init_native(void) fpp_to_int = fp_to_int; fpp_from_int = fp_from_int; + fpp_to_pack = fp_to_pack; + fpp_from_pack = fp_from_pack; + fpp_to_single = fp_to_single; fpp_from_single = fp_from_single; fpp_to_double = fp_to_double; diff --git a/fpp_softfloat.cpp b/fpp_softfloat.cpp index 3e9adf4a..04b8141a 100644 --- a/fpp_softfloat.cpp +++ b/fpp_softfloat.cpp @@ -756,7 +756,7 @@ static void fp_normalize(fpdata *a) a->fpx = floatx80_normalize(a->fpx); } -void to_pack_softfloat (fpdata *fp, uae_u32 *wrd) +static void fp_to_pack(fpdata *fp, uae_u32 *wrd, int dummy) { uae_s32 exp = 0; uae_s64 mant = 0; @@ -776,8 +776,6 @@ void to_pack_softfloat (fpdata *fp, uae_u32 *wrd) int i; - int zerocount; - uae_s64 multiplier; uae_u32 pack_exp = (wrd[0] >> 16) & 0xFFF; // packed exponent uae_u32 pack_int = wrd[0] & 0xF; // packed integer part @@ -814,127 +812,19 @@ void to_pack_softfloat (fpdata *fp, uae_u32 *wrd) mant += (pack_frac >> (60 - i * 4)) & 0xF; } -#if 1 - - if (pack_sm) { - mant = -mant; - } - - - zerocount = 0; - multiplier = 10; - if (exp >= 27) { - if (pack_se) { - for (i = 0; i < 16; i++) { - if ((pack_frac >> (i * 4)) & 0xF) { - break; - } - zerocount++; - } - - - exp -= zerocount; - - while (zerocount) { - if (zerocount & 1) { - mant /= multiplier; - } - multiplier *= multiplier; - zerocount >>= 1; - } - } else { - if (pack_int == 0) { - zerocount++; - for (i = 0; i < 16; i++) { - if ((pack_frac >> (60 - i * 4)) & 0xF) { - break; - } - zerocount++; - } - } - - - exp -= zerocount; - - while (zerocount) { - if (zerocount & 1) { - mant *= multiplier; - } - multiplier *= multiplier; - zerocount >>= 1; - } - } - } - - - /* TODO: calculate rounding mode */ - floatx80 z = int64_to_floatx80(mant); - floatx80 m = int32_to_floatx80(10); - floatx80 a = int32_to_floatx80(1); - - - while (exp) { - if (exp & 1) { - a = floatx80_mul(a, m, &fs); - } - m = floatx80_mul(m, m, &fs); - exp >>= 1; - } - - - if (pack_se) { - z = floatx80_div(z, a, &fs); - } else { - z = floatx80_mul(z, a, &fs); - } - -#if 0 - write_log(_T("z = %s\n"), fp_printx80(&z, 0)); - write_log(_T("m = %s\n"), fp_printx80(&m, 0)); - write_log(_T("a = %s\n"), fp_printx80(&a, 0)); - - - write_log(_T("zerocount = %i\n"), zerocount); - write_log(_T("multiplier = %llu\n"), multiplier); -#endif - - fp->fpx = z; - - /* TODO: set inex1 and restore rounding mode */ - // if mul/div caused inex2 --> set inex1 - -#else - floatx80 f; f.high = exp & 0x3FFF; f.high |= pack_se ? 0x4000 : 0; f.high |= pack_sm ? 0x8000 : 0; f.low = mant; - fp->fpx = floatdecimal_to_floatx80(f, &fsp); - -#endif - - if (!currprefs.fpu_softfloat) { - to_native(&fp->fp, fp); - } - + fp->fpx = floatdecimal_to_floatx80(f, &fs); } -void from_pack_softfloat (fpdata *fp, uae_u32 *wrd, int kfactor) +static void fp_from_pack(fpdata *fp, uae_u32 *wrd, int kfactor) { - floatx80 a; - fpdata fpd; - - if (!currprefs.fpu_softfloat) { - from_native(fp->fp, &fpd); - } else { - fpd.fpx = fp->fpx; - } - a = fpd.fpx; - - floatx80 f = floatx80_to_floatdecimal(a, &kfactor, &fs); + floatx80 f = floatx80_to_floatdecimal(fp->fpx, &kfactor, &fs); uae_u32 pack_exp = 0; // packed exponent uae_u32 pack_exp4 = 0; @@ -950,9 +840,9 @@ void from_pack_softfloat (fpdata *fp, uae_u32 *wrd, int kfactor) uae_u64 digit; if ((f.high & 0x7FFF) == 0x7FFF) { - wrd[0] = (uae_u32)(a.high << 16); - wrd[1] = a.low >> 32; - wrd[2] = (uae_u32)a.low; + wrd[0] = (uae_u32)(f.high << 16); + wrd[1] = f.low >> 32; + wrd[2] = (uae_u32)f.low; } else { exponent = f.high & 0x3FFF; significand = f.low; @@ -1025,6 +915,9 @@ void fp_init_softfloat(void) fpp_to_int = to_int; fpp_from_int = from_int; + fpp_to_pack = fp_to_pack; + fpp_from_pack = fp_from_pack; + fpp_to_single = to_single; fpp_from_single = from_single; fpp_to_double = to_double; diff --git a/include/fpp.h b/include/fpp.h index 6b8a9f4e..cfc226d3 100644 --- a/include/fpp.h +++ b/include/fpp.h @@ -1,8 +1,11 @@ -extern void fp_init_native(void); -extern void fp_init_softfloat(void); -extern void fpsr_set_exception(uae_u32 exception); -extern void fpu_modechange(void); +/* single : S 8*E 23*F */ +/* double : S 11*E 52*F */ +/* extended : S 15*E 64*F */ +/* E = 0 & F = 0 -> 0 */ +/* E = MAX & F = 0 -> Infin */ +/* E = MAX & F # 0 -> NotANumber */ +/* E = biased by 127 (single) ,1023 (double) ,16383 (extended) */ #define FPSR_BSUN 0x00008000 #define FPSR_SNAN 0x00004000 @@ -13,6 +16,11 @@ extern void fpu_modechange(void); #define FPSR_INEX2 0x00000200 #define FPSR_INEX1 0x00000100 +extern void fp_init_native(void); +extern void fp_init_softfloat(void); +extern void fpsr_set_exception(uae_u32 exception); +extern void fpu_modechange(void); + #if defined(CPU_i386) || defined(CPU_x86_64) extern void init_fpucw_x87(void); #endif @@ -42,8 +50,7 @@ typedef void (*FPP_FROM_DOUBLE)(fpdata*, uae_u32*, uae_u32*); typedef void (*FPP_TO_EXTEN)(fpdata*, uae_u32, uae_u32, uae_u32); typedef void (*FPP_FROM_EXTEN)(fpdata*, uae_u32*, uae_u32*, uae_u32*); -typedef void (*FPP_PACK)(uae_u32*, uae_u32*, uae_u32*); -typedef void (*FPP_PACKG)(uae_u32*, uae_u32*, uae_u32*, uae_u32*); +typedef void (*FPP_PACK)(fpdata*, uae_u32*, int); typedef const TCHAR* (*FPP_PRINT)(fpdata*,int); typedef uae_u32 (*FPP_GET32)(void); @@ -69,6 +76,9 @@ extern FPP_TO_NATIVE fpp_to_native; extern FPP_TO_INT fpp_to_int; extern FPP_FROM_INT fpp_from_int; +extern FPP_PACK fpp_to_pack; +extern FPP_PACK fpp_from_pack; + extern FPP_TO_SINGLE fpp_to_single; extern FPP_FROM_SINGLE fpp_from_single; extern FPP_TO_DOUBLE fpp_to_double; diff --git a/softfloat/softfloat_decimal.cpp b/softfloat/softfloat_decimal.cpp index 531c08bf..9e1d3977 100644 --- a/softfloat/softfloat_decimal.cpp +++ b/softfloat/softfloat_decimal.cpp @@ -10,7 +10,7 @@ Arithmetic Package, Release 2a. #include "sysconfig.h" #include "sysdeps.h" -#define DECIMAL_LOG 1 +#define DECIMAL_LOG 0 #if DECIMAL_LOG #define decimal_log write_log @@ -26,203 +26,250 @@ Arithmetic Package, Release 2a. | Methods for converting decimal floats to binary extended precision floats. *----------------------------------------------------------------------------*/ +void round128to64(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status) +{ + flag increment; + int32_t zExp; + uint64_t zSig0, zSig1; + + zExp = *aExp; + zSig0 = *aSig0; + zSig1 = *aSig1; + + increment = ( (int64_t) zSig1 < 0 ); + if (status->float_rounding_mode != float_round_nearest_even) { + if (status->float_rounding_mode == float_round_to_zero) { + increment = 0; + } else { + if (aSign) { + increment = (status->float_rounding_mode == float_round_down) && zSig1; + } else { + increment = (status->float_rounding_mode == float_round_up) && zSig1; + } + } + } + + if (increment) { + ++zSig0; + if (zSig0 == 0) { + ++zExp; + zSig0 = LIT64(0x8000000000000000); + } else { + zSig0 &= ~ (((int64_t) (zSig1<<1) == 0) & (status->float_rounding_mode == float_round_nearest_even)); + } + } else { + if ( zSig0 == 0 ) zExp = 0; + } + + *aExp = zExp; + *aSig0 = zSig0; + *aSig1 = 0; +} + +void mul128by128round(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1, float_status *status) +{ + int32_t zExp; + uint64_t zSig0, zSig1, zSig2, zSig3; + + zExp = *aExp; + zSig0 = *aSig0; + zSig1 = *aSig1; + + round128to64(aSign, &bExp, &bSig0, &bSig1, status); + + zExp += bExp - 0x3FFE; + mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3); + zSig1 |= (zSig2 | zSig3) != 0; + if ( 0 < (int64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + *aExp = zExp; + *aSig0 = zSig0; + *aSig1 = zSig1; + + round128to64(aSign, aExp, aSig0, aSig1, status); +} + void mul128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1) { - int32_t zExp; - uint64_t zSig0, zSig1, zSig2, zSig3; - - zExp = *aExp; - zSig0 = *aSig0; - zSig1 = *aSig1; - - zExp += bExp - 0x3FFE; - mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3); - zSig1 |= (zSig2 | zSig3) != 0; - if ( 0 < (int64_t) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - *aExp = zExp; - *aSig0 = zSig0; - *aSig1 = zSig1; + int32_t zExp; + uint64_t zSig0, zSig1, zSig2, zSig3; + + zExp = *aExp; + zSig0 = *aSig0; + zSig1 = *aSig1; + + zExp += bExp - 0x3FFE; + mul128To256(zSig0, zSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3); + zSig1 |= (zSig2 | zSig3) != 0; + if ( 0 < (int64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + *aExp = zExp; + *aSig0 = zSig0; + *aSig1 = zSig1; } -void div128by128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1) +void div128by128(int32_t *paExp, uint64_t *paSig0, uint64_t *paSig1, int32_t bExp, uint64_t bSig0, uint64_t bSig1) { - int32_t zExp; - uint64_t zSig0, zSig1; - uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; - - zExp = *aExp; - zSig0 = *aSig0; - zSig1 = *aSig1; - - zExp -= bExp - 0x3FFE; - rem1 = 0; - if ( le128( bSig0, bSig1, zSig0, zSig1 ) ) { - shift128Right( zSig0, zSig1, 1, &zSig0, &zSig1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( zSig0, zSig1, bSig0 ); - mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); - sub192( zSig0, zSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); - while ( (int64_t) rem0 < 0 ) { - --zSig0; - add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); - } - zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); - if ( ( zSig1 & 0x3FFF ) <= 4 ) { - mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); - sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); - while ( (int64_t) rem1 < 0 ) { - --zSig1; - add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - - *aExp = zExp; - *aSig0 = zSig0; - *aSig1 = zSig1; + int32_t zExp, aExp; + uint64_t zSig0, zSig1, aSig0, aSig1; + uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + aExp = *paExp; + aSig0 = *paSig0; + aSig1 = *paSig1; + + zExp = aExp - bExp + 0x3FFE; + if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { + shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); + mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FFF ) <= 4 ) { + mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + + *paExp = zExp; + *paSig0 = zSig0; + *paSig1 = zSig1; } -void tentoint128(int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale) +void tentoint128(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, int32_t scale, float_status *status) { - int32_t mExp; - uint64_t mSig0, mSig1; - - *aExp = 0x3FFF; - *aSig0 = LIT64(0x8000000000000000); - *aSig1 = 0; - - mExp = 0x4002; - mSig0 = LIT64(0xA000000000000000); - mSig1 = 0; - - while (scale) { - if (scale & 1) { - mul128by128(aExp, aSig0, aSig1, mExp, mSig0, mSig1); - } - mul128by128(&mExp, &mSig0, &mSig1, mExp, mSig0, mSig1); - scale >>= 1; - } + int32_t mExp; + uint64_t mSig0, mSig1; + + *aExp = 0x3FFF; + *aSig0 = LIT64(0x8000000000000000); + *aSig1 = 0; + + mExp = 0x4002; + mSig0 = LIT64(0xA000000000000000); + mSig1 = 0; + + while (scale) { + if (scale & 1) { + mul128by128round(aSign, aExp, aSig0, aSig1, mExp, mSig0, mSig1, status); + } + mul128by128(&mExp, &mSig0, &mSig1, mExp, mSig0, mSig1); + scale >>= 1; + } } -void roundtoint128(flag aSign, int32_t *aExp, uint64_t *aSig0, uint64_t *aSig1, float_status *status ) +int64_t tentointdec(int32_t scale) { - int32_t zExp; - uint64_t zSig, lastBitMask, roundBitsMask; - - zExp = *aExp; - zSig = *aSig0; - - if (0x403E <= zExp) return; - - if (*aSig1) { - zSig |= 1; - *aSig1 = 0; - } - - if (zExp < 0x3FFF) { - if (zExp == 0 && zSig == 0) return; - - float_raise(float_flag_inexact, status); - - switch (status->float_rounding_mode) { - case float_round_nearest_even: - if ((zExp == 0x3FFE) && (uint64_t)(zSig<<1)) { - *aExp = 0x3FFF; - *aSig0 = LIT64(0x8000000000000000); - return; - } - break; - case float_round_down: - if (aSign) { - *aExp = 0x3FFF; - *aSig0 = LIT64(0x8000000000000000); - return; - } - break; - case float_round_up: - if (!aSign) { - *aExp = 0x3FFF; - *aSig0 = LIT64(0x8000000000000000); - return; - } - break; - } - *aExp = 0; - *aSig0 = 0; - return; - } - lastBitMask = 1; - lastBitMask <<= 0x403E - zExp; - roundBitsMask = lastBitMask - 1; - - if ( status->float_rounding_mode == float_round_nearest_even ) { - zSig += lastBitMask>>1; - if ((zSig & roundBitsMask) == 0 ) zSig &= ~ lastBitMask; - } else if (status->float_rounding_mode != float_round_to_zero) { - if (aSign ^ (status->float_rounding_mode == float_round_up)) { - zSig += roundBitsMask; - } - } - zSig &= ~ roundBitsMask; - if (zSig == 0) { - ++zExp; - zSig = LIT64( 0x8000000000000000 ); - } - if ( zSig != *aSig0 ) float_raise(float_flag_inexact, status); - - *aExp = zExp; - *aSig0 = zSig; + uint64_t decM, decX; + + decX = 1; + decM = 10; + + while (scale) { + if (scale & 1) { + decX *= decM; + } + decM *= decM; + scale >>= 1; + } + + return decX; +} + + +int64_t float128toint64(flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven, increment; + int64_t z; + + shift128RightJamming(zSig0, zSig1, 0x403E - zExp, &zSig0, &zSig1); + + roundingMode = status->float_rounding_mode; + roundNearestEven = (roundingMode == float_round_nearest_even); + increment = ((int64_t)zSig1 < 0); + if (!roundNearestEven) { + if (roundingMode == float_round_to_zero) { + increment = 0; + } else { + if (zSign) { + increment = (roundingMode == float_round_down ) && zSig1; + } else { + increment = (roundingMode == float_round_up ) && zSig1; + } + } + } + if (increment) { + ++zSig0; + zSig0 &= ~ (((uint64_t)(zSig1<<1) == 0) & roundNearestEven); + } + z = zSig0; + if (zSig1) float_raise(float_flag_inexact, status); + return z; } int32_t getDecimalExponent(int32_t aExp, uint64_t aSig) { - flag zSign; - int32_t zExp, shiftCount; - uint64_t zSig0, zSig1; - - if (aSig == 0 || aExp == 0x3FFF) { - return 0; - } - - aSig ^= LIT64(0x8000000000000000); - aExp -= 0x3FFF; - zSign = (aExp < 0); - aExp = zSign ? -aExp : aExp; - shiftCount = 31 - countLeadingZeros32(aExp); - zExp = 0x3FFF + shiftCount; - - if (shiftCount < 0) { - shortShift128Left(aSig, 0, -shiftCount, &zSig0, &zSig1); - } else { - shift128Right(aSig, 0, shiftCount, &zSig0, &zSig1); - aSig = (uint64_t)aExp << (63 - shiftCount); - if (zSign) { - sub128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1); - } else { - add128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1); - } - } - - shiftCount = countLeadingZeros64(zSig0); - shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1); - zExp -= shiftCount; - mul128by128(&zExp, &zSig0, &zSig1, 0x3FFD, LIT64(0x9A209A84FBCFF798), LIT64(0x8F8959AC0B7C9178)); - - shiftCount = 0x403E - zExp; - shift128RightJamming(zSig0, zSig1, shiftCount, &zSig0, &zSig1); - - if ((int64_t)zSig1 < 0) { - ++zSig0; - zSig0 &= ~(((int64_t)(zSig1<<1) == 0) & 1); - } - - zExp = zSign ? -zSig0 : zSig0; - - return zExp; + flag zSign; + int32_t zExp, shiftCount; + uint64_t zSig0, zSig1; + + if (aSig == 0 || aExp == 0x3FFF) { + return 0; + } + if (aExp < 0) { + return -4932; + } + + aSig ^= LIT64(0x8000000000000000); + aExp -= 0x3FFF; + zSign = (aExp < 0); + aExp = zSign ? -aExp : aExp; + shiftCount = 31 - countLeadingZeros32(aExp); + zExp = 0x3FFF + shiftCount; + + if (shiftCount < 0) { + shortShift128Left(aSig, 0, -shiftCount, &zSig0, &zSig1); + } else { + shift128Right(aSig, 0, shiftCount, &zSig0, &zSig1); + aSig = (uint64_t)aExp << (63 - shiftCount); + if (zSign) { + sub128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1); + } else { + add128(aSig, 0, zSig0, zSig1, &zSig0, &zSig1); + } + } + + shiftCount = countLeadingZeros64(zSig0); + shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1); + zExp -= shiftCount; + mul128by128(&zExp, &zSig0, &zSig1, 0x3FFD, LIT64(0x9A209A84FBCFF798), LIT64(0x8F8959AC0B7C9178)); + + shiftCount = 0x403E - zExp; + shift128RightJamming(zSig0, zSig1, shiftCount, &zSig0, &zSig1); + + if ((int64_t)zSig1 < 0) { + ++zSig0; + zSig0 &= ~(((int64_t)(zSig1<<1) == 0) & 1); + } + + zExp = zSign ? -zSig0 : zSig0; + + return zExp; } /*---------------------------------------------------------------------------- @@ -231,62 +278,62 @@ int32_t getDecimalExponent(int32_t aExp, uint64_t aSig) floatx80 floatdecimal_to_floatx80(floatx80 a, float_status *status) { - flag decSign, zSign, decExpSign, increment; - int32_t decExp, zExp, xExp, shiftCount; - uint64_t decSig, zSig0, zSig1, xSig0, xSig1; - - decSign = extractFloatx80Sign(a); - decExp = extractFloatx80Exp(a); - decSig = extractFloatx80Frac(a); - - if (decExp == 0x7FFF) return a; - - if (decExp == 0 && decSig == 0) return a; - - decExpSign = (decExp >> 14) & 1; - decExp &= 0x3FFF; - - shiftCount = countLeadingZeros64( decSig ); - zExp = 0x403E - shiftCount; - zSig0 = decSig << shiftCount; - zSig1 = 0; - zSign = decSign; - - tentoint128(&xExp, &xSig0, &xSig1, decExp); - - if (decExpSign) { - div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); - } else { - mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); - } - - increment = ( (int64_t) zSig1 < 0 ); - if (status->float_rounding_mode != float_round_nearest_even) { - if (status->float_rounding_mode == float_round_to_zero) { - increment = 0; - } else { - if (zSign) { - increment = (status->float_rounding_mode == float_round_down) && zSig1; - } else { - increment = (status->float_rounding_mode == float_round_up) && zSig1; - } - } - } - if (zSig1) float_raise(float_flag_decimal, status); - - if (increment) { - ++zSig0; - if (zSig0 == 0) { - ++zExp; - zSig0 = LIT64(0x8000000000000000); - } else { - zSig0 &= ~ (((uint64_t) (zSig1<<1) == 0) & (status->float_rounding_mode == float_round_nearest_even)); - } - } else { - if ( zSig0 == 0 ) zExp = 0; - } - return packFloatx80( zSign, zExp, zSig0 ); - + flag decSign, zSign, decExpSign, increment; + int32_t decExp, zExp, xExp, shiftCount; + uint64_t decSig, zSig0, zSig1, xSig0, xSig1; + + decSign = extractFloatx80Sign(a); + decExp = extractFloatx80Exp(a); + decSig = extractFloatx80Frac(a); + + if (decExp == 0x7FFF) return a; + + if (decExp == 0 && decSig == 0) return a; + + decExpSign = (decExp >> 14) & 1; + decExp &= 0x3FFF; + + shiftCount = countLeadingZeros64( decSig ); + zExp = 0x403E - shiftCount; + zSig0 = decSig << shiftCount; + zSig1 = 0; + zSign = decSign; + + tentoint128(zSign, &xExp, &xSig0, &xSig1, decExp, status); + + if (decExpSign) { + div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); + } else { + mul128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); + } + + increment = ( (int64_t) zSig1 < 0 ); + if (status->float_rounding_mode != float_round_nearest_even) { + if (status->float_rounding_mode == float_round_to_zero) { + increment = 0; + } else { + if (zSign) { + increment = (status->float_rounding_mode == float_round_down) && zSig1; + } else { + increment = (status->float_rounding_mode == float_round_up) && zSig1; + } + } + } + if (zSig1) float_raise(float_flag_decimal, status); + + if (increment) { + ++zSig0; + if (zSig0 == 0) { + ++zExp; + zSig0 = LIT64(0x8000000000000000); + } else { + zSig0 &= ~ (((uint64_t) (zSig1<<1) == 0) & (status->float_rounding_mode == float_round_nearest_even)); + } + } else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); + } /*---------------------------------------------------------------------------- @@ -297,7 +344,7 @@ floatx80 floatx80_to_floatdecimal(floatx80 a, int32_t *k, float_status *status) { flag aSign, decSign; int32_t aExp, decExp, zExp, xExp; - uint64_t aSig, decSig, zSig0, zSig1, xSig0, xSig1; + uint64_t aSig, decSig, decX, zSig0, zSig1, xSig0, xSig1; flag ictr, lambda; int32_t kfactor, ilog, iscale, len; @@ -338,10 +385,10 @@ try_again: if (len < 1) { len = 1; } - if (kfactor > ilog) { - ilog = kfactor; - decimal_log(_T("ILOG is kfactor = %i\n"), ilog); - } + if (kfactor > ilog) { + ilog = kfactor; + decimal_log(_T("ILOG is kfactor = %i\n"), ilog); + } } decimal_log(_T("LEN = %i\n"),len); @@ -356,7 +403,7 @@ try_again: decimal_log(_T("ISCALE = %i, LAMBDA = %i\n"),iscale, lambda); - tentoint128(&xExp, &xSig0, &xSig1, iscale); + tentoint128(0, &xExp, &xSig0, &xSig1, iscale, status); zExp = aExp; zSig0 = aSig; @@ -368,25 +415,25 @@ try_again: div128by128(&zExp, &zSig0, &zSig1, xExp, xSig0, xSig1); } - decimal_log(_T("BEFORE: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"),zExp,zSig0,zSig1); + decimal_log(_T("BEFORE: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"),zExp,zSig0,zSig1); - roundtoint128(aSign, &zExp, &zSig0, &zSig1, status); + decSig = float128toint64(aSign, zExp, zSig0, zSig1, status); - decimal_log(_T("AFTER: zExp = %04x, zSig0 = %16llx, zSig1 = %16llx\n"),zExp,zSig0,zSig1); + decimal_log(_T("AFTER: decSig = %llu\n"),decSig); if (ictr == 0) { - tentoint128(&xExp, &xSig0, &xSig1, len - 1); + decX = tentointdec(len - 1); - if (zExp < xExp || ((zExp == xExp) && lt128(zSig0, zSig1, xSig0, xSig1))) { // z < x + if (decSig < decX) { // z < x ilog -= 1; ictr = 1; goto try_again; } - mul128by128(&xExp, &xSig0, &xSig1, 0x4002, LIT64(0xA000000000000000), 0); + decX *= 10; - if (zExp > xExp || ((zExp == xExp) && lt128(xSig0, xSig1, zSig0, zSig1))) { // z > x + if (decSig > decX) { // z > x ilog += 1; ictr = 1; goto try_again; @@ -394,7 +441,6 @@ try_again: } decSign = aSign; - decSig = zSig0 >> (0x403E - zExp); decExp = (ilog < 0) ? -ilog : ilog; if (decExp > 999) { float_raise(float_flag_invalid, status); -- 2.47.3