From b0e5d33e892f9d04bfd864271bfe5ed12b22ca71 Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Sat, 5 May 2018 18:49:46 +0300 Subject: [PATCH] Host 80-bit FPU updates. --- od-win32/fpp_native_msvc_80bit.cpp | 314 +++++++++++++++-------------- od-win32/fpux64_80.asm | 297 +++++++++++++++++++++++++++ od-win32/fpux86_80.asm | 295 +++++++++++++++++++++++++++ 3 files changed, 760 insertions(+), 146 deletions(-) diff --git a/od-win32/fpp_native_msvc_80bit.cpp b/od-win32/fpp_native_msvc_80bit.cpp index c32d7a94..91976cd3 100644 --- a/od-win32/fpp_native_msvc_80bit.cpp +++ b/od-win32/fpp_native_msvc_80bit.cpp @@ -41,6 +41,26 @@ extern "C" extern void _cdecl xfp_cos(void*, void*); extern void _cdecl xfp_tan(void*, void*); extern void _cdecl xfp_atan(void*, void*); + extern void _cdecl xfp_asin(void*, void*); + extern void _cdecl xfp_acos(void*, void*); + extern void _cdecl xfp_atanh(void*, void*); + extern void _cdecl xfp_sinh(void*, void*); + extern void _cdecl xfp_cosh(void*, void*); + extern void _cdecl xfp_tanh(void*, void*); + + extern void _cdecl xfp_rem(void*, void*); + extern void _cdecl xfp_rem1(void*, void*); + extern void _cdecl xfp_getexp(void*, void*); + extern void _cdecl xfp_getman(void*, void*); + extern void _cdecl xfp_scale(void*, void*); + extern void _cdecl xfp_twotox(void*, void*); + extern void _cdecl xfp_etox(void*, void*); + extern void _cdecl xfp_etoxm1(void*, void*); + extern void _cdecl xfp_tentox(void*, void*); + extern void _cdecl xfp_log2(void*, void*); + extern void _cdecl xfp_log10(void*, void*); + extern void _cdecl xfp_logn(void*, void*); + extern void _cdecl xfp_lognp1(void*, void*); extern void _cdecl xfp_to_single(void*, uae_u32*); extern void _cdecl xfp_from_single(void*, uae_u32*); @@ -202,6 +222,78 @@ static void fp_set_mode_native(uae_u32 mode_control) fpu_mode_control = mode_control; } +static void fp_get_status(uae_u32 *status) +{ + uae_u16 st = xfp_get_status(); + + if (st & (1 << 5)) // P + *status |= FPSR_INEX2; + if (st & (1 << 4)) // U + *status |= FPSR_UNFL; + if (st & (1 << 3)) // O + *status |= FPSR_OVFL; + if (st & (1 << 2)) // Z + *status |= FPSR_DZ; + *status |= fp_status; +} + +static void fp_clear_status(void) +{ + xfp_clear_status(); + fp_status = 0; +} + +static void toxnative(fpdata *fpd, fptype *fp) +{ + xfp_x_to_double(&fpd->rfp, fp); +} +static void fromxnative(fptype *fp, fpdata *fpd) +{ + xfp_x_from_double(&fpd->rfp, fp); + fp_clear_status(); +} + +static void xfp_to_softfloat(fpdata *fpd) +{ + fpd->fpx.high = fpd->rfp.e; + fpd->fpx.low = fpd->rfp.m; +} +static void xfp_from_softfloat(fpdata *fpd) +{ + fpd->rfp.e = fpd->fpx.high; + fpd->rfp.m = fpd->fpx.low; +} + +/* Functions for rounding */ + +// round to float with extended precision exponent +static void fp_round32(fpdata *fpd) +{ + xfp_to_softfloat(fpd); + fpd->fpx = floatx80_round32(fpd->fpx, &fs); + xfp_from_softfloat(fpd); +} + +// round to double with extended precision exponent +static void fp_round64(fpdata *fpd) +{ + xfp_to_softfloat(fpd); + fpd->fpx = floatx80_round64(fpd->fpx, &fs); + xfp_from_softfloat(fpd); +} + +// round to float +static void fp_round_single(fpdata *fpd) +{ + xfp_round_single(&fpd->rfp, &fpd->rfp); +} + +// round to double +static void fp_round_double(fpdata *fpd) +{ + xfp_round_double(&fpd->rfp, &fpd->rfp); +} + static bool xfp_changed; static bool native_changed; @@ -237,6 +329,37 @@ static void xfp_setprec(int prec) } } +static void xfp_resetnormal(fpdata *fp) +{ + if (xfp_changed) { + xfp_fldcw(&fpx_mode); + set_floatx80_rounding_precision(xfp_swprec, &fs); + xfp_changed = false; + } + xfp_clear_status(); + if (!currprefs.fpu_strict) + return; + if (fs.floatx80_rounding_precision == 32) + fp_round_single(fp); + else if (fs.floatx80_rounding_precision == 64) + fp_round_double(fp); +} + +static void xfp_setnormal(void) +{ + uae_u16 v = fpx_mode; + v |= 3 << 8; // extended + v &= ~(10 << 3); // round nearest + if (v != fpx_mode) { + xfp_fldcw(&v); + xfp_swprec = fs.floatx80_rounding_precision; + set_floatx80_rounding_precision(80, &fs); + xfp_changed = true; + } else { + xfp_changed = false; + } +} + // Must use default precision/rounding mode when calling C-library math functions. static void fp_normal_prec(void) { @@ -261,48 +384,6 @@ static uae_u32 fp_get_support_flags(void) return FPU_FEATURE_EXCEPTIONS; } -static void fp_get_status(uae_u32 *status) -{ - uae_u16 st = xfp_get_status(); - - if (st & (1 << 5)) // P - *status |= FPSR_INEX2; - if (st & (1 << 4)) // U - *status |= FPSR_UNFL; - if (st & (1 << 3)) // O - *status |= FPSR_OVFL; - if (st & (1 << 2)) // Z - *status |= FPSR_DZ; - *status |= fp_status; -} - -static void fp_clear_status(void) -{ - xfp_clear_status(); - fp_status = 0; -} - -static void toxnative(fpdata *fpd, fptype *fp) -{ - xfp_x_to_double(&fpd->rfp, fp); -} -static void fromxnative(fptype *fp, fpdata *fpd) -{ - xfp_x_from_double(&fpd->rfp, fp); - fp_clear_status(); -} - -static void xfp_to_softfloat(fpdata *fpd) -{ - fpd->fpx.high = fpd->rfp.e; - fpd->fpx.low = fpd->rfp.m; -} -static void xfp_from_softfloat(fpdata *fpd) -{ - fpd->rfp.e = fpd->fpx.high; - fpd->rfp.m = fpd->fpx.low; -} - /* Functions for detecting float type */ static bool fp_is_init(fpdata *fpd) { @@ -422,36 +503,6 @@ static void fp_from_int(fpdata *fpd, uae_s32 src) xfp_from_int(&fpd->rfp, &src); } -/* Functions for rounding */ - -// round to float with extended precision exponent -static void fp_round32(fpdata *fpd) -{ - xfp_to_softfloat(fpd); - fpd->fpx = floatx80_round32(fpd->fpx, &fs); - xfp_from_softfloat(fpd); -} - -// round to double with extended precision exponent -static void fp_round64(fpdata *fpd) -{ - xfp_to_softfloat(fpd); - fpd->fpx = floatx80_round64(fpd->fpx, &fs); - xfp_from_softfloat(fpd); -} - -// round to float -static void fp_round_single(fpdata *fpd) -{ - xfp_round_single(&fpd->rfp, &fpd->rfp); -} - -// round to double -static void fp_round_double(fpdata *fpd) -{ - xfp_round_double(&fpd->rfp, &fpd->rfp); -} - static const TCHAR *fp_print(fpdata *fpd, int mode) { static TCHAR fsout[32]; @@ -485,22 +536,6 @@ static const TCHAR *fp_print(fpdata *fpd, int mode) return fsout; } -static void fp_round_prec(fpdata *fpd, int prec) -{ - if (prec == 64) { - fp_round_double(fpd); - } else if (prec == 32) { - fp_round_single(fpd); - } -} - -static void fp_round(fpdata *fpd) -{ - if (!currprefs.fpu_strict) - return; - fp_round_prec(fpd, fpu_prec); -} - /* Arithmetic functions */ static void fp_move(fpdata *a, fpdata *b, int prec) @@ -517,15 +552,11 @@ static void fp_int(fpdata *a, fpdata *b) static void fp_getexp(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_getexp(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_getexp(&a->rfp, &b->rfp); } static void fp_getman(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_getman(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_getman(&a->rfp, &b->rfp); } static void fp_div(fpdata *a, fpdata *b, int prec) { @@ -535,57 +566,48 @@ static void fp_div(fpdata *a, fpdata *b, int prec) } static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { - xfp_to_softfloat(a); - xfp_to_softfloat(b); - a->fpx = floatx80_mod(a->fpx, b->fpx, q, s, &fs); - xfp_from_softfloat(a); + xfp_rem(&a->rfp, &b->rfp); } static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s) { - xfp_to_softfloat(a); - xfp_to_softfloat(b); - a->fpx = floatx80_rem(a->fpx, b->fpx, q, s, &fs); - xfp_from_softfloat(a); + xfp_rem1(&a->rfp, &b->rfp); } static void fp_scale(fpdata *a, fpdata *b) { - xfp_to_softfloat(a); - xfp_to_softfloat(b); - a->fpx = floatx80_scale(a->fpx, b->fpx, &fs); - xfp_from_softfloat(a); + xfp_scale(&a->rfp, &b->rfp); } static void fp_sinh(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_sinh(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_sinh(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_lognp1(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_lognp1(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_lognp1(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_etoxm1(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_etoxm1(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_etoxm1(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_tanh(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_tanh(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_tanh(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_asin(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_asin(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_asin(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_atanh(fpdata *a, fpdata *b) { @@ -595,51 +617,51 @@ static void fp_atanh(fpdata *a, fpdata *b) } static void fp_etox(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_etox(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_etox(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_twotox(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_twotox(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_twotox(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_tentox(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_tentox(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_tentox(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_logn(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_logn(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_logn(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_log10(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_log10(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_log10(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_log2(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_log2(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_log2(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_cosh(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_cosh(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_cosh(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_acos(fpdata *a, fpdata *b) { - xfp_to_softfloat(b); - a->fpx = floatx80_acos(b->fpx, &fs); - xfp_from_softfloat(a); + xfp_setnormal(); + xfp_acos(&a->rfp, &b->rfp); + xfp_resetnormal(a); } static void fp_intrz(fpdata *a, fpdata *b) @@ -714,10 +736,10 @@ static void fp_sglmul(fpdata *a, fpdata *b) b->rfp.m &= 0xFFFFFF0000000000; xfp_mul(&a->rfp, &b->rfp); fpdata fpx = *a; + xfp_resetprec(); fp_round32(a); if (fpx.rfp.m != a->rfp.m) fp_status |= FPSR_INEX2; - xfp_resetprec(); } static void fp_sgldiv(fpdata *a, fpdata *b) { diff --git a/od-win32/fpux64_80.asm b/od-win32/fpux64_80.asm index 7ab820f0..f2209341 100644 --- a/od-win32/fpux64_80.asm +++ b/od-win32/fpux64_80.asm @@ -1,4 +1,6 @@ +; 64-bit assembly functions for native 80-bit FPU emulation + global xfp_int global xfp_mov global xfp_fldcw @@ -21,10 +23,30 @@ global xfp_add global xfp_sub global xfp_sqrt +global xfp_rem +global xfp_rem1 +global xfp_getexp +global xfp_getman +global xfp_scale +global xfp_twotox +global xfp_etox +global xfp_etoxm1 +global xfp_tentox +global xfp_log2 +global xfp_log10 +global xfp_logn +global xfp_lognp1 + global xfp_sin global xfp_cos global xfp_tan global xfp_atan +global xfp_asin +global xfp_acos +global xfp_atanh +global xfp_sinh +global xfp_cosh +global xfp_tanh global xfp_get_status global xfp_clear_status @@ -37,6 +59,10 @@ bits 64 fld tword[rdx] %endmacro +%macro reloadfp1 0 + fld tword[rdx] +%endmacro + %macro loadfp2 0 fld tword[rcx] fld tword[rdx] @@ -181,11 +207,282 @@ xfp_sin: xfp_tan: loadfp1 fptan + fstp st0 storefp ret xfp_atan: loadfp1 + fld1 + fpatan + storefp + ret + + +xfp_rem: + loadfp2 + fprem + fstp st1 + storefp + ret + +xfp_rem1: + loadfp2 + fprem1 + fstp st1 + storefp + ret + +xfp_getexp: + loadfp1 + fxtract + fstp st0 + storefp + ret + +xfp_getman: + loadfp1 + fxtract + fstp st1 + storefp + ret + +xfp_scale: + loadfp2 + fxch + fscale + fstp st1 + storefp + ret + +xfp_twotox: + loadfp1 + frndint + reloadfp1 + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + storefp + ret + +xfp_etox: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + storefp + ret + +xfp_etoxm1: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + fsub dword[one] + storefp + ret + +xfp_tentox: + loadfp1 + fldl2t + fmul st0,st1 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + storefp + ret + +xfp_log2: + loadfp1 + fld1 + fxch + fyl2x + storefp + ret + +xfp_log10: + loadfp1 + fldlg2 + fxch + fyl2x + storefp + ret + +xfp_logn: + loadfp1 + fldln2 + fxch + fyl2x + storefp + ret + +xfp_lognp1: + loadfp1 + fldln2 + fxch + fyl2xp1 + storefp + ret + + +_xfp_asin: + loadfp1 + fmul st0,st0 + fld1 + fsubrp + fsqrt + reloadfp1 + fxch + fpatan + storefp + ret + +_xfp_acos: + loadfp1 + fmul st0,st0 + fld1 + fsubrp + fsqrt + reloadfp1 + fxch fpatan + fld tword[pihalf] + fsubrp + storefp + ret + +_xfp_atanh: + loadfp1 + fld1 + fadd st1,st0 + fsub st0,st2 + fdivp + fldln2 + fxch + fyl2x + fld1 + fchs + fxch + fscale + fstp st1 storefp ret + +_xfp_sinh: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + fchs + fld st0 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fxch st2 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + fsubrp + fld1 + fchs + fxch + fscale + fstp st1 + storefp + ret + +_xfp_cosh: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + fchs + fld st0 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fxch st2 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + faddp + fld1 + fchs + fxch + fscale + fstp st1 + storefp + ret + +_xfp_tanh: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + fchs + fld st0 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fxch st2 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fst st1 + fadd st0,st2 + fxch st2 + fsubp + fdivrp + storefp + ret + +align 4 + +one: + dd 1.0 +pihalf: + dd 0x2168c234,0xc90fdaa2,0x00003fff diff --git a/od-win32/fpux86_80.asm b/od-win32/fpux86_80.asm index 1e0a0062..c88a5703 100644 --- a/od-win32/fpux86_80.asm +++ b/od-win32/fpux86_80.asm @@ -1,4 +1,6 @@ +; 32-bit assembly functions for native 80-bit FPU emulation + global _xfp_int global _xfp_mov global _xfp_fldcw @@ -21,10 +23,30 @@ global _xfp_add global _xfp_sub global _xfp_sqrt +global _xfp_rem +global _xfp_rem1 +global _xfp_getexp +global _xfp_getman +global _xfp_scale +global _xfp_twotox +global _xfp_etox +global _xfp_etoxm1 +global _xfp_tentox +global _xfp_log2 +global _xfp_log10 +global _xfp_logn +global _xfp_lognp1 + global _xfp_sin global _xfp_cos global _xfp_tan global _xfp_atan +global _xfp_asin +global _xfp_acos +global _xfp_atanh +global _xfp_sinh +global _xfp_cosh +global _xfp_tanh global _xfp_get_status global _xfp_clear_status @@ -37,6 +59,10 @@ section .text fld tword[ecx] %endmacro +%macro reloadfp1 0 + fld tword[ecx] +%endmacro + %macro loadfp2 0 mov eax,[esp+4] mov ecx,[esp+8] @@ -203,11 +229,280 @@ _xfp_sin: _xfp_tan: loadfp1 fptan + fstp st0 storefp ret _xfp_atan: loadfp1 + fld1 + fpatan + storefp + ret + +_xfp_rem: + loadfp2 + fprem + fstp st1 + storefp + ret + +_xfp_rem1: + loadfp2 + fprem1 + fstp st1 + storefp + ret + +_xfp_getexp: + loadfp1 + fxtract + fstp st0 + storefp + ret + +_xfp_getman: + loadfp1 + fxtract + fstp st1 + storefp + ret + +_xfp_scale: + loadfp2 + fxch + fscale + fstp st1 + storefp + ret + +_xfp_twotox: + loadfp1 + frndint + reloadfp1 + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + storefp + ret + +_xfp_etox: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + storefp + ret + +_xfp_etoxm1: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + fsub dword[one] + storefp + ret + +_xfp_tentox: + loadfp1 + fldl2t + fmul st0,st1 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + storefp + ret + +_xfp_log2: + loadfp1 + fld1 + fxch + fyl2x + storefp + ret + +_xfp_log10: + loadfp1 + fldlg2 + fxch + fyl2x + storefp + ret + +_xfp_logn: + loadfp1 + fldln2 + fxch + fyl2x + storefp + ret + +_xfp_lognp1: + loadfp1 + fldln2 + fxch + fyl2xp1 + storefp + ret + +_xfp_asin: + loadfp1 + fmul st0,st0 + fld1 + fsubrp + fsqrt + reloadfp1 + fxch fpatan storefp ret + +_xfp_acos: + loadfp1 + fmul st0,st0 + fld1 + fsubrp + fsqrt + reloadfp1 + fxch + fpatan + fld tword[pihalf] + fsubrp + storefp + ret + +_xfp_atanh: + loadfp1 + fld1 + fadd st1,st0 + fsub st0,st2 + fdivp + fldln2 + fxch + fyl2x + fld1 + fchs + fxch + fscale + fstp st1 + storefp + ret + +_xfp_sinh: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + fchs + fld st0 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fxch st2 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + fsubrp + fld1 + fchs + fxch + fscale + fstp st1 + storefp + ret + +_xfp_cosh: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + fchs + fld st0 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fxch st2 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fstp st1 + faddp + fld1 + fchs + fxch + fscale + fstp st1 + storefp + ret + +_xfp_tanh: + loadfp1 + fldl2e + fmul st0,st1 + fst st1 + fchs + fld st0 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fxch st2 + fst st1 + frndint + fxch + fsub st0,st1 + f2xm1 + fadd dword[one] + fscale + fst st1 + fadd st0,st2 + fxch st2 + fsubp + fdivrp + storefp + ret + +align 4 + +one: + dd 1.0 +pihalf: + dd 0x2168c234,0xc90fdaa2,0x00003fff -- 2.47.3