From b0e5d33e892f9d04bfd864271bfe5ed12b22ca71 Mon Sep 17 00:00:00 2001
From: Toni Wilen <twilen@winuae.net>
Date: Sat, 5 May 2018 18:49:46 +0300
Subject: [PATCH] Host 80-bit FPU updates.

---
 od-win32/fpp_native_msvc_80bit.cpp | 314 +++++++++++++++--------------
 od-win32/fpux64_80.asm             | 297 +++++++++++++++++++++++++++
 od-win32/fpux86_80.asm             | 295 +++++++++++++++++++++++++++
 3 files changed, 760 insertions(+), 146 deletions(-)

diff --git a/od-win32/fpp_native_msvc_80bit.cpp b/od-win32/fpp_native_msvc_80bit.cpp
index c32d7a94..91976cd3 100644
--- a/od-win32/fpp_native_msvc_80bit.cpp
+++ b/od-win32/fpp_native_msvc_80bit.cpp
@@ -41,6 +41,26 @@ extern "C"
 	extern void _cdecl xfp_cos(void*, void*);
 	extern void _cdecl xfp_tan(void*, void*);
 	extern void _cdecl xfp_atan(void*, void*);
+	extern void _cdecl xfp_asin(void*, void*);
+	extern void _cdecl xfp_acos(void*, void*);
+	extern void _cdecl xfp_atanh(void*, void*);
+	extern void _cdecl xfp_sinh(void*, void*);
+	extern void _cdecl xfp_cosh(void*, void*);
+	extern void _cdecl xfp_tanh(void*, void*);
+
+	extern void _cdecl xfp_rem(void*, void*);
+	extern void _cdecl xfp_rem1(void*, void*);
+	extern void _cdecl xfp_getexp(void*, void*);
+	extern void _cdecl xfp_getman(void*, void*);
+	extern void _cdecl xfp_scale(void*, void*);
+	extern void _cdecl xfp_twotox(void*, void*);
+	extern void _cdecl xfp_etox(void*, void*);
+	extern void _cdecl xfp_etoxm1(void*, void*);
+	extern void _cdecl xfp_tentox(void*, void*);
+	extern void _cdecl xfp_log2(void*, void*);
+	extern void _cdecl xfp_log10(void*, void*);
+	extern void _cdecl xfp_logn(void*, void*);
+	extern void _cdecl xfp_lognp1(void*, void*);
 
 	extern void _cdecl xfp_to_single(void*, uae_u32*);
 	extern void _cdecl xfp_from_single(void*, uae_u32*);
@@ -202,6 +222,78 @@ static void fp_set_mode_native(uae_u32 mode_control)
 	fpu_mode_control = mode_control;
 }
 
+static void fp_get_status(uae_u32 *status)
+{
+	uae_u16 st = xfp_get_status();
+
+	if (st & (1 << 5)) // P
+		*status |= FPSR_INEX2;
+	if (st & (1 << 4)) // U
+		*status |= FPSR_UNFL;
+	if (st & (1 << 3)) // O
+		*status |= FPSR_OVFL;
+	if (st & (1 << 2)) // Z
+		*status |= FPSR_DZ;
+	*status |= fp_status;
+}
+
+static void fp_clear_status(void)
+{
+	xfp_clear_status();
+	fp_status = 0;
+}
+
+static void toxnative(fpdata *fpd, fptype *fp)
+{
+	xfp_x_to_double(&fpd->rfp, fp);
+}
+static void fromxnative(fptype *fp, fpdata *fpd)
+{
+	xfp_x_from_double(&fpd->rfp, fp);
+	fp_clear_status();
+}
+
+static void xfp_to_softfloat(fpdata *fpd)
+{
+	fpd->fpx.high = fpd->rfp.e;
+	fpd->fpx.low = fpd->rfp.m;
+}
+static void xfp_from_softfloat(fpdata *fpd)
+{
+	fpd->rfp.e = fpd->fpx.high;
+	fpd->rfp.m = fpd->fpx.low;
+}
+
+/* Functions for rounding */
+
+// round to float with extended precision exponent
+static void fp_round32(fpdata *fpd)
+{
+	xfp_to_softfloat(fpd);
+	fpd->fpx = floatx80_round32(fpd->fpx, &fs);
+	xfp_from_softfloat(fpd);
+}
+
+// round to double with extended precision exponent
+static void fp_round64(fpdata *fpd)
+{
+	xfp_to_softfloat(fpd);
+	fpd->fpx = floatx80_round64(fpd->fpx, &fs);
+	xfp_from_softfloat(fpd);
+}
+
+// round to float
+static void fp_round_single(fpdata *fpd)
+{
+	xfp_round_single(&fpd->rfp, &fpd->rfp);
+}
+
+// round to double
+static void fp_round_double(fpdata *fpd)
+{
+	xfp_round_double(&fpd->rfp, &fpd->rfp);
+}
+
 
 static bool xfp_changed;
 static bool native_changed;
@@ -237,6 +329,37 @@ static void xfp_setprec(int prec)
 	}
 }
 
+static void xfp_resetnormal(fpdata *fp)
+{
+	if (xfp_changed) {
+		xfp_fldcw(&fpx_mode);
+		set_floatx80_rounding_precision(xfp_swprec, &fs);
+		xfp_changed = false;
+	}
+	xfp_clear_status();
+	if (!currprefs.fpu_strict)
+		return;
+	if (fs.floatx80_rounding_precision == 32)
+		fp_round_single(fp);
+	else if (fs.floatx80_rounding_precision == 64)
+		fp_round_double(fp);
+}
+
+static void xfp_setnormal(void)
+{
+	uae_u16 v = fpx_mode;
+	v |= 3 << 8; // extended
+	v &= ~(10 << 3); // round nearest
+	if (v != fpx_mode) {
+		xfp_fldcw(&v);
+		xfp_swprec = fs.floatx80_rounding_precision;
+		set_floatx80_rounding_precision(80, &fs);
+		xfp_changed = true;
+	} else {
+		xfp_changed = false;
+	}
+}
+
 // Must use default precision/rounding mode when calling C-library math functions.
 static void fp_normal_prec(void)
 {
@@ -261,48 +384,6 @@ static uae_u32 fp_get_support_flags(void)
 	return FPU_FEATURE_EXCEPTIONS;
 }
 
-static void fp_get_status(uae_u32 *status)
-{
-	uae_u16 st = xfp_get_status();
-
-	if (st & (1 << 5)) // P
-		*status |= FPSR_INEX2;
-	if (st & (1 << 4)) // U
-		*status |= FPSR_UNFL;
-	if (st & (1 << 3)) // O
-		*status |= FPSR_OVFL;
-	if (st & (1 << 2)) // Z
-		*status |= FPSR_DZ;
-	*status |= fp_status;
-}
-
-static void fp_clear_status(void)
-{
-	xfp_clear_status();
-	fp_status = 0;
-}
-
-static void toxnative(fpdata *fpd, fptype *fp)
-{
-	xfp_x_to_double(&fpd->rfp, fp);
-}
-static void fromxnative(fptype *fp, fpdata *fpd)
-{
-	xfp_x_from_double(&fpd->rfp, fp);
-	fp_clear_status();
-}
-
-static void xfp_to_softfloat(fpdata *fpd)
-{
-	fpd->fpx.high = fpd->rfp.e;
-	fpd->fpx.low = fpd->rfp.m;
-}
-static void xfp_from_softfloat(fpdata *fpd)
-{
-	fpd->rfp.e = fpd->fpx.high;
-	fpd->rfp.m = fpd->fpx.low;
-}
-
 /* Functions for detecting float type */
 static bool fp_is_init(fpdata *fpd)
 {
@@ -422,36 +503,6 @@ static void fp_from_int(fpdata *fpd, uae_s32 src)
 	xfp_from_int(&fpd->rfp, &src);
 }
 
-/* Functions for rounding */
-
-// round to float with extended precision exponent
-static void fp_round32(fpdata *fpd)
-{
-	xfp_to_softfloat(fpd);
-	fpd->fpx = floatx80_round32(fpd->fpx, &fs);
-	xfp_from_softfloat(fpd);
-}
-
-// round to double with extended precision exponent
-static void fp_round64(fpdata *fpd)
-{
-	xfp_to_softfloat(fpd);
-	fpd->fpx = floatx80_round64(fpd->fpx, &fs);
-	xfp_from_softfloat(fpd);
-}
-
-// round to float
-static void fp_round_single(fpdata *fpd)
-{
-	xfp_round_single(&fpd->rfp, &fpd->rfp);
-}
-
-// round to double
-static void fp_round_double(fpdata *fpd)
-{
-	xfp_round_double(&fpd->rfp, &fpd->rfp);
-}
-
 static const TCHAR *fp_print(fpdata *fpd, int mode)
 {
 	static TCHAR fsout[32];
@@ -485,22 +536,6 @@ static const TCHAR *fp_print(fpdata *fpd, int mode)
 	return fsout;
 }
 
-static void fp_round_prec(fpdata *fpd, int prec)
-{
-	if (prec == 64) {
-		fp_round_double(fpd);
-	} else if (prec == 32) {
-		fp_round_single(fpd);
-	}
-}
-
-static void fp_round(fpdata *fpd)
-{
-	if (!currprefs.fpu_strict)
-		return;
-	fp_round_prec(fpd, fpu_prec);
-}
-
 /* Arithmetic functions */
 
 static void fp_move(fpdata *a, fpdata *b, int prec)
@@ -517,15 +552,11 @@ static void fp_int(fpdata *a, fpdata *b)
 
 static void fp_getexp(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_getexp(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_getexp(&a->rfp, &b->rfp);
 }
 static void fp_getman(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_getman(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_getman(&a->rfp, &b->rfp);
 }
 static void fp_div(fpdata *a, fpdata *b, int prec)
 {
@@ -535,57 +566,48 @@ static void fp_div(fpdata *a, fpdata *b, int prec)
 }
 static void fp_mod(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s)
 {
-	xfp_to_softfloat(a);
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_mod(a->fpx, b->fpx, q, s, &fs);
-	xfp_from_softfloat(a);
+	xfp_rem(&a->rfp, &b->rfp);
 }
 
 static void fp_rem(fpdata *a, fpdata *b, uae_u64 *q, uae_u8 *s)
 {
-	xfp_to_softfloat(a);
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_rem(a->fpx, b->fpx, q, s, &fs);
-	xfp_from_softfloat(a);
+	xfp_rem1(&a->rfp, &b->rfp);
 }
 
 static void fp_scale(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(a);
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_scale(a->fpx, b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_scale(&a->rfp, &b->rfp);
 }
 
 static void fp_sinh(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_sinh(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_sinh(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_lognp1(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_lognp1(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_lognp1(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_etoxm1(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_etoxm1(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_etoxm1(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_tanh(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_tanh(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_tanh(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_asin(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_asin(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_asin(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_atanh(fpdata *a, fpdata *b)
 {
@@ -595,51 +617,51 @@ static void fp_atanh(fpdata *a, fpdata *b)
 }
 static void fp_etox(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_etox(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_etox(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_twotox(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_twotox(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_twotox(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_tentox(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_tentox(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_tentox(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_logn(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_logn(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_logn(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_log10(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_log10(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_log10(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_log2(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_log2(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_log2(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_cosh(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_cosh(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_cosh(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 static void fp_acos(fpdata *a, fpdata *b)
 {
-	xfp_to_softfloat(b);
-	a->fpx = floatx80_acos(b->fpx, &fs);
-	xfp_from_softfloat(a);
+	xfp_setnormal();
+	xfp_acos(&a->rfp, &b->rfp);
+	xfp_resetnormal(a);
 }
 
 static void fp_intrz(fpdata *a, fpdata *b)
@@ -714,10 +736,10 @@ static void fp_sglmul(fpdata *a, fpdata *b)
 	b->rfp.m &= 0xFFFFFF0000000000;
 	xfp_mul(&a->rfp, &b->rfp);
 	fpdata fpx = *a;
+	xfp_resetprec();
 	fp_round32(a);
 	if (fpx.rfp.m != a->rfp.m)
 		fp_status |= FPSR_INEX2;
-	xfp_resetprec();
 }
 static void fp_sgldiv(fpdata *a, fpdata *b)
 {
diff --git a/od-win32/fpux64_80.asm b/od-win32/fpux64_80.asm
index 7ab820f0..f2209341 100644
--- a/od-win32/fpux64_80.asm
+++ b/od-win32/fpux64_80.asm
@@ -1,4 +1,6 @@
 
+; 64-bit assembly functions for native 80-bit FPU emulation
+
 global xfp_int
 global xfp_mov
 global xfp_fldcw
@@ -21,10 +23,30 @@ global xfp_add
 global xfp_sub
 global xfp_sqrt
 
+global xfp_rem
+global xfp_rem1
+global xfp_getexp
+global xfp_getman
+global xfp_scale
+global xfp_twotox
+global xfp_etox
+global xfp_etoxm1
+global xfp_tentox
+global xfp_log2
+global xfp_log10
+global xfp_logn
+global xfp_lognp1
+
 global xfp_sin
 global xfp_cos
 global xfp_tan
 global xfp_atan
+global xfp_asin
+global xfp_acos
+global xfp_atanh
+global xfp_sinh
+global xfp_cosh
+global xfp_tanh
 
 global xfp_get_status
 global xfp_clear_status
@@ -37,6 +59,10 @@ bits 64
 	fld tword[rdx]
 %endmacro
 
+%macro reloadfp1 0
+	fld tword[rdx]
+%endmacro
+
 %macro loadfp2 0
 	fld tword[rcx]
 	fld tword[rdx]
@@ -181,11 +207,282 @@ xfp_sin:
 xfp_tan:
 	loadfp1
 	fptan
+	fstp st0
 	storefp
 	ret
 
 xfp_atan:
 	loadfp1
+	fld1
+	fpatan
+	storefp
+	ret
+
+
+xfp_rem:
+	loadfp2
+	fprem
+	fstp st1
+	storefp
+	ret
+
+xfp_rem1:
+	loadfp2
+	fprem1
+	fstp st1	
+	storefp
+	ret
+
+xfp_getexp:
+	loadfp1
+	fxtract
+	fstp st0
+	storefp
+	ret
+
+xfp_getman:
+	loadfp1
+	fxtract
+	fstp st1
+	storefp
+	ret
+
+xfp_scale:
+	loadfp2
+	fxch
+	fscale
+	fstp st1
+	storefp
+	ret
+
+xfp_twotox:
+	loadfp1
+	frndint
+	reloadfp1
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	storefp
+	ret
+
+xfp_etox:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	storefp
+	ret
+
+xfp_etoxm1:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	fsub dword[one]
+	storefp
+	ret
+
+xfp_tentox:
+	loadfp1
+	fldl2t
+	fmul st0,st1
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	storefp
+	ret
+
+xfp_log2:
+	loadfp1
+	fld1
+	fxch
+	fyl2x
+	storefp
+	ret
+
+xfp_log10:
+	loadfp1
+	fldlg2
+	fxch
+	fyl2x
+	storefp
+	ret
+
+xfp_logn:
+	loadfp1
+	fldln2
+	fxch
+	fyl2x
+	storefp
+	ret
+
+xfp_lognp1:
+	loadfp1
+	fldln2
+	fxch
+	fyl2xp1
+	storefp
+	ret
+
+
+_xfp_asin:
+	loadfp1
+	fmul st0,st0
+	fld1
+	fsubrp
+	fsqrt
+	reloadfp1
+	fxch
+	fpatan
+	storefp
+	ret
+
+_xfp_acos:
+	loadfp1
+	fmul st0,st0
+	fld1
+	fsubrp
+	fsqrt
+	reloadfp1
+	fxch
 	fpatan
+	fld tword[pihalf]
+	fsubrp
+	storefp
+	ret
+
+_xfp_atanh:
+	loadfp1
+	fld1
+	fadd st1,st0
+	fsub st0,st2
+	fdivp
+	fldln2
+	fxch
+	fyl2x
+	fld1
+	fchs
+	fxch
+	fscale
+	fstp st1
 	storefp
 	ret
+
+_xfp_sinh:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	fchs
+	fld st0
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fxch st2
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	fsubrp
+	fld1
+	fchs
+	fxch
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_cosh:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	fchs
+	fld st0
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fxch st2
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	faddp
+	fld1
+	fchs
+	fxch
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_tanh:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	fchs
+	fld st0
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fxch st2
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fst st1
+	fadd st0,st2
+	fxch st2
+	fsubp
+	fdivrp
+	storefp
+	ret
+
+align 4
+
+one:
+	dd 1.0
+pihalf:
+	dd 0x2168c234,0xc90fdaa2,0x00003fff
diff --git a/od-win32/fpux86_80.asm b/od-win32/fpux86_80.asm
index 1e0a0062..c88a5703 100644
--- a/od-win32/fpux86_80.asm
+++ b/od-win32/fpux86_80.asm
@@ -1,4 +1,6 @@
 
+; 32-bit assembly functions for native 80-bit FPU emulation
+
 global _xfp_int
 global _xfp_mov
 global _xfp_fldcw
@@ -21,10 +23,30 @@ global _xfp_add
 global _xfp_sub
 global _xfp_sqrt
 
+global _xfp_rem
+global _xfp_rem1
+global _xfp_getexp
+global _xfp_getman
+global _xfp_scale
+global _xfp_twotox
+global _xfp_etox
+global _xfp_etoxm1
+global _xfp_tentox
+global _xfp_log2
+global _xfp_log10
+global _xfp_logn
+global _xfp_lognp1
+
 global _xfp_sin
 global _xfp_cos
 global _xfp_tan
 global _xfp_atan
+global _xfp_asin
+global _xfp_acos
+global _xfp_atanh
+global _xfp_sinh
+global _xfp_cosh
+global _xfp_tanh
 
 global _xfp_get_status
 global _xfp_clear_status
@@ -37,6 +59,10 @@ section .text
 	fld tword[ecx]
 %endmacro
 
+%macro reloadfp1 0
+	fld tword[ecx]
+%endmacro
+
 %macro loadfp2 0
 	mov eax,[esp+4]
 	mov ecx,[esp+8]
@@ -203,11 +229,280 @@ _xfp_sin:
 _xfp_tan:
 	loadfp1
 	fptan
+	fstp st0
 	storefp
 	ret
 
 _xfp_atan:
 	loadfp1
+	fld1
+	fpatan
+	storefp
+	ret
+
+_xfp_rem:
+	loadfp2
+	fprem
+	fstp st1
+	storefp
+	ret
+
+_xfp_rem1:
+	loadfp2
+	fprem1
+	fstp st1	
+	storefp
+	ret
+
+_xfp_getexp:
+	loadfp1
+	fxtract
+	fstp st0
+	storefp
+	ret
+
+_xfp_getman:
+	loadfp1
+	fxtract
+	fstp st1
+	storefp
+	ret
+
+_xfp_scale:
+	loadfp2
+	fxch
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_twotox:
+	loadfp1
+	frndint
+	reloadfp1
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_etox:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_etoxm1:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	fsub dword[one]
+	storefp
+	ret
+
+_xfp_tentox:
+	loadfp1
+	fldl2t
+	fmul st0,st1
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_log2:
+	loadfp1
+	fld1
+	fxch
+	fyl2x
+	storefp
+	ret
+
+_xfp_log10:
+	loadfp1
+	fldlg2
+	fxch
+	fyl2x
+	storefp
+	ret
+
+_xfp_logn:
+	loadfp1
+	fldln2
+	fxch
+	fyl2x
+	storefp
+	ret
+
+_xfp_lognp1:
+	loadfp1
+	fldln2
+	fxch
+	fyl2xp1
+	storefp
+	ret
+
+_xfp_asin:
+	loadfp1
+	fmul st0,st0
+	fld1
+	fsubrp
+	fsqrt
+	reloadfp1
+	fxch
 	fpatan
 	storefp
 	ret
+
+_xfp_acos:
+	loadfp1
+	fmul st0,st0
+	fld1
+	fsubrp
+	fsqrt
+	reloadfp1
+	fxch
+	fpatan
+	fld tword[pihalf]
+	fsubrp
+	storefp
+	ret
+
+_xfp_atanh:
+	loadfp1
+	fld1
+	fadd st1,st0
+	fsub st0,st2
+	fdivp
+	fldln2
+	fxch
+	fyl2x
+	fld1
+	fchs
+	fxch
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_sinh:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	fchs
+	fld st0
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fxch st2
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	fsubrp
+	fld1
+	fchs
+	fxch
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_cosh:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	fchs
+	fld st0
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fxch st2
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fstp st1
+	faddp
+	fld1
+	fchs
+	fxch
+	fscale
+	fstp st1
+	storefp
+	ret
+
+_xfp_tanh:
+	loadfp1
+	fldl2e
+	fmul st0,st1
+	fst st1
+	fchs
+	fld st0
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fxch st2
+	fst st1
+	frndint
+	fxch
+	fsub st0,st1
+	f2xm1
+	fadd dword[one]
+	fscale
+	fst st1
+	fadd st0,st2
+	fxch st2
+	fsubp
+	fdivrp
+	storefp
+	ret
+
+align 4
+
+one:
+	dd 1.0
+pihalf:
+	dd 0x2168c234,0xc90fdaa2,0x00003fff
-- 
2.47.3