From: Toni Wilen Date: Thu, 26 May 2005 17:09:11 +0000 (+0300) Subject: imported winuaesrc1000b9.zip X-Git-Tag: 2100~312 X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=d442a2c881ac95bf99024cef99c0d433f43cbafe;p=francis%2Fwinuae.git imported winuaesrc1000b9.zip --- diff --git a/catweasel.c b/catweasel.c index 6fc18133..7d9d919d 100755 --- a/catweasel.c +++ b/catweasel.c @@ -84,7 +84,10 @@ uae_u32 catweasel_do_bget (uaecptr addr) if (addr >= 0x100) return 0; buf1[0] = (uae_u8)addr; - DeviceIoControl (handle, CW_PEEKREG_FULL, buf1, 1, buf2, 1, &did_read, 0); + if (handle != INVALID_HANDLE_VALUE) + DeviceIoControl (handle, CW_PEEKREG_FULL, buf1, 1, buf2, 1, &did_read, 0); + else + buf2[0] = ioport_read (cwc.iobase + addr); //write_log ("G %02.2X %02.2X %d\n", buf1[0], buf2[0], did_read); return buf2[0]; } @@ -98,7 +101,10 @@ void catweasel_do_bput (uaecptr addr, uae_u32 b) return; buf[0] = (uae_u8)addr; buf[1] = b; - DeviceIoControl (handle, CW_POKEREG_FULL, buf, 2, 0, 0, &did_read, 0); + if (handle != INVALID_HANDLE_VALUE) + DeviceIoControl (handle, CW_POKEREG_FULL, buf, 2, 0, 0, &did_read, 0); + else + ioport_write (cwc.iobase + addr, b); //write_log ("P %02.2X %02.2X %d\n", (uae_u8)addr, (uae_u8)b, did_read); } @@ -114,42 +120,52 @@ int catweasel_init (void) if (!currprefs.catweasel) return 0; - for (i = 0; i < 4; i++) { - if (currprefs.catweasel > 0) - i = currprefs.catweasel; - sprintf (name, "\\\\.\\CAT%d_F0", i); - handle = CreateFile (name, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, - OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - if (handle != INVALID_HANDLE_VALUE || currprefs.catweasel > 0) - break; - } - if (handle == INVALID_HANDLE_VALUE) { - write_log ("No Catweasel detected\n"); - goto fail; - } - if (!DeviceIoControl (handle, CW_GET_VERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) { - write_log ("CW_GET_VERSION failed %d\n", GetLastError()); - goto fail; - } - write_log ("CW driver version string '%s'\n", buffer); - if (!DeviceIoControl (handle, CW_GET_HWVERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) { - write_log ("CW_GET_HWVERSION failed %d\n", GetLastError()); - goto fail; - } - write_log ("CW: v=%d 14=%d 28=%d 56=%d joy=%d dpm=%d sid=%d kb=%d sidfifo=%d\n", + + if (currprefs.catweasel >= 100) { + cwc.type = currprefs.catweasel >= 0x400 ? 3 : 1; + cwc.iobase = currprefs.catweasel; + if (!ioport_init()) + goto fail; + strcpy(name, "[DIRECT]"); + } else { + for (i = 0; i < 4; i++) { + if (currprefs.catweasel > 0) + i = currprefs.catweasel; + sprintf (name, "\\\\.\\CAT%d_F0", i); + handle = CreateFile (name, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); + if (handle != INVALID_HANDLE_VALUE || currprefs.catweasel > 0) + break; + } + if (handle == INVALID_HANDLE_VALUE) { + write_log ("No Catweasel detected\n"); + goto fail; + } + if (!DeviceIoControl (handle, CW_GET_VERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) { + write_log ("CW_GET_VERSION failed %d\n", GetLastError()); + goto fail; + } + write_log ("CW driver version string '%s'\n", buffer); + if (!DeviceIoControl (handle, CW_GET_HWVERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) { + write_log ("CW_GET_HWVERSION failed %d\n", GetLastError()); + goto fail; + } + write_log ("CW: v=%d 14=%d 28=%d 56=%d joy=%d dpm=%d sid=%d kb=%d sidfifo=%d\n", buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7], ((uae_u32*)(buffer + 8))[0]); - if (!DeviceIoControl (handle, CW_LOCK_EXCLUSIVE, 0, 0, buffer, sizeof (buffer), &len, 0)) { - write_log ("CW_LOCK_EXCLUSIVE failed %d\n", GetLastError()); - goto fail; + if (!DeviceIoControl (handle, CW_LOCK_EXCLUSIVE, 0, 0, buffer, sizeof (buffer), &len, 0)) { + write_log ("CW_LOCK_EXCLUSIVE failed %d\n", GetLastError()); + goto fail; + } + model = *((uae_u32*)(buffer + 4)); + base = *((uae_u32*)(buffer + 0)); + cwc.type = model == 0 ? 1 : model == 2 ? 4 : 3; + cwc.iobase = base; } - model = *((uae_u32*)(buffer + 4)); - base = *((uae_u32*)(buffer + 0)); - cwc.type = model == 0 ? 1 : model == 2 ? 4 : 3; - cwc.iobase = base; write_log ("Catweasel MK%d @%p (%s) detected and enabled\n", cwc.type, cwc.iobase, name); - catweasel_do_bput (3, 0x41); /* enable MK3-mode */ + if (cwc.type == CATWEASEL_TYPE_MK4) + catweasel_do_bput (3, 0x41); /* enable MK3-mode */ catweasel_init_controller (&cwc); return 1; fail: @@ -163,6 +179,7 @@ void catweasel_free (void) if (handle != INVALID_HANDLE_VALUE) CloseHandle (handle); handle = INVALID_HANDLE_VALUE; + ioport_free(); cwc.type = 0; } diff --git a/cfgfile.c b/cfgfile.c index 2a24fae4..86d48fc7 100755 --- a/cfgfile.c +++ b/cfgfile.c @@ -112,7 +112,7 @@ static struct cfg_lines opttable[] = {"floppy3", "Diskfile for drive 3" }, {"hardfile", "access,sectors, surfaces, reserved, blocksize, path format" }, {"filesystem", "access,'Amiga volume-name':'host directory path' - where 'access' can be 'read-only' or 'read-write'" }, - {"catweasel_io","Catweasel board io base address" } + {"catweasel", "Catweasel board io base address" } }; static const char *guimode1[] = { "no", "yes", "nowait", 0 }; @@ -412,7 +412,10 @@ static void save_options (struct zfile *f, struct uae_prefs *p, int type) cfgfile_write (f, "blitter_cycle_exact=%s\n", p->blitter_cycle_exact ? "true" : "false"); cfgfile_write (f, "log_illegal_mem=%s\n", p->illegal_mem ? "true" : "false"); - cfgfile_write (f, "catweasel=%d\n", p->catweasel); + if (p->catweasel >= 100) + cfgfile_write (f, "catweasel=0x%x\n", p->catweasel); + else + cfgfile_write (f, "catweasel=%d\n", p->catweasel); cfgfile_write (f, "kbd_lang=%s\n", (p->keyboard_lang == KBD_LANG_DE ? "de" : p->keyboard_lang == KBD_LANG_DK ? "dk" diff --git a/compemu_fpp.c b/compemu_fpp.c index db9564f4..9ff0945f 100755 --- a/compemu_fpp.c +++ b/compemu_fpp.c @@ -486,7 +486,6 @@ STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad) } } abort(); - return -1; } void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra) @@ -866,9 +865,9 @@ void comp_frestore_opp (uae_u32 opcode) m68k_areg (regs, opcode & 7) = ad; } -static fptype const_e=2.718281828; /* Got some more digits? */ -static fptype const_log10_e=0.4342944819; -static fptype const_loge_10=2.302585093; +static fptype const_e=2.718281828459045235360; +static fptype const_log10_e=0.434294481903251827651; +static fptype const_loge_10=2.302585092994045684018; static fptype power10[]={1e0,1e1,1e2,1e4,1e8,1e16,1e32,1e64,1e128,1e256 #if USE_LONG_DOUBLE , 1e512, 1e1024, 1e2048, 1e4096 @@ -1192,16 +1191,24 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) MAKE_FPSR (src); break; case 0x01: /* FINT */ - FAIL(1); - return; dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + frndint_rr(reg,src); + MAKE_FPSR (reg); + break; case 0x02: /* FSINH */ - FAIL(1); - return; - dont_care_fflags(); - regs.fp[reg] = sinh (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsinh_rr(reg,src); + MAKE_FPSR (reg); break; case 0x03: /* FINTRZ */ #if USE_X86_FPUCW @@ -1243,50 +1250,64 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) MAKE_FPSR (reg); break; case 0x06: /* FLOGNP1 */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = log (src + 1.0); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + flogNP1_rr(reg,src); + MAKE_FPSR (reg); break; case 0x08: /* FETOXM1 */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = exp (src) - 1.0; - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fetoxM1_rr(reg,src); + MAKE_FPSR (reg); break; case 0x09: /* FTANH */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = tanh (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + ftanh_rr(reg,src); + MAKE_FPSR (reg); break; case 0x0a: /* FATAN */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = atan (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fatan_rr(reg,src); + MAKE_FPSR (reg); break; case 0x0c: /* FASIN */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = asin (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fasin_rr(reg,src); + MAKE_FPSR (reg); break; case 0x0d: /* FATANH */ - FAIL(1); - return; dont_care_fflags(); -#if 1 /* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */ - regs.fp[reg] = log ((1 + src) / (1 - src)) / 2; -#else - regs.fp[reg] = atanh (src); -#endif - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fatanh_rr(reg,src); + MAKE_FPSR (reg); break; case 0x0e: /* FSIN */ dont_care_fflags(); @@ -1299,11 +1320,14 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) MAKE_FPSR (reg); break; case 0x0f: /* FTAN */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = tan (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + ftan_rr(reg,src); + MAKE_FPSR (reg); break; case 0x10: /* FETOX */ dont_care_fflags(); @@ -1326,26 +1350,37 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) MAKE_FPSR (reg); break; case 0x12: /* FTENTOX */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = pow (10.0, src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + ftentox_rr(reg,src); + MAKE_FPSR (reg); break; case 0x14: /* FLOGN */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = log (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + flogN_rr(reg,src); + MAKE_FPSR (reg); break; + case 0x15: /* FLOG10 */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = log10 (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + flog10_rr(reg,src); + MAKE_FPSR (reg); break; + case 0x16: /* FLOG2 */ dont_care_fflags(); src=get_fp_value (opcode, extra); @@ -1369,11 +1404,14 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) MAKE_FPSR (reg); break; case 0x19: /* FCOSH */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = cosh (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fcosh_rr(reg,src); + MAKE_FPSR (reg); break; case 0x1a: /* FNEG */ case 0x5a: @@ -1388,11 +1426,14 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) MAKE_FPSR (reg); break; case 0x1c: /* FACOS */ - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = acos (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + facos_rr(reg,src); + MAKE_FPSR (reg); break; case 0x1d: /* FCOS */ dont_care_fflags(); @@ -1493,10 +1534,13 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) break; case 0x26: /* FSCALE */ dont_care_fflags(); - FAIL(1); - return; - regs.fp[reg] *= exp (log (2.0) * src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fscale_rr(reg,src); + MAKE_FPSR (reg); break; case 0x27: /* FSGLMUL */ dont_care_fflags(); @@ -1528,12 +1572,15 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) case 0x35: case 0x36: case 0x37: - FAIL(1); - return; dont_care_fflags(); - regs.fp[reg] = sin (src); - regs.fp[extra & 7] = cos (src); - MAKE_FPSR (regs.fp[reg]); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsincos_rr(reg,extra & 7,src); + MAKE_FPSR (extra & 7); + MAKE_FPSR (reg); break; case 0x38: /* FCMP */ src=get_fp_value (opcode, extra); diff --git a/compemu_fpp_old.c b/compemu_fpp_old.c new file mode 100755 index 00000000..db9564f4 --- /dev/null +++ b/compemu_fpp_old.c @@ -0,0 +1,1564 @@ +/* + * UAE - The Un*x Amiga Emulator + * + * MC68881 emulation + * + * Copyright 1996 Herman ten Brugge + * Adapted for JIT compilation (c) Bernd Meyer, 2000 + */ + +#include + +#include "sysconfig.h" +#include "sysdeps.h" + +#include "config.h" +#include "options.h" +#include "memory.h" +#include "custom.h" +#include "newcpu.h" +#include "ersatz.h" +#include "md-fpp.h" +#include "compemu.h" + +#define MAKE_FPSR(r) do { fmov_rr(FP_RESULT,r); } while (0) + +#define delay //nop() ;nop() +#define delay2 //nop() ;nop() + +uae_s32 temp_fp[3]; /* To convert between FP/integer */ + +/* return register number, or -1 for failure */ +STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra) +{ + uaecptr tmppc; + uae_u16 tmp; + int size; + int mode; + int reg; + double* src; + uae_u32 ad = 0; + static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + + if ((extra & 0x4000) == 0) { + return (extra >> 10) & 7; + } + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + switch (mode) { + case 0: + switch (size) { + case 6: + sign_extend_8_rr(S1,reg); + mov_l_mr((uae_u32)temp_fp,S1); + delay2; + fmovi_rm(FS1,(uae_u32)temp_fp); + return FS1; + case 4: + sign_extend_16_rr(S1,reg); + mov_l_mr((uae_u32)temp_fp,S1); + delay2; + fmovi_rm(FS1,(uae_u32)temp_fp); + return FS1; + case 0: + mov_l_mr((uae_u32)temp_fp,reg); + delay2; + fmovi_rm(FS1,(uae_u32)temp_fp); + return FS1; + case 1: + mov_l_mr((uae_u32)temp_fp,reg); + delay2; + fmovs_rm(FS1,(uae_u32)temp_fp); + return FS1; + default: + return -1; + } + return -1; /* Should be unreachable */ + case 1: + return -1; /* Genuine invalid instruction */ + default: + break; + } + /* OK, we *will* have to load something from an address. Let's make + sure we know how to handle that, or quit early --- i.e. *before* + we do any postincrement/predecrement that we may regret */ + + switch (size) { + case 3: + return -1; + case 0: + case 1: + case 2: + case 4: + case 5: + case 6: + break; + default: + return -1; + } + + switch (mode) { + case 2: + ad=S1; /* We will change it, anyway ;-) */ + mov_l_rr(ad,reg+8); + break; + case 3: + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); + break; + case 4: + ad=S1; + + lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); + mov_l_rr(ad,reg+8); + break; + case 5: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(ad,ad,off); + break; + } + case 6: + { + uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + calc_disp_ea_020(reg+8,dp,ad,S2); + break; + } + case 7: + switch (reg) { + case 0: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 1: + { + uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 2: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,address+PC16off); + break; + } + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + ad=S1; + if (size == 6) + address++; + mov_l_ri(ad,address); + m68k_pc_offset+=sz2[size]; + break; + } + default: + return -1; + } + } + + switch (size) { + case 0: + readlong(ad,S2,S3); + mov_l_mr((uae_u32)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uae_u32)temp_fp); + break; + case 1: + readlong(ad,S2,S3); + mov_l_mr((uae_u32)temp_fp,S2); + delay2; + fmovs_rm(FS1,(uae_u32)temp_fp); + break; + case 2: + readword(ad,S2,S3); + mov_w_mr(((uae_u32)temp_fp)+8,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uae_u32)(temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uae_u32)(temp_fp),S2); + delay2; + fmov_ext_rm(FS1,(uae_u32)(temp_fp)); + break; + case 3: + return -1; /* Some silly "packed" stuff */ + case 4: + readword(ad,S2,S3); + sign_extend_16_rr(S2,S2); + mov_l_mr((uae_u32)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uae_u32)temp_fp); + break; + case 5: + readlong(ad,S2,S3); + mov_l_mr(((uae_u32)temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uae_u32)(temp_fp),S2); + delay2; + fmov_rm(FS1,(uae_u32)(temp_fp)); + break; + case 6: + readbyte(ad,S2,S3); + sign_extend_8_rr(S2,S2); + mov_l_mr((uae_u32)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uae_u32)temp_fp); + break; + default: + return -1; + } + return FS1; +} + +/* return of -1 means failure, >=0 means OK */ +STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra) +{ + uae_u16 tmp; + uaecptr tmppc; + int size; + int mode; + int reg; + uae_u32 ad; + static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + + if ((extra & 0x4000) == 0) { + fmov_rr((extra>>10)&7,val); + return 0; + } + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + ad = -1; + switch (mode) { + case 0: + switch (size) { + case 6: + fmovi_mr((uae_u32)temp_fp,val); + delay; + mov_b_rm(reg,(uae_u32)temp_fp); + return 0; + case 4: + fmovi_mr((uae_u32)temp_fp,val); + delay; + mov_w_rm(reg,(uae_u32)temp_fp); + return 0; + case 0: + fmovi_mr((uae_u32)temp_fp,val); + delay; + mov_l_rm(reg,(uae_u32)temp_fp); + return 0; + case 1: + fmovs_mr((uae_u32)temp_fp,val); + delay; + mov_l_rm(reg,(uae_u32)temp_fp); + return 0; + default: + return -1; + } + case 1: + return -1; /* genuine invalid instruction */ + default: break; + } + + /* Let's make sure we get out *before* doing something silly if + we can't handle the size */ + switch (size) { + case 0: + case 4: + case 5: + case 6: + case 2: + case 1: + break; + case 3: + default: + return -1; + } + + switch (mode) { + case 2: + ad=S1; + mov_l_rr(ad,reg+8); + break; + case 3: + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); + break; + case 4: + ad=S1; + lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); + mov_l_rr(ad,reg+8); + break; + case 5: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_rr(ad,reg+8); + add_l_ri(ad,off); + break; + } + case 6: + { + uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + calc_disp_ea_020(reg+8,dp,ad,S2); + break; + } + case 7: + switch (reg) { + case 0: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 1: + { + uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 2: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,address+PC16off); + break; + } + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + ad=S1; + mov_l_ri(ad,address); + m68k_pc_offset+=sz2[size]; + break; + } + default: + return -1; + } + } + switch (size) { + case 0: + fmovi_mr((uae_u32)temp_fp,val); + delay; + mov_l_rm(S2,(uae_u32)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 1: + fmovs_mr((uae_u32)temp_fp,val); + delay; + mov_l_rm(S2,(uae_u32)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 2: + fmov_ext_mr((uae_u32)temp_fp,val); + delay; + mov_w_rm(S2,(uae_u32)temp_fp+8); + writeword_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uae_u32)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uae_u32)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 3: return -1; /* Packed */ + + case 4: + fmovi_mr((uae_u32)temp_fp,val); + delay; + mov_l_rm(S2,(uae_u32)temp_fp); + writeword_clobber(ad,S2,S3); + break; + case 5: + fmov_mr((uae_u32)temp_fp,val); + delay; + mov_l_rm(S2,(uae_u32)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uae_u32)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 6: + fmovi_mr((uae_u32)temp_fp,val); + delay; + mov_l_rm(S2,(uae_u32)temp_fp); + writebyte(ad,S2,S3); + break; + default: + return -1; + } + return 0; +} + +/* return -1 for failure, or register number for success */ +STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad) +{ + uae_u16 tmp; + uaecptr tmppc; + int mode; + int reg; + uae_s32 off; + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) { + case 0: + case 1: + return -1; + case 2: + case 3: + case 4: + mov_l_rr(S1,8+reg); + return S1; + *ad = m68k_areg (regs, reg); + break; + case 5: + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + + mov_l_rr(S1,8+reg); + add_l_ri(S1,off); + return S1; + case 6: + return -1; + break; + case 7: + switch (reg) { + case 0: + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + mov_l_ri(S1,off); + return S1; + case 1: + off=comp_get_ilong((m68k_pc_offset+=4)-4); + mov_l_ri(S1,off); + return S1; + case 2: + return -1; + *ad = m68k_getpc (); + *ad += (uae_s32) (uae_s16) next_iword (); + break; + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + *ad = get_disp_ea_020 (tmppc, tmp); + break; + default: + return -1; + } + } + abort(); + return -1; +} + +void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra) +{ + FAIL(1); + return; + + if (!currprefs.compfpu) { + FAIL(1); + return; + } +} + +void comp_fscc_opp (uae_u32 opcode, uae_u16 extra) +{ + uae_u32 ad; + int cc; + int reg; + + if (!currprefs.compfpu) { + FAIL(1); + return; + } + +#if DEBUG_FPP + printf ("fscc_opp at %08lx\n", m68k_getpc ()); + fflush (stdout); +#endif + + + if (extra&0x20) { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode & 0x38) != 0) { /* We can only do to integer register */ + FAIL(1); + return; + } + + fflags_into_flags(S2); + reg=(opcode&7); + + mov_l_ri(S1,255); + mov_l_ri(S4,0); + switch(extra&0x0f) { /* according to fpp.c, the 0x10 bit is ignored + */ + case 0: break; /* set never */ + case 1: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,4); + cmov_l_rr(S4,S2,10); break; + case 2: cmov_l_rr(S4,S1,7); break; + case 3: cmov_l_rr(S4,S1,3); break; + case 4: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,2); + cmov_l_rr(S4,S2,10); break; + case 5: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,6); + cmov_l_rr(S4,S2,10); break; + case 6: cmov_l_rr(S4,S1,5); break; + case 7: cmov_l_rr(S4,S1,11); break; + case 8: cmov_l_rr(S4,S1,10); break; + case 9: cmov_l_rr(S4,S1,4); break; + case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break; + case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break; + case 12: cmov_l_rr(S4,S1,2); break; + case 13: cmov_l_rr(S4,S1,6); break; + case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break; + case 15: mov_l_rr(S4,S1); break; + } + + if ((opcode & 0x38) == 0) { + mov_b_rr(reg,S4); + } else { + abort(); + if (get_fp_ad (opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } else + put_byte (ad, cc ? 0xff : 0x00); + } +} + +void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc) +{ + int cc; + + FAIL(1); + return; +} + +extern unsigned long foink3, oink; + +void comp_fbcc_opp (uae_u32 opcode) +{ + uae_u32 start_68k_offset=m68k_pc_offset; + uae_u32 off; + uae_u32 v1; + uae_u32 v2; + uae_u32 nh; + int cc; + + if (!currprefs.compfpu) { + FAIL(1); + return; + } + + if (opcode&0x20) { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode&0x40)==0) { + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + } + else { + off=comp_get_ilong((m68k_pc_offset+=4)-4); + } + mov_l_ri(S1,(uae_u32) + (comp_pc_p+off-(m68k_pc_offset-start_68k_offset))); + mov_l_ri(PC_P,(uae_u32)comp_pc_p); + + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + add_l_ri(S1,m68k_pc_offset); + add_l_ri(PC_P,m68k_pc_offset); + m68k_pc_offset=0; + + /* according to fpp.c, the 0x10 bit is ignored + (it handles exception handling, which we don't + do, anyway ;-) */ + cc=opcode&0x0f; + v1=get_const(PC_P); + v2=get_const(S1); + fflags_into_flags(S2); + + // mov_l_mi((uae_u32)&foink3,cc); + switch(cc) { + case 0: break; /* jump never */ + case 1: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,4); + cmov_l_rr(PC_P,S2,10); break; + case 2: register_branch(v1,v2,7); break; + case 3: register_branch(v1,v2,3); break; + case 4: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,2); + cmov_l_rr(PC_P,S2,10); break; + case 5: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,6); + cmov_l_rr(PC_P,S2,10); break; + case 6: register_branch(v1,v2,5); break; + case 7: register_branch(v1,v2,11); break; + case 8: register_branch(v1,v2,10); break; + case 9: register_branch(v1,v2,4); break; + case 10: + cmov_l_rr(PC_P,S1,10); + cmov_l_rr(PC_P,S1,7); break; + case 11: + cmov_l_rr(PC_P,S1,4); + cmov_l_rr(PC_P,S1,3); break; + case 12: register_branch(v1,v2,2); break; + case 13: register_branch(v1,v2,6); break; + case 14: + cmov_l_rr(PC_P,S1,5); + cmov_l_rr(PC_P,S1,10); break; + case 15: mov_l_rr(PC_P,S1); break; + } +} + + /* Floating point conditions + The "NotANumber" part could be problematic; Howver, when NaN is + encountered, the ftst instruction sets bot N and Z to 1 on the x87, + so quite often things just fall into place. This is probably not + accurate wrt the 68k FPU, but it is *as* accurate as this was before. + However, some more thought should go into fixing this stuff up so + it accurately emulates the 68k FPU. +>=== 4) { + /* 4 byte 68040 IDLE frame. */ + if (incr < 0) { + ad -= 4; + put_long (ad, 0x41000000); + } else { + put_long (ad, 0x41000000); + ad += 4; + } + } else { + if (incr < 0) { + ad -= 4; + put_long (ad, 0x70000000); + for (i = 0; i < 5; i++) { + ad -= 4; + put_long (ad, 0x00000000); + } + ad -= 4; + put_long (ad, 0x1f180000); + } else { + put_long (ad, 0x1f180000); + ad += 4; + for (i = 0; i < 5; i++) { + put_long (ad, 0x00000000); + ad += 4; + } + put_long (ad, 0x70000000); + ad += 4; + } + } + if ((opcode & 0x38) == 0x18) + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) + m68k_areg (regs, opcode & 7) = ad; +} + +void comp_frestore_opp (uae_u32 opcode) +{ + uae_u32 ad; + uae_u32 d; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + + FAIL(1); + return; + + if (!currprefs.compfpu) { + FAIL(1); + return; + } + +#if DEBUG_FPP + printf ("frestore_opp at %08lx\n", m68k_getpc ()); + fflush (stdout); +#endif + if (get_fp_ad (opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + if (currprefs.cpu_level >= 4) { + /* 68040 */ + if (incr < 0) { + /* @@@ This may be wrong. */ + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) != 0) { /* Not a NULL frame? */ + if ((d & 0x00ff0000) == 0) { /* IDLE */ + } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */ + ad -= 44; + } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */ + ad -= 92; + } + } + } else { + d = get_long (ad); + ad += 4; + if ((d & 0xff000000) != 0) { /* Not a NULL frame? */ + if ((d & 0x00ff0000) == 0) { /* IDLE */ + } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */ + ad += 44; + } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */ + ad += 92; + } + } + } + } else { + if (incr < 0) { + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) != 0) { + if ((d & 0x00ff0000) == 0x00180000) + ad -= 6 * 4; + else if ((d & 0x00ff0000) == 0x00380000) + ad -= 14 * 4; + else if ((d & 0x00ff0000) == 0x00b40000) + ad -= 45 * 4; + } + } else { + d = get_long (ad); + ad += 4; + if ((d & 0xff000000) != 0) { + if ((d & 0x00ff0000) == 0x00180000) + ad += 6 * 4; + else if ((d & 0x00ff0000) == 0x00380000) + ad += 14 * 4; + else if ((d & 0x00ff0000) == 0x00b40000) + ad += 45 * 4; + } + } + } + if ((opcode & 0x38) == 0x18) + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) + m68k_areg (regs, opcode & 7) = ad; +} + +static fptype const_e=2.718281828; /* Got some more digits? */ +static fptype const_log10_e=0.4342944819; +static fptype const_loge_10=2.302585093; +static fptype power10[]={1e0,1e1,1e2,1e4,1e8,1e16,1e32,1e64,1e128,1e256 +#if USE_LONG_DOUBLE +, 1e512, 1e1024, 1e2048, 1e4096 +#endif +}; + +/* 128 words, indexed through the low byte of the 68k fpu control word */ +static uae_u16 x86_fpucw[]={ + 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p0r0 */ + 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p0r1 */ + 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p0r2 */ + 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, /* p0r3 */ + + 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, /* p1r0 */ + 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, /* p1r1 */ + 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, /* p1r2 */ + 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, /* p1r3 */ + + 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, /* p2r0 */ + 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, /* p2r1 */ + 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, /* p2r2 */ + 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, /* p2r3 */ + + 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p3r0 */ + 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p3r1 */ + 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p3r2 */ + 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f /* p3r3 */ +}; + +void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) +{ + int reg; + int src; + + if (!currprefs.compfpu) { + FAIL(1); + return; + } + switch ((extra >> 13) & 0x7) { + case 3: /* 2nd most common */ + if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) { + FAIL(1); + return; + } + return; + case 6: + case 7: + { + uae_u32 ad, list = 0; + int incr = 0; + if (extra & 0x2000) { + int ad; + + /* FMOVEM FPP->memory */ + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL(1); return; + } + ad=get_fp_ad (opcode, &ad); + if (ad<0) { + FAIL(1); +#if 0 + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); +#endif + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + while (list) { + uae_u32 wrd1, wrd2, wrd3; + if (incr < 0) { /* Predecrement */ + fmov_ext_mr((uae_u32)temp_fp,fpp_movem_index2[list]); + delay; + sub_l_ri(ad,4); + mov_l_rm(S2,(uae_u32)temp_fp); + writelong_clobber(ad,S2,S3); + sub_l_ri(ad,4); + mov_l_rm(S2,(uae_u32)temp_fp+4); + writelong_clobber(ad,S2,S3); + sub_l_ri(ad,4); + mov_w_rm(S2,(uae_u32)temp_fp+8); + writeword_clobber(ad,S2,S3); + } else { /* postinc */ + fmov_ext_mr((uae_u32)temp_fp,fpp_movem_index2[list]); + delay; + mov_w_rm(S2,(uae_u32)temp_fp+8); + writeword_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uae_u32)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uae_u32)temp_fp); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + } + list = fpp_movem_next[list]; + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7)+8,ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7)+8,ad); + } else { + /* FMOVEM memory->FPP */ + + int ad; + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL(1); return; + } + ad=get_fp_ad (opcode, &ad); + if (ad<0) { + FAIL(1); +#if 0 + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); +#endif + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + + while (list) { + uae_u32 wrd1, wrd2, wrd3; + if (incr < 0) { + sub_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uae_u32)(temp_fp),S2); + sub_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uae_u32)(temp_fp)+4,S2); + sub_l_ri(ad,4); + readword(ad,S2,S3); + mov_w_mr(((uae_u32)temp_fp)+8,S2); + delay2; + fmov_ext_rm(fpp_movem_index2[list],(uae_u32)(temp_fp)); + } else { + readword(ad,S2,S3); + mov_w_mr(((uae_u32)temp_fp)+8,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uae_u32)(temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uae_u32)(temp_fp),S2); + add_l_ri(ad,4); + delay2; + fmov_ext_rm(fpp_movem_index1[list],(uae_u32)(temp_fp)); + } + list = fpp_movem_next[list]; + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7)+8,ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7)+8,ad); + } + } + return; + + case 4: + case 5: /* rare */ + if ((opcode & 0x30) == 0) { + if (extra & 0x2000) { + if (extra & 0x1000) { + mov_l_rm(opcode & 15,(uae_u32)®s.fpcr); return; + } + if (extra & 0x0800) { + FAIL(1); + return; + } + if (extra & 0x0400) { + mov_l_rm(opcode & 15,(uae_u32)®s.fpiar); return; + } + } else { + if (extra & 0x1000) { + mov_l_mr((uae_u32)®s.fpcr,opcode & 15); +#if USE_X86_FPUCW + mov_l_rr(S1,opcode & 15); + and_l_ri(S1,0x000000f0); + fldcw_m_indexed(S1,(uae_u32)x86_fpucw); +#endif + return; + } + if (extra & 0x0800) { + FAIL(1); + return; + // set_fpsr(m68k_dreg (regs, opcode & 15)); + } + if (extra & 0x0400) { + mov_l_mr((uae_u32)®s.fpiar,opcode & 15); return; + } + } + } else if ((opcode & 0x3f) == 0x3c) { + if ((extra & 0x2000) == 0) { + if (extra & 0x1000) { + uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4); + mov_l_mi((uae_u32)®s.fpcr,val); +#if USE_X86_FPUCW + mov_l_ri(S1,val&0x000000f0); + fldcw_m_indexed(S1,(uae_u32)x86_fpucw); +#endif + return; + } + if (extra & 0x0800) { + FAIL(1); + return; + } + if (extra & 0x0400) { + uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4); + mov_l_mi((uae_u32)®s.fpiar,val); + return; + } + } + FAIL(1); + return; + } else if (extra & 0x2000) { + FAIL(1); + return; + } else { + FAIL(1); + return; + } + FAIL(1); + return; + + case 0: + case 2: /* Extremely common */ + reg = (extra >> 7) & 7; + if ((extra & 0xfc00) == 0x5c00) { + switch (extra & 0x7f) { + case 0x00: + fmov_pi(reg); + break; + case 0x0b: + fmov_log10_2(reg); + break; + case 0x0c: + fmov_rm(reg,(uae_u32)&const_e); + break; + case 0x0d: + fmov_log2_e(reg); + break; + case 0x0e: + fmov_rm(reg,(uae_u32)&const_log10_e); + break; + case 0x0f: + fmov_0(reg); + break; + case 0x30: + fmov_loge_2(reg); + break; + case 0x31: + fmov_rm(reg,(uae_u32)&const_loge_10); + break; + case 0x32: + fmov_1(reg); + break; + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: + fmov_rm(reg,(uae_u32)(power10+(extra & 0x7f)-0x32)); + break; + default: + /* This is not valid, so we fail */ + FAIL(1); + return; + } + return; + } + + switch (extra & 0x7f) { + case 0x00: /* FMOVE */ + case 0x40: /* Explicit rounding. This is just a quick fix. Same + * for all other cases that have three choices */ + case 0x44: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(reg,src); + MAKE_FPSR (src); + break; + case 0x01: /* FINT */ + FAIL(1); + return; + dont_care_fflags(); + case 0x02: /* FSINH */ + FAIL(1); + return; + + dont_care_fflags(); + regs.fp[reg] = sinh (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x03: /* FINTRZ */ +#if USE_X86_FPUCW + /* If we have control over the CW, we can do this */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + mov_l_ri(S1,16); /* Switch to "round to zero" mode */ + fldcw_m_indexed(S1,(uae_u32)x86_fpucw); + + frndint_rr(reg,src); + + /* restore control word */ + mov_l_rm(S1,(uae_u32)®s.fpcr); + and_l_ri(S1,0x000000f0); + fldcw_m_indexed(S1,(uae_u32)x86_fpucw); + + MAKE_FPSR (reg); + break; +#endif + FAIL(1); + return; + regs.fp[reg] = (int) src; + MAKE_FPSR (regs.fp[reg]); + break; + case 0x04: /* FSQRT */ + case 0x41: + case 0x45: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsqrt_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x06: /* FLOGNP1 */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = log (src + 1.0); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x08: /* FETOXM1 */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = exp (src) - 1.0; + MAKE_FPSR (regs.fp[reg]); + break; + case 0x09: /* FTANH */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = tanh (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x0a: /* FATAN */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = atan (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x0c: /* FASIN */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = asin (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x0d: /* FATANH */ + FAIL(1); + return; + dont_care_fflags(); +#if 1 /* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */ + regs.fp[reg] = log ((1 + src) / (1 - src)) / 2; +#else + regs.fp[reg] = atanh (src); +#endif + MAKE_FPSR (regs.fp[reg]); + break; + case 0x0e: /* FSIN */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsin_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x0f: /* FTAN */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = tan (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x10: /* FETOX */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fetox_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x11: /* FTWOTOX */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + ftwotox_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x12: /* FTENTOX */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = pow (10.0, src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x14: /* FLOGN */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = log (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x15: /* FLOG10 */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = log10 (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x16: /* FLOG2 */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + flog2_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x18: /* FABS */ + case 0x58: + case 0x5c: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fabs_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x19: /* FCOSH */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = cosh (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x1a: /* FNEG */ + case 0x5a: + case 0x5e: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fneg_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x1c: /* FACOS */ + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = acos (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x1d: /* FCOS */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fcos_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x1e: /* FGETEXP */ + FAIL(1); + return; + dont_care_fflags(); + { + int expon; + frexp (src, &expon); + regs.fp[reg] = (double) (expon - 1); + MAKE_FPSR (regs.fp[reg]); + } + break; + case 0x1f: /* FGETMAN */ + FAIL(1); + return; + dont_care_fflags(); + { + int expon; + regs.fp[reg] = frexp (src, &expon) * 2.0; + MAKE_FPSR (regs.fp[reg]); + } + break; + case 0x20: /* FDIV */ + case 0x60: + case 0x64: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x21: /* FMOD */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + frem_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x22: /* FADD */ + case 0x62: + case 0x66: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fadd_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x23: /* FMUL */ + case 0x63: + case 0x67: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmul_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x24: /* FSGLDIV */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x25: /* FREM */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + frem1_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x26: /* FSCALE */ + dont_care_fflags(); + FAIL(1); + return; + regs.fp[reg] *= exp (log (2.0) * src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x27: /* FSGLMUL */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmul_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x28: /* FSUB */ + case 0x68: + case 0x6c: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsub_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x30: /* FSINCOS */ + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + FAIL(1); + return; + dont_care_fflags(); + regs.fp[reg] = sin (src); + regs.fp[extra & 7] = cos (src); + MAKE_FPSR (regs.fp[reg]); + break; + case 0x38: /* FCMP */ + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT,reg); + fsub_rr(FP_RESULT,src); /* Right way? */ + break; + case 0x3a: /* FTST */ + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT,src); + break; + default: + FAIL(1); + return; + break; + } + return; + } + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); +} diff --git a/compemu_raw_x86.c b/compemu_raw_x86.c index 3670a3cb..a64895ba 100755 --- a/compemu_raw_x86.c +++ b/compemu_raw_x86.c @@ -2546,30 +2546,6 @@ static __inline__ void make_tos(int r) live.spos[q]=p; } -static __inline__ void make_tos2(int r, int r2) -{ - int q; - - make_tos(r2); /* Put the reg that's supposed to end up in position2 - on top */ - - if (live.spos[r]<0) { /* Register not yet on stack */ - make_tos(r); /* This will extend the stack */ - return; - } - /* Register is on stack */ - emit_byte(0xd9); - emit_byte(0xc9); /* Move r2 into position 2 */ - - q=live.onstack[live.tos-1]; - live.onstack[live.tos]=q; - live.spos[q]=live.tos; - live.onstack[live.tos-1]=r2; - live.spos[r2]=live.tos-1; - - make_tos(r); /* And r into 1 */ -} - static __inline__ int stackpos(int r) { if (live.spos[r]<0) @@ -2581,14 +2557,21 @@ static __inline__ int stackpos(int r) return live.tos-live.spos[r]; } +/* IMO, calling usereg(r) makes no sense, if the register r should supply our function with + an argument, because I would expect all arguments to be on the stack already, won't they? + Thus, usereg(s) is always useless and also for every FRW d it's too late here now. PeterK +*/ static __inline__ void usereg(int r) { - if (live.spos[r]<0) + + if (live.spos[r]<0) { + // write_log ("usereg wants to push reg %d onto the x87 stack calling make_tos\n", r); make_tos(r); + } } -/* This is called with one FP value in a reg *above* tos, which it will - pop off the stack if necessary */ +/* This is called with one FP value in a reg *above* tos, + which it will pop off the stack if necessary */ static __inline__ void tos_make(int r) { if (live.spos[r]<0) { @@ -2598,8 +2581,8 @@ static __inline__ void tos_make(int r) return; } emit_byte(0xdd); - emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg, - and pop it*/ + emit_byte(0xd8+(live.tos+1)-live.spos[r]); + /* store top of stack in reg and pop it*/ } @@ -2761,7 +2744,6 @@ LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) { int ds; - usereg(s); ds=stackpos(s); if (ds==0 && live.spos[d]>=0) { /* source is on top of stack, and we already have the dest */ @@ -2791,18 +2773,17 @@ LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) int ds; if (d!=s) { - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xfa); /* take square root */ - tos_make(d); /* store to destination */ + emit_byte(0xfa); /* fsqrt sqrt(x) */ + tos_make(d); /* store to destination */ } else { make_tos(d); emit_byte(0xd9); - emit_byte(0xfa); /* take square root */ + emit_byte(0xfa); /* fsqrt y=sqrt(x) */ } } LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) @@ -2812,18 +2793,17 @@ LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) int ds; if (d!=s) { - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xe1); /* take fabs */ - tos_make(d); /* store to destination */ + emit_byte(0xe1); /* fabs abs(x) */ + tos_make(d); /* store to destination */ } else { make_tos(d); emit_byte(0xd9); - emit_byte(0xe1); /* take fabs */ + emit_byte(0xe1); /* fabs y=abs(x) */ } } LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) @@ -2833,92 +2813,159 @@ LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) int ds; if (d!=s) { - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xfc); /* take frndint */ - tos_make(d); /* store to destination */ + emit_byte(0xfc); /* frndint int(x) */ + tos_make(d); /* store to destination */ } else { make_tos(d); emit_byte(0xd9); - emit_byte(0xfc); /* take frndint */ + emit_byte(0xfc); /* frndint y=int(x) */ } } LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) -LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) +LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) { int ds; if (d!=s) { - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xff); /* take cos */ - tos_make(d); /* store to destination */ + emit_byte(0xfe); /* fsin sin(x) */ + tos_make(d); /* store to destination */ } else { make_tos(d); emit_byte(0xd9); - emit_byte(0xff); /* take cos */ + emit_byte(0xfe); /* fsin y=sin(x) */ } } -LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) +LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) -LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) +LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) { int ds; if (d!=s) { - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xfe); /* take sin */ - tos_make(d); /* store to destination */ + emit_byte(0xff); /* fcos cos(x) */ + tos_make(d); /* store to destination */ } else { make_tos(d); emit_byte(0xd9); - emit_byte(0xfe); /* take sin */ + emit_byte(0xff); /* fcos y=cos(x) */ } } -LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) +LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) -double one=1; -LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) +LOWFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xf2); /* fptan tan(x)=y/1.0 */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp pop 1.0 */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xf2); /* fptan tan(x)=y/1.0 */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp pop 1.0 */ + } +} +LENDFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s)) { int ds; - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xfb); /* fsincos sin(x) push cos(x) */ + if ((live.spos[c]<0)&&(live.spos[d]<0)) { + live.tos++; + live.spos[d]=live.tos; + live.onstack[live.tos]=d; /* sin(x) comes first */ + live.tos++; + live.spos[c]=live.tos; + live.onstack[live.tos]=c; + return; /* occupy both regs directly */ + } + if (live.spos[c]<0) { + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap cos(x) with sin(x) */ + emit_byte(0xdd); /* store sin(x) to d & pop */ + emit_byte(0xd8+(live.tos+2)-live.spos[d]); + live.tos++; /* occupy a reg for cos(x) here */ + live.spos[c]=live.tos; + live.onstack[live.tos]=c; + } + else { + emit_byte(0xdd); /* store cos(x) to c & pop */ + emit_byte(0xd8+(live.tos+2)-live.spos[c]); + tos_make(d); /* store sin(x) to destination */ + } +} +LENDFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s)) + +float one=1; + +LOWFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s)) +{ + int ds; + + make_tos(s); /* tos=x */ + ds=stackpos(d); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld y */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale y*(2^x) */ + tos_make(d); /* store y=y*(2^x) */ +} +LENDFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) +{ + int ds; + ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xfc); /* rndint */ + emit_byte(0xfc); /* frndint int(x) */ emit_byte(0xd9); - emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xc1+ds); /* fld x again */ emit_byte(0xd8); - emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xe1); /* fsub frac(x) = x - int(x) */ emit_byte(0xd9); - emit_byte(0xf0); /* f2xm1 */ - emit_byte(0xdc); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); emit_byte(0x05); - emit_long((uae_u32)&one); /* Add '1' without using extra stack space */ + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ emit_byte(0xd9); - emit_byte(0xfd); /* and scale it */ + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x) */ emit_byte(0xdd); - emit_byte(0xd9); /* take he rounded value off */ - tos_make(d); /* store to destination */ + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=2^x */ } LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) @@ -2926,61 +2973,530 @@ LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) { int ds; - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xea); /* fldl2e */ - emit_byte(0xde); - emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */ + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy up */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); + emit_byte(0x05); + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=e^x */ +} +LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s)) +{ + int ds; + ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy up */ emit_byte(0xd9); - emit_byte(0xfc); /* rndint */ + emit_byte(0xfc); /* frndint int(x*log2(e)) */ emit_byte(0xd9); - emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xc9); /* fxch swap top two elements */ emit_byte(0xd8); - emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ emit_byte(0xd9); - emit_byte(0xf0); /* f2xm1 */ - emit_byte(0xdc); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((2^frac(x))-1)*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=(e^x)-1 */ +} +LENDFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xe9); /* fldl2t log2(10) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(10) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy up */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(10)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(10) - int(x*log2(10)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); emit_byte(0x05); - emit_long((uae_u32)&one); /* Add '1' without using extra stack space */ + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ emit_byte(0xd9); - emit_byte(0xfd); /* and scale it */ + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(10)) */ emit_byte(0xdd); - emit_byte(0xd9); /* take he rounded value off */ - tos_make(d); /* store to destination */ + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=10^x */ } -LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) +LENDFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) { int ds; - usereg(s); ds=stackpos(s); emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xc0+ds); /* fld x */ emit_byte(0xd9); - emit_byte(0xe8); /* push '1' */ + emit_byte(0xe8); /* fld1 1 */ emit_byte(0xd9); - emit_byte(0xc9); /* swap top two */ + emit_byte(0xc9); /* fxch swap 1 with x */ emit_byte(0xd9); - emit_byte(0xf1); /* take 1*log2(x) */ - tos_make(d); /* store to destination */ + emit_byte(0xf1); /* fyl2x 1*log2(x) */ + tos_make(d); /* store y=log2(x) */ } LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) +LOWFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xed); /* fldln2 logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with x */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x logN(2)*log2(x) */ + tos_make(d); /* store y=logN(x) */ +} +LENDFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xed); /* fldln2 logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with x */ + emit_byte(0xd9); + emit_byte(0xf9); /* fyl2xp1 logN(2)*log2(x+1) */ + tos_make(d); /* store y=logN(x+1) */ +} +LENDFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xec); /* fldlg2 log10(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap log10(2) with x */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x log10(2)*log2(x) */ + tos_make(d); /* store y=log10(x) */ +} +LENDFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xc1+ds); /* fld x */ + emit_byte(0xd8); + emit_byte(0xc8); /* fmul x*x */ + emit_byte(0xd8); + emit_byte(0xe9); /* fsubr 1 - (x^2) */ + emit_byte(0xd9); + emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */ + emit_byte(0xd8); + emit_byte(0xfa+ds); /* fdivr x / sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap with 1.0 */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x)/1 & pop */ + tos_make(d); /* store y=asin(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xc1+ds); /* fld x */ + emit_byte(0xd8); + emit_byte(0xc8); /* fmul x*x */ + emit_byte(0xd8); + emit_byte(0xe9); /* fsubr 1 - (x^2) */ + emit_byte(0xd9); + emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */ + emit_byte(0xd8); + emit_byte(0xf2+ds); /* fdiv sqrt(1-(x^2)) / x */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap with 1.0 */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x)/1 & pop */ + tos_make(d); /* store y=acos(x) */ +} +LENDFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x)/1 */ + tos_make(d); /* store y=atan(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xdc); + emit_byte(0xc1); /* fadd 1 + x */ + emit_byte(0xd8); + emit_byte(0xe2+ds); /* fsub 1 - x */ + emit_byte(0xde); + emit_byte(0xf9); /* fdivp (1+x)/(1-x) */ + emit_byte(0xd9); + emit_byte(0xed); /* fldl2e logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with (1+x)/(1-x) */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x logN(2)*log2((1+x)/(1-x)) pop */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale logN((1+x)/(1-x)) * 2^(-1) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=atanh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); + emit_byte(0x05); + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); + emit_byte(0x05); + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy e^x & pop */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* add +12 to esp */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + emit_byte(0xde); + emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */ + } + else { + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp (e^x)-(e^-x) */ + } + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((e^x)-(e^-x))/2 */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=sinh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); + emit_byte(0x05); + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); + emit_byte(0x05); + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy e^x & pop */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* add +12 to esp */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + } + emit_byte(0xde); + emit_byte(0xc1); /* faddp (e^x)+(e^-x) */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((e^x)+(e^-x))/2 */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=cosh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); + emit_byte(0x05); + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd8); + emit_byte(0x05); + emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy e^x */ + emit_byte(0xd8); + emit_byte(0xc2); /* fadd (e^x)+(e^-x) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with e^-x */ + emit_byte(0xde); + emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* add +12 to esp */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + emit_byte(0xde); + emit_byte(0xf9); /* fdivp ((e^x)-(e^-x))/((e^x)+(e^-x)) */ + } + else { + emit_byte(0xde); + emit_byte(0xf1); /* fdivrp ((e^x)-(e^-x))/((e^x)+(e^-x)) */ + } + tos_make(d); /* store y=tanh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) { int ds; if (d!=s) { - usereg(s); ds=stackpos(s); emit_byte(0xd9); emit_byte(0xc0+ds); /* duplicate source */ @@ -3000,9 +3516,6 @@ LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) { int ds; - usereg(s); - usereg(d); - if (live.spos[s]==live.tos) { /* Source is on top of stack */ ds=stackpos(d); @@ -3022,9 +3535,6 @@ LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) { int ds; - - usereg(s); - usereg(d); if (live.spos[s]==live.tos) { /* Source is on top of stack */ @@ -3045,9 +3555,6 @@ LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) { int ds; - - usereg(s); - usereg(d); make_tos(d); ds=stackpos(s); @@ -3060,9 +3567,6 @@ LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) { int ds; - - usereg(s); - usereg(d); if (live.spos[s]==live.tos) { /* Source is on top of stack */ @@ -3083,9 +3587,6 @@ LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) { int ds; - - usereg(s); - usereg(d); if (live.spos[s]==live.tos) { /* Source is on top of stack */ @@ -3106,19 +3607,14 @@ LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) { int ds; - - usereg(s); - usereg(d); - make_tos2(d,s); - ds=stackpos(s); - - if (ds!=1) { - printf("Failed horribly in raw_frem_rr! ds is %d\n",ds); - abort(); - } + make_tos(s); /* tos=x */ + ds=stackpos(d); emit_byte(0xd9); - emit_byte(0xf8); /* take rem from dest by source */ + emit_byte(0xc0+ds); /* fld y */ + emit_byte(0xd9); + emit_byte(0xf8); /* fprem rem(y/x) */ + tos_make(d); /* store y=rem(y/x) */ } LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) @@ -3126,18 +3622,13 @@ LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) { int ds; - usereg(s); - usereg(d); - - make_tos2(d,s); - ds=stackpos(s); - - if (ds!=1) { - printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds); - abort(); - } + make_tos(s); /* tos=x */ + ds=stackpos(d); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld y */ emit_byte(0xd9); - emit_byte(0xf5); /* take rem1 from dest by source */ + emit_byte(0xf5); /* fprem rem1(y/x) */ + tos_make(d); /* store y=rem1(y/x) */ } LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) diff --git a/compemu_raw_x86_old.c b/compemu_raw_x86_old.c new file mode 100755 index 00000000..3670a3cb --- /dev/null +++ b/compemu_raw_x86_old.c @@ -0,0 +1,3179 @@ +/* This should eventually end up in machdep/, but for now, x86 is the + only target, and it's easier this way... */ + +/************************************************************************* + * Some basic information about the the target CPU * + *************************************************************************/ + +#define EAX 0 +#define ECX 1 +#define EDX 2 +#define EBX 3 + +/* The register in which subroutines return an integer return value */ +#define REG_RESULT 0 + +/* The registers subroutines take their first and second argument in */ +#define REG_PAR1 0 +#define REG_PAR2 2 + +/* Three registers that are not used for any of the above */ +#define REG_NOPAR1 6 +#define REG_NOPAR2 5 +#define REG_NOPAR3 3 + +#define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */ +#define REG_PC_TMP 1 /* Another register that is not the above */ + +#define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount. + -1 if any reg will do */ +#define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */ +#define MUL_NREG2 2 /* %edx will hold the high 32 bits */ + +uae_s8 always_used[]={4,-1}; +uae_s8 can_byte[]={0,1,2,3,-1}; +uae_s8 can_word[]={0,1,2,3,5,6,7,-1}; + +uae_u8 call_saved[]={0,0,0,0,1,0,0,0}; + +/* This *should* be the same as call_saved. But: + - We might not really know which registers are saved, and which aren't, + so we need to preserve some, but don't want to rely on everyone else + also saving those registers + - Special registers (such like the stack pointer) should not be "preserved" + by pushing, even though they are "saved" across function calls +*/ +uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1}; + +/* Whether classes of instructions do or don't clobber the native flags */ +#define CLOBBER_MOV +#define CLOBBER_LEA +#define CLOBBER_CMOV +#define CLOBBER_POP +#define CLOBBER_PUSH +#define CLOBBER_SUB clobber_flags() +#define CLOBBER_SBB clobber_flags() +#define CLOBBER_CMP clobber_flags() +#define CLOBBER_ADD clobber_flags() +#define CLOBBER_ADC clobber_flags() +#define CLOBBER_AND clobber_flags() +#define CLOBBER_OR clobber_flags() +#define CLOBBER_XOR clobber_flags() + +#define CLOBBER_ROL clobber_flags() +#define CLOBBER_ROR clobber_flags() +#define CLOBBER_SHLL clobber_flags() +#define CLOBBER_SHRL clobber_flags() +#define CLOBBER_SHRA clobber_flags() +#define CLOBBER_TEST clobber_flags() +#define CLOBBER_CL16 +#define CLOBBER_CL8 +#define CLOBBER_SE16 +#define CLOBBER_SE8 +#define CLOBBER_ZE16 +#define CLOBBER_ZE8 +#define CLOBBER_SW16 clobber_flags() +#define CLOBBER_SW32 +#define CLOBBER_SETCC +#define CLOBBER_MUL clobber_flags() +#define CLOBBER_BT clobber_flags() +#define CLOBBER_BSF clobber_flags() + +/************************************************************************* + * Actual encoding of the instructions on the target CPU * + *************************************************************************/ + +static int have_cmov=0; /* We need to generate different code if + we don't have cmov */ + +#include "compemu_optimizer_x86.c" + +static uae_u16 swap16(uae_u16 x) +{ + return ((x&0xff00)>>8)|((x&0x00ff)<<8); +} + +static uae_u32 swap32(uae_u32 x) +{ + return ((x&0xff00)<<8)|((x&0x00ff)<<24)|((x&0xff0000)>>8)|((x&0xff000000)>>24); +} + +static __inline__ int isbyte(uae_s32 x) +{ + return (x>=-128 && x<=127); +} + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) +{ + emit_byte(0x50+r); +} +LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) +{ + emit_byte(0x58+r); +} +LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xa3); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xbb); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) + + +LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xf0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xb3); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xab); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe8+d); + emit_byte(i); + } + else { + emit_byte(0x81); + emit_byte(0xe8+d); + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) + + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + emit_byte(0xc7); + emit_byte(0x05); + emit_long(d); + emit_long(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0xc7); + emit_byte(0x05); + emit_long(d); + emit_word(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ + emit_byte(0xc6); + emit_byte(0x05); + emit_long(d); + emit_byte(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) +{ + emit_byte(0xc0); + emit_byte(0x05); + emit_long(d); + emit_byte(i); +} +LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + emit_byte(0xc0); + emit_byte(0xc0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + emit_byte(0xc0); + emit_byte(0xc8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0xc1); + emit_byte(0xc8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0xc1); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + emit_byte(0xc0); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0xc1); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + emit_byte(0xc0); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0xc1); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + emit_byte(0xc0); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) +{ + emit_byte(0x9e); +} +LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) + +LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) +{ + emit_byte(0x0f); + emit_byte(0xa2); +} +LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) + +LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) +{ + emit_byte(0x9f); +} +LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + emit_byte(0x0f); + emit_byte(0x90+cc); + emit_byte(0xc0+d); +} +LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + emit_byte(0x0f); + emit_byte(0x90+cc); + emit_byte(0x05); + emit_long(d); +} +LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cc); + emit_byte(0xc0+8*d+s); + } + else { /* replacement using branch and mov */ + int uncc=(cc^1); + emit_byte(0x70+uncc); + emit_byte(2); /* skip next 2 bytes if not cc=true */ + emit_byte(0x89); + emit_byte(0xc0+8*s+d); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) +{ + emit_byte(0x0f); + emit_byte(0xbc); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) +{ + emit_byte(0x0f); + emit_byte(0xbf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) +{ + emit_byte(0x0f); + emit_byte(0xbe); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) +{ + emit_byte(0x0f); + emit_byte(0xb7); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) +{ + emit_byte(0x0f); + emit_byte(0xb6); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) +{ + emit_byte(0x0f); + emit_byte(0xaf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) + abort(); + emit_byte(0xf7); + emit_byte(0xea); +} +LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + printf("Bad register in MUL: d=%d, s=%d\n",d,s); + abort(); + } + emit_byte(0xf7); + emit_byte(0xe2); +} +LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) +{ + abort(); /* %^$&%^$%#^ x86! */ + emit_byte(0x0f); + emit_byte(0xaf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) +{ + emit_byte(0x88); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + int isebp=(baser==5)?0x40:0; + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + + emit_byte(0x8b); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x8a); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + + isebp=(baser==5)?0x40:0; + + emit_byte(0x89); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x88); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x89); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x88); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8b); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8a); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + int fi; + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: + fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor); + abort(); + } + emit_byte(0x8b); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) +{ + int fi; + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: + fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor); + abort(); + } + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cond); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); + } + else { /* replacement using branch and mov */ + int uncc=(cond^1); + emit_byte(0x70+uncc); + emit_byte(7); /* skip next 7 bytes if not cc=true */ + emit_byte(0x8b); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); + } +} +LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) +{ + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cond); + emit_byte(0x05+8*d); + emit_long(mem); + } + else { /* replacement using branch and mov */ + int uncc=(cond^1); + emit_byte(0x70+uncc); + emit_byte(6); /* skip next 6 bytes if not cc=true */ + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(mem); + } +} +LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + emit_byte(0x8b); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + emit_byte(0x8a); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + emit_byte(0x8b); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + emit_byte(0x8a); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + emit_byte(0xc7); + emit_byte(0x40+d); + emit_byte(offset); + emit_long(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0xc7); + emit_byte(0x40+d); + emit_byte(offset); + emit_word(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + emit_byte(0xc6); + emit_byte(0x40+d); + emit_byte(offset); + emit_byte(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + emit_byte(0x88); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + emit_byte(0x8d); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8d); + emit_byte(0x84+8*d); + emit_byte(0x40*fi+8*index+s); + emit_long(offset); +} +LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + int isebp=(s==5)?0x40:0; + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8d); + emit_byte(0x04+8*d+isebp); + emit_byte(0x40*fi+8*index+s); + if (isebp) + emit_byte(0); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + emit_byte(0x89); + emit_byte(0x80+8*s+d); + emit_long(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x80+8*s+d); + emit_long(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + emit_byte(0x88); + emit_byte(0x80+8*s+d); + emit_long(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ + emit_byte(0x0f); + emit_byte(0xc8+r); +} +LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(0x08); +} +LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) +{ + emit_byte(0x89); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) +{ + emit_byte(0x89); + emit_byte(0x05+8*s); + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x05+8*s); + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) +{ + emit_byte(0x88); + emit_byte(0x05+8*s); + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ + emit_byte(0x8a); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) +{ + emit_byte(0xb8+d); + emit_long(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0xb8+d); + emit_word(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + emit_byte(0xb0+d); + emit_byte(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) + +LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) +{ + emit_byte(0x81); + emit_byte(0x15); + emit_long(d); + emit_long(s); +} +LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) +{ + emit_byte(0x81); + emit_byte(0x05); + emit_long(d); + emit_long(s); +} +LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0x81); + emit_byte(0x05); + emit_long(d); + emit_word(s); +} +LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) +{ + emit_byte(0x80); + emit_byte(0x05); + emit_long(d); + emit_byte(s); +} +LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) +{ + emit_byte(0xf7); + emit_byte(0xc0+d); + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) +{ + emit_byte(0x85); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x85); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) +{ + emit_byte(0x84); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ + emit_byte(0x81); + emit_byte(0xe0+d); + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + emit_byte(0x81); + emit_byte(0xe0+d); + emit_word(i); +} +LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) +{ + emit_byte(0x21); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x21); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) +{ + emit_byte(0x20); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ + emit_byte(0x81); + emit_byte(0xc8+d); + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) +{ + emit_byte(0x09); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x09); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) +{ + emit_byte(0x08); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) +{ + emit_byte(0x11); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x11); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) +{ + emit_byte(0x10); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) +{ + emit_byte(0x01); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x01); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) +{ + emit_byte(0x00); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe8+d); + emit_byte(i); + } + else { + emit_byte(0x81); + emit_byte(0xe8+d); + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + emit_byte(0x80); + emit_byte(0xe8+d); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xc0+d); + emit_byte(i); + } + else { + emit_byte(0x81); + emit_byte(0xc0+d); + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ + if (isbyte(i)) { + emit_byte(0x66); + emit_byte(0x83); + emit_byte(0xc0+d); + emit_byte(i); + } + else { + emit_byte(0x66); + emit_byte(0x81); + emit_byte(0xc0+d); + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + emit_byte(0x80); + emit_byte(0xc0+d); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) +{ + emit_byte(0x19); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x19); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) +{ + emit_byte(0x18); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) +{ + emit_byte(0x29); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x29); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) +{ + emit_byte(0x28); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) +{ + emit_byte(0x39); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) +{ + emit_byte(0x81); + emit_byte(0xf8+r); + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x39); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) +{ + emit_byte(0x80); + emit_byte(0xf8+d); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) +{ + emit_byte(0x38); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) + +LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + emit_byte(0x39); + emit_byte(0x04+8*d); + emit_byte(5+8*index+0x40*fi); + emit_long(offset); +} +LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) +{ + emit_byte(0x31); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x31); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) +{ + emit_byte(0x30); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) +{ + emit_byte(0x81); + emit_byte(0x2d); + emit_long(d); + emit_long(s); +} +LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) +{ + emit_byte(0x81); + emit_byte(0x3d); + emit_long(d); + emit_long(s); +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) +{ + emit_byte(0x87); + emit_byte(0xc0+8*r1+r2); +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) +{ + emit_byte(0x9c); +} +LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) + +LOWFUNC(WRITE,READ,0,raw_popfl,(void)) +{ + emit_byte(0x9d); +} +LENDFUNC(WRITE,READ,0,raw_popfl,(void)) + +/************************************************************************* + * Unoptimizable stuff --- jump * + *************************************************************************/ + +static __inline__ void raw_call_r(R4 r) +{ + lopt_emit_all(); + emit_byte(0xff); + emit_byte(0xd0+r); +} + +static __inline__ void raw_jmp_r(R4 r) +{ + lopt_emit_all(); + emit_byte(0xff); + emit_byte(0xe0+r); +} + +static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ + int mu; + switch(m) { + case 1: mu=0; break; + case 2: mu=1; break; + case 4: mu=2; break; + case 8: mu=3; break; + default: abort(); + } + lopt_emit_all(); + emit_byte(0xff); + emit_byte(0x24); + emit_byte(0x05+8*r+0x40*mu); + emit_long(base); +} + +static __inline__ void raw_jmp_m(uae_u32 base) +{ + lopt_emit_all(); + emit_byte(0xff); + emit_byte(0x25); + emit_long(base); +} + + +static __inline__ void raw_call(uae_u32 t) +{ + lopt_emit_all(); + emit_byte(0xe8); + emit_long(t-(uae_u32)target-4); +} + +static __inline__ void raw_jmp(uae_u32 t) +{ + lopt_emit_all(); + emit_byte(0xe9); + emit_long(t-(uae_u32)target-4); +} + +static __inline__ void raw_jl(uae_u32 t) +{ + lopt_emit_all(); + emit_byte(0x0f); + emit_byte(0x8c); + emit_long(t-(uae_u32)target-4); +} + +static __inline__ void raw_jz(uae_u32 t) +{ + lopt_emit_all(); + emit_byte(0x0f); + emit_byte(0x84); + emit_long(t-(uae_u32)target-4); +} + +static __inline__ void raw_jnz(uae_u32 t) +{ + lopt_emit_all(); + emit_byte(0x0f); + emit_byte(0x85); + emit_long(t-(uae_u32)target-4); +} + +static __inline__ void raw_jnz_l_oponly(void) +{ + lopt_emit_all(); + emit_byte(0x0f); + emit_byte(0x85); +} + +static __inline__ void raw_jcc_l_oponly(int cc) +{ + lopt_emit_all(); + emit_byte(0x0f); + emit_byte(0x80+cc); +} + +static __inline__ void raw_jnz_b_oponly(void) +{ + lopt_emit_all(); + emit_byte(0x75); +} + +static __inline__ void raw_jz_b_oponly(void) +{ + lopt_emit_all(); + emit_byte(0x74); +} + +static __inline__ void raw_jmp_l_oponly(void) +{ + lopt_emit_all(); + emit_byte(0xe9); +} + +static __inline__ void raw_jmp_b_oponly(void) +{ + lopt_emit_all(); + emit_byte(0xeb); +} + +static __inline__ void raw_ret(void) +{ + lopt_emit_all(); + emit_byte(0xc3); +} + +static __inline__ void raw_nop(void) +{ + lopt_emit_all(); + emit_byte(0x90); +} + + +/************************************************************************* + * Flag handling, to and fro UAE flag register * + *************************************************************************/ + + +#define FLAG_NREG1 0 /* Set to -1 if any register will do */ + +static __inline__ void raw_flags_to_reg(int r) +{ + raw_lahf(0); /* Most flags in AH */ + //raw_setcc(r,0); /* V flag in AL */ + raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0); + +#if 1 /* Let's avoid those nasty partial register stalls */ + //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r); + raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4); + //live.state[FLAGTMP].status=CLEAN; + live.state[FLAGTMP].status=INMEM; + live.state[FLAGTMP].realreg=-1; + /* We just "evicted" FLAGTMP. */ + if (live.nat[r].nholds!=1) { + /* Huh? */ + abort(); + } + live.nat[r].nholds=0; +#endif +} + +#define FLAG_NREG2 0 /* Set to -1 if any register will do */ +static __inline__ void raw_reg_to_flags(int r) +{ + raw_cmp_b_ri(r,-127); /* set V */ + raw_sahf(0); +} + +/* Apparently, there are enough instructions between flag store and + flag reload to avoid the partial memory stall */ +static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r) +{ +#if 1 + raw_mov_l_rm(target,(uae_u32)live.state[r].mem); +#else + raw_mov_b_rm(target,(uae_u32)live.state[r].mem); + raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1); +#endif +} + +/* FLAGX is byte sized, and we *do* write it at that size */ +static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r) +{ + if (live.nat[target].canbyte) + raw_mov_b_rm(target,(uae_u32)live.state[r].mem); + else if (live.nat[target].canword) + raw_mov_w_rm(target,(uae_u32)live.state[r].mem); + else + raw_mov_l_rm(target,(uae_u32)live.state[r].mem); +} + +#define NATIVE_FLAG_Z 0x40 +#define NATIVE_CC_EQ 4 +static __inline__ void raw_flags_set_zero(int f, int r, int t) +{ + // FIXME: this is really suboptimal + raw_pushfl(); + raw_pop_l_r(f); + raw_and_l_ri(f,~NATIVE_FLAG_Z); + raw_test_l_rr(r,r); + raw_mov_l_ri(r,0); + raw_mov_l_ri(t,NATIVE_FLAG_Z); + raw_cmov_l_rr(r,t,NATIVE_CC_EQ); + raw_or_l(f,r); + raw_push_l_r(f); + raw_popfl(); +} + +static __inline__ void raw_inc_sp(int off) +{ + raw_add_l_ri(4,off); +} + +/************************************************************************* + * Handling mistaken direct memory access * + *************************************************************************/ + + +#ifdef NATMEM_OFFSET +#ifdef _WIN32 // %%% BRIAN KING WAS HERE %%% +#include +#else +#include +#endif +#include + +#define SIG_READ 1 +#define SIG_WRITE 2 + +static int in_handler=0; +static uae_u8 *veccode; + +#ifdef _WIN32 +int EvalException ( LPEXCEPTION_POINTERS blah, int n_except ) +{ + PEXCEPTION_RECORD pExceptRecord = NULL; + PCONTEXT pContext = NULL; + + uae_u8* i = NULL; + uae_u32 addr = 0; + int r=-1; + int size=4; + int dir=-1; + int len=0; + int j; + + if( n_except != STATUS_ACCESS_VIOLATION || !canbang) + return EXCEPTION_CONTINUE_SEARCH; + + pExceptRecord = blah->ExceptionRecord; + pContext = blah->ContextRecord; + + if( pContext ) + { + i = (uae_u8 *)(pContext->Eip); + } + if( pExceptRecord ) + { + addr = (uae_u32)(pExceptRecord->ExceptionInformation[1]); + } +#ifdef JIT_DEBUG + write_log("JIT: fault address is 0x%x at 0x%x\n",addr,i); +#endif + if (!canbang || !currprefs.cachesize) + { +#ifdef JIT_DEBUG + write_log("JIT: Not happy! Canbang or cachesize is 0 in SIGSEGV handler!\n"); +#endif + return EXCEPTION_CONTINUE_SEARCH; + } + + if (in_handler) + write_log("JIT: Argh --- Am already in a handler. Shouldn't happen!\n"); + + if (canbang && i>=compiled_code && i<=current_compile_p) { + if (*i==0x66) { + i++; + size=2; + len++; + } + + switch(i[0]) { + case 0x8a: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_READ; + size=1; + len+=6; + break; + } + break; + case 0x88: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_WRITE; + size=1; + len+=6; + break; + } + break; + case 0x8b: + switch(i[1]&0xc0) { + case 0x80: + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=6; + break; + case 0x40: + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=3; + break; + case 0x00: + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=2; + break; + default: + break; + } + break; + case 0x89: + switch(i[1]&0xc0) { + case 0x80: + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=6; + break; + case 0x40: + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=3; + break; + case 0x00: + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=2; + break; + } + break; + } + } + + if (r!=-1) { + void* pr=NULL; +#ifdef JIT_DEBUG + write_log("register was %d, direction was %d, size was %d\n",r,dir,size); +#endif + + switch(r) { + case 0: pr=&(pContext->Eax); break; + case 1: pr=&(pContext->Ecx); break; + case 2: pr=&(pContext->Edx); break; + case 3: pr=&(pContext->Ebx); break; + case 4: pr=(size>1)?NULL:(((uae_u8*)&(pContext->Eax))+1); break; + case 5: pr=(size>1)? + (void*)(&(pContext->Ebp)): + (void*)(((uae_u8*)&(pContext->Ecx))+1); break; + case 6: pr=(size>1)? + (void*)(&(pContext->Esi)): + (void*)(((uae_u8*)&(pContext->Edx))+1); break; + case 7: pr=(size>1)? + (void*)(&(pContext->Edi)): + (void*)(((uae_u8*)&(pContext->Ebx))+1); break; + default: abort(); + } + if (pr) { + blockinfo* bi; + + if (currprefs.comp_oldsegv) { + addr-=NATMEM_OFFSET; + + if ((addr>=0x10000000 && addr<0x40000000) || + (addr>=0x50000000)) { +#ifdef JIT_DEBUG + write_log("Suspicious address 0x%x in SEGV handler.\n",addr); +#endif + } + if (dir==SIG_READ) { + switch(size) { + case 1: *((uae_u8*)pr)=get_byte(addr); break; + case 2: *((uae_u16*)pr)=swap16(get_word(addr)); break; + case 4: *((uae_u32*)pr)=swap32(get_long(addr)); break; + default: abort(); + } + } + else { /* write */ + switch(size) { + case 1: put_byte(addr,*((uae_u8*)pr)); break; + case 2: put_word(addr,swap16(*((uae_u16*)pr))); break; + case 4: put_long(addr,swap32(*((uae_u32*)pr))); break; + default: abort(); + } + } +#ifdef JIT_DEBUG + write_log("Handled one access!\n"); +#endif + fflush(stdout); + segvcount++; + pContext->Eip+=len; + } + else { + void* tmp=target; + int i; + uae_u8 vecbuf[5]; + + addr-=NATMEM_OFFSET; + + if ((addr>=0x10000000 && addr<0x40000000) || + (addr>=0x50000000)) { +#ifdef JIT_DEBUG + write_log("Suspicious address 0x%x in SEGV handler.\n",addr); +#endif + } + + target=(uae_u8*)pContext->Eip; + for (i=0;i<5;i++) + vecbuf[i]=target[i]; + emit_byte(0xe9); + emit_long((uae_u32)veccode-(uae_u32)target-4); +#ifdef JIT_DEBUG + + write_log("Create jump to %p\n",veccode); + write_log("Handled one access!\n"); +#endif + segvcount++; + + target=veccode; + + if (dir==SIG_READ) { + switch(size) { + case 1: raw_mov_b_ri(r,get_byte(addr)); break; + case 2: raw_mov_w_ri(r,swap16(get_word(addr))); break; + case 4: raw_mov_l_ri(r,swap32(get_long(addr))); break; + default: abort(); + } + } + else { /* write */ + switch(size) { + case 1: put_byte(addr,*((uae_u8*)pr)); break; + case 2: put_word(addr,swap16(*((uae_u16*)pr))); break; + case 4: put_long(addr,swap32(*((uae_u32*)pr))); break; + default: abort(); + } + } + for (i=0;i<5;i++) + raw_mov_b_mi(pContext->Eip+i,vecbuf[i]); + raw_mov_l_mi((uae_u32)&in_handler,0); + emit_byte(0xe9); + emit_long(pContext->Eip+len-(uae_u32)target-4); + in_handler=1; + target=tmp; + } + bi=active; + while (bi) { + if (bi->handler && + (uae_u8*)bi->direct_handler<=i && + (uae_u8*)bi->nexthandler>i) { +#ifdef JIT_DEBUG + write_log("deleted trigger (%p<%p<%p) %p\n", + bi->handler, + i, + bi->nexthandler, + bi->pc_p); +#endif + invalidate_block(bi); + raise_in_cl_list(bi); + set_special(0); + return EXCEPTION_CONTINUE_EXECUTION; + } + bi=bi->next; + } + /* Not found in the active list. Might be a rom routine that + is in the dormant list */ + bi=dormant; + while (bi) { + if (bi->handler && + (uae_u8*)bi->direct_handler<=i && + (uae_u8*)bi->nexthandler>i) { +#ifdef JIT_DEBUG + write_log("deleted trigger (%p<%p<%p) %p\n", + bi->handler, + i, + bi->nexthandler, + bi->pc_p); +#endif + invalidate_block(bi); + raise_in_cl_list(bi); + set_special(0); + return EXCEPTION_CONTINUE_EXECUTION; + } + bi=bi->next; + } +#ifdef JIT_DEBUG + write_log("Huh? Could not find trigger!\n"); +#endif + return EXCEPTION_CONTINUE_EXECUTION; + } + } + write_log("JIT: Can't handle access!\n"); + if( i ) + { + for (j=0;j<10;j++) { + write_log("JIT: instruction byte %2d is 0x%02x\n",j,i[j]); + } + } +#if 0 + write_log("Please send the above info (starting at \"fault address\") to\n" + "bmeyer@csse.monash.edu.au\n" + "This shouldn't happen ;-)\n"); +#endif + return EXCEPTION_CONTINUE_SEARCH; +} +#else +static void vec(int x, struct sigcontext sc) +{ + uae_u8* i=(uae_u8*)sc.eip; + uae_u32 addr=sc.cr2; + int r=-1; + int size=4; + int dir=-1; + int len=0; + int j; + + write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip); + if (!canbang) + write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n"); + if (in_handler) + write_log("Argh --- Am already in a handler. Shouldn't happen!\n"); + + if (canbang && i>=compiled_code && i<=current_compile_p) { + if (*i==0x66) { + i++; + size=2; + len++; + } + + switch(i[0]) { + case 0x8a: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_READ; + size=1; + len+=6; + break; + } + break; + case 0x88: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_WRITE; + size=1; + len+=6; + break; + } + break; + + case 0x8b: + switch(i[1]&0xc0) { + case 0x80: + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=6; + break; + case 0x40: + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=3; + break; + case 0x00: + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=2; + break; + default: + break; + } + break; + + case 0x89: + switch(i[1]&0xc0) { + case 0x80: + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=6; + break; + case 0x40: + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=3; + break; + case 0x00: + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=2; + break; + } + break; + } + } + + if (r!=-1) { + void* pr=NULL; + write_log("register was %d, direction was %d, size was %d\n",r,dir,size); + + switch(r) { + case 0: pr=&(sc.eax); break; + case 1: pr=&(sc.ecx); break; + case 2: pr=&(sc.edx); break; + case 3: pr=&(sc.ebx); break; + case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break; + case 5: pr=(size>1)? + (void*)(&(sc.ebp)): + (void*)(((uae_u8*)&(sc.ecx))+1); break; + case 6: pr=(size>1)? + (void*)(&(sc.esi)): + (void*)(((uae_u8*)&(sc.edx))+1); break; + case 7: pr=(size>1)? + (void*)(&(sc.edi)): + (void*)(((uae_u8*)&(sc.ebx))+1); break; + default: abort(); + } + if (pr) { + blockinfo* bi; + + if (currprefs.comp_oldsegv) { + addr-=NATMEM_OFFSET; + + if ((addr>=0x10000000 && addr<0x40000000) || + (addr>=0x50000000)) { + write_log("Suspicious address in %x SEGV handler.\n",addr); + } + if (dir==SIG_READ) { + switch(size) { + case 1: *((uae_u8*)pr)=get_byte(addr); break; + case 2: *((uae_u16*)pr)=get_word(addr); break; + case 4: *((uae_u32*)pr)=get_long(addr); break; + default: abort(); + } + } + else { /* write */ + switch(size) { + case 1: put_byte(addr,*((uae_u8*)pr)); break; + case 2: put_word(addr,*((uae_u16*)pr)); break; + case 4: put_long(addr,*((uae_u32*)pr)); break; + default: abort(); + } + } + write_log("Handled one access!\n"); + fflush(stdout); + segvcount++; + sc.eip+=len; + } + else { + void* tmp=target; + int i; + uae_u8 vecbuf[5]; + + addr-=NATMEM_OFFSET; + + if ((addr>=0x10000000 && addr<0x40000000) || + (addr>=0x50000000)) { + write_log("Suspicious address 0x%x in SEGV handler.\n",addr); + } + + target=(uae_u8*)sc.eip; + for (i=0;i<5;i++) + vecbuf[i]=target[i]; + emit_byte(0xe9); + emit_long((uae_u32)veccode-(uae_u32)target-4); + write_log("Create jump to %p\n",veccode); + + write_log("Handled one access!\n"); + fflush(stdout); + segvcount++; + + target=veccode; + + if (dir==SIG_READ) { + switch(size) { + case 1: raw_mov_b_ri(r,get_byte(addr)); break; + case 2: raw_mov_w_ri(r,get_word(addr)); break; + case 4: raw_mov_l_ri(r,get_long(addr)); break; + default: abort(); + } + } + else { /* write */ + switch(size) { + case 1: put_byte(addr,*((uae_u8*)pr)); break; + case 2: put_word(addr,*((uae_u16*)pr)); break; + case 4: put_long(addr,*((uae_u32*)pr)); break; + default: abort(); + } + } + for (i=0;i<5;i++) + raw_mov_b_mi(sc.eip+i,vecbuf[i]); + raw_mov_l_mi((uae_u32)&in_handler,0); + emit_byte(0xe9); + emit_long(sc.eip+len-(uae_u32)target-4); + in_handler=1; + target=tmp; + } + bi=active; + while (bi) { + if (bi->handler && + (uae_u8*)bi->direct_handler<=i && + (uae_u8*)bi->nexthandler>i) { + write_log("deleted trigger (%p<%p<%p) %p\n", + bi->handler, + i, + bi->nexthandler, + bi->pc_p); + invalidate_block(bi); + raise_in_cl_list(bi); + set_special(0); + return; + } + bi=bi->next; + } + /* Not found in the active list. Might be a rom routine that + is in the dormant list */ + bi=dormant; + while (bi) { + if (bi->handler && + (uae_u8*)bi->direct_handler<=i && + (uae_u8*)bi->nexthandler>i) { + write_log("deleted trigger (%p<%p<%p) %p\n", + bi->handler, + i, + bi->nexthandler, + bi->pc_p); + invalidate_block(bi); + raise_in_cl_list(bi); + set_special(0); + return; + } + bi=bi->next; + } + write_log("Huh? Could not find trigger!\n"); + return; + } + } + write_log("Can't handle access!\n"); + for (j=0;j<10;j++) { + write_log("instruction byte %2d is %02x\n",j,i[j]); + } +#if 0 + write_log("Please send the above info (starting at \"fault address\") to\n" + "bmeyer@csse.monash.edu.au\n" + "This shouldn't happen ;-)\n"); + fflush(stdout); +#endif + signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */ +} +#endif +#endif + +/************************************************************************* + * Checking for CPU features * + *************************************************************************/ + +typedef struct { + uae_u32 eax; + uae_u32 ecx; + uae_u32 edx; + uae_u32 ebx; +} x86_regs; + + +/* This could be so much easier if it could make assumptions about the + compiler... */ + +static uae_u32 cpuid_ptr; +static uae_u32 cpuid_level; + +static x86_regs cpuid(uae_u32 level) +{ + x86_regs answer; + uae_u8 *cpuid_space; + void* tmp=get_target(); + + cpuid_ptr=(uae_u32)&answer; + cpuid_level=level; + + cpuid_space = cache_alloc (256); + set_target(cpuid_space); + raw_push_l_r(0); /* eax */ + raw_push_l_r(1); /* ecx */ + raw_push_l_r(2); /* edx */ + raw_push_l_r(3); /* ebx */ + raw_push_l_r(7); /* edi */ + raw_mov_l_rm(0,(uae_u32)&cpuid_level); + raw_cpuid(0); + raw_mov_l_rm(7,(uae_u32)&cpuid_ptr); + raw_mov_l_Rr(7,0,0); + raw_mov_l_Rr(7,1,4); + raw_mov_l_Rr(7,2,8); + raw_mov_l_Rr(7,3,12); + raw_pop_l_r(7); + raw_pop_l_r(3); + raw_pop_l_r(2); + raw_pop_l_r(1); + raw_pop_l_r(0); + raw_ret(); + set_target(tmp); + + ((cpuop_func*)cpuid_space)(0); + cache_free (cpuid_space); + return answer; +} + +static void raw_init_cpu(void) +{ + x86_regs x; + uae_u32 maxlev; + + x=cpuid(0); + maxlev=x.eax; + write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n", + maxlev, + x.ebx, + x.ebx>>8, + x.ebx>>16, + x.ebx>>24, + x.edx, + x.edx>>8, + x.edx>>16, + x.edx>>24, + x.ecx, + x.ecx>>8, + x.ecx>>16, + x.ecx>>24 + ); + have_rat_stall=(x.ecx==0x6c65746e); + + if (maxlev>=1) { + x=cpuid(1); + if (x.edx&(1<<15)) + have_cmov=1; + } + have_rat_stall=1; +#if 0 + if (!have_cmov) + have_rat_stall=0; +#endif +#if 0 + write_log ("have_cmov=%d, avoid_cmov=%d, have_rat_stall=%d\n", + have_cmov,currprefs.avoid_cmov,have_rat_stall); + if (currprefs.avoid_cmov) { + write_log("Disabling cmov use despite processor claiming to support it!\n"); + have_cmov=0; + } +#else + /* Dear Bernie, I don't want to keep around options which are useless, and not + represented in the GUI anymore... Is this okay? */ + write_log ("have_cmov=%d, have_rat_stall=%d\n", have_cmov, have_rat_stall); +#endif +#if 0 /* For testing of non-cmov code! */ + have_cmov=0; +#endif +#if 0 /* It appears that partial register writes are a bad idea even on + AMD K7 cores, even though they are not supposed to have the + dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */ + if (have_cmov) + have_rat_stall=1; +#endif +} + +/************************************************************************* + * FPU stuff * + *************************************************************************/ + + +static __inline__ void raw_fp_init(void) +{ + int i; + + for (i=0;i1) { + emit_byte(0x9b); + emit_byte(0xdb); + emit_byte(0xe3); + live.tos=-1; + } +#endif + while (live.tos>=1) { + emit_byte(0xde); + emit_byte(0xd9); + live.tos-=2; + } + while (live.tos>=0) { + emit_byte(0xdd); + emit_byte(0xd8); + live.tos--; + } + raw_fp_init(); +} + +static __inline__ void make_tos(int r) +{ + int p,q; + + if (live.spos[r]<0) { /* Register not yet on stack */ + emit_byte(0xd9); + emit_byte(0xe8); /* Push '1' on the stack, just to grow it */ + live.tos++; + live.spos[r]=live.tos; + live.onstack[live.tos]=r; + return; + } + /* Register is on stack */ + if (live.tos==live.spos[r]) + return; + p=live.spos[r]; + q=live.onstack[live.tos]; + + emit_byte(0xd9); + emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */ + live.onstack[live.tos]=r; + live.spos[r]=live.tos; + live.onstack[p]=q; + live.spos[q]=p; +} + +static __inline__ void make_tos2(int r, int r2) +{ + int q; + + make_tos(r2); /* Put the reg that's supposed to end up in position2 + on top */ + + if (live.spos[r]<0) { /* Register not yet on stack */ + make_tos(r); /* This will extend the stack */ + return; + } + /* Register is on stack */ + emit_byte(0xd9); + emit_byte(0xc9); /* Move r2 into position 2 */ + + q=live.onstack[live.tos-1]; + live.onstack[live.tos]=q; + live.spos[q]=live.tos; + live.onstack[live.tos-1]=r2; + live.spos[r2]=live.tos-1; + + make_tos(r); /* And r into 1 */ +} + +static __inline__ int stackpos(int r) +{ + if (live.spos[r]<0) + abort(); + if (live.tos=0) { + /* source is on top of stack, and we already have the dest */ + int dd=stackpos(d); + emit_byte(0xdd); + emit_byte(0xd0+dd); + } + else { + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source on tos */ + tos_make(d); /* store to destination, pop if necessary */ + } +} +LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) + +LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base)) +{ + emit_byte(0xd9); + emit_byte(0xa8+index); + emit_long(base); +} +LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base)) + + +LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + } +} +LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + } +} +LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + } +} +LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfe); /* take sin */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfe); /* take sin */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) + +double one=1; +LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + + emit_byte(0xd9); + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* rndint */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 */ + emit_byte(0xdc); + emit_byte(0x05); + emit_long((uae_u32)&one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* and scale it */ + emit_byte(0xdd); + emit_byte(0xd9); /* take he rounded value off */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e */ + emit_byte(0xde); + emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */ + + emit_byte(0xd9); + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* rndint */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 */ + emit_byte(0xdc); + emit_byte(0x05); + emit_long((uae_u32)&one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* and scale it */ + emit_byte(0xdd); + emit_byte(0xd9); /* take he rounded value off */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe8); /* push '1' */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two */ + emit_byte(0xd9); + emit_byte(0xf1); /* take 1*log2(x) */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) + + +LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + } +} +LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc0+ds); /* add source to dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc0+ds); /* add source to dest*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xe8+ds); /* sub source from dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xe0+ds); /* sub src from dest */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos(d); + ds=stackpos(s); + + emit_byte(0xdd); + emit_byte(0xe0+ds); /* cmp dest with source*/ +} +LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc8+ds); /* mul dest by source*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc8+ds); /* mul dest by source*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xf8+ds); /* div dest by source */ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xf0+ds); /* div dest by source*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + printf("Failed horribly in raw_frem_rr! ds is %d\n",ds); + abort(); + } + emit_byte(0xd9); + emit_byte(0xf8); /* take rem from dest by source */ +} +LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds); + abort(); + } + emit_byte(0xd9); + emit_byte(0xf5); /* take rem1 from dest by source */ +} +LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) + + +LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) +{ + make_tos(r); + emit_byte(0xd9); /* ftst */ + emit_byte(0xe4); +} +LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) + +static __inline__ void raw_fflags_into_flags(int r) +{ + int p; + + usereg(r); + p=stackpos(r); + + emit_byte(0xd9); + emit_byte(0xee); /* Push 0 */ + emit_byte(0xd9); + emit_byte(0xc9+p); /* swap top two around */ + if (have_cmov) { + // gb-- fucomi is for P6 cores only, not K6-2 then... + emit_byte(0xdb); + emit_byte(0xe9+p); /* fucomi them */ + } + else { + emit_byte(0xdd); + emit_byte(0xe1+p); /* fucom them */ + emit_byte(0x9b); + emit_byte(0xdf); + emit_byte(0xe0); /* fstsw ax */ + raw_sahf(0); /* sahf */ + } + emit_byte(0xdd); + emit_byte(0xd9+p); /* store value back, and get rid of 0 */ +} diff --git a/compemu_support.c b/compemu_support.c index b37a3c38..31803314 100755 --- a/compemu_support.c +++ b/compemu_support.c @@ -3989,7 +3989,6 @@ MIDFUNC(0,nop,(void)) } MENDFUNC(0,nop,(void)) - MIDFUNC(1,f_forget_about,(FW r)) { if (f_isinreg(r)) @@ -4197,6 +4196,38 @@ MIDFUNC(2,fcos_rr,(FW d, FR s)) } MENDFUNC(2,fcos_rr,(FW d, FR s)) +MIDFUNC(2,ftan_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftan_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftan_rr,(FW d, FR s)) + +MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) +{ + s=f_readreg(s); /* s for source */ + d=f_writereg(d); /* d for sine */ + c=f_writereg(c); /* c for cosine */ + raw_fsincos_rr(d,c,s); + f_unlock(s); + f_unlock(d); + f_unlock(c); +} +MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) + +MIDFUNC(2,fscale_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fscale_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fscale_rr,(FRW d, FR s)) + MIDFUNC(2,ftwotox_rr,(FW d, FR s)) { s=f_readreg(s); @@ -4217,6 +4248,26 @@ MIDFUNC(2,fetox_rr,(FW d, FR s)) } MENDFUNC(2,fetox_rr,(FW d, FR s)) +MIDFUNC(2,fetoxM1_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fetoxM1_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fetoxM1_rr,(FW d, FR s)) + +MIDFUNC(2,ftentox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftentox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftentox_rr,(FW d, FR s)) + MIDFUNC(2,frndint_rr,(FW d, FR s)) { s=f_readreg(s); @@ -4237,6 +4288,106 @@ MIDFUNC(2,flog2_rr,(FW d, FR s)) } MENDFUNC(2,flog2_rr,(FW d, FR s)) +MIDFUNC(2,flogN_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flogN_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flogN_rr,(FW d, FR s)) + +MIDFUNC(2,flogNP1_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flogNP1_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flogNP1_rr,(FW d, FR s)) + +MIDFUNC(2,flog10_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flog10_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flog10_rr,(FW d, FR s)) + +MIDFUNC(2,fasin_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fasin_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fasin_rr,(FW d, FR s)) + +MIDFUNC(2,facos_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_facos_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,facos_rr,(FW d, FR s)) + +MIDFUNC(2,fatan_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fatan_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fatan_rr,(FW d, FR s)) + +MIDFUNC(2,fatanh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fatanh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fatanh_rr,(FW d, FR s)) + +MIDFUNC(2,fsinh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsinh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsinh_rr,(FW d, FR s)) + +MIDFUNC(2,fcosh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fcosh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcosh_rr,(FW d, FR s)) + +MIDFUNC(2,ftanh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftanh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftanh_rr,(FW d, FR s)) + MIDFUNC(2,fneg_rr,(FW d, FR s)) { s=f_readreg(s); diff --git a/compemu_support_old.c b/compemu_support_old.c new file mode 100755 index 00000000..b37a3c38 --- /dev/null +++ b/compemu_support_old.c @@ -0,0 +1,6182 @@ +#define writemem_special writemem +#define readmem_special readmem + +#define USE_MATCHSTATE 0 +#define setzflg_uses_bsf 0 +#include "sysconfig.h" +#include "sysdeps.h" +#include "config.h" +#include "options.h" +#include "events.h" +#include "include/memory.h" +#include "custom.h" +#include "newcpu.h" +#include "comptbl.h" +#include "compemu.h" + +// %%% BRIAN KING WAS HERE %%% +extern int canbang; +#include +#include /* for PAGESIZE */ + +cpuop_func *compfunctbl[65536]; +cpuop_func *nfcompfunctbl[65536]; +#ifdef NOFLAGS_SUPPORT +cpuop_func *nfcpufunctbl[65536]; +#endif +uae_u8* comp_pc_p; + +uae_u8* start_pc_p; +uae_u32 start_pc; +uae_u32 current_block_pc_p; +uae_u32 current_block_start_target; +uae_u32 needed_flags; +static uae_u32 next_pc_p; +static uae_u32 taken_pc_p; +static int branch_cc; +int segvcount=0; +int soft_flush_count=0; +int hard_flush_count=0; +int compile_count=0; +int checksum_count=0; +static uae_u8* current_compile_p=NULL; +static uae_u8* max_compile_start; +uae_u8* compiled_code=NULL; +static uae_s32 reg_alloc_run; +static int have_rat_stall=0; + +void* pushall_call_handler=NULL; +static void* popall_do_nothing=NULL; +static void* popall_exec_nostats=NULL; +static void* popall_execute_normal=NULL; +static void* popall_cache_miss=NULL; +static void* popall_recompile_block=NULL; +static void* popall_check_checksum=NULL; + +extern uae_u32 oink; +extern unsigned long foink3; +extern unsigned long foink; + +/* The 68k only ever executes from even addresses. So right now, we + waste half the entries in this array + UPDATE: We now use those entries to store the start of the linked + lists that we maintain for each hash result. */ +cacheline cache_tags[TAGSIZE]; +int letit=0; +blockinfo* hold_bi[MAX_HOLD_BI]; +blockinfo* active; +blockinfo* dormant; + +op_properties prop[65536]; + +#ifdef NOFLAGS_SUPPORT +/* 68040 */ +extern struct cputbl op_smalltbl_0_nf[]; +#endif +extern struct cputbl op_smalltbl_0_comp_nf[]; +extern struct cputbl op_smalltbl_0_comp_ff[]; +#ifdef NOFLAGS_SUPPORT +/* 68020 + 68881 */ +extern struct cputbl op_smalltbl_1_nf[]; +/* 68020 */ +extern struct cputbl op_smalltbl_2_nf[]; +/* 68010 */ +extern struct cputbl op_smalltbl_3_nf[]; +/* 68000 */ +extern struct cputbl op_smalltbl_4_nf[]; +/* 68000 slow but compatible. */ +extern struct cputbl op_smalltbl_5_nf[]; +#endif + +static void flush_icache_hard(int n); + + + +bigstate live; +smallstate empty_ss; +smallstate default_ss; +static int optlev; + +static int writereg(int r, int size); +static void unlock(int r); +static void setlock(int r); +static int readreg_specific(int r, int size, int spec); +static int writereg_specific(int r, int size, int spec); +static void prepare_for_call_1(void); +static void prepare_for_call_2(void); +static void align_target(uae_u32 a); + +static uae_s32 nextused[VREGS]; + +static uae_u8 *popallspace; + +uae_u32 m68k_pc_offset; + +/* Some arithmetic ooperations can be optimized away if the operands + are known to be constant. But that's only a good idea when the + side effects they would have on the flags are not important. This + variable indicates whether we need the side effects or not +*/ +uae_u32 needflags=0; + +/* Flag handling is complicated. + + x86 instructions create flags, which quite often are exactly what we + want. So at times, the "68k" flags are actually in the x86 flags. + + Then again, sometimes we do x86 instructions that clobber the x86 + flags, but don't represent a corresponding m68k instruction. In that + case, we have to save them. + + We used to save them to the stack, but now store them back directly + into the regflags.cznv of the traditional emulation. Thus some odd + names. + + So flags can be in either of two places (used to be three; boy were + things complicated back then!); And either place can contain either + valid flags or invalid trash (and on the stack, there was also the + option of "nothing at all", now gone). A couple of variables keep + track of the respective states. + + To make things worse, we might or might not be interested in the flags. + by default, we are, but a call to dont_care_flags can change that + until the next call to live_flags. If we are not, pretty much whatever + is in the register and/or the native flags is seen as valid. +*/ + + +static __inline__ blockinfo* get_blockinfo(uae_u32 cl) +{ + return cache_tags[cl+1].bi; +} + +static __inline__ blockinfo* get_blockinfo_addr(void* addr) +{ + blockinfo* bi=get_blockinfo(cacheline(addr)); + + while (bi) { + if (bi->pc_p==addr) + return bi; + bi=bi->next_same_cl; + } + return NULL; +} + + +/******************************************************************* + * All sorts of list related functions for all of the lists * + *******************************************************************/ + +static __inline__ void remove_from_cl_list(blockinfo* bi) +{ + uae_u32 cl=cacheline(bi->pc_p); + + if (bi->prev_same_cl_p) + *(bi->prev_same_cl_p)=bi->next_same_cl; + if (bi->next_same_cl) + bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p; + if (cache_tags[cl+1].bi) + cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use; + else + cache_tags[cl].handler=popall_execute_normal; +} + +static __inline__ void remove_from_list(blockinfo* bi) +{ + if (bi->prev_p) + *(bi->prev_p)=bi->next; + if (bi->next) + bi->next->prev_p=bi->prev_p; +} + +static __inline__ void remove_from_lists(blockinfo* bi) +{ + remove_from_list(bi); + remove_from_cl_list(bi); +} + +static __inline__ void add_to_cl_list(blockinfo* bi) +{ + uae_u32 cl=cacheline(bi->pc_p); + + if (cache_tags[cl+1].bi) + cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl); + bi->next_same_cl=cache_tags[cl+1].bi; + + cache_tags[cl+1].bi=bi; + bi->prev_same_cl_p=&(cache_tags[cl+1].bi); + + cache_tags[cl].handler=bi->handler_to_use; +} + +static __inline__ void raise_in_cl_list(blockinfo* bi) +{ + remove_from_cl_list(bi); + add_to_cl_list(bi); +} + +static __inline__ void add_to_active(blockinfo* bi) +{ + if (active) + active->prev_p=&(bi->next); + bi->next=active; + + active=bi; + bi->prev_p=&active; +} + +static __inline__ void add_to_dormant(blockinfo* bi) +{ + if (dormant) + dormant->prev_p=&(bi->next); + bi->next=dormant; + + dormant=bi; + bi->prev_p=&dormant; +} + +static __inline__ void remove_dep(dependency* d) +{ + if (d->prev_p) + *(d->prev_p)=d->next; + if (d->next) + d->next->prev_p=d->prev_p; + d->prev_p=NULL; + d->next=NULL; +} + +/* This block's code is about to be thrown away, so it no longer + depends on anything else */ +static __inline__ void remove_deps(blockinfo* bi) +{ + remove_dep(&(bi->dep[0])); + remove_dep(&(bi->dep[1])); +} + +static __inline__ void adjust_jmpdep(dependency* d, void* a) +{ + *(d->jmp_off)=(uae_u32)a-((uae_u32)d->jmp_off+4); +} + +/******************************************************************** + * Soft flush handling support functions * + ********************************************************************/ + +static __inline__ void set_dhtu(blockinfo* bi, void* dh) +{ + //printf("bi is %p\n",bi); + if (dh!=bi->direct_handler_to_use) { + dependency* x=bi->deplist; + //printf("bi->deplist=%p\n",bi->deplist); + while (x) { + //printf("x is %p\n",x); + //printf("x->next is %p\n",x->next); + //printf("x->prev_p is %p\n",x->prev_p); + + if (x->jmp_off) { + adjust_jmpdep(x,dh); + } + x=x->next; + } + bi->direct_handler_to_use=dh; + } +} + +static __inline__ void invalidate_block(blockinfo* bi) +{ + int i; + + bi->optlevel=0; + bi->count=currprefs.optcount[0]-1; + bi->handler=NULL; + bi->handler_to_use=popall_execute_normal; + bi->direct_handler=NULL; + set_dhtu(bi,bi->direct_pen); + bi->needed_flags=0xff; + + for (i=0;i<2;i++) { + bi->dep[i].jmp_off=NULL; + bi->dep[i].target=NULL; + } + remove_deps(bi); +} + +static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) +{ + blockinfo* tbi=get_blockinfo_addr((void*)target); + + Dif(!tbi) { + printf("Could not create jmpdep!\n"); + abort(); + } + bi->dep[i].jmp_off=jmpaddr; + bi->dep[i].target=tbi; + bi->dep[i].next=tbi->deplist; + if (bi->dep[i].next) + bi->dep[i].next->prev_p=&(bi->dep[i].next); + bi->dep[i].prev_p=&(tbi->deplist); + tbi->deplist=&(bi->dep[i]); +} + +static __inline__ void big_to_small_state(bigstate* b, smallstate* s) +{ + int i; + int count=0; + + for (i=0;inat[i].validsize=0; + s->nat[i].dirtysize=0; + if (b->nat[i].nholds) { + int index=b->nat[i].nholds-1; + int r=b->nat[i].holds[index]; + s->nat[i].holds=r; + s->nat[i].validsize=b->state[r].validsize; + s->nat[i].dirtysize=b->state[r].dirtysize; + count++; + } + } + printf("count=%d\n",count); + for (i=0;inat[i].dirtysize=0; + } +} + +static __inline__ void attached_state(blockinfo* bi) +{ + bi->havestate=1; + if (bi->direct_handler_to_use==bi->direct_handler) + set_dhtu(bi,bi->direct_pen); + bi->direct_handler=bi->direct_pen; + bi->status=BI_TARGETTED; +} + +static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate) +{ + blockinfo* bi=get_blockinfo_addr(addr); + int i; + +#if USE_OPTIMIZER + if (reg_alloc_run) + return NULL; +#endif + if (!bi) { + for (i=0;ipc_p=addr; + invalidate_block(bi); + add_to_active(bi); + add_to_cl_list(bi); + + } + } + } + if (!bi) { + write_log ("Looking for blockinfo, can't find free one\n"); + abort(); + } + +#if USE_MATCHSTATE + if (setstate && + !bi->havestate) { + big_to_small_state(&live,&(bi->env)); + attached_state(bi); + } +#endif + return bi; +} + +static void prepare_block(blockinfo* bi); + +static __inline__ void alloc_blockinfos(void) +{ + int i; + blockinfo* bi; + + for (i=0;i : comptrustbyte is not 'direct' or 'afterpic'\n"); + if (currprefs.comptrustword!=0 && currprefs.comptrustword!=3) + stop = 1, write_log(" : comptrustword is not 'direct' or 'afterpic'\n"); + if (currprefs.comptrustlong!=0 && currprefs.comptrustlong!=3) + stop = 1, write_log(" : comptrustlong is not 'direct' or 'afterpic'\n"); + if (currprefs.comptrustnaddr!=0 && currprefs.comptrustnaddr!=3) + stop = 1, write_log(" : comptrustnaddr is not 'direct' or 'afterpic'\n"); + if (currprefs.compnf!=1) + stop = 1, write_log(" : compnf is not 'yes'\n"); + if (currprefs.cachesize<1024) + stop = 1, write_log(" : cachesize is less than 1024\n"); + if (currprefs.comp_hardflush) + stop = 1, write_log(" : comp_flushmode is 'hard'\n"); + if (!canbang) + stop = 1, write_log(" : Cannot use most direct memory access,\n" + " and unable to recover from failed guess!\n"); +#if 0 + if (stop) { + gui_message("JIT: Configuration problems were detected!\n" + "JIT: These will adversely affect performance, and should\n" + "JIT: not be used. For more info, please see README.JIT-tuning\n" + "JIT: in the UAE documentation directory. You can force\n" + "JIT: your settings to be used by setting\n" + "JIT: 'compforcesettings=yes'\n" + "JIT: in your config file\n"); + exit(1); + } +#endif + } +} + +/******************************************************************** + * Get the optimizer stuff * + ********************************************************************/ + +#include "compemu_optimizer.c" + +/******************************************************************** + * Functions to emit data into memory, and other general support * + ********************************************************************/ + +static uae_u8* target; + +static void emit_init(void) +{ +} + +static __inline__ void emit_byte(uae_u8 x) +{ + *target++=x; +} + +static __inline__ void emit_word(uae_u16 x) +{ + *((uae_u16*)target)=x; + target+=2; +} + +static __inline__ void emit_long(uae_u32 x) +{ + *((uae_u32*)target)=x; + target+=4; +} + +static __inline__ uae_u32 reverse32(uae_u32 oldv) +{ + return ((oldv>>24)&0xff) | ((oldv>>8)&0xff00) | + ((oldv<<8)&0xff0000) | ((oldv<<24)&0xff000000); +} + + +void set_target(uae_u8* t) +{ + lopt_emit_all(); + target=t; +} + +static __inline__ uae_u8* get_target_noopt(void) +{ + return target; +} + +__inline__ uae_u8* get_target(void) +{ + lopt_emit_all(); + return get_target_noopt(); +} + + +/******************************************************************** + * Getting the information about the target CPU * + ********************************************************************/ + +#include "compemu_raw_x86.c" + + +/******************************************************************** + * Flags status handling. EMIT TIME! * + ********************************************************************/ + +static void bt_l_ri_noclobber(R4 r, IMM i); + +static void make_flags_live_internal(void) +{ + if (live.flags_in_flags==VALID) + return; + Dif (live.flags_on_stack==TRASH) { + printf("Want flags, got something on stack, but it is TRASH\n"); + abort(); + } + if (live.flags_on_stack==VALID) { + int tmp; + tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2); + raw_reg_to_flags(tmp); + unlock(tmp); + + live.flags_in_flags=VALID; + return; + } + printf("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n", + live.flags_in_flags,live.flags_on_stack); + abort(); +} + +static void flags_to_stack(void) +{ + if (live.flags_on_stack==VALID) + return; + if (!live.flags_are_important) { + live.flags_on_stack=VALID; + return; + } + Dif (live.flags_in_flags!=VALID) + abort(); + else { + int tmp; + tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1); + raw_flags_to_reg(tmp); + unlock(tmp); + } + live.flags_on_stack=VALID; +} + +static __inline__ void clobber_flags(void) +{ + if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID) + flags_to_stack(); + live.flags_in_flags=TRASH; +} + +/* Prepare for leaving the compiled stuff */ +static __inline__ void flush_flags(void) +{ + flags_to_stack(); + return; +} + +int touchcnt; + +/******************************************************************** + * register allocation per block logging * + ********************************************************************/ + +static uae_s8 vstate[VREGS]; +static uae_s8 nstate[N_REGS]; + +#define L_UNKNOWN -127 +#define L_UNAVAIL -1 +#define L_NEEDED -2 +#define L_UNNEEDED -3 + +static __inline__ void log_startblock(void) +{ + int i; + for (i=0;i0) { + free_nreg(bestreg); + } + if (isinreg(r)) { + int rr=live.state[r].realreg; + /* This will happen if we read a partially dirty register at a + bigger size */ + Dif (willclobber || live.state[r].validsize>=size) + abort(); + Dif (live.nat[rr].nholds!=1) + abort(); + if (size==4 && live.state[r].validsize==2) { + log_isused(bestreg); + raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem); + raw_bswap_32(bestreg); + raw_zero_extend_16_rr(rr,rr); + raw_zero_extend_16_rr(bestreg,bestreg); + raw_bswap_32(bestreg); + raw_lea_l_brr_indexed(rr,rr,bestreg,1,0); + live.state[r].validsize=4; + live.nat[rr].touched=touchcnt++; + return rr; + } + if (live.state[r].validsize==1) { + /* Nothing yet */ + } + evict(r); + } + + if (!willclobber) { + if (live.state[r].status!=UNDEF) { + if (isconst(r)) { + raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + log_isused(bestreg); + } + else { + if (r==FLAGTMP) + raw_load_flagreg(bestreg,r); + else if (r==FLAGX) + raw_load_flagx(bestreg,r); + else { + raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem); + } + live.state[r].dirtysize=0; + set_status(r,CLEAN); + log_isreg(bestreg,r); + } + } + else { + live.state[r].val=0; + live.state[r].dirtysize=0; + set_status(r,CLEAN); + log_isused(bestreg); + } + live.state[r].validsize=4; + } + else { /* this is the easiest way, but not optimal. FIXME! */ + /* Now it's trickier, but hopefully still OK */ + if (!isconst(r) || size==4) { + live.state[r].validsize=size; + live.state[r].dirtysize=size; + live.state[r].val=0; + set_status(r,DIRTY); + if (size==4) + log_isused(bestreg); + else + log_isreg(bestreg,r); + } + else { + if (live.state[r].status!=UNDEF) + raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].validsize=4; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + log_isused(bestreg); + } + } + live.state[r].realreg=bestreg; + live.state[r].realind=live.nat[bestreg].nholds; + live.nat[bestreg].touched=touchcnt++; + live.nat[bestreg].holds[live.nat[bestreg].nholds]=r; + live.nat[bestreg].nholds++; + + return bestreg; +} + +static int alloc_reg(int r, int size, int willclobber) +{ + return alloc_reg_hinted(r,size,willclobber,-1); +} + +static void unlock(int r) +{ + Dif (!live.nat[r].locked) + abort(); + live.nat[r].locked--; +} + +static void setlock(int r) +{ + live.nat[r].locked++; +} + + +static void mov_nregs(int d, int s) +{ + int ns=live.nat[s].nholds; + int nd=live.nat[d].nholds; + int i; + + if (s==d) + return; + + if (nd>0) + free_nreg(d); + + raw_mov_l_rr(d,s); + log_isused(d); + + for (i=0;i=size) { + n=live.state[r].realreg; + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec); + } + + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + return answer; +} + + + +static int readreg(int r, int size) +{ + return readreg_general(r,size,-1,0); +} + +static int readreg_specific(int r, int size, int spec) +{ + return readreg_general(r,size,spec,0); +} + +static int readreg_offset(int r, int size) +{ + return readreg_general(r,size,-1,1); +} + + +static __inline__ int writereg_general(int r, int size, int spec) +{ + int n; + int answer=-1; + + if (size<4) { + remove_offset(r,spec); + } + + make_exclusive(r,size,spec); + if (isinreg(r)) { + int nvsize=size>live.state[r].validsize?size:live.state[r].validsize; + int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + n=live.state[r].realreg; + + Dif (live.nat[n].nholds!=1) + abort(); + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 4: + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,size,1,spec); + } + if (spec>=0 && spec!=answer) { + mov_nregs(spec,answer); + answer=spec; + } + if (live.state[r].status==UNDEF) + live.state[r].validsize=4; + live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize; + + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + if (size==4) { + live.state[r].val=0; + } + else { + Dif (live.state[r].val) { + printf("Problem with val\n"); + abort(); + } + } + set_status(r,DIRTY); + return answer; +} + +static int writereg(int r, int size) +{ + return writereg_general(r,size,-1); +} + +static int writereg_specific(int r, int size, int spec) +{ + return writereg_general(r,size,spec); +} + +static __inline__ int rmw_general(int r, int wsize, int rsize, int spec) +{ + int n; + int answer=-1; + + if (live.state[r].status==UNDEF) { + printf("WARNING: Unexpected read of undefined register %d\n",r); + } + remove_offset(r,spec); + make_exclusive(r,0,spec); + + Dif (wsize=rsize) { + n=live.state[r].realreg; + Dif (live.nat[n].nholds!=1) + abort(); + + switch(rsize) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec); + } + + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + if (wsize>live.state[r].dirtysize) + live.state[r].dirtysize=wsize; + if (wsize>live.state[r].validsize) + live.state[r].validsize=wsize; + set_status(r,DIRTY); + + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + + Dif (live.state[r].val) { + printf("Problem with val(rmw)\n"); + abort(); + } + return answer; +} + +static int rmw(int r, int wsize, int rsize) +{ + return rmw_general(r,wsize,rsize,-1); +} + +static int rmw_specific(int r, int wsize, int rsize, int spec) +{ + return rmw_general(r,wsize,rsize,spec); +} + + +/* needed for restoring the carry flag on non-P6 cores */ +static void bt_l_ri_noclobber(R4 r, IMM i) +{ + int size=4; + if (i<16) + size=2; + r=readreg(r,size); + raw_bt_l_ri(r,i); + unlock(r); +} + +/******************************************************************** + * FPU register status handling. EMIT TIME! * + ********************************************************************/ + +static void f_tomem(int r) +{ + if (live.fate[r].status==DIRTY) { +#if USE_LONG_DOUBLE + raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg); +#else + raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg); +#endif + live.fate[r].status=CLEAN; + } +} + +static void f_tomem_drop(int r) +{ + if (live.fate[r].status==DIRTY) { +#if USE_LONG_DOUBLE + raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg); +#else + raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg); +#endif + live.fate[r].status=INMEM; + } +} + + +static __inline__ int f_isinreg(int r) +{ + return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY; +} + +static void f_evict(int r) +{ + int rr; + + if (!f_isinreg(r)) + return; + rr=live.fate[r].realreg; + if (live.fat[rr].nholds==1) + f_tomem_drop(r); + else + f_tomem(r); + + Dif (live.fat[rr].locked && + live.fat[rr].nholds==1) { + write_log ("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg); + abort(); + } + + live.fat[rr].nholds--; + if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */ + int topreg=live.fat[rr].holds[live.fat[rr].nholds]; + int thisind=live.fate[r].realind; + live.fat[rr].holds[thisind]=topreg; + live.fate[topreg].realind=thisind; + } + live.fate[r].status=INMEM; + live.fate[r].realreg=-1; +} + +static __inline__ void f_free_nreg(int r) +{ + int i=live.fat[r].nholds; + + while (i) { + int vr; + + --i; + vr=live.fat[r].holds[i]; + f_evict(vr); + } + Dif (live.fat[r].nholds!=0) { + printf("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds); + abort(); + } +} + + +/* Use with care! */ +static __inline__ void f_isclean(int r) +{ + if (!f_isinreg(r)) + return; + live.fate[r].status=CLEAN; +} + +static __inline__ void f_disassociate(int r) +{ + f_isclean(r); + f_evict(r); +} + + + +static int f_alloc_reg(int r, int willclobber) +{ + int bestreg; + uae_s32 when; + int i; + uae_s32 badness; + bestreg=-1; + when=2000000000; + for (i=N_FREGS;i--;) { + badness=live.fat[i].touched; + if (live.fat[i].nholds==0) + badness=0; + + if (!live.fat[i].locked && badness0) { + f_free_nreg(bestreg); + } + if (f_isinreg(r)) { + f_evict(r); + } + + if (!willclobber) { + if (live.fate[r].status!=UNDEF) { +#if USE_LONG_DOUBLE + raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem); +#else + raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem); +#endif + } + live.fate[r].status=CLEAN; + } + else { + live.fate[r].status=DIRTY; + } + live.fate[r].realreg=bestreg; + live.fate[r].realind=live.fat[bestreg].nholds; + live.fat[bestreg].touched=touchcnt++; + live.fat[bestreg].holds[live.fat[bestreg].nholds]=r; + live.fat[bestreg].nholds++; + + return bestreg; +} + +static void f_unlock(int r) +{ + Dif (!live.fat[r].locked) + abort(); + live.fat[r].locked--; +} + +static void f_setlock(int r) +{ + live.fat[r].locked++; +} + +static __inline__ int f_readreg(int r) +{ + int n; + int answer=-1; + + if (f_isinreg(r)) { + n=live.fate[r].realreg; + answer=n; + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) + answer=f_alloc_reg(r,0); + + live.fat[answer].locked++; + live.fat[answer].touched=touchcnt++; + return answer; +} + +static __inline__ void f_make_exclusive(int r, int clobber) +{ + freg_status oldstate; + int rr=live.fate[r].realreg; + int nr; + int nind; + int ndirt=0; + int i; + + if (!f_isinreg(r)) + return; + if (live.fat[rr].nholds==1) + return; + for (i=0;i>=i; + return; + } + CLOBBER_SHRL; + r=rmw(r,4,4); + raw_shrl_l_ri(r,i); + unlock(r); +} +MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,2,2); + raw_shrl_w_ri(r,i); + unlock(r); +} +MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,1,1); + raw_shrl_b_ri(r,i); + unlock(r); +} +MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,4,4); + raw_shra_l_ri(r,i); + unlock(r); +} +MENDFUNC(2,shra_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,2,2); + raw_shra_w_ri(r,i); + unlock(r); +} +MENDFUNC(2,shra_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,1,1); + raw_shra_b_ri(r,i); + unlock(r); +} +MENDFUNC(2,shra_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_rr,(RW4 d, R1 r)) +{ + if (isconst(r)) { + COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=1) { + write_log ("Illegal register %d in raw_rol_b\n",r); + abort(); + } + raw_shra_l_rr(d,r) ; + unlock(r); + unlock(d); +} +MENDFUNC(2,shra_l_rr,(RW4 d, R1 r)) + +MIDFUNC(2,shra_w_rr,(RW2 d, R1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=1) { + write_log ("Illegal register %d in raw_shra_b\n",r); + abort(); + } + raw_shra_w_rr(d,r) ; + unlock(r); + unlock(d); +} +MENDFUNC(2,shra_w_rr,(RW2 d, R1 r)) + +MIDFUNC(2,shra_b_rr,(RW1 d, R1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=1) { + write_log ("Illegal register %d in raw_shra_b\n",r); + abort(); + } + raw_shra_b_rr(d,r) ; + unlock(r); + unlock(d); +} +MENDFUNC(2,shra_b_rr,(RW1 d, R1 r)) + + +MIDFUNC(2,setcc,(W1 d, IMM cc)) +{ + CLOBBER_SETCC; + d=writereg(d,1); + raw_setcc(d,cc); + unlock(d); +} +MENDFUNC(2,setcc,(W1 d, IMM cc)) + +MIDFUNC(2,setcc_m,(IMM d, IMM cc)) +{ + CLOBBER_SETCC; + raw_setcc_m(d,cc); +} +MENDFUNC(2,setcc_m,(IMM d, IMM cc)) + +MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,4); + d=rmw(d,4,4); + raw_cmov_l_rr(d,s,cc); + unlock(s); + unlock(d); +} +MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +MIDFUNC(1,setzflg_l,(RW4 r)) +{ + if (setzflg_uses_bsf) { + CLOBBER_BSF; + r=rmw(r,4,4); + raw_bsf_l_rr(r,r); + unlock(r); + } + else { + Dif (live.flags_in_flags!=VALID) { + write_log("setzflg() wanted flags in native flags, they are %d\n", + live.flags_in_flags); + abort(); + } + r=readreg(r,4); + { + int f=writereg(S11,4); + int t=writereg(S12,4); + raw_flags_set_zero(f,r,t); + unlock(f); + unlock(r); + unlock(t); + } + } +} +MENDFUNC(1,setzflg_l,(RW4 r)) + +MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) +{ + CLOBBER_CMOV; + d=rmw(d,4,4); + raw_cmov_l_rm(d,s,cc); + unlock(d); +} +MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) + +MIDFUNC(2,bsf_l_rr,(W4 d, R4 s)) +{ + CLOBBER_BSF; + s=readreg(s,4); + d=writereg(d,4); + raw_bsf_l_rr(d,s); + unlock(s); + unlock(d); +} +MENDFUNC(2,bsf_l_rr,(W4 d, R4 s)) + +MIDFUNC(2,imul_32_32,(RW4 d, R4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_imul_32_32(d,s); + unlock(s); + unlock(d); +} +MENDFUNC(2,imul_32_32,(RW4 d, R4 s)) + +MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_imul_64_32(d,s); + unlock(s); + unlock(d); +} +MENDFUNC(2,imul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_mul_64_32(d,s); + unlock(s); + unlock(d); +} +MENDFUNC(2,mul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,mul_32_32,(RW4 d, R4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_mul_32_32(d,s); + unlock(s); + unlock(d); +} +MENDFUNC(2,mul_32_32,(RW4 d, R4 s)) + +MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + CLOBBER_SE16; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_sign_extend_16_rr(d,s); + if (!isrmw) { + unlock(d); + unlock(s); + } + else { + unlock(s); + } +} +MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) + +MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_SE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_sign_extend_8_rr(d,s); + + if (!isrmw) { + unlock(d); + unlock(s); + } + else { + unlock(s); + } +} +MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) + + +MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u16)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE16; + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_zero_extend_16_rr(d,s); + if (!isrmw) { + unlock(d); + unlock(s); + } + else { + unlock(s); + } +} +MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) + +MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) +{ + int isrmw; + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_zero_extend_8_rr(d,s); + + if (!isrmw) { + unlock(d); + unlock(s); + } + else { + unlock(s); + } +} +MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) + +MIDFUNC(2,mov_b_rr,(W1 d, R1 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=writereg(d,1); + raw_mov_b_rr(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,mov_b_rr,(W1 d, R1 s)) + +MIDFUNC(2,mov_w_rr,(W2 d, R2 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=writereg(d,2); + raw_mov_w_rr(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,mov_w_rr,(W2 d, R2 s)) + + +MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_mov_l_rrm_indexed(d,baser,index,factor); + unlock(d); + unlock(baser); + unlock(index); +} +MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,2); + + raw_mov_w_rrm_indexed(d,baser,index,factor); + unlock(d); + unlock(baser); + unlock(index); +} +MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,1); + + raw_mov_b_rrm_indexed(d,baser,index,factor); + + unlock(d); + unlock(baser); + unlock(index); +} +MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + + +MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,4); + + Dif (baser==s || index==s) + abort(); + + + raw_mov_l_mrr_indexed(baser,index,factor,s); + unlock(s); + unlock(baser); + unlock(index); +} +MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,2); + + raw_mov_w_mrr_indexed(baser,index,factor,s); + unlock(s); + unlock(baser); + unlock(index); +} +MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg(baser,4); + index=readreg(index,4); + + raw_mov_b_mrr_indexed(baser,index,factor,s); + unlock(s); + unlock(baser); + unlock(index); +} +MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + + +MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,4); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_l_bmrr_indexed(base,baser,index,factor,s); + unlock(s); + unlock(baser); + unlock(index); +} +MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,2); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_w_bmrr_indexed(base,baser,index,factor,s); + unlock(s); + unlock(baser); + unlock(index); +} +MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_b_bmrr_indexed(base,baser,index,factor,s); + unlock(s); + unlock(baser); + unlock(index); +} +MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + + + +/* Read a long from base+baser+factor*index */ +MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,4); + raw_mov_l_brrm_indexed(d,base,baser,index,factor); + unlock(d); + unlock(baser); + unlock(index); +} +MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + + +MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,2); + raw_mov_w_brrm_indexed(d,base,baser,index,factor); + unlock(d); + unlock(baser); + unlock(index); +} +MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + + +MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,1); + raw_mov_b_brrm_indexed(d,base,baser,index,factor); + unlock(d); + unlock(baser); + unlock(index); +} +MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +/* Read a long from base+factor*index */ +MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + int indexreg=index; + + if (isconst(index)) { + COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val); + return; + } + + CLOBBER_MOV; + index=readreg_offset(index,4); + base+=get_offset(indexreg)*factor; + d=writereg(d,4); + + raw_mov_l_rm_indexed(d,base,index,factor); + unlock(index); + unlock(d); +} +MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,4); + + raw_mov_l_rR(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,2); + + raw_mov_w_rR(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,1); + + raw_mov_b_rR(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,4); + + raw_mov_l_brR(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,2); + + raw_mov_w_brR(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,1); + + raw_mov_b_brR(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) + +MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_l_Ri(d,i,offset); + unlock(d); +} +MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) + +MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_w_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_Ri(d,i,offset); + unlock(d); +} +MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) + +MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_Ri(d,i,offset); + unlock(d); +} +MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) + + /* Warning! OFFSET is byte sized only! */ +MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg(d,4); + + raw_mov_l_Rr(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) + +MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg(d,4); + raw_mov_w_Rr(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) + +MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg(d,4); + raw_mov_b_Rr(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) + +MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val+offset); + return; + } +#if USE_OFFSET + if (d==s) { + add_offset(d,offset); + return; + } +#endif + CLOBBER_LEA; + s=readreg(s,4); + d=writereg(d,4); + raw_lea_l_brr(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) + +MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + if (!offset) { + COMPCALL(lea_l_rr_indexed)(d,s,index,factor); + return; + } + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_brr_indexed(d,s,index,factor,offset); + unlock(d); + unlock(index); + unlock(s); +} +MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_rr_indexed(d,s,index,factor); + unlock(d); + unlock(index); + unlock(s); +} +MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +/* write d to the long at the address contained in s+offset */ +MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + + raw_mov_l_bRr(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) + +/* write the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + int dreg=d; + + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_bRr(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) + +MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_bRr(d,s,offset); + unlock(d); + unlock(s); +} +MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) + +MIDFUNC(1,bswap_32,(RW4 r)) +{ + int reg=r; + + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=reverse32(oldv); + return; + } + + CLOBBER_SW32; + r=rmw(r,4,4); + raw_bswap_32(r); + unlock(r); +} +MENDFUNC(1,bswap_32,(RW4 r)) + +MIDFUNC(1,bswap_16,(RW2 r)) +{ + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | + (oldv&0xffff0000); + return; + } + + CLOBBER_SW16; + r=rmw(r,2,2); + + raw_bswap_16(r); + unlock(r); +} +MENDFUNC(1,bswap_16,(RW2 r)) + + + +MIDFUNC(2,mov_l_rr,(W4 d, R4 s)) +{ + int olds; + + if (d==s) { /* How pointless! */ + return; + } + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val); + return; + } +#if USE_ALIAS + olds=s; + disassociate(d); + s=readreg_offset(s,4); + live.state[d].realreg=s; + live.state[d].realind=live.nat[s].nholds; + live.state[d].val=live.state[olds].val; + live.state[d].validsize=4; + live.state[d].dirtysize=4; + set_status(d,DIRTY); + + live.nat[s].holds[live.nat[s].nholds]=d; + live.nat[s].nholds++; + log_clobberreg(d); + + /* printf("Added %d to nreg %d(%d), now holds %d regs\n", + d,s,live.state[d].realind,live.nat[s].nholds); */ + unlock(s); +#else + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,4); + + raw_mov_l_rr(d,s); + unlock(d); + unlock(s); +#endif +} +MENDFUNC(2,mov_l_rr,(W4 d, R4 s)) + +MIDFUNC(2,mov_l_mr,(IMM d, R4 s)) +{ + if (isconst(s)) { + COMPCALL(mov_l_mi)(d,live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + + raw_mov_l_mr(d,s); + unlock(s); +} +MENDFUNC(2,mov_l_mr,(IMM d, R4 s)) + + +MIDFUNC(2,mov_w_mr,(IMM d, R2 s)) +{ + if (isconst(s)) { + COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,2); + + raw_mov_w_mr(d,s); + unlock(s); +} +MENDFUNC(2,mov_w_mr,(IMM d, R2 s)) + +MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_rm(d,s); + unlock(d); +} +MENDFUNC(2,mov_w_rm,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_mr,(IMM d, R1 s)) +{ + if (isconst(s)) { + COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + + raw_mov_b_mr(d,s); + unlock(s); +} +MENDFUNC(2,mov_b_mr,(IMM d, R1 s)) + +MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_rm(d,s); + unlock(d); +} +MENDFUNC(2,mov_b_rm,(W1 d, IMM s)) + +MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) +{ + set_const(d,s); + return; +} +MENDFUNC(2,mov_l_ri,(W4 d, IMM s)) + +MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_ri(d,s); + unlock(d); +} +MENDFUNC(2,mov_w_ri,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_ri(d,s); + unlock(d); +} +MENDFUNC(2,mov_b_ri,(W1 d, IMM s)) + + +MIDFUNC(2,add_l_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_l_mi(d,s) ; +} +MENDFUNC(2,add_l_mi,(IMM d, IMM s)) + +MIDFUNC(2,add_w_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_w_mi(d,s) ; +} +MENDFUNC(2,add_w_mi,(IMM d, IMM s)) + +MIDFUNC(2,add_b_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_b_mi(d,s) ; +} +MENDFUNC(2,add_b_mi,(IMM d, IMM s)) + + +MIDFUNC(2,test_l_ri,(R4 d, IMM i)) +{ + CLOBBER_TEST; + d=readreg(d,4); + + raw_test_l_ri(d,i); + unlock(d); +} +MENDFUNC(2,test_l_ri,(R4 d, IMM i)) + +MIDFUNC(2,test_l_rr,(R4 d, R4 s)) +{ + CLOBBER_TEST; + d=readreg(d,4); + s=readreg(s,4); + + raw_test_l_rr(d,s);; + unlock(d); + unlock(s); +} +MENDFUNC(2,test_l_rr,(R4 d, R4 s)) + +MIDFUNC(2,test_w_rr,(R2 d, R2 s)) +{ + CLOBBER_TEST; + d=readreg(d,2); + s=readreg(s,2); + + raw_test_w_rr(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,test_w_rr,(R2 d, R2 s)) + +MIDFUNC(2,test_b_rr,(R1 d, R1 s)) +{ + CLOBBER_TEST; + d=readreg(d,1); + s=readreg(s,1); + + raw_test_b_rr(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,test_b_rr,(R1 d, R1 s)) + + +MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) +{ + if (isconst (d) && ! needflags) { + live.state[d].val &= i; + return; + } + + CLOBBER_AND; + d=rmw(d,4,4); + + raw_and_l_ri(d,i); + unlock(d); +} +MENDFUNC(2,and_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,and_l,(RW4 d, R4 s)) +{ + CLOBBER_AND; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_and_l(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,and_l,(RW4 d, R4 s)) + +MIDFUNC(2,and_w,(RW2 d, R2 s)) +{ + CLOBBER_AND; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_and_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,and_w,(RW2 d, R2 s)) + +MIDFUNC(2,and_b,(RW1 d, R1 s)) +{ + CLOBBER_AND; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_and_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,and_b,(RW1 d, R1 s)) + +MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val|=i; + return; + } + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_ri(d,i); + unlock(d); +} +MENDFUNC(2,or_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,or_l,(RW4 d, R4 s)) +{ + if (isconst(d) && isconst(s) && !needflags) { + live.state[d].val|=live.state[s].val; + return; + } + CLOBBER_OR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_or_l(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,or_l,(RW4 d, R4 s)) + +MIDFUNC(2,or_w,(RW2 d, R2 s)) +{ + CLOBBER_OR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_or_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,or_w,(RW2 d, R2 s)) + +MIDFUNC(2,or_b,(RW1 d, R1 s)) +{ + CLOBBER_OR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_or_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,or_b,(RW1 d, R1 s)) + +MIDFUNC(2,adc_l,(RW4 d, R4 s)) +{ + CLOBBER_ADC; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_adc_l(d,s); + + unlock(d); + unlock(s); +} +MENDFUNC(2,adc_l,(RW4 d, R4 s)) + +MIDFUNC(2,adc_w,(RW2 d, R2 s)) +{ + CLOBBER_ADC; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_adc_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,adc_w,(RW2 d, R2 s)) + +MIDFUNC(2,adc_b,(RW1 d, R1 s)) +{ + CLOBBER_ADC; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_adc_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,adc_b,(RW1 d, R1 s)) + +MIDFUNC(2,add_l,(RW4 d, R4 s)) +{ + if (isconst(s)) { + COMPCALL(add_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_add_l(d,s); + + unlock(d); + unlock(s); +} +MENDFUNC(2,add_l,(RW4 d, R4 s)) + +MIDFUNC(2,add_w,(RW2 d, R2 s)) +{ + if (isconst(s)) { + COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_add_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,add_w,(RW2 d, R2 s)) + +MIDFUNC(2,add_b,(RW1 d, R1 s)) +{ + if (isconst(s)) { + COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_add_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,add_b,(RW1 d, R1 s)) + +MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,-(signed)i); + return; + } +#endif + + CLOBBER_SUB; + d=rmw(d,4,4); + + raw_sub_l_ri(d,i); + unlock(d); +} +MENDFUNC(2,sub_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,2,2); + + raw_sub_w_ri(d,i); + unlock(d); +} +MENDFUNC(2,sub_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,1,1); + + raw_sub_b_ri(d,i); + + unlock(d); +} +MENDFUNC(2,sub_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,i); + return; + } +#endif + CLOBBER_ADD; + d=rmw(d,4,4); + raw_add_l_ri(d,i); + unlock(d); +} +MENDFUNC(2,add_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,2,2); + + raw_add_w_ri(d,i); + unlock(d); +} +MENDFUNC(2,add_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,1,1); + + raw_add_b_ri(d,i); + + unlock(d); +} +MENDFUNC(2,add_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,sbb_l,(RW4 d, R4 s)) +{ + CLOBBER_SBB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sbb_l(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,sbb_l,(RW4 d, R4 s)) + +MIDFUNC(2,sbb_w,(RW2 d, R2 s)) +{ + CLOBBER_SBB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sbb_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,sbb_w,(RW2 d, R2 s)) + +MIDFUNC(2,sbb_b,(RW1 d, R1 s)) +{ + CLOBBER_SBB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sbb_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,sbb_b,(RW1 d, R1 s)) + +MIDFUNC(2,sub_l,(RW4 d, R4 s)) +{ + if (isconst(s)) { + COMPCALL(sub_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sub_l(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,sub_l,(RW4 d, R4 s)) + +MIDFUNC(2,sub_w,(RW2 d, R2 s)) +{ + if (isconst(s)) { + COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sub_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,sub_w,(RW2 d, R2 s)) + +MIDFUNC(2,sub_b,(RW1 d, R1 s)) +{ + if (isconst(s)) { + COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sub_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,sub_b,(RW1 d, R1 s)) + +MIDFUNC(2,cmp_l,(R4 d, R4 s)) +{ + CLOBBER_CMP; + s=readreg(s,4); + d=readreg(d,4); + + raw_cmp_l(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,cmp_l,(R4 d, R4 s)) + +MIDFUNC(2,cmp_l_ri,(R4 r, IMM i)) +{ + CLOBBER_CMP; + r=readreg(r,4); + + raw_cmp_l_ri(r,i); + unlock(r); +} +MENDFUNC(2,cmp_l_ri,(R4 r, IMM i)) + +MIDFUNC(2,cmp_w,(R2 d, R2 s)) +{ + CLOBBER_CMP; + s=readreg(s,2); + d=readreg(d,2); + + raw_cmp_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,cmp_w,(R2 d, R2 s)) + +MIDFUNC(2,cmp_b,(R1 d, R1 s)) +{ + CLOBBER_CMP; + s=readreg(s,1); + d=readreg(d,1); + + raw_cmp_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,cmp_b,(R1 d, R1 s)) + + +MIDFUNC(2,xor_l,(RW4 d, R4 s)) +{ + CLOBBER_XOR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_xor_l(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,xor_l,(RW4 d, R4 s)) + +MIDFUNC(2,xor_w,(RW2 d, R2 s)) +{ + CLOBBER_XOR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_xor_w(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,xor_w,(RW2 d, R2 s)) + +MIDFUNC(2,xor_b,(RW1 d, R1 s)) +{ + CLOBBER_XOR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_xor_b(d,s); + unlock(d); + unlock(s); +} +MENDFUNC(2,xor_b,(RW1 d, R1 s)) + +MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) +{ + clobber_flags(); + remove_all_offsets(); + if (osize==4) { + if (out1!=in1 && out1!=r) { + COMPCALL(forget_about)(out1); + } + } + else { + tomem_c(out1); + } + + in1=readreg_specific(in1,isize,REG_PAR1); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in1); +#endif + unlock(in1); + unlock(r); + + prepare_for_call_2(); + raw_call_r(r); + +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + + + live.nat[REG_RESULT].holds[0]=out1; + live.nat[REG_RESULT].nholds=1; + live.nat[REG_RESULT].touched=touchcnt++; + + live.state[out1].realreg=REG_RESULT; + live.state[out1].realind=0; + live.state[out1].val=0; + live.state[out1].validsize=osize; + live.state[out1].dirtysize=osize; + set_status(out1,DIRTY); +} +MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) + +MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) +{ + clobber_flags(); + remove_all_offsets(); + in1=readreg_specific(in1,isize1,REG_PAR1); + in2=readreg_specific(in2,isize2,REG_PAR2); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in2); + raw_push_l_r(in1); +#endif + unlock(r); + unlock(in1); + unlock(in2); + prepare_for_call_2(); + raw_call_r(r); +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(8); +#endif +} +MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) + + +MIDFUNC(1,forget_about,(W4 r)) +{ + if (isinreg(r)) + disassociate(r); + live.state[r].val=0; + set_status(r,UNDEF); +} +MENDFUNC(1,forget_about,(W4 r)) + +MIDFUNC(0,nop,(void)) +{ + raw_nop(); +} +MENDFUNC(0,nop,(void)) + + +MIDFUNC(1,f_forget_about,(FW r)) +{ + if (f_isinreg(r)) + f_disassociate(r); + live.fate[r].status=UNDEF; +} +MENDFUNC(1,f_forget_about,(FW r)) + +MIDFUNC(1,fmov_pi,(FW r)) +{ + r=f_writereg(r); + raw_fmov_pi(r); + f_unlock(r); +} +MENDFUNC(1,fmov_pi,(FW r)) + +MIDFUNC(1,fmov_log10_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log10_2(r); + f_unlock(r); +} +MENDFUNC(1,fmov_log10_2,(FW r)) + +MIDFUNC(1,fmov_log2_e,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log2_e(r); + f_unlock(r); +} +MENDFUNC(1,fmov_log2_e,(FW r)) + +MIDFUNC(1,fmov_loge_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_loge_2(r); + f_unlock(r); +} +MENDFUNC(1,fmov_loge_2,(FW r)) + +MIDFUNC(1,fmov_1,(FW r)) +{ + r=f_writereg(r); + raw_fmov_1(r); + f_unlock(r); +} +MENDFUNC(1,fmov_1,(FW r)) + +MIDFUNC(1,fmov_0,(FW r)) +{ + r=f_writereg(r); + raw_fmov_0(r); + f_unlock(r); +} +MENDFUNC(1,fmov_0,(FW r)) + +MIDFUNC(2,fmov_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmov_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmov_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovi_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmovi_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmovi_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovi_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmovi_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmovi_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmovs_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmovs_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmovs_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovs_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmovs_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmovs_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_ext_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmov_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmov_ext_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmov_rr,(FW d, FR s)) +{ + if (d==s) { /* How pointless! */ + return; + } +#if USE_F_ALIAS + f_disassociate(d); + s=f_readreg(s); + live.fate[d].realreg=s; + live.fate[d].realind=live.fat[s].nholds; + live.fate[d].status=DIRTY; + live.fat[s].holds[live.fat[s].nholds]=d; + live.fat[s].nholds++; + f_unlock(s); +#else + s=f_readreg(s); + d=f_writereg(d); + raw_fmov_rr(d,s); + f_unlock(s); + f_unlock(d); +#endif +} +MENDFUNC(2,fmov_rr,(FW d, FR s)) + +MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) +{ + index=readreg(index,4); + + raw_fldcw_m_indexed(index,base); + unlock(index); +} +MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) + +MIDFUNC(1,ftst_r,(FR r)) +{ + r=f_readreg(r); + raw_ftst_r(r); + f_unlock(r); +} +MENDFUNC(1,ftst_r,(FR r)) + +MIDFUNC(0,dont_care_fflags,(void)) +{ + f_disassociate(FP_RESULT); +} +MENDFUNC(0,dont_care_fflags,(void)) + +MIDFUNC(2,fsqrt_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsqrt_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsqrt_rr,(FW d, FR s)) + +MIDFUNC(2,fabs_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fabs_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fabs_rr,(FW d, FR s)) + +MIDFUNC(2,fsin_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsin_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsin_rr,(FW d, FR s)) + +MIDFUNC(2,fcos_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fcos_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcos_rr,(FW d, FR s)) + +MIDFUNC(2,ftwotox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftwotox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftwotox_rr,(FW d, FR s)) + +MIDFUNC(2,fetox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fetox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fetox_rr,(FW d, FR s)) + +MIDFUNC(2,frndint_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_frndint_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frndint_rr,(FW d, FR s)) + +MIDFUNC(2,flog2_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flog2_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flog2_rr,(FW d, FR s)) + +MIDFUNC(2,fneg_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fneg_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fneg_rr,(FW d, FR s)) + +MIDFUNC(2,fadd_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fadd_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fadd_rr,(FRW d, FR s)) + +MIDFUNC(2,fsub_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fsub_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsub_rr,(FRW d, FR s)) + +MIDFUNC(2,fcmp_rr,(FR d, FR s)) +{ + d=f_readreg(d); + s=f_readreg(s); + raw_fcmp_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcmp_rr,(FR d, FR s)) + +MIDFUNC(2,fdiv_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fdiv_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fdiv_rr,(FRW d, FR s)) + +MIDFUNC(2,frem_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frem_rr,(FRW d, FR s)) + +MIDFUNC(2,frem1_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem1_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frem1_rr,(FRW d, FR s)) + +MIDFUNC(2,fmul_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fmul_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fmul_rr,(FRW d, FR s)) + + +/******************************************************************** + * Support functions exposed to gencomp. CREATE time * + ********************************************************************/ + +int kill_rodent(int r) +{ + return KILLTHERAT && + have_rat_stall && + (live.state[r].status==INMEM || + live.state[r].status==CLEAN || + live.state[r].status==ISCONST || + live.state[r].dirtysize==4); +} + +uae_u32 get_const(int r) +{ +#if USE_OPTIMIZER + if (!reg_alloc_run) +#endif + Dif (!isconst(r)) { + printf("Register %d should be constant, but isn't\n",r); + abort(); + } + return live.state[r].val; +} + +void sync_m68k_pc(void) +{ + if (m68k_pc_offset) { + add_l_ri(PC_P,m68k_pc_offset); + comp_pc_p+=m68k_pc_offset; + m68k_pc_offset=0; + } +} + +/******************************************************************** + * Support functions exposed to newcpu * + ********************************************************************/ + +uae_u32 scratch[VREGS]; +fptype fscratch[VFREGS]; + +void init_comp(void) +{ + int i; + uae_u8* cb=can_byte; + uae_u8* cw=can_word; + uae_u8* au=always_used; + + for (i=0;i1) + abort(); + if (live.nat[n].nholds && depthnat[i].validsize) + vton[s->nat[i].holds]=i; + + flush_flags(); /* low level */ + sync_m68k_pc(); /* mid level */ + + /* We don't do FREGS yet, so this is raw flush() code */ + for (i=0;is->nat[vton[i]].dirtysize) + tomem(i); + /* Fall-through! */ + case CLEAN: + if (vton[i]==-1 || + live.state[i].validsizenat[vton[i]].validsize) + evict(i); + else + make_exclusive(i,0,-1); + break; + case INMEM: + break; + case UNDEF: + break; + default: + printf("Weird status: %d\n",live.state[i].status); + abort(); + } + } + + /* Quick consistency check */ + for (i=0;is->nat[n].dirtysize) + abort; + if (live.state[i].validsizenat[n].validsize) + abort; + live.state[i].dirtysize=s->nat[n].dirtysize; + live.state[i].validsize=s->nat[n].validsize; + if (live.state[i].dirtysize) + set_status(i,DIRTY); + break; + case UNDEF: + break; + } + if (n!=-1) + live.nat[n].touched=touchcnt++; + } +} +#else +static __inline__ void match_states(smallstate* s) +{ + flush(1); +} +#endif + +/* Only do this if you really mean it! The next call should be to init!*/ +void flush(int save_regs) +{ + int fi,i; + + log_flush(); + flush_flags(); /* low level */ + sync_m68k_pc(); /* mid level */ + + if (save_regs) { + for (i=0;i=(uae_u32)kickmemory && + addr<(uae_u32)kickmemory+8*65536); +} + +static void flush_all(void) +{ + int i; + + log_flush(); + for (i=0;i0) + free_nreg(i); + + for (i=0;i0) + f_free_nreg(i); + + live.flags_in_flags=TRASH; /* Note: We assume we already rescued the + flags at the very start of the call_r + functions! */ +} + + +/******************************************************************** + * Memory access and related functions, CREATE time * + ********************************************************************/ + +void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond) +{ + next_pc_p=not_taken; + taken_pc_p=taken; + branch_cc=cond; +} + + +static uae_u32 get_handler_address(uae_u32 addr) +{ + uae_u32 cl=cacheline(addr); + blockinfo* bi=get_blockinfo_addr_new((void*)addr,0); + +#if USE_OPTIMIZER + if (!bi && reg_alloc_run) + return 0; +#endif + return (uae_u32)&(bi->direct_handler_to_use); +} + +static uae_u32 get_handler(uae_u32 addr) +{ + uae_u32 cl=cacheline(addr); + blockinfo* bi=get_blockinfo_addr_new((void*)addr,0); + +#if USE_OPTIMIZER + if (!bi && reg_alloc_run) + return 0; +#endif + return (uae_u32)bi->direct_handler_to_use; +} + +static void load_handler(int reg, uae_u32 addr) +{ + mov_l_rm(reg,get_handler_address(addr)); +} + +/* This version assumes that it is writing *real* memory, and *will* fail + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ + +static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber) +{ + int f=tmp; + +#ifdef NATMEM_OFFSET + if (canbang) { /* Woohoo! go directly at the memory! */ + if (clobber) + f=source; + switch(size) { + case 1: mov_b_bRr(address,source,NATMEM_OFFSET); break; + case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,NATMEM_OFFSET); break; + case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,NATMEM_OFFSET); break; + } + forget_about(tmp); + forget_about(f); + return; + } +#endif + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr table */ + mov_l_rm_indexed(f,(uae_u32)(baseaddr),f,4); + + if (address==source && size>1) { /* IBrowse does this! */ + add_l(f,address); /* f now has the final address */ + switch(size) { + case 2: bswap_16(source); mov_w_Rr(f,source,0); bswap_16(source); break; + case 4: bswap_32(source); mov_l_Rr(f,source,0); bswap_32(source); break; + } + } + else { + /* f now holds the offset */ + switch(size) { + case 1: mov_b_mrr_indexed(address,f,1,source); break; + case 2: bswap_16(source); mov_w_mrr_indexed(address,f,1,source); bswap_16(source); break; + case 4: bswap_32(source); mov_l_mrr_indexed(address,f,1,source); bswap_32(source); break; + } + } +} + + + +static __inline__ void writemem(int address, int source, int offset, int size, int tmp) +{ + int f=tmp; + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the mem bank table */ + mov_l_rm_indexed(f,(uae_u32)mem_banks,f,4); + /* Now f holds a pointer to the actual membank */ + mov_l_rR(f,f,offset); + /* Now f holds the address of the b/w/lput function */ + call_r_02(f,address,source,4,size); + forget_about(tmp); +} + + +void writebyte(int address, int source, int tmp) +{ + int distrust; + switch (currprefs.comptrustbyte) { + case 0: distrust=0; break; + case 1: distrust=1; break; + case 2: distrust=((start_pc&0xF80000)==0xF80000); break; + case 3: distrust=!have_done_picasso; break; + default: abort(); + } + + if ((special_mem&S_WRITE) || distrust) + writemem_special(address,source,20,1,tmp); + else + writemem_real(address,source,20,1,tmp,0); +} + +static __inline__ void writeword_general(int address, int source, int tmp, + int clobber) +{ + int distrust; + switch (currprefs.comptrustword) { + case 0: distrust=0; break; + case 1: distrust=1; break; + case 2: distrust=((start_pc&0xF80000)==0xF80000); break; + case 3: distrust=!have_done_picasso; break; + default: abort(); + } + + if ((special_mem&S_WRITE) || distrust) + writemem_special(address,source,16,2,tmp); + else + writemem_real(address,source,16,2,tmp,clobber); +} + +void writeword_clobber(int address, int source, int tmp) +{ + writeword_general(address,source,tmp,1); +} + +void writeword(int address, int source, int tmp) +{ + writeword_general(address,source,tmp,0); +} + +static __inline__ void writelong_general(int address, int source, int tmp, + int clobber) +{ + int distrust; + switch (currprefs.comptrustlong) { + case 0: distrust=0; break; + case 1: distrust=1; break; + case 2: distrust=((start_pc&0xF80000)==0xF80000); break; + case 3: distrust=!have_done_picasso; break; + default: abort(); + } + + if ((special_mem&S_WRITE) || distrust) + writemem_special(address,source,12,4,tmp); + else + writemem_real(address,source,12,4,tmp,clobber); +} + +void writelong_clobber(int address, int source, int tmp) +{ + writelong_general(address,source,tmp,1); +} + +void writelong(int address, int source, int tmp) +{ + writelong_general(address,source,tmp,0); +} + + + +/* This version assumes that it is reading *real* memory, and *will* fail + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ + +static void readmem_real(int address, int dest, int offset, int size, int tmp) +{ + int f=tmp; + + if (size==4 && address!=dest) + f=dest; + +#ifdef NATMEM_OFFSET + if (canbang) { /* Woohoo! go directly at the memory! */ + switch(size) { + case 1: mov_b_brR(dest,address,NATMEM_OFFSET); break; + case 2: mov_w_brR(dest,address,NATMEM_OFFSET); bswap_16(dest); break; + case 4: mov_l_brR(dest,address,NATMEM_OFFSET); bswap_32(dest); break; + } + forget_about(tmp); + return; + } +#endif + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr table */ + mov_l_rm_indexed(f,(uae_u32)baseaddr,f,4); + /* f now holds the offset */ + + switch(size) { + case 1: mov_b_rrm_indexed(dest,address,f,1); break; + case 2: mov_w_rrm_indexed(dest,address,f,1); bswap_16(dest); break; + case 4: mov_l_rrm_indexed(dest,address,f,1); bswap_32(dest); break; + } + forget_about(tmp); +} + + + +static __inline__ void readmem(int address, int dest, int offset, int size, int tmp) +{ + int f=tmp; + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the mem bank table */ + mov_l_rm_indexed(f,(uae_u32)mem_banks,f,4); + /* Now f holds a pointer to the actual membank */ + mov_l_rR(f,f,offset); + /* Now f holds the address of the b/w/lget function */ + call_r_11(dest,f,address,size,4); + forget_about(tmp); +} + +void readbyte(int address, int dest, int tmp) +{ + int distrust; + switch (currprefs.comptrustbyte) { + case 0: distrust=0; break; + case 1: distrust=1; break; + case 2: distrust=((start_pc&0xF80000)==0xF80000); break; + case 3: distrust=!have_done_picasso; break; + default: abort(); + } + + if ((special_mem&S_READ) || distrust) + readmem_special(address,dest,8,1,tmp); + else + readmem_real(address,dest,8,1,tmp); +} + +void readword(int address, int dest, int tmp) +{ + int distrust; + switch (currprefs.comptrustword) { + case 0: distrust=0; break; + case 1: distrust=1; break; + case 2: distrust=((start_pc&0xF80000)==0xF80000); break; + case 3: distrust=!have_done_picasso; break; + default: abort(); + } + + if ((special_mem&S_READ) || distrust) + readmem_special(address,dest,4,2,tmp); + else + readmem_real(address,dest,4,2,tmp); +} + +void readlong(int address, int dest, int tmp) +{ + int distrust; + switch (currprefs.comptrustlong) { + case 0: distrust=0; break; + case 1: distrust=1; break; + case 2: distrust=((start_pc&0xF80000)==0xF80000); break; + case 3: distrust=!have_done_picasso; break; + default: abort(); + } + + if ((special_mem&S_READ) || distrust) + readmem_special(address,dest,0,4,tmp); + else + readmem_real(address,dest,0,4,tmp); +} + + + +/* This one might appear a bit odd... */ +static __inline__ void get_n_addr_old(int address, int dest, int tmp) +{ + readmem(address,dest,24,4,tmp); +} + +static __inline__ void get_n_addr_real(int address, int dest, int tmp) +{ + int f=tmp; + if (address!=dest) + f=dest; + +#ifdef NATMEM_OFFSET + if (canbang) { + lea_l_brr(dest,address,NATMEM_OFFSET); + forget_about(tmp); + return; + } +#endif + mov_l_rr(f,address); + mov_l_rr(dest,address); // gb-- nop if dest==address + shrl_l_ri(f,16); + mov_l_rm_indexed(f, (uae_u32)baseaddr, f, 4); + add_l(dest,f); + forget_about(tmp); +} + +void get_n_addr(int address, int dest, int tmp) +{ + int distrust; + switch (currprefs.comptrustnaddr) { + case 0: distrust=0; break; + case 1: distrust=1; break; + case 2: distrust=((start_pc&0xF80000)==0xF80000); break; + case 3: distrust=!have_done_picasso; break; + default: abort(); + } + + if (special_mem || distrust) + get_n_addr_old(address,dest,tmp); + else + get_n_addr_real(address,dest,tmp); +} + +void get_n_addr_jmp(int address, int dest, int tmp) +{ +#if 0 /* For this, we need to get the same address as the rest of UAE + would --- otherwise we end up translating everything twice */ + get_n_addr(address,dest,tmp); +#else + int f=tmp; + if (address!=dest) + f=dest; + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr bank table */ + mov_l_rm_indexed(dest,(uae_u32)baseaddr,f,4); + add_l(dest,address); + and_l_ri (dest, ~1); + forget_about(tmp); +#endif +} + + +/* base is a register, but dp is an actual value. + target is a register, as is tmp */ +void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp) +{ + int reg = (dp >> 12) & 15; + int regd_shift=(dp >> 9) & 3; + + if (dp & 0x100) { + int ignorebase=(dp&0x80); + int ignorereg=(dp&0x40); + int addbase=0; + int outer=0; + + if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4); + + if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4); + + if ((dp & 0x4) == 0) { /* add regd *before* the get_long */ + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(target,reg); + else + mov_l_rr(target,reg); + shll_l_ri(target,regd_shift); + } + else + mov_l_ri(target,0); + + /* target is now regd */ + if (!ignorebase) + add_l(target,base); + add_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + } else { /* do the getlong first, then add regd */ + if (!ignorebase) { + mov_l_rr(target,base); + add_l_ri(target,addbase); + } + else + mov_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(tmp,reg); + else + mov_l_rr(tmp,reg); + shll_l_ri(tmp,regd_shift); + /* tmp is now regd */ + add_l(target,tmp); + } + } + add_l_ri(target,outer); + } + else { /* 68000 version */ + if ((dp & 0x800) == 0) { /* Sign extend */ + sign_extend_16_rr(target,reg); + lea_l_brr_indexed(target,base,target,1<> 8) & 255) | ((f & 255) << 8); +} + +void set_cache_state(int enabled) +{ + if (enabled!=letit) + flush_icache_hard(77); + letit=enabled; +} + +int get_cache_state(void) +{ + return letit; +} + +uae_u32 get_jitted_size(void) +{ + if (compiled_code) + return current_compile_p-compiled_code; + return 0; +} + +void alloc_cache(void) +{ + if (compiled_code) { + flush_icache_hard(6); + cache_free(compiled_code); + } + if (veccode == NULL) + veccode = cache_alloc (256); + if (popallspace == NULL) + popallspace = cache_alloc (1024); + compiled_code = NULL; + if (currprefs.cachesize == 0) + return; + + while (!compiled_code && currprefs.cachesize) { + compiled_code=cache_alloc(currprefs.cachesize*1024); + if (!compiled_code) + currprefs.cachesize/=2; + } + if (compiled_code) { + max_compile_start=compiled_code+currprefs.cachesize*1024-BYTES_PER_INST; + current_compile_p=compiled_code; + } +} + +extern unsigned long op_illg_1 (uae_u32 opcode) REGPARAM; + +static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) +{ + uae_u32 k1=0; + uae_u32 k2=0; + uae_s32 len=bi->len; + uae_u32 tmp=bi->min_pcp; + uae_u32* pos; + + len+=(tmp&3); + tmp&=(~3); + pos=(uae_u32*)tmp; + + if (len<0 || len>MAX_CHECKSUM_LEN) { + *c1=0; + *c2=0; + } + else { + while (len>0) { + k1+=*pos; + k2^=*pos; + pos++; + len-=4; + } + *c1=k1; + *c2=k2; + } +} + +static void show_checksum(blockinfo* bi) +{ + uae_u32 k1=0; + uae_u32 k2=0; + uae_s32 len=bi->len; + uae_u32 tmp=(uae_u32)bi->pc_p; + uae_u32* pos; + + len+=(tmp&3); + tmp&=(~3); + pos=(uae_u32*)tmp; + + if (len<0 || len>MAX_CHECKSUM_LEN) { + return; + } + else { + while (len>0) { + printf("%08x ",*pos); + pos++; + len-=4; + } + printf(" bla\n"); + } +} + + +int check_for_cache_miss(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + + if (bi) { + int cl=cacheline(regs.pc_p); + if (bi!=cache_tags[cl+1].bi) { + raise_in_cl_list(bi); + return 1; + } + } + return 0; +} + + +static void recompile_block(void) +{ + /* An existing block's countdown code has expired. We need to make + sure that execute_normal doesn't refuse to recompile due to a + perceived cache miss... */ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + + Dif (!bi) + abort(); + raise_in_cl_list(bi); + execute_normal(); + return; +} + +static void cache_miss(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); + + if (!bi) { + execute_normal(); /* Compile this block now */ + return; + } + Dif (!bi2 || bi==bi2) { + write_log ("Unexplained cache miss %p %p\n",bi,bi2); + abort(); + } + raise_in_cl_list(bi); + return; +} + +static void check_checksum(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); + + uae_u32 c1,c2; + + checksum_count++; + /* These are not the droids you are looking for... */ + if (!bi) { + /* Whoever is the primary target is in a dormant state, but + calling it was accidental, and we should just compile this + new block */ + execute_normal(); + return; + } + if (bi!=bi2) { + /* The block was hit accidentally, but it does exist. Cache miss */ + cache_miss(); + return; + } + + if (bi->c1 || bi->c2) + calc_checksum(bi,&c1,&c2); + else { + c1=c2=1; /* Make sure it doesn't match */ + } + if (c1==bi->c1 && c2==bi->c2) { + /* This block is still OK. So we reactivate. Of course, that + means we have to move it into the needs-to-be-flushed list */ + bi->handler_to_use=bi->handler; + set_dhtu(bi,bi->direct_handler); + + /* printf("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, + c1,c2,bi->c1,bi->c2);*/ + remove_from_list(bi); + add_to_active(bi); + raise_in_cl_list(bi); + } + else { + /* This block actually changed. We need to invalidate it, + and set it up to be recompiled */ + /* printf("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, + c1,c2,bi->c1,bi->c2); */ + invalidate_block(bi); + raise_in_cl_list(bi); + execute_normal(); + } +} + + +static __inline__ void create_popalls(void) +{ + int i,r; + + current_compile_p=popallspace; + set_target(current_compile_p); +#if USE_PUSH_POP + /* If we can't use gcc inline assembly, we need to pop some + registers before jumping back to the various get-out routines. + This generates the code for it. + */ + popall_do_nothing=current_compile_p; + for (i=0;idirect_pen=(void*)get_target(); + raw_mov_l_rm(0,(uae_u32)&(bi->pc_p)); + raw_mov_l_mr((uae_u32)®s.pc_p,0); + raw_jmp((uae_u32)popall_execute_normal); + + align_target(32); + bi->direct_pcc=(void*)get_target(); + raw_mov_l_rm(0,(uae_u32)&(bi->pc_p)); + raw_mov_l_mr((uae_u32)®s.pc_p,0); + raw_jmp((uae_u32)popall_check_checksum); + + align_target(32); + current_compile_p=get_target(); + + bi->deplist=NULL; + for (i=0;i<2;i++) { + bi->dep[i].prev_p=NULL; + bi->dep[i].next=NULL; + } + bi->env=default_ss; + bi->status=BI_NEW; + bi->havestate=0; + //bi->env=empty_ss; +} + +void compemu_reset(void) +{ + int i; + + set_cache_state(0); +} + +void build_comp(void) +{ + int i; + int jumpcount=0; + unsigned long opcode; + struct cputbl* tbl=op_smalltbl_0_comp_ff; + struct cputbl* nftbl=op_smalltbl_0_comp_nf; + int count; +#ifdef NOFLAGS_SUPPORT + struct cputbl *nfctbl = (currprefs.cpu_level >= 4 ? op_smalltbl_0_nf + : currprefs.cpu_level == 3 ? op_smalltbl_1_nf + : currprefs.cpu_level == 2 ? op_smalltbl_2_nf + : currprefs.cpu_level == 1 ? op_smalltbl_3_nf + : ! currprefs.cpu_compatible ? op_smalltbl_4_nf + : op_smalltbl_5_nf); +#endif + raw_init_cpu(); +#ifdef NATMEM_OFFSET + write_log ("JIT: Setting signal handler\n"); +#ifndef _WIN32 + signal(SIGSEGV,vec); +#endif +#endif + write_log ("JIT: Building Compiler function table\n"); + for (opcode = 0; opcode < 65536; opcode++) { +#ifdef NOFLAGS_SUPPORT + nfcpufunctbl[opcode] = op_illg_1; +#endif + compfunctbl[opcode] = NULL; + nfcompfunctbl[opcode] = NULL; + prop[opcode].use_flags = 0x1f; + prop[opcode].set_flags = 0x1f; + prop[opcode].is_jump=1; + } + + for (i = 0; tbl[i].opcode < 65536; i++) { + int isjmp=(tbl[i].specific&1); + int isaddx=(tbl[i].specific&8); + int iscjmp=(tbl[i].specific&16); + + prop[tbl[i].opcode].is_jump=isjmp; + prop[tbl[i].opcode].is_const_jump=iscjmp; + prop[tbl[i].opcode].is_addx=isaddx; + compfunctbl[tbl[i].opcode] = tbl[i].handler; + } + for (i = 0; nftbl[i].opcode < 65536; i++) { + nfcompfunctbl[nftbl[i].opcode] = nftbl[i].handler; +#ifdef NOFLAGS_SUPPORT + nfcpufunctbl[nftbl[i].opcode] = nfctbl[i].handler; +#endif + } + +#ifdef NOFLAGS_SUPPORT + for (i = 0; nfctbl[i].handler; i++) { + nfcpufunctbl[nfctbl[i].opcode] = nfctbl[i].handler; + } +#endif + + for (opcode = 0; opcode < 65536; opcode++) { + cpuop_func *f; + cpuop_func *nff; +#ifdef NOFLAGS_SUPPORT + cpuop_func *nfcf; +#endif + int isjmp,isaddx,iscjmp; + + if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > currprefs.cpu_level) + continue; + + if (table68k[opcode].handler != -1) { + f = compfunctbl[table68k[opcode].handler]; + nff = nfcompfunctbl[table68k[opcode].handler]; +#ifdef NOFLAGS_SUPPORT + nfcf = nfcpufunctbl[table68k[opcode].handler]; +#endif + isjmp=prop[table68k[opcode].handler].is_jump; + iscjmp=prop[table68k[opcode].handler].is_const_jump; + isaddx=prop[table68k[opcode].handler].is_addx; + prop[opcode].is_jump=isjmp; + prop[opcode].is_const_jump=iscjmp; + prop[opcode].is_addx=isaddx; + compfunctbl[opcode] = f; + nfcompfunctbl[opcode] = nff; +#ifdef NOFLAGS_SUPPORT + Dif (nfcf == op_illg_1) + abort(); + nfcpufunctbl[opcode] = nfcf; +#endif + } + prop[opcode].set_flags =table68k[opcode].flagdead; + prop[opcode].use_flags =table68k[opcode].flaglive; + /* Unconditional jumps don't evaluate condition codes, so they + don't actually use any flags themselves */ + if (prop[opcode].is_const_jump) + prop[opcode].use_flags=0; + } +#ifdef NOFLAGS_SUPPORT + for (i = 0; nfctbl[i].handler != NULL; i++) { + if (nfctbl[i].specific) + nfcpufunctbl[tbl[i].opcode] = nfctbl[i].handler; + } +#endif + + count=0; + for (opcode = 0; opcode < 65536; opcode++) { + if (compfunctbl[opcode]) + count++; + } + write_log ("Supposedly %d compileable opcodes!\n",count); + + /* Initialise state */ + alloc_cache(); + create_popalls(); + reset_lists(); + + for (i=0;ipc_p)].handler=(void*)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + bi=bi->next; + } + bi=dormant; + while(bi) { + cache_tags[cacheline(bi->pc_p)].handler=(void*)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + bi=bi->next; + } + + reset_lists(); + if (!compiled_code) + return; + current_compile_p=compiled_code; + set_special(0); /* To get out of compiled code */ +} + + +/* "Soft flushing" --- instead of actually throwing everything away, + we simply mark everything as "needs to be checked". +*/ + +void flush_icache(int n) +{ + uae_u32 i; + blockinfo* bi; + blockinfo* bi2; + + if (currprefs.comp_hardflush) { + flush_icache_hard(n); + return; + } + soft_flush_count++; + if (!active) + return; + + bi=active; + while (bi) { + uae_u32 cl=cacheline(bi->pc_p); + if (!bi->handler) { + /* invalidated block */ + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=popall_execute_normal; + bi->handler_to_use=popall_execute_normal; + set_dhtu(bi,bi->direct_pen); + } + else { + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=popall_check_checksum; + bi->handler_to_use=popall_check_checksum; + set_dhtu(bi,bi->direct_pcc); + } + bi2=bi; + bi=bi->next; + } + /* bi2 is now the last entry in the active list */ + bi2->next=dormant; + if (dormant) + dormant->prev_p=&(bi2->next); + + dormant=active; + active->prev_p=&dormant; + active=NULL; +} + + +static void catastrophe(void) +{ + abort(); +} + +int failure; + + +void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) +{ + if (letit && compiled_code && currprefs.cpu_level>=2) { + + /* OK, here we need to 'compile' a block */ + int i; + int r; + int was_comp=0; + uae_u8 liveflags[MAXRUN+1]; + uae_u32 max_pcp=(uae_u32)pc_hist[0].location; + uae_u32 min_pcp=max_pcp; + uae_u32 cl=cacheline(pc_hist[0].location); + void* specflags=(void*)®s.spcflags; + blockinfo* bi=NULL; + blockinfo* bi2; + int extra_len=0; + + compile_count++; + if (current_compile_p>=max_compile_start) + flush_icache_hard(7); + + alloc_blockinfos(); + + bi=get_blockinfo_addr_new(pc_hist[0].location,0); + bi2=get_blockinfo(cl); + + optlev=bi->optlevel; + if (bi->handler) { + Dif (bi!=bi2) { + /* I don't think it can happen anymore. Shouldn't, in + any case. So let's make sure... */ + printf("WOOOWOO count=%d, ol=%d %p %p\n", + bi->count,bi->optlevel,bi->handler_to_use, + cache_tags[cl].handler); + abort(); + } + + Dif (bi->count!=-1 && bi->status!=BI_TARGETTED) { + /* What the heck? We are not supposed to be here! */ + abort(); + } + } + if (bi->count==-1) { + optlev++; + while (!currprefs.optcount[optlev]) + optlev++; + bi->count=currprefs.optcount[optlev]-1; + } + current_block_pc_p=(uae_u32)pc_hist[0].location; + + remove_deps(bi); /* We are about to create new code */ + bi->optlevel=optlev; + bi->pc_p=(uae_u8*)pc_hist[0].location; + + liveflags[blocklen]=0x1f; /* All flags needed afterwards */ + i=blocklen; + while (i--) { + uae_u16* currpcp=pc_hist[i].location; + int op=cft_map(*currpcp); + + if ((uae_u32)currpcpmax_pcp) + max_pcp=(uae_u32)currpcp; + + if (currprefs.compnf) { + liveflags[i]=((liveflags[i+1]& + (~prop[op].set_flags))| + prop[op].use_flags); + if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0) + liveflags[i]&= ~FLAG_Z; + } + else { + liveflags[i]=0x1f; + } + } + + bi->needed_flags=liveflags[0]; + + /* This is the non-direct handler */ + align_target(32); + set_target(get_target()+1); + align_target(16); + /* Now aligned at n*32+16 */ + + bi->handler= + bi->handler_to_use=(void*)get_target(); + raw_cmp_l_mi((uae_u32)®s.pc_p,(uae_u32)pc_hist[0].location); + raw_jnz((uae_u32)popall_cache_miss); + /* This was 16 bytes on the x86, so now aligned on (n+1)*32 */ + + was_comp=0; + +#if USE_MATCHSTATE + comp_pc_p=(uae_u8*)pc_hist[0].location; + init_comp(); + match_states(&(bi->env)); + was_comp=1; +#endif + + bi->direct_handler=(void*)get_target(); + set_dhtu(bi,bi->direct_handler); + current_block_start_target=(uae_u32)get_target(); + + if (bi->count>=0) { /* Need to generate countdown code */ + raw_mov_l_mi((uae_u32)®s.pc_p,(uae_u32)pc_hist[0].location); + raw_sub_l_mi((uae_u32)&(bi->count),1); + raw_jl((uae_u32)popall_recompile_block); + } + if (optlev==0) { /* No need to actually translate */ + /* Execute normally without keeping stats */ + raw_mov_l_mi((uae_u32)®s.pc_p,(uae_u32)pc_hist[0].location); + raw_jmp((uae_u32)popall_exec_nostats); + } + else { + reg_alloc_run=0; + next_pc_p=0; + taken_pc_p=0; + branch_cc=0; + + log_startblock(); + for (i=0;i1) { + failure=0; + if (!was_comp) { + comp_pc_p=(uae_u8*)pc_hist[i].location; + init_comp(); + } + was_comp++; + + comptbl[opcode](opcode); + freescratch(); + if (!(liveflags[i+1] & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + } +#if INDIVIDUAL_INST + flush(1); + nop(); + flush(1); + was_comp=0; +#endif + } + else + failure=1; + if (failure) { + if (was_comp) { + flush(1); + was_comp=0; + } + raw_mov_l_ri(REG_PAR1,(uae_u32)opcode); +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(REG_PAR1); +#endif + raw_mov_l_mi((uae_u32)®s.pc_p, + (uae_u32)pc_hist[i].location); + raw_call((uae_u32)cputbl[opcode]); + //raw_add_l_mi((uae_u32)&oink,1); // FIXME +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + if (needed_flags) { + //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode+65536); + } + else { + //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode); + } + + if (ineeded_flags; + + if (x==0xff || 1) { /* To be on the safe side */ + uae_u16* next=(uae_u16*)next_pc_p; + uae_u16 op=cft_map(*next); + + x=0x1f; + x&=(~prop[op].set_flags); + x|=prop[op].use_flags; + } + + x|=bi2->needed_flags; + if (!(x & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + extra_len+=2; /* The next instruction now is part of this + block */ + } + + } +#endif + + if (next_pc_p) { /* A branch was registered */ + uae_u32 t1=next_pc_p; + uae_u32 t2=taken_pc_p; + int cc=branch_cc; + + uae_u32* branchadd; + uae_u32* tba; + bigstate tmp; + blockinfo* tbi; + + if (taken_pc_penv)); + //flush(1); /* Can only get here if was_comp==1 */ + raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles)); + raw_jcc_l_oponly(9); + tba=(uae_u32*)get_target(); + emit_long(get_handler(t1)-((uae_u32)tba+4)); + raw_mov_l_mi((uae_u32)®s.pc_p,t1); + raw_jmp((uae_u32)popall_do_nothing); + create_jmpdep(bi,0,tba,t1); + + align_target(16); + /* not-predicted outcome */ + *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4); + live=tmp; /* Ouch again */ + tbi=get_blockinfo_addr_new((void*)t2,1); + match_states(&(tbi->env)); + + //flush(1); /* Can only get here if was_comp==1 */ + raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles)); + raw_jcc_l_oponly(9); + tba=(uae_u32*)get_target(); + emit_long(get_handler(t2)-((uae_u32)tba+4)); + raw_mov_l_mi((uae_u32)®s.pc_p,t2); + raw_jmp((uae_u32)popall_do_nothing); + create_jmpdep(bi,1,tba,t2); + } + else + { + if (was_comp) { + flush(1); + } + + /* Let's find out where next_handler is... */ + if (was_comp && isinreg(PC_P)) { + int r2; + + r=live.state[PC_P].realreg; + + if (r==0) + r2=1; + else + r2=0; + + raw_and_l_ri(r,TAGMASK); + raw_mov_l_ri(r2,(uae_u32)popall_do_nothing); + raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles)); + raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,9); + raw_jmp_r(r2); + } + else if (was_comp && isconst(PC_P)) { + uae_u32 v=live.state[PC_P].val; + uae_u32* tba; + blockinfo* tbi; + + tbi=get_blockinfo_addr_new((void*)v,1); + match_states(&(tbi->env)); + + raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles)); + raw_jcc_l_oponly(9); + tba=(uae_u32*)get_target(); + emit_long(get_handler(v)-((uae_u32)tba+4)); + raw_mov_l_mi((uae_u32)®s.pc_p,v); + raw_jmp((uae_u32)popall_do_nothing); + create_jmpdep(bi,0,tba,v); + } + else { + int r2; + + r=REG_PC_TMP; + raw_mov_l_rm(r,(uae_u32)®s.pc_p); + if (r==0) + r2=1; + else + r2=0; + + raw_and_l_ri(r,TAGMASK); + raw_mov_l_ri(r2,(uae_u32)popall_do_nothing); + raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles)); + raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,9); + raw_jmp_r(r2); + } + } + } + + if (next_pc_p+extra_len>=max_pcp && + next_pc_p+extra_lenlen=max_pcp-min_pcp; + bi->min_pcp=min_pcp; + + remove_from_list(bi); + if (isinrom(min_pcp) && isinrom(max_pcp)) + add_to_dormant(bi); /* No need to checksum it on cache flush. + Please don't start changing ROMs in + flight! */ + else { + calc_checksum(bi,&(bi->c1),&(bi->c2)); + add_to_active(bi); + } + + log_dump(); + align_target(32); + current_compile_p=get_target(); + + raise_in_cl_list(bi); + bi->nexthandler=current_compile_p; + + /* We will flush soon, anyway, so let's do it now */ + if (current_compile_p>=max_compile_start) + flush_icache_hard(7); + + do_extra_cycles(totcycles); /* for the compilation time */ + } +} + + diff --git a/disk.c b/disk.c index 6370464e..6d8ec58d 100755 --- a/disk.c +++ b/disk.c @@ -833,6 +833,7 @@ static int drive_insert (drive * drv, struct uae_prefs *p, int dnum, const char currprefs.df[dnum][255] = 0; strncpy (changed_prefs.df[dnum], fname, 255); changed_prefs.df[dnum][255] = 0; + strcpy (drv->newname, fname); gui_filename (dnum, fname); memset (buffer, 0, sizeof (buffer)); @@ -1657,6 +1658,9 @@ static void setdskchangetime(drive *drv, int dsktime) } } drv->dskchange_time = dsktime; +#ifdef DISK_DEBUG + write_dlog("delayed insert enable %d\n", dsktime); +#endif } void DISK_reinsert (int num) diff --git a/filesys.c b/filesys.c index 3ad38252..709c26eb 100755 --- a/filesys.c +++ b/filesys.c @@ -3414,7 +3414,7 @@ static void *filesys_thread (void *unit_v) ui->self->cmds_sent++; /* The message is sent by our interrupt handler, so make sure an interrupt * happens. */ - uae_int_requested = 1; + do_uae_int_requested(); /* Send back the locks. */ if (get_long (ui->self->locklist) != 0) write_comm_pipe_int (ui->back_pipe, (int)(get_long (ui->self->locklist)), 0); diff --git a/include/compemu.h b/include/compemu.h index a8b2a756..b0ceb288 100755 --- a/include/compemu.h +++ b/include/compemu.h @@ -424,9 +424,24 @@ DECLARE(fabs_rr(FW d, FR s)); DECLARE(frndint_rr(FW d, FR s)); DECLARE(fsin_rr(FW d, FR s)); DECLARE(fcos_rr(FW d, FR s)); +DECLARE(ftan_rr(FW d, FR s)); +DECLARE(fsincos_rr(FW d, FW c, FR s)); +DECLARE(fscale_rr(FRW d, FR s)); DECLARE(ftwotox_rr(FW d, FR s)); DECLARE(fetox_rr(FW d, FR s)); +DECLARE(fetoxM1_rr(FW d, FR s)); +DECLARE(ftentox_rr(FW d, FR s)); DECLARE(flog2_rr(FW d, FR s)); +DECLARE(flogN_rr(FW d, FR s)); +DECLARE(flogNP1_rr(FW d, FR s)); +DECLARE(flog10_rr(FW d, FR s)); +DECLARE(fasin_rr(FW d, FR s)); +DECLARE(facos_rr(FW d, FR s)); +DECLARE(fatan_rr(FW d, FR s)); +DECLARE(fsinh_rr(FW d, FR s)); +DECLARE(fcosh_rr(FW d, FR s)); +DECLARE(ftanh_rr(FW d, FR s)); +DECLARE(fatanh_rr(FW d, FR s)); DECLARE(fneg_rr(FW d, FR s)); DECLARE(fadd_rr(FRW d, FR s)); DECLARE(fsub_rr(FRW d, FR s)); diff --git a/include/compemu_old.h b/include/compemu_old.h new file mode 100755 index 00000000..a8b2a756 --- /dev/null +++ b/include/compemu_old.h @@ -0,0 +1,527 @@ +#define USE_OPTIMIZER 0 +#define USE_LOW_OPTIMIZER 0 +#define USE_ALIAS 1 +#define USE_F_ALIAS 1 +#define USE_SOFT_FLUSH 1 +#define USE_OFFSET 1 +#define COMP_DEBUG 1 + +#if COMP_DEBUG +#define Dif(x) if (x) +#else +#define Dif(x) if (0) +#endif + +#define SCALE 2 +#define MAXCYCLES (1000 * CYCLE_UNIT) +#define MAXREGOPT 65536 + +#define BYTES_PER_INST 10240 /* paranoid ;-) */ +#define LONGEST_68K_INST 16 /* The number of bytes the longest possible + 68k instruction takes */ +#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums + for. Anything larger will be flushed + unconditionally even with SOFT_FLUSH */ +#define MAX_HOLD_BI 3 /* One for the current block, and up to two + for jump targets */ + +#define INDIVIDUAL_INST 0 +#define FLAG_C 0x0010 +#define FLAG_V 0x0008 +#define FLAG_Z 0x0004 +#define FLAG_N 0x0002 +#define FLAG_X 0x0001 +#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V) +#define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V) + +#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */ + +/* Whether to preserve registers across calls to JIT compiled routines */ +#if defined X86_ASSEMBLY +#define USE_PUSH_POP 0 +#else +#define USE_PUSH_POP 1 +#endif + +#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */ +#define N_FREGS 6 /* That leaves us two positions on the stack to play with */ + +/* Functions exposed to newcpu, or to what was moved from newcpu.c to + * compemu_support.c */ +extern void init_comp(void); +extern void flush(int save_regs); +extern void small_flush(int save_regs); +extern void set_target(uae_u8* t); +extern uae_u8* get_target(void); +extern void freescratch(void); +extern void build_comp(void); +extern void set_cache_state(int enabled); +extern int get_cache_state(void); +extern uae_u32 get_jitted_size(void); +#ifdef JIT +extern void flush_icache(int n); +#endif +extern void alloc_cache(void); +extern void compile_block(cpu_history* pc_hist, int blocklen, int totcyles); +extern void lopt_emit_all(void); +extern int check_for_cache_miss(void); + + +#define scaled_cycles(x) (currprefs.m68k_speed==-1?(((x)/SCALE)?(((x)/SCALE= REGALLOC */ +#define DECLARE(func) extern void func; extern void do_##func +#else +#define REGALLOC_O 2000000 +#define PEEPHOLE_O 2000000 +#define DECLARE(func) extern void func +#endif + + +/* What we expose to the outside */ +DECLARE(bt_l_ri(R4 r, IMM i)); +DECLARE(bt_l_rr(R4 r, R4 b)); +DECLARE(btc_l_ri(RW4 r, IMM i)); +DECLARE(btc_l_rr(RW4 r, R4 b)); +DECLARE(bts_l_ri(RW4 r, IMM i)); +DECLARE(bts_l_rr(RW4 r, R4 b)); +DECLARE(btr_l_ri(RW4 r, IMM i)); +DECLARE(btr_l_rr(RW4 r, R4 b)); +DECLARE(mov_l_rm(W4 d, IMM s)); +DECLARE(call_r(R4 r)); +DECLARE(sub_l_mi(IMM d, IMM s)); +DECLARE(mov_l_mi(IMM d, IMM s)); +DECLARE(mov_w_mi(IMM d, IMM s)); +DECLARE(mov_b_mi(IMM d, IMM s)); +DECLARE(rol_b_ri(RW1 r, IMM i)); +DECLARE(rol_w_ri(RW2 r, IMM i)); +DECLARE(rol_l_ri(RW4 r, IMM i)); +DECLARE(rol_l_rr(RW4 d, R1 r)); +DECLARE(rol_w_rr(RW2 d, R1 r)); +DECLARE(rol_b_rr(RW1 d, R1 r)); +DECLARE(shll_l_rr(RW4 d, R1 r)); +DECLARE(shll_w_rr(RW2 d, R1 r)); +DECLARE(shll_b_rr(RW1 d, R1 r)); +DECLARE(ror_b_ri(R1 r, IMM i)); +DECLARE(ror_w_ri(R2 r, IMM i)); +DECLARE(ror_l_ri(R4 r, IMM i)); +DECLARE(ror_l_rr(R4 d, R1 r)); +DECLARE(ror_w_rr(R2 d, R1 r)); +DECLARE(ror_b_rr(R1 d, R1 r)); +DECLARE(shrl_l_rr(RW4 d, R1 r)); +DECLARE(shrl_w_rr(RW2 d, R1 r)); +DECLARE(shrl_b_rr(RW1 d, R1 r)); +DECLARE(shra_l_rr(RW4 d, R1 r)); +DECLARE(shra_w_rr(RW2 d, R1 r)); +DECLARE(shra_b_rr(RW1 d, R1 r)); +DECLARE(shll_l_ri(RW4 r, IMM i)); +DECLARE(shll_w_ri(RW2 r, IMM i)); +DECLARE(shll_b_ri(RW1 r, IMM i)); +DECLARE(shrl_l_ri(RW4 r, IMM i)); +DECLARE(shrl_w_ri(RW2 r, IMM i)); +DECLARE(shrl_b_ri(RW1 r, IMM i)); +DECLARE(shra_l_ri(RW4 r, IMM i)); +DECLARE(shra_w_ri(RW2 r, IMM i)); +DECLARE(shra_b_ri(RW1 r, IMM i)); +DECLARE(setcc(W1 d, IMM cc)); +DECLARE(setcc_m(IMM d, IMM cc)); +DECLARE(cmov_l_rr(RW4 d, R4 s, IMM cc)); +DECLARE(cmov_l_rm(RW4 d, IMM s, IMM cc)); +DECLARE(bsf_l_rr(W4 d, R4 s)); +DECLARE(pop_m(IMM d)); +DECLARE(push_m(IMM d)); +DECLARE(pop_l(W4 d)); +DECLARE(push_l_i(IMM i)); +DECLARE(push_l(R4 s)); +DECLARE(clear_16(RW4 r)); +DECLARE(clear_8(RW4 r)); +DECLARE(sign_extend_16_rr(W4 d, R2 s)); +DECLARE(sign_extend_8_rr(W4 d, R1 s)); +DECLARE(zero_extend_16_rr(W4 d, R2 s)); +DECLARE(zero_extend_8_rr(W4 d, R1 s)); +DECLARE(imul_64_32(RW4 d, RW4 s)); +DECLARE(mul_64_32(RW4 d, RW4 s)); +DECLARE(imul_32_32(RW4 d, R4 s)); +DECLARE(mul_32_32(RW4 d, R4 s)); +DECLARE(mov_b_rr(W1 d, R1 s)); +DECLARE(mov_w_rr(W2 d, R2 s)); +DECLARE(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor)); +DECLARE(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor)); +DECLARE(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor)); +DECLARE(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s)); +DECLARE(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s)); +DECLARE(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s)); +DECLARE(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s)); +DECLARE(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s)); +DECLARE(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s)); +DECLARE(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor)); +DECLARE(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor)); +DECLARE(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor)); +DECLARE(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor)); +DECLARE(mov_l_rR(W4 d, R4 s, IMM offset)); +DECLARE(mov_w_rR(W2 d, R4 s, IMM offset)); +DECLARE(mov_b_rR(W1 d, R4 s, IMM offset)); +DECLARE(mov_l_brR(W4 d, R4 s, IMM offset)); +DECLARE(mov_w_brR(W2 d, R4 s, IMM offset)); +DECLARE(mov_b_brR(W1 d, R4 s, IMM offset)); +DECLARE(mov_l_Ri(R4 d, IMM i, IMM offset)); +DECLARE(mov_w_Ri(R4 d, IMM i, IMM offset)); +DECLARE(mov_b_Ri(R4 d, IMM i, IMM offset)); +DECLARE(mov_l_Rr(R4 d, R4 s, IMM offset)); +DECLARE(mov_w_Rr(R4 d, R2 s, IMM offset)); +DECLARE(mov_b_Rr(R4 d, R1 s, IMM offset)); +DECLARE(lea_l_brr(W4 d, R4 s, IMM offset)); +DECLARE(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset)); +DECLARE(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor)); +DECLARE(mov_l_bRr(R4 d, R4 s, IMM offset)); +DECLARE(mov_w_bRr(R4 d, R2 s, IMM offset)); +DECLARE(mov_b_bRr(R4 d, R1 s, IMM offset)); +DECLARE(bswap_32(RW4 r)); +DECLARE(bswap_16(RW2 r)); +DECLARE(mov_l_rr(W4 d, R4 s)); +DECLARE(mov_l_mr(IMM d, R4 s)); +DECLARE(mov_w_mr(IMM d, R2 s)); +DECLARE(mov_w_rm(W2 d, IMM s)); +DECLARE(mov_b_mr(IMM d, R1 s)); +DECLARE(mov_b_rm(W1 d, IMM s)); +DECLARE(mov_l_ri(W4 d, IMM s)); +DECLARE(mov_w_ri(W2 d, IMM s)); +DECLARE(mov_b_ri(W1 d, IMM s)); +DECLARE(add_l_mi(IMM d, IMM s) ); +DECLARE(add_w_mi(IMM d, IMM s) ); +DECLARE(add_b_mi(IMM d, IMM s) ); +DECLARE(test_l_ri(R4 d, IMM i)); +DECLARE(test_l_rr(R4 d, R4 s)); +DECLARE(test_w_rr(R2 d, R2 s)); +DECLARE(test_b_rr(R1 d, R1 s)); +DECLARE(and_l_ri(RW4 d, IMM i)); +DECLARE(and_l(RW4 d, R4 s)); +DECLARE(and_w(RW2 d, R2 s)); +DECLARE(and_b(RW1 d, R1 s)); +DECLARE(or_l_ri(RW4 d, IMM i)); +DECLARE(or_l(RW4 d, R4 s)); +DECLARE(or_w(RW2 d, R2 s)); +DECLARE(or_b(RW1 d, R1 s)); +DECLARE(adc_l(RW4 d, R4 s)); +DECLARE(adc_w(RW2 d, R2 s)); +DECLARE(adc_b(RW1 d, R1 s)); +DECLARE(add_l(RW4 d, R4 s)); +DECLARE(add_w(RW2 d, R2 s)); +DECLARE(add_b(RW1 d, R1 s)); +DECLARE(sub_l_ri(RW4 d, IMM i)); +DECLARE(sub_w_ri(RW2 d, IMM i)); +DECLARE(sub_b_ri(RW1 d, IMM i)); +DECLARE(add_l_ri(RW4 d, IMM i)); +DECLARE(add_w_ri(RW2 d, IMM i)); +DECLARE(add_b_ri(RW1 d, IMM i)); +DECLARE(sbb_l(RW4 d, R4 s)); +DECLARE(sbb_w(RW2 d, R2 s)); +DECLARE(sbb_b(RW1 d, R1 s)); +DECLARE(sub_l(RW4 d, R4 s)); +DECLARE(sub_w(RW2 d, R2 s)); +DECLARE(sub_b(RW1 d, R1 s)); +DECLARE(cmp_l(R4 d, R4 s)); +DECLARE(cmp_l_ri(R4 r, IMM i)); +DECLARE(cmp_w(R2 d, R2 s)); +DECLARE(cmp_b(R1 d, R1 s)); +DECLARE(xor_l(RW4 d, R4 s)); +DECLARE(xor_w(RW2 d, R2 s)); +DECLARE(xor_b(RW1 d, R1 s)); +DECLARE(live_flags(void)); +DECLARE(dont_care_flags(void)); +DECLARE(duplicate_carry(void)); +DECLARE(restore_carry(void)); +DECLARE(start_needflags(void)); +DECLARE(end_needflags(void)); +DECLARE(make_flags_live(void)); +DECLARE(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize)); +DECLARE(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)); +DECLARE(readmem_new(R4 address, W4 dest, IMM offset, IMM size, W4 tmp)); +DECLARE(writemem_new(R4 address, R4 source, IMM offset, IMM size, W4 tmp)); +DECLARE(forget_about(W4 r)); +DECLARE(nop(void)); + +DECLARE(f_forget_about(FW r)); +DECLARE(fmov_pi(FW r)); +DECLARE(fmov_log10_2(FW r)); +DECLARE(fmov_log2_e(FW r)); +DECLARE(fmov_loge_2(FW r)); +DECLARE(fmov_1(FW r)); +DECLARE(fmov_0(FW r)); +DECLARE(fmov_rm(FW r, MEMR m)); +DECLARE(fmovi_rm(FW r, MEMR m)); +DECLARE(fmovi_mr(MEMW m, FR r)); +DECLARE(fmovs_rm(FW r, MEMR m)); +DECLARE(fmovs_mr(MEMW m, FR r)); +DECLARE(fmov_mr(MEMW m, FR r)); +DECLARE(fmov_ext_mr(MEMW m, FR r)); +DECLARE(fmov_ext_rm(FW r, MEMR m)); +DECLARE(fmov_rr(FW d, FR s)); +DECLARE(fldcw_m_indexed(R4 index, IMM base)); +DECLARE(ftst_r(FR r)); +DECLARE(dont_care_fflags(void)); +DECLARE(fsqrt_rr(FW d, FR s)); +DECLARE(fabs_rr(FW d, FR s)); +DECLARE(frndint_rr(FW d, FR s)); +DECLARE(fsin_rr(FW d, FR s)); +DECLARE(fcos_rr(FW d, FR s)); +DECLARE(ftwotox_rr(FW d, FR s)); +DECLARE(fetox_rr(FW d, FR s)); +DECLARE(flog2_rr(FW d, FR s)); +DECLARE(fneg_rr(FW d, FR s)); +DECLARE(fadd_rr(FRW d, FR s)); +DECLARE(fsub_rr(FRW d, FR s)); +DECLARE(fmul_rr(FRW d, FR s)); +DECLARE(frem_rr(FRW d, FR s)); +DECLARE(frem1_rr(FRW d, FR s)); +DECLARE(fdiv_rr(FRW d, FR s)); +DECLARE(fcmp_rr(FR d, FR s)); +DECLARE(fflags_into_flags(W2 tmp)); + +extern int failure; +#define FAIL(x) do { failure|=x; } while (0) + +/* Convenience functions exposed to gencomp */ +extern uae_u32 m68k_pc_offset; +extern void readbyte(int address, int dest, int tmp); +extern void readword(int address, int dest, int tmp); +extern void readlong(int address, int dest, int tmp); +extern void writebyte(int address, int source, int tmp); +extern void writeword(int address, int source, int tmp); +extern void writelong(int address, int source, int tmp); +extern void writeword_clobber(int address, int source, int tmp); +extern void writelong_clobber(int address, int source, int tmp); +extern void get_n_addr(int address, int dest, int tmp); +extern void get_n_addr_jmp(int address, int dest, int tmp); +extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp); +extern int kill_rodent(int r); +extern void sync_m68k_pc(void); +extern uae_u32 get_const(int r); +extern int is_const(int r); +extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond); +extern void empty_optimizer(void); + +#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1)) +#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o))) +#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o))) + +/* Preferences handling */ +void check_prefs_changed_comp (void); + +struct blockinfo_t; + +typedef struct dep_t { + uae_u32* jmp_off; + struct blockinfo_t* target; + struct dep_t** prev_p; + struct dep_t* next; +} dependency; + +typedef struct blockinfo_t { + uae_s32 count; + cpuop_func* direct_handler_to_use; + cpuop_func* handler_to_use; + /* The direct handler does not check for the correct address */ + + cpuop_func* handler; + cpuop_func* direct_handler; + + cpuop_func* direct_pen; + cpuop_func* direct_pcc; + + uae_u8* nexthandler; + uae_u8* pc_p; + + uae_u32 c1; + uae_u32 c2; + uae_u32 len; + + struct blockinfo_t* next_same_cl; + struct blockinfo_t** prev_same_cl_p; + struct blockinfo_t* next; + struct blockinfo_t** prev_p; + + uae_u32 min_pcp; + uae_u8 optlevel; + uae_u8 needed_flags; + uae_u8 status; + uae_u8 havestate; + + dependency dep[2]; /* Holds things we depend on */ + dependency* deplist; /* List of things that depend on this */ + smallstate env; +} blockinfo; + +#define BI_NEW 0 +#define BI_COUNTING 1 +#define BI_TARGETTED 2 + +typedef struct { + uae_u8 type; + uae_u8 reg; + uae_u32 next; +} regacc; + +void execute_normal(void); +void exec_nostats(void); +void do_nothing(void); + diff --git a/include/native2amiga.h b/include/native2amiga.h index ca986dd0..606e0f8a 100755 --- a/include/native2amiga.h +++ b/include/native2amiga.h @@ -54,3 +54,10 @@ void native2amiga_startup (void); * It's emptied via exter_int_helper by the EXTER interrupt. */ extern smp_comm_pipe native2amiga_pending; #endif + +STATIC_INLINE do_uae_int_requested(void) +{ + uae_int_requested = 1; + set_uae_int_flag (); + INTREQ (0x8000 | 0x0008); +} diff --git a/native2amiga.c b/native2amiga.c index 48a57875..fe628016 100755 --- a/native2amiga.c +++ b/native2amiga.c @@ -51,7 +51,7 @@ void uae_Cause(uaecptr interrupt) uae_sem_wait (&n2asem); write_comm_pipe_int (&native2amiga_pending, 3, 0); write_comm_pipe_u32 (&native2amiga_pending, interrupt, 1); - uae_int_requested = 1; + do_uae_int_requested(); uae_sem_post (&n2asem); } @@ -60,7 +60,7 @@ void uae_ReplyMsg(uaecptr msg) uae_sem_wait (&n2asem); write_comm_pipe_int (&native2amiga_pending, 2, 0); write_comm_pipe_u32 (&native2amiga_pending, msg, 1); - uae_int_requested = 1; + do_uae_int_requested(); uae_sem_post (&n2asem); } @@ -70,7 +70,7 @@ void uae_PutMsg(uaecptr port, uaecptr msg) write_comm_pipe_int (&native2amiga_pending, 1, 0); write_comm_pipe_u32 (&native2amiga_pending, port, 0); write_comm_pipe_u32 (&native2amiga_pending, msg, 1); - uae_int_requested = 1; + do_uae_int_requested(); uae_sem_post (&n2asem); } @@ -80,7 +80,7 @@ void uae_Signal(uaecptr task, uae_u32 mask) write_comm_pipe_int (&native2amiga_pending, 0, 0); write_comm_pipe_u32 (&native2amiga_pending, task, 0); write_comm_pipe_int (&native2amiga_pending, mask, 1); - uae_int_requested = 1; + do_uae_int_requested(); uae_sem_post (&n2asem); } @@ -90,7 +90,7 @@ void uae_NotificationHack(uaecptr port, uaecptr nr) write_comm_pipe_int (&native2amiga_pending, 4, 0); write_comm_pipe_int (&native2amiga_pending, port, 0); write_comm_pipe_int (&native2amiga_pending, nr, 1); - uae_int_requested = 1; + do_uae_int_requested(); uae_sem_post (&n2asem); } diff --git a/od-win32/ioport.c b/od-win32/ioport.c index c6dae9ad..a89347b6 100755 --- a/od-win32/ioport.c +++ b/od-win32/ioport.c @@ -64,9 +64,9 @@ void ioport_free (void) if (initialized) { pShutdownWinIo(); FreeLibrary (ioh); + io_log ("io freed\n"); } #endif - io_log ("io freed\n"); initialized = 0; } diff --git a/od-win32/mman.c b/od-win32/mman.c index e68baf17..74e768ef 100755 --- a/od-win32/mman.c +++ b/od-win32/mman.c @@ -241,9 +241,8 @@ void *shmat(int shmid, LPVOID shmaddr, int shmflg) if ((shmids[shmid].key == shmid) && shmids[shmid].size) { got = FALSE; if (got == FALSE) { - if (shmaddr) { - result = (void*)VirtualFree(shmaddr, 0, os_winnt ? MEM_RESET : MEM_RELEASE); - } + if (shmaddr) + VirtualFree(shmaddr, 0, os_winnt ? MEM_RESET : MEM_RELEASE); result = VirtualAlloc(shmaddr, size, os_winnt ? MEM_COMMIT : (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE); if (result == NULL) { diff --git a/od-win32/win32.h b/od-win32/win32.h index 0b254959..a1b942b2 100755 --- a/od-win32/win32.h +++ b/od-win32/win32.h @@ -22,7 +22,7 @@ extern int manual_palette_refresh_needed; extern int mouseactive, focus; extern int ignore_messages_all; #define WINUAEBETA 1 -#define WINUAEBETASTR " Beta 7" +#define WINUAEBETASTR " Beta 9" extern char start_path_exe[MAX_DPATH]; extern char start_path_data[MAX_DPATH]; diff --git a/od-win32/win32_filesys.c b/od-win32/win32_filesys.c index 46fc90e0..4cb21296 100755 --- a/od-win32/win32_filesys.c +++ b/od-win32/win32_filesys.c @@ -113,12 +113,12 @@ void filesys_init( void ) /* Is this drive-letter valid (it used to check for media in drive) */ if( ( dwDriveMask & 1 ) /* && CheckRM( volumepath ) */ ) { - BOOL inserted = CheckRM( volumepath ); /* Is there a disk inserted? */ - drivetype = GetDriveType( volumepath ); + BOOL inserted = CheckRM(volumepath); /* Is there a disk inserted? */ + drivetype = GetDriveType(volumepath); if (drivetype != DRIVE_CDROM) { get_volume_name( currprefs.mountinfo, volumepath, volumename, MAX_DPATH, inserted, drivetype, 1 ); - if( drivetype == DRIVE_REMOTE ) + if (drivetype == DRIVE_REMOTE) strcat( volumepath, "." ); else strcat( volumepath, ".." ); diff --git a/od-win32/win32gui.c b/od-win32/win32gui.c index f97fc89c..5ea19569 100755 --- a/od-win32/win32gui.c +++ b/od-win32/win32gui.c @@ -4281,7 +4281,7 @@ static void values_to_miscdlg (HWND hDlg) CheckDlgButton (hDlg, IDC_CLOCKSYNC, workprefs.tod_hack); cw = catweasel_detect(); EnableWindow (GetDlgItem (hDlg, IDC_CATWEASEL), cw); - if (!cw) + if (!cw && workprefs.catweasel < 100) workprefs.catweasel = 0; CheckDlgButton (hDlg, IDC_CATWEASEL, workprefs.catweasel); CheckDlgButton (hDlg, IDC_STATE_CAPTURE, workprefs.statecapture); diff --git a/od-win32/winuae_msvc/winuae_msvc.vcproj b/od-win32/winuae_msvc/winuae_msvc.vcproj index efb0829e..10aa4cec 100755 --- a/od-win32/winuae_msvc/winuae_msvc.vcproj +++ b/od-win32/winuae_msvc/winuae_msvc.vcproj @@ -165,6 +165,7 @@ BufferSecurityCheck="false" EnableFunctionLevelLinking="false" EnableEnhancedInstructionSet="0" + FloatingPointModel="0" TreatWChar_tAsBuiltInType="false" RuntimeTypeInfo="false" UsePrecompiledHeader="0"