if (addr >= 0x100)
return 0;
buf1[0] = (uae_u8)addr;
- DeviceIoControl (handle, CW_PEEKREG_FULL, buf1, 1, buf2, 1, &did_read, 0);
+ if (handle != INVALID_HANDLE_VALUE)
+ DeviceIoControl (handle, CW_PEEKREG_FULL, buf1, 1, buf2, 1, &did_read, 0);
+ else
+ buf2[0] = ioport_read (cwc.iobase + addr);
//write_log ("G %02.2X %02.2X %d\n", buf1[0], buf2[0], did_read);
return buf2[0];
}
return;
buf[0] = (uae_u8)addr;
buf[1] = b;
- DeviceIoControl (handle, CW_POKEREG_FULL, buf, 2, 0, 0, &did_read, 0);
+ if (handle != INVALID_HANDLE_VALUE)
+ DeviceIoControl (handle, CW_POKEREG_FULL, buf, 2, 0, 0, &did_read, 0);
+ else
+ ioport_write (cwc.iobase + addr, b);
//write_log ("P %02.2X %02.2X %d\n", (uae_u8)addr, (uae_u8)b, did_read);
}
if (!currprefs.catweasel)
return 0;
- for (i = 0; i < 4; i++) {
- if (currprefs.catweasel > 0)
- i = currprefs.catweasel;
- sprintf (name, "\\\\.\\CAT%d_F0", i);
- handle = CreateFile (name, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0,
- OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
- if (handle != INVALID_HANDLE_VALUE || currprefs.catweasel > 0)
- break;
- }
- if (handle == INVALID_HANDLE_VALUE) {
- write_log ("No Catweasel detected\n");
- goto fail;
- }
- if (!DeviceIoControl (handle, CW_GET_VERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
- write_log ("CW_GET_VERSION failed %d\n", GetLastError());
- goto fail;
- }
- write_log ("CW driver version string '%s'\n", buffer);
- if (!DeviceIoControl (handle, CW_GET_HWVERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
- write_log ("CW_GET_HWVERSION failed %d\n", GetLastError());
- goto fail;
- }
- write_log ("CW: v=%d 14=%d 28=%d 56=%d joy=%d dpm=%d sid=%d kb=%d sidfifo=%d\n",
+
+ if (currprefs.catweasel >= 100) {
+ cwc.type = currprefs.catweasel >= 0x400 ? 3 : 1;
+ cwc.iobase = currprefs.catweasel;
+ if (!ioport_init())
+ goto fail;
+ strcpy(name, "[DIRECT]");
+ } else {
+ for (i = 0; i < 4; i++) {
+ if (currprefs.catweasel > 0)
+ i = currprefs.catweasel;
+ sprintf (name, "\\\\.\\CAT%d_F0", i);
+ handle = CreateFile (name, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0,
+ OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
+ if (handle != INVALID_HANDLE_VALUE || currprefs.catweasel > 0)
+ break;
+ }
+ if (handle == INVALID_HANDLE_VALUE) {
+ write_log ("No Catweasel detected\n");
+ goto fail;
+ }
+ if (!DeviceIoControl (handle, CW_GET_VERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
+ write_log ("CW_GET_VERSION failed %d\n", GetLastError());
+ goto fail;
+ }
+ write_log ("CW driver version string '%s'\n", buffer);
+ if (!DeviceIoControl (handle, CW_GET_HWVERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
+ write_log ("CW_GET_HWVERSION failed %d\n", GetLastError());
+ goto fail;
+ }
+ write_log ("CW: v=%d 14=%d 28=%d 56=%d joy=%d dpm=%d sid=%d kb=%d sidfifo=%d\n",
buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5],
buffer[6], buffer[7], ((uae_u32*)(buffer + 8))[0]);
- if (!DeviceIoControl (handle, CW_LOCK_EXCLUSIVE, 0, 0, buffer, sizeof (buffer), &len, 0)) {
- write_log ("CW_LOCK_EXCLUSIVE failed %d\n", GetLastError());
- goto fail;
+ if (!DeviceIoControl (handle, CW_LOCK_EXCLUSIVE, 0, 0, buffer, sizeof (buffer), &len, 0)) {
+ write_log ("CW_LOCK_EXCLUSIVE failed %d\n", GetLastError());
+ goto fail;
+ }
+ model = *((uae_u32*)(buffer + 4));
+ base = *((uae_u32*)(buffer + 0));
+ cwc.type = model == 0 ? 1 : model == 2 ? 4 : 3;
+ cwc.iobase = base;
}
- model = *((uae_u32*)(buffer + 4));
- base = *((uae_u32*)(buffer + 0));
- cwc.type = model == 0 ? 1 : model == 2 ? 4 : 3;
- cwc.iobase = base;
write_log ("Catweasel MK%d @%p (%s) detected and enabled\n",
cwc.type, cwc.iobase, name);
- catweasel_do_bput (3, 0x41); /* enable MK3-mode */
+ if (cwc.type == CATWEASEL_TYPE_MK4)
+ catweasel_do_bput (3, 0x41); /* enable MK3-mode */
catweasel_init_controller (&cwc);
return 1;
fail:
if (handle != INVALID_HANDLE_VALUE)
CloseHandle (handle);
handle = INVALID_HANDLE_VALUE;
+ ioport_free();
cwc.type = 0;
}
{"floppy3", "Diskfile for drive 3" },
{"hardfile", "access,sectors, surfaces, reserved, blocksize, path format" },
{"filesystem", "access,'Amiga volume-name':'host directory path' - where 'access' can be 'read-only' or 'read-write'" },
- {"catweasel_io","Catweasel board io base address" }
+ {"catweasel", "Catweasel board io base address" }
};
static const char *guimode1[] = { "no", "yes", "nowait", 0 };
cfgfile_write (f, "blitter_cycle_exact=%s\n", p->blitter_cycle_exact ? "true" : "false");
cfgfile_write (f, "log_illegal_mem=%s\n", p->illegal_mem ? "true" : "false");
- cfgfile_write (f, "catweasel=%d\n", p->catweasel);
+ if (p->catweasel >= 100)
+ cfgfile_write (f, "catweasel=0x%x\n", p->catweasel);
+ else
+ cfgfile_write (f, "catweasel=%d\n", p->catweasel);
cfgfile_write (f, "kbd_lang=%s\n", (p->keyboard_lang == KBD_LANG_DE ? "de"
: p->keyboard_lang == KBD_LANG_DK ? "dk"
}
}
abort();
- return -1;
}
void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
m68k_areg (regs, opcode & 7) = ad;
}
-static fptype const_e=2.718281828; /* Got some more digits? */
-static fptype const_log10_e=0.4342944819;
-static fptype const_loge_10=2.302585093;
+static fptype const_e=2.718281828459045235360;
+static fptype const_log10_e=0.434294481903251827651;
+static fptype const_loge_10=2.302585092994045684018;
static fptype power10[]={1e0,1e1,1e2,1e4,1e8,1e16,1e32,1e64,1e128,1e256
#if USE_LONG_DOUBLE
, 1e512, 1e1024, 1e2048, 1e4096
MAKE_FPSR (src);
break;
case 0x01: /* FINT */
- FAIL(1);
- return;
dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ frndint_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
case 0x02: /* FSINH */
- FAIL(1);
- return;
-
dont_care_fflags();
- regs.fp[reg] = sinh (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsinh_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x03: /* FINTRZ */
#if USE_X86_FPUCW
MAKE_FPSR (reg);
break;
case 0x06: /* FLOGNP1 */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = log (src + 1.0);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ flogNP1_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x08: /* FETOXM1 */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = exp (src) - 1.0;
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fetoxM1_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x09: /* FTANH */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = tanh (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ ftanh_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x0a: /* FATAN */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = atan (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fatan_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x0c: /* FASIN */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = asin (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fasin_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x0d: /* FATANH */
- FAIL(1);
- return;
dont_care_fflags();
-#if 1 /* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */
- regs.fp[reg] = log ((1 + src) / (1 - src)) / 2;
-#else
- regs.fp[reg] = atanh (src);
-#endif
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fatanh_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x0e: /* FSIN */
dont_care_fflags();
MAKE_FPSR (reg);
break;
case 0x0f: /* FTAN */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = tan (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ ftan_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x10: /* FETOX */
dont_care_fflags();
MAKE_FPSR (reg);
break;
case 0x12: /* FTENTOX */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = pow (10.0, src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ ftentox_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x14: /* FLOGN */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = log (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ flogN_rr(reg,src);
+ MAKE_FPSR (reg);
break;
+
case 0x15: /* FLOG10 */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = log10 (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ flog10_rr(reg,src);
+ MAKE_FPSR (reg);
break;
+
case 0x16: /* FLOG2 */
dont_care_fflags();
src=get_fp_value (opcode, extra);
MAKE_FPSR (reg);
break;
case 0x19: /* FCOSH */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = cosh (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fcosh_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x1a: /* FNEG */
case 0x5a:
MAKE_FPSR (reg);
break;
case 0x1c: /* FACOS */
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = acos (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ facos_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x1d: /* FCOS */
dont_care_fflags();
break;
case 0x26: /* FSCALE */
dont_care_fflags();
- FAIL(1);
- return;
- regs.fp[reg] *= exp (log (2.0) * src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fscale_rr(reg,src);
+ MAKE_FPSR (reg);
break;
case 0x27: /* FSGLMUL */
dont_care_fflags();
case 0x35:
case 0x36:
case 0x37:
- FAIL(1);
- return;
dont_care_fflags();
- regs.fp[reg] = sin (src);
- regs.fp[extra & 7] = cos (src);
- MAKE_FPSR (regs.fp[reg]);
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsincos_rr(reg,extra & 7,src);
+ MAKE_FPSR (extra & 7);
+ MAKE_FPSR (reg);
break;
case 0x38: /* FCMP */
src=get_fp_value (opcode, extra);
--- /dev/null
+/*
+ * UAE - The Un*x Amiga Emulator
+ *
+ * MC68881 emulation
+ *
+ * Copyright 1996 Herman ten Brugge
+ * Adapted for JIT compilation (c) Bernd Meyer, 2000
+ */
+
+#include <math.h>
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include "config.h"
+#include "options.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "ersatz.h"
+#include "md-fpp.h"
+#include "compemu.h"
+
+#define MAKE_FPSR(r) do { fmov_rr(FP_RESULT,r); } while (0)
+
+#define delay //nop() ;nop()
+#define delay2 //nop() ;nop()
+
+uae_s32 temp_fp[3]; /* To convert between FP/integer */
+
+/* return register number, or -1 for failure */
+STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra)
+{
+ uaecptr tmppc;
+ uae_u16 tmp;
+ int size;
+ int mode;
+ int reg;
+ double* src;
+ uae_u32 ad = 0;
+ static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
+ static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
+
+ if ((extra & 0x4000) == 0) {
+ return (extra >> 10) & 7;
+ }
+
+ mode = (opcode >> 3) & 7;
+ reg = opcode & 7;
+ size = (extra >> 10) & 7;
+ switch (mode) {
+ case 0:
+ switch (size) {
+ case 6:
+ sign_extend_8_rr(S1,reg);
+ mov_l_mr((uae_u32)temp_fp,S1);
+ delay2;
+ fmovi_rm(FS1,(uae_u32)temp_fp);
+ return FS1;
+ case 4:
+ sign_extend_16_rr(S1,reg);
+ mov_l_mr((uae_u32)temp_fp,S1);
+ delay2;
+ fmovi_rm(FS1,(uae_u32)temp_fp);
+ return FS1;
+ case 0:
+ mov_l_mr((uae_u32)temp_fp,reg);
+ delay2;
+ fmovi_rm(FS1,(uae_u32)temp_fp);
+ return FS1;
+ case 1:
+ mov_l_mr((uae_u32)temp_fp,reg);
+ delay2;
+ fmovs_rm(FS1,(uae_u32)temp_fp);
+ return FS1;
+ default:
+ return -1;
+ }
+ return -1; /* Should be unreachable */
+ case 1:
+ return -1; /* Genuine invalid instruction */
+ default:
+ break;
+ }
+ /* OK, we *will* have to load something from an address. Let's make
+ sure we know how to handle that, or quit early --- i.e. *before*
+ we do any postincrement/predecrement that we may regret */
+
+ switch (size) {
+ case 3:
+ return -1;
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 5:
+ case 6:
+ break;
+ default:
+ return -1;
+ }
+
+ switch (mode) {
+ case 2:
+ ad=S1; /* We will change it, anyway ;-) */
+ mov_l_rr(ad,reg+8);
+ break;
+ case 3:
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
+ break;
+ case 4:
+ ad=S1;
+
+ lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
+ mov_l_rr(ad,reg+8);
+ break;
+ case 5:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ lea_l_brr(ad,ad,off);
+ break;
+ }
+ case 6:
+ {
+ uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ calc_disp_ea_020(reg+8,dp,ad,S2);
+ break;
+ }
+ case 7:
+ switch (reg) {
+ case 0:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 1:
+ {
+ uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 2:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+ m68k_pc_offset;
+ uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_ri(ad,address+PC16off);
+ break;
+ }
+ case 3:
+ return -1;
+ tmppc = m68k_getpc ();
+ tmp = next_iword ();
+ ad = get_disp_ea_020 (tmppc, tmp);
+ break;
+ case 4:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+ m68k_pc_offset;
+ ad=S1;
+ if (size == 6)
+ address++;
+ mov_l_ri(ad,address);
+ m68k_pc_offset+=sz2[size];
+ break;
+ }
+ default:
+ return -1;
+ }
+ }
+
+ switch (size) {
+ case 0:
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)temp_fp,S2);
+ delay2;
+ fmovi_rm(FS1,(uae_u32)temp_fp);
+ break;
+ case 1:
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)temp_fp,S2);
+ delay2;
+ fmovs_rm(FS1,(uae_u32)temp_fp);
+ break;
+ case 2:
+ readword(ad,S2,S3);
+ mov_w_mr(((uae_u32)temp_fp)+8,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)(temp_fp)+4,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)(temp_fp),S2);
+ delay2;
+ fmov_ext_rm(FS1,(uae_u32)(temp_fp));
+ break;
+ case 3:
+ return -1; /* Some silly "packed" stuff */
+ case 4:
+ readword(ad,S2,S3);
+ sign_extend_16_rr(S2,S2);
+ mov_l_mr((uae_u32)temp_fp,S2);
+ delay2;
+ fmovi_rm(FS1,(uae_u32)temp_fp);
+ break;
+ case 5:
+ readlong(ad,S2,S3);
+ mov_l_mr(((uae_u32)temp_fp)+4,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)(temp_fp),S2);
+ delay2;
+ fmov_rm(FS1,(uae_u32)(temp_fp));
+ break;
+ case 6:
+ readbyte(ad,S2,S3);
+ sign_extend_8_rr(S2,S2);
+ mov_l_mr((uae_u32)temp_fp,S2);
+ delay2;
+ fmovi_rm(FS1,(uae_u32)temp_fp);
+ break;
+ default:
+ return -1;
+ }
+ return FS1;
+}
+
+/* return of -1 means failure, >=0 means OK */
+STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra)
+{
+ uae_u16 tmp;
+ uaecptr tmppc;
+ int size;
+ int mode;
+ int reg;
+ uae_u32 ad;
+ static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
+ static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
+
+ if ((extra & 0x4000) == 0) {
+ fmov_rr((extra>>10)&7,val);
+ return 0;
+ }
+
+ mode = (opcode >> 3) & 7;
+ reg = opcode & 7;
+ size = (extra >> 10) & 7;
+ ad = -1;
+ switch (mode) {
+ case 0:
+ switch (size) {
+ case 6:
+ fmovi_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_b_rm(reg,(uae_u32)temp_fp);
+ return 0;
+ case 4:
+ fmovi_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_w_rm(reg,(uae_u32)temp_fp);
+ return 0;
+ case 0:
+ fmovi_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_l_rm(reg,(uae_u32)temp_fp);
+ return 0;
+ case 1:
+ fmovs_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_l_rm(reg,(uae_u32)temp_fp);
+ return 0;
+ default:
+ return -1;
+ }
+ case 1:
+ return -1; /* genuine invalid instruction */
+ default: break;
+ }
+
+ /* Let's make sure we get out *before* doing something silly if
+ we can't handle the size */
+ switch (size) {
+ case 0:
+ case 4:
+ case 5:
+ case 6:
+ case 2:
+ case 1:
+ break;
+ case 3:
+ default:
+ return -1;
+ }
+
+ switch (mode) {
+ case 2:
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ break;
+ case 3:
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
+ break;
+ case 4:
+ ad=S1;
+ lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
+ mov_l_rr(ad,reg+8);
+ break;
+ case 5:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ add_l_ri(ad,off);
+ break;
+ }
+ case 6:
+ {
+ uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ calc_disp_ea_020(reg+8,dp,ad,S2);
+ break;
+ }
+ case 7:
+ switch (reg) {
+ case 0:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 1:
+ {
+ uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 2:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+ m68k_pc_offset;
+ uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_ri(ad,address+PC16off);
+ break;
+ }
+ case 3:
+ return -1;
+ tmppc = m68k_getpc ();
+ tmp = next_iword ();
+ ad = get_disp_ea_020 (tmppc, tmp);
+ break;
+ case 4:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+ m68k_pc_offset;
+ ad=S1;
+ mov_l_ri(ad,address);
+ m68k_pc_offset+=sz2[size];
+ break;
+ }
+ default:
+ return -1;
+ }
+ }
+ switch (size) {
+ case 0:
+ fmovi_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 1:
+ fmovs_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 2:
+ fmov_ext_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_w_rm(S2,(uae_u32)temp_fp+8);
+ writeword_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uae_u32)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 3: return -1; /* Packed */
+
+ case 4:
+ fmovi_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writeword_clobber(ad,S2,S3);
+ break;
+ case 5:
+ fmov_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uae_u32)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 6:
+ fmovi_mr((uae_u32)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writebyte(ad,S2,S3);
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+/* return -1 for failure, or register number for success */
+STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad)
+{
+ uae_u16 tmp;
+ uaecptr tmppc;
+ int mode;
+ int reg;
+ uae_s32 off;
+
+ mode = (opcode >> 3) & 7;
+ reg = opcode & 7;
+ switch (mode) {
+ case 0:
+ case 1:
+ return -1;
+ case 2:
+ case 3:
+ case 4:
+ mov_l_rr(S1,8+reg);
+ return S1;
+ *ad = m68k_areg (regs, reg);
+ break;
+ case 5:
+ off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+
+ mov_l_rr(S1,8+reg);
+ add_l_ri(S1,off);
+ return S1;
+ case 6:
+ return -1;
+ break;
+ case 7:
+ switch (reg) {
+ case 0:
+ off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ mov_l_ri(S1,off);
+ return S1;
+ case 1:
+ off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ mov_l_ri(S1,off);
+ return S1;
+ case 2:
+ return -1;
+ *ad = m68k_getpc ();
+ *ad += (uae_s32) (uae_s16) next_iword ();
+ break;
+ case 3:
+ return -1;
+ tmppc = m68k_getpc ();
+ tmp = next_iword ();
+ *ad = get_disp_ea_020 (tmppc, tmp);
+ break;
+ default:
+ return -1;
+ }
+ }
+ abort();
+ return -1;
+}
+
+void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
+{
+ FAIL(1);
+ return;
+
+ if (!currprefs.compfpu) {
+ FAIL(1);
+ return;
+ }
+}
+
+void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
+{
+ uae_u32 ad;
+ int cc;
+ int reg;
+
+ if (!currprefs.compfpu) {
+ FAIL(1);
+ return;
+ }
+
+#if DEBUG_FPP
+ printf ("fscc_opp at %08lx\n", m68k_getpc ());
+ fflush (stdout);
+#endif
+
+
+ if (extra&0x20) { /* only cc from 00 to 1f are defined */
+ FAIL(1);
+ return;
+ }
+ if ((opcode & 0x38) != 0) { /* We can only do to integer register */
+ FAIL(1);
+ return;
+ }
+
+ fflags_into_flags(S2);
+ reg=(opcode&7);
+
+ mov_l_ri(S1,255);
+ mov_l_ri(S4,0);
+ switch(extra&0x0f) { /* according to fpp.c, the 0x10 bit is ignored
+ */
+ case 0: break; /* set never */
+ case 1: mov_l_rr(S2,S4);
+ cmov_l_rr(S4,S1,4);
+ cmov_l_rr(S4,S2,10); break;
+ case 2: cmov_l_rr(S4,S1,7); break;
+ case 3: cmov_l_rr(S4,S1,3); break;
+ case 4: mov_l_rr(S2,S4);
+ cmov_l_rr(S4,S1,2);
+ cmov_l_rr(S4,S2,10); break;
+ case 5: mov_l_rr(S2,S4);
+ cmov_l_rr(S4,S1,6);
+ cmov_l_rr(S4,S2,10); break;
+ case 6: cmov_l_rr(S4,S1,5); break;
+ case 7: cmov_l_rr(S4,S1,11); break;
+ case 8: cmov_l_rr(S4,S1,10); break;
+ case 9: cmov_l_rr(S4,S1,4); break;
+ case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break;
+ case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break;
+ case 12: cmov_l_rr(S4,S1,2); break;
+ case 13: cmov_l_rr(S4,S1,6); break;
+ case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break;
+ case 15: mov_l_rr(S4,S1); break;
+ }
+
+ if ((opcode & 0x38) == 0) {
+ mov_b_rr(reg,S4);
+ } else {
+ abort();
+ if (get_fp_ad (opcode, &ad) == 0) {
+ m68k_setpc (m68k_getpc () - 4);
+ op_illg (opcode);
+ } else
+ put_byte (ad, cc ? 0xff : 0x00);
+ }
+}
+
+void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
+{
+ int cc;
+
+ FAIL(1);
+ return;
+}
+
+extern unsigned long foink3, oink;
+
+void comp_fbcc_opp (uae_u32 opcode)
+{
+ uae_u32 start_68k_offset=m68k_pc_offset;
+ uae_u32 off;
+ uae_u32 v1;
+ uae_u32 v2;
+ uae_u32 nh;
+ int cc;
+
+ if (!currprefs.compfpu) {
+ FAIL(1);
+ return;
+ }
+
+ if (opcode&0x20) { /* only cc from 00 to 1f are defined */
+ FAIL(1);
+ return;
+ }
+ if ((opcode&0x40)==0) {
+ off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ }
+ else {
+ off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ }
+ mov_l_ri(S1,(uae_u32)
+ (comp_pc_p+off-(m68k_pc_offset-start_68k_offset)));
+ mov_l_ri(PC_P,(uae_u32)comp_pc_p);
+
+ /* Now they are both constant. Might as well fold in m68k_pc_offset */
+ add_l_ri(S1,m68k_pc_offset);
+ add_l_ri(PC_P,m68k_pc_offset);
+ m68k_pc_offset=0;
+
+ /* according to fpp.c, the 0x10 bit is ignored
+ (it handles exception handling, which we don't
+ do, anyway ;-) */
+ cc=opcode&0x0f;
+ v1=get_const(PC_P);
+ v2=get_const(S1);
+ fflags_into_flags(S2);
+
+ // mov_l_mi((uae_u32)&foink3,cc);
+ switch(cc) {
+ case 0: break; /* jump never */
+ case 1:
+ mov_l_rr(S2,PC_P);
+ cmov_l_rr(PC_P,S1,4);
+ cmov_l_rr(PC_P,S2,10); break;
+ case 2: register_branch(v1,v2,7); break;
+ case 3: register_branch(v1,v2,3); break;
+ case 4:
+ mov_l_rr(S2,PC_P);
+ cmov_l_rr(PC_P,S1,2);
+ cmov_l_rr(PC_P,S2,10); break;
+ case 5:
+ mov_l_rr(S2,PC_P);
+ cmov_l_rr(PC_P,S1,6);
+ cmov_l_rr(PC_P,S2,10); break;
+ case 6: register_branch(v1,v2,5); break;
+ case 7: register_branch(v1,v2,11); break;
+ case 8: register_branch(v1,v2,10); break;
+ case 9: register_branch(v1,v2,4); break;
+ case 10:
+ cmov_l_rr(PC_P,S1,10);
+ cmov_l_rr(PC_P,S1,7); break;
+ case 11:
+ cmov_l_rr(PC_P,S1,4);
+ cmov_l_rr(PC_P,S1,3); break;
+ case 12: register_branch(v1,v2,2); break;
+ case 13: register_branch(v1,v2,6); break;
+ case 14:
+ cmov_l_rr(PC_P,S1,5);
+ cmov_l_rr(PC_P,S1,10); break;
+ case 15: mov_l_rr(PC_P,S1); break;
+ }
+}
+
+ /* Floating point conditions
+ The "NotANumber" part could be problematic; Howver, when NaN is
+ encountered, the ftst instruction sets bot N and Z to 1 on the x87,
+ so quite often things just fall into place. This is probably not
+ accurate wrt the 68k FPU, but it is *as* accurate as this was before.
+ However, some more thought should go into fixing this stuff up so
+ it accurately emulates the 68k FPU.
+>=<U
+0000 0x00: 0 --- Never jump
+0101 0x01: Z --- jump if zero (x86: 4)
+1000 0x02: !(NotANumber || Z || N) --- Neither Z nor N set (x86: 7)
+1101 0x03: Z || !(NotANumber || N); --- Z or !N (x86: 4 and 3)
+0010 0x04: N && !(NotANumber || Z); --- N and !Z (x86: hard!)
+0111 0x05: Z || (N && !NotANumber); --- Z or N (x86: 6)
+1010 0x06: !(NotANumber || Z); --- not Z (x86: 5)
+1110 0x07: !NotANumber; --- not NaN (x86: 11, not parity)
+0001 0x08: NotANumber; --- NaN (x86: 10)
+0101 0x09: NotANumber || Z; --- Z (x86: 4)
+1001 0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
+1101 0x0b: NotANumber || Z || !N; --- Z or !N (x86: 4 and 3)
+0011 0x0c: NotANumber || (N && !Z); --- N (x86: 2)
+0111 0x0d: NotANumber || Z || N; --- Z or N (x86: 6)
+1010 0x0e: !Z; --- not Z (x86: 5)
+1111 0x0f: 1; --- always
+
+This is not how the 68k handles things, though --- it sets Z to 0 and N
+to the NaN's sign.... ('o' and 'i' denote differences from the above
+table)
+
+>=<U
+0000 0x00: 0 --- Never jump
+010o 0x01: Z --- jump if zero (x86: 4, not 10)
+1000 0x02: !(NotANumber || Z || N) --- Neither Z nor N set (x86: 7)
+110o 0x03: Z || !(NotANumber || N); --- Z or !N (x86: 3)
+0010 0x04: N && !(NotANumber || Z); --- N and !Z (x86: 2, not 10)
+011o 0x05: Z || (N && !NotANumber); --- Z or N (x86: 6, not 10)
+1010 0x06: !(NotANumber || Z); --- not Z (x86: 5)
+1110 0x07: !NotANumber; --- not NaN (x86: 11, not parity)
+0001 0x08: NotANumber; --- NaN (x86: 10)
+0101 0x09: NotANumber || Z; --- Z (x86: 4)
+1001 0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
+1101 0x0b: NotANumber || Z || !N; --- Z or !N (x86: 4 and 3)
+0011 0x0c: NotANumber || (N && !Z); --- N (x86: 2)
+0111 0x0d: NotANumber || Z || N; --- Z or N (x86: 6)
+101i 0x0e: !Z; --- not Z (x86: 5 and 10)
+1111 0x0f: 1; --- always
+
+Of course, this *still* doesn't mean that the x86 and 68k conditions are
+equivalent --- the handling of infinities is different, for one thing.
+On the 68k, +infinity minus +infinity is NotANumber (as it should be). On
+the x86, it is +infinity, and some exception is raised (which I suspect
+is promptly ignored) STUPID!
+The more I learn about their CPUs, the more I detest Intel....
+
+You can see this in action if you have "Benoit" (see Aminet) and
+set the exponent to 16. Wait for a long time, and marvel at the extra black
+areas outside the center one. That's where Benoit expects NaN, and the x86
+gives +infinity. [Ooops --- that must have been some kind of bug in my code.
+it no longer happens, and the resulting graphic looks much better, too]
+
+x86 conditions
+0011 : 2
+1100 : 3
+0101 : 4
+1010 : 5
+0111 : 6
+1000 : 7
+0001 : 10
+1110 : 11
+ */
+void comp_fsave_opp (uae_u32 opcode)
+{
+ uae_u32 ad;
+ int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
+ int i;
+
+ FAIL(1);
+ return;
+
+ if (!currprefs.compfpu) {
+ FAIL(1);
+ return;
+ }
+
+#if DEBUG_FPP
+ printf ("fsave_opp at %08lx\n", m68k_getpc ());
+ fflush (stdout);
+#endif
+ if (get_fp_ad (opcode, &ad) == 0) {
+ m68k_setpc (m68k_getpc () - 2);
+ op_illg (opcode);
+ return;
+ }
+
+ if (currprefs.cpu_level >= 4) {
+ /* 4 byte 68040 IDLE frame. */
+ if (incr < 0) {
+ ad -= 4;
+ put_long (ad, 0x41000000);
+ } else {
+ put_long (ad, 0x41000000);
+ ad += 4;
+ }
+ } else {
+ if (incr < 0) {
+ ad -= 4;
+ put_long (ad, 0x70000000);
+ for (i = 0; i < 5; i++) {
+ ad -= 4;
+ put_long (ad, 0x00000000);
+ }
+ ad -= 4;
+ put_long (ad, 0x1f180000);
+ } else {
+ put_long (ad, 0x1f180000);
+ ad += 4;
+ for (i = 0; i < 5; i++) {
+ put_long (ad, 0x00000000);
+ ad += 4;
+ }
+ put_long (ad, 0x70000000);
+ ad += 4;
+ }
+ }
+ if ((opcode & 0x38) == 0x18)
+ m68k_areg (regs, opcode & 7) = ad;
+ if ((opcode & 0x38) == 0x20)
+ m68k_areg (regs, opcode & 7) = ad;
+}
+
+void comp_frestore_opp (uae_u32 opcode)
+{
+ uae_u32 ad;
+ uae_u32 d;
+ int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
+
+ FAIL(1);
+ return;
+
+ if (!currprefs.compfpu) {
+ FAIL(1);
+ return;
+ }
+
+#if DEBUG_FPP
+ printf ("frestore_opp at %08lx\n", m68k_getpc ());
+ fflush (stdout);
+#endif
+ if (get_fp_ad (opcode, &ad) == 0) {
+ m68k_setpc (m68k_getpc () - 2);
+ op_illg (opcode);
+ return;
+ }
+ if (currprefs.cpu_level >= 4) {
+ /* 68040 */
+ if (incr < 0) {
+ /* @@@ This may be wrong. */
+ ad -= 4;
+ d = get_long (ad);
+ if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
+ if ((d & 0x00ff0000) == 0) { /* IDLE */
+ } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
+ ad -= 44;
+ } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
+ ad -= 92;
+ }
+ }
+ } else {
+ d = get_long (ad);
+ ad += 4;
+ if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
+ if ((d & 0x00ff0000) == 0) { /* IDLE */
+ } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
+ ad += 44;
+ } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
+ ad += 92;
+ }
+ }
+ }
+ } else {
+ if (incr < 0) {
+ ad -= 4;
+ d = get_long (ad);
+ if ((d & 0xff000000) != 0) {
+ if ((d & 0x00ff0000) == 0x00180000)
+ ad -= 6 * 4;
+ else if ((d & 0x00ff0000) == 0x00380000)
+ ad -= 14 * 4;
+ else if ((d & 0x00ff0000) == 0x00b40000)
+ ad -= 45 * 4;
+ }
+ } else {
+ d = get_long (ad);
+ ad += 4;
+ if ((d & 0xff000000) != 0) {
+ if ((d & 0x00ff0000) == 0x00180000)
+ ad += 6 * 4;
+ else if ((d & 0x00ff0000) == 0x00380000)
+ ad += 14 * 4;
+ else if ((d & 0x00ff0000) == 0x00b40000)
+ ad += 45 * 4;
+ }
+ }
+ }
+ if ((opcode & 0x38) == 0x18)
+ m68k_areg (regs, opcode & 7) = ad;
+ if ((opcode & 0x38) == 0x20)
+ m68k_areg (regs, opcode & 7) = ad;
+}
+
+static fptype const_e=2.718281828; /* Got some more digits? */
+static fptype const_log10_e=0.4342944819;
+static fptype const_loge_10=2.302585093;
+static fptype power10[]={1e0,1e1,1e2,1e4,1e8,1e16,1e32,1e64,1e128,1e256
+#if USE_LONG_DOUBLE
+, 1e512, 1e1024, 1e2048, 1e4096
+#endif
+};
+
+/* 128 words, indexed through the low byte of the 68k fpu control word */
+static uae_u16 x86_fpucw[]={
+ 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p0r0 */
+ 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p0r1 */
+ 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p0r2 */
+ 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, /* p0r3 */
+
+ 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, /* p1r0 */
+ 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, /* p1r1 */
+ 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, /* p1r2 */
+ 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, /* p1r3 */
+
+ 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, /* p2r0 */
+ 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, /* p2r1 */
+ 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, /* p2r2 */
+ 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, /* p2r3 */
+
+ 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p3r0 */
+ 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p3r1 */
+ 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p3r2 */
+ 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f /* p3r3 */
+};
+
+void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
+{
+ int reg;
+ int src;
+
+ if (!currprefs.compfpu) {
+ FAIL(1);
+ return;
+ }
+ switch ((extra >> 13) & 0x7) {
+ case 3: /* 2nd most common */
+ if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) {
+ FAIL(1);
+ return;
+ }
+ return;
+ case 6:
+ case 7:
+ {
+ uae_u32 ad, list = 0;
+ int incr = 0;
+ if (extra & 0x2000) {
+ int ad;
+
+ /* FMOVEM FPP->memory */
+ switch ((extra >> 11) & 3) { /* Get out early if failure */
+ case 0:
+ case 2:
+ break;
+ case 1:
+ case 3:
+ default:
+ FAIL(1); return;
+ }
+ ad=get_fp_ad (opcode, &ad);
+ if (ad<0) {
+ FAIL(1);
+#if 0
+ m68k_setpc (m68k_getpc () - 4);
+ op_illg (opcode);
+#endif
+ return;
+ }
+ switch ((extra >> 11) & 3) {
+ case 0: /* static pred */
+ list = extra & 0xff;
+ incr = -1;
+ break;
+ case 2: /* static postinc */
+ list = extra & 0xff;
+ incr = 1;
+ break;
+ case 1: /* dynamic pred */
+ case 3: /* dynamic postinc */
+ abort();
+ }
+ while (list) {
+ uae_u32 wrd1, wrd2, wrd3;
+ if (incr < 0) { /* Predecrement */
+ fmov_ext_mr((uae_u32)temp_fp,fpp_movem_index2[list]);
+ delay;
+ sub_l_ri(ad,4);
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ sub_l_ri(ad,4);
+ mov_l_rm(S2,(uae_u32)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ sub_l_ri(ad,4);
+ mov_w_rm(S2,(uae_u32)temp_fp+8);
+ writeword_clobber(ad,S2,S3);
+ } else { /* postinc */
+ fmov_ext_mr((uae_u32)temp_fp,fpp_movem_index2[list]);
+ delay;
+ mov_w_rm(S2,(uae_u32)temp_fp+8);
+ writeword_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uae_u32)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uae_u32)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ }
+ list = fpp_movem_next[list];
+ }
+ if ((opcode & 0x38) == 0x18)
+ mov_l_rr((opcode & 7)+8,ad);
+ if ((opcode & 0x38) == 0x20)
+ mov_l_rr((opcode & 7)+8,ad);
+ } else {
+ /* FMOVEM memory->FPP */
+
+ int ad;
+ switch ((extra >> 11) & 3) { /* Get out early if failure */
+ case 0:
+ case 2:
+ break;
+ case 1:
+ case 3:
+ default:
+ FAIL(1); return;
+ }
+ ad=get_fp_ad (opcode, &ad);
+ if (ad<0) {
+ FAIL(1);
+#if 0
+ m68k_setpc (m68k_getpc () - 4);
+ op_illg (opcode);
+#endif
+ return;
+ }
+ switch ((extra >> 11) & 3) {
+ case 0: /* static pred */
+ list = extra & 0xff;
+ incr = -1;
+ break;
+ case 2: /* static postinc */
+ list = extra & 0xff;
+ incr = 1;
+ break;
+ case 1: /* dynamic pred */
+ case 3: /* dynamic postinc */
+ abort();
+ }
+
+ while (list) {
+ uae_u32 wrd1, wrd2, wrd3;
+ if (incr < 0) {
+ sub_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)(temp_fp),S2);
+ sub_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)(temp_fp)+4,S2);
+ sub_l_ri(ad,4);
+ readword(ad,S2,S3);
+ mov_w_mr(((uae_u32)temp_fp)+8,S2);
+ delay2;
+ fmov_ext_rm(fpp_movem_index2[list],(uae_u32)(temp_fp));
+ } else {
+ readword(ad,S2,S3);
+ mov_w_mr(((uae_u32)temp_fp)+8,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)(temp_fp)+4,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uae_u32)(temp_fp),S2);
+ add_l_ri(ad,4);
+ delay2;
+ fmov_ext_rm(fpp_movem_index1[list],(uae_u32)(temp_fp));
+ }
+ list = fpp_movem_next[list];
+ }
+ if ((opcode & 0x38) == 0x18)
+ mov_l_rr((opcode & 7)+8,ad);
+ if ((opcode & 0x38) == 0x20)
+ mov_l_rr((opcode & 7)+8,ad);
+ }
+ }
+ return;
+
+ case 4:
+ case 5: /* rare */
+ if ((opcode & 0x30) == 0) {
+ if (extra & 0x2000) {
+ if (extra & 0x1000) {
+ mov_l_rm(opcode & 15,(uae_u32)®s.fpcr); return;
+ }
+ if (extra & 0x0800) {
+ FAIL(1);
+ return;
+ }
+ if (extra & 0x0400) {
+ mov_l_rm(opcode & 15,(uae_u32)®s.fpiar); return;
+ }
+ } else {
+ if (extra & 0x1000) {
+ mov_l_mr((uae_u32)®s.fpcr,opcode & 15);
+#if USE_X86_FPUCW
+ mov_l_rr(S1,opcode & 15);
+ and_l_ri(S1,0x000000f0);
+ fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+#endif
+ return;
+ }
+ if (extra & 0x0800) {
+ FAIL(1);
+ return;
+ // set_fpsr(m68k_dreg (regs, opcode & 15));
+ }
+ if (extra & 0x0400) {
+ mov_l_mr((uae_u32)®s.fpiar,opcode & 15); return;
+ }
+ }
+ } else if ((opcode & 0x3f) == 0x3c) {
+ if ((extra & 0x2000) == 0) {
+ if (extra & 0x1000) {
+ uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
+ mov_l_mi((uae_u32)®s.fpcr,val);
+#if USE_X86_FPUCW
+ mov_l_ri(S1,val&0x000000f0);
+ fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+#endif
+ return;
+ }
+ if (extra & 0x0800) {
+ FAIL(1);
+ return;
+ }
+ if (extra & 0x0400) {
+ uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
+ mov_l_mi((uae_u32)®s.fpiar,val);
+ return;
+ }
+ }
+ FAIL(1);
+ return;
+ } else if (extra & 0x2000) {
+ FAIL(1);
+ return;
+ } else {
+ FAIL(1);
+ return;
+ }
+ FAIL(1);
+ return;
+
+ case 0:
+ case 2: /* Extremely common */
+ reg = (extra >> 7) & 7;
+ if ((extra & 0xfc00) == 0x5c00) {
+ switch (extra & 0x7f) {
+ case 0x00:
+ fmov_pi(reg);
+ break;
+ case 0x0b:
+ fmov_log10_2(reg);
+ break;
+ case 0x0c:
+ fmov_rm(reg,(uae_u32)&const_e);
+ break;
+ case 0x0d:
+ fmov_log2_e(reg);
+ break;
+ case 0x0e:
+ fmov_rm(reg,(uae_u32)&const_log10_e);
+ break;
+ case 0x0f:
+ fmov_0(reg);
+ break;
+ case 0x30:
+ fmov_loge_2(reg);
+ break;
+ case 0x31:
+ fmov_rm(reg,(uae_u32)&const_loge_10);
+ break;
+ case 0x32:
+ fmov_1(reg);
+ break;
+ case 0x33:
+ case 0x34:
+ case 0x35:
+ case 0x36:
+ case 0x37:
+ case 0x38:
+ case 0x39:
+ case 0x3a:
+ case 0x3b:
+ fmov_rm(reg,(uae_u32)(power10+(extra & 0x7f)-0x32));
+ break;
+ default:
+ /* This is not valid, so we fail */
+ FAIL(1);
+ return;
+ }
+ return;
+ }
+
+ switch (extra & 0x7f) {
+ case 0x00: /* FMOVE */
+ case 0x40: /* Explicit rounding. This is just a quick fix. Same
+ * for all other cases that have three choices */
+ case 0x44:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmov_rr(reg,src);
+ MAKE_FPSR (src);
+ break;
+ case 0x01: /* FINT */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ case 0x02: /* FSINH */
+ FAIL(1);
+ return;
+
+ dont_care_fflags();
+ regs.fp[reg] = sinh (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x03: /* FINTRZ */
+#if USE_X86_FPUCW
+ /* If we have control over the CW, we can do this */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ mov_l_ri(S1,16); /* Switch to "round to zero" mode */
+ fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+
+ frndint_rr(reg,src);
+
+ /* restore control word */
+ mov_l_rm(S1,(uae_u32)®s.fpcr);
+ and_l_ri(S1,0x000000f0);
+ fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+
+ MAKE_FPSR (reg);
+ break;
+#endif
+ FAIL(1);
+ return;
+ regs.fp[reg] = (int) src;
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x04: /* FSQRT */
+ case 0x41:
+ case 0x45:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsqrt_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x06: /* FLOGNP1 */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = log (src + 1.0);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x08: /* FETOXM1 */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = exp (src) - 1.0;
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x09: /* FTANH */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = tanh (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x0a: /* FATAN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = atan (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x0c: /* FASIN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = asin (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x0d: /* FATANH */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+#if 1 /* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */
+ regs.fp[reg] = log ((1 + src) / (1 - src)) / 2;
+#else
+ regs.fp[reg] = atanh (src);
+#endif
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x0e: /* FSIN */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsin_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x0f: /* FTAN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = tan (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x10: /* FETOX */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fetox_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x11: /* FTWOTOX */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ ftwotox_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x12: /* FTENTOX */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = pow (10.0, src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x14: /* FLOGN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = log (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x15: /* FLOG10 */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = log10 (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x16: /* FLOG2 */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ flog2_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x18: /* FABS */
+ case 0x58:
+ case 0x5c:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fabs_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x19: /* FCOSH */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = cosh (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x1a: /* FNEG */
+ case 0x5a:
+ case 0x5e:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fneg_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x1c: /* FACOS */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = acos (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x1d: /* FCOS */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fcos_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x1e: /* FGETEXP */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ {
+ int expon;
+ frexp (src, &expon);
+ regs.fp[reg] = (double) (expon - 1);
+ MAKE_FPSR (regs.fp[reg]);
+ }
+ break;
+ case 0x1f: /* FGETMAN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ {
+ int expon;
+ regs.fp[reg] = frexp (src, &expon) * 2.0;
+ MAKE_FPSR (regs.fp[reg]);
+ }
+ break;
+ case 0x20: /* FDIV */
+ case 0x60:
+ case 0x64:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fdiv_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x21: /* FMOD */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ frem_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x22: /* FADD */
+ case 0x62:
+ case 0x66:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fadd_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x23: /* FMUL */
+ case 0x63:
+ case 0x67:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmul_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x24: /* FSGLDIV */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fdiv_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x25: /* FREM */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ frem1_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x26: /* FSCALE */
+ dont_care_fflags();
+ FAIL(1);
+ return;
+ regs.fp[reg] *= exp (log (2.0) * src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x27: /* FSGLMUL */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmul_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x28: /* FSUB */
+ case 0x68:
+ case 0x6c:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsub_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x30: /* FSINCOS */
+ case 0x31:
+ case 0x32:
+ case 0x33:
+ case 0x34:
+ case 0x35:
+ case 0x36:
+ case 0x37:
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ regs.fp[reg] = sin (src);
+ regs.fp[extra & 7] = cos (src);
+ MAKE_FPSR (regs.fp[reg]);
+ break;
+ case 0x38: /* FCMP */
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmov_rr(FP_RESULT,reg);
+ fsub_rr(FP_RESULT,src); /* Right way? */
+ break;
+ case 0x3a: /* FTST */
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmov_rr(FP_RESULT,src);
+ break;
+ default:
+ FAIL(1);
+ return;
+ break;
+ }
+ return;
+ }
+ m68k_setpc (m68k_getpc () - 4);
+ op_illg (opcode);
+}
live.spos[q]=p;
}
-static __inline__ void make_tos2(int r, int r2)
-{
- int q;
-
- make_tos(r2); /* Put the reg that's supposed to end up in position2
- on top */
-
- if (live.spos[r]<0) { /* Register not yet on stack */
- make_tos(r); /* This will extend the stack */
- return;
- }
- /* Register is on stack */
- emit_byte(0xd9);
- emit_byte(0xc9); /* Move r2 into position 2 */
-
- q=live.onstack[live.tos-1];
- live.onstack[live.tos]=q;
- live.spos[q]=live.tos;
- live.onstack[live.tos-1]=r2;
- live.spos[r2]=live.tos-1;
-
- make_tos(r); /* And r into 1 */
-}
-
static __inline__ int stackpos(int r)
{
if (live.spos[r]<0)
return live.tos-live.spos[r];
}
+/* IMO, calling usereg(r) makes no sense, if the register r should supply our function with
+ an argument, because I would expect all arguments to be on the stack already, won't they?
+ Thus, usereg(s) is always useless and also for every FRW d it's too late here now. PeterK
+*/
static __inline__ void usereg(int r)
{
- if (live.spos[r]<0)
+
+ if (live.spos[r]<0) {
+ // write_log ("usereg wants to push reg %d onto the x87 stack calling make_tos\n", r);
make_tos(r);
+ }
}
-/* This is called with one FP value in a reg *above* tos, which it will
- pop off the stack if necessary */
+/* This is called with one FP value in a reg *above* tos,
+ which it will pop off the stack if necessary */
static __inline__ void tos_make(int r)
{
if (live.spos[r]<0) {
return;
}
emit_byte(0xdd);
- emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
- and pop it*/
+ emit_byte(0xd8+(live.tos+1)-live.spos[r]);
+ /* store top of stack in reg and pop it*/
}
{
int ds;
- usereg(s);
ds=stackpos(s);
if (ds==0 && live.spos[d]>=0) {
/* source is on top of stack, and we already have the dest */
int ds;
if (d!=s) {
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xfa); /* take square root */
- tos_make(d); /* store to destination */
+ emit_byte(0xfa); /* fsqrt sqrt(x) */
+ tos_make(d); /* store to destination */
}
else {
make_tos(d);
emit_byte(0xd9);
- emit_byte(0xfa); /* take square root */
+ emit_byte(0xfa); /* fsqrt y=sqrt(x) */
}
}
LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
int ds;
if (d!=s) {
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xe1); /* take fabs */
- tos_make(d); /* store to destination */
+ emit_byte(0xe1); /* fabs abs(x) */
+ tos_make(d); /* store to destination */
}
else {
make_tos(d);
emit_byte(0xd9);
- emit_byte(0xe1); /* take fabs */
+ emit_byte(0xe1); /* fabs y=abs(x) */
}
}
LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
int ds;
if (d!=s) {
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xfc); /* take frndint */
- tos_make(d); /* store to destination */
+ emit_byte(0xfc); /* frndint int(x) */
+ tos_make(d); /* store to destination */
}
else {
make_tos(d);
emit_byte(0xd9);
- emit_byte(0xfc); /* take frndint */
+ emit_byte(0xfc); /* frndint y=int(x) */
}
}
LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
-LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
{
int ds;
if (d!=s) {
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xff); /* take cos */
- tos_make(d); /* store to destination */
+ emit_byte(0xfe); /* fsin sin(x) */
+ tos_make(d); /* store to destination */
}
else {
make_tos(d);
emit_byte(0xd9);
- emit_byte(0xff); /* take cos */
+ emit_byte(0xfe); /* fsin y=sin(x) */
}
}
-LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
-LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
{
int ds;
if (d!=s) {
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xfe); /* take sin */
- tos_make(d); /* store to destination */
+ emit_byte(0xff); /* fcos cos(x) */
+ tos_make(d); /* store to destination */
}
else {
make_tos(d);
emit_byte(0xd9);
- emit_byte(0xfe); /* take sin */
+ emit_byte(0xff); /* fcos y=cos(x) */
}
}
-LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
-double one=1;
-LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xf2); /* fptan tan(x)=y/1.0 */
+ emit_byte(0xdd);
+ emit_byte(0xd8); /* fstp pop 1.0 */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xf2); /* fptan tan(x)=y/1.0 */
+ emit_byte(0xdd);
+ emit_byte(0xd8); /* fstp pop 1.0 */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s))
{
int ds;
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xfb); /* fsincos sin(x) push cos(x) */
+ if ((live.spos[c]<0)&&(live.spos[d]<0)) {
+ live.tos++;
+ live.spos[d]=live.tos;
+ live.onstack[live.tos]=d; /* sin(x) comes first */
+ live.tos++;
+ live.spos[c]=live.tos;
+ live.onstack[live.tos]=c;
+ return; /* occupy both regs directly */
+ }
+ if (live.spos[c]<0) {
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap cos(x) with sin(x) */
+ emit_byte(0xdd); /* store sin(x) to d & pop */
+ emit_byte(0xd8+(live.tos+2)-live.spos[d]);
+ live.tos++; /* occupy a reg for cos(x) here */
+ live.spos[c]=live.tos;
+ live.onstack[live.tos]=c;
+ }
+ else {
+ emit_byte(0xdd); /* store cos(x) to c & pop */
+ emit_byte(0xd8+(live.tos+2)-live.spos[c]);
+ tos_make(d); /* store sin(x) to destination */
+ }
+}
+LENDFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s))
+
+float one=1;
+
+LOWFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s))
+{
+ int ds;
+
+ make_tos(s); /* tos=x */
+ ds=stackpos(d);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld y */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale y*(2^x) */
+ tos_make(d); /* store y=y*(2^x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+{
+ int ds;
+ ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xfc); /* rndint */
+ emit_byte(0xfc); /* frndint int(x) */
emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two elements */
+ emit_byte(0xc1+ds); /* fld x again */
emit_byte(0xd8);
- emit_byte(0xe1); /* subtract rounded from original */
+ emit_byte(0xe1); /* fsub frac(x) = x - int(x) */
emit_byte(0xd9);
- emit_byte(0xf0); /* f2xm1 */
- emit_byte(0xdc);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
emit_byte(0x05);
- emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
emit_byte(0xd9);
- emit_byte(0xfd); /* and scale it */
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x) */
emit_byte(0xdd);
- emit_byte(0xd9); /* take he rounded value off */
- tos_make(d); /* store to destination */
+ emit_byte(0xd9); /* fstp copy & pop */
+ tos_make(d); /* store y=2^x */
}
LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
{
int ds;
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xea); /* fldl2e */
- emit_byte(0xde);
- emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
+ emit_byte(0xea); /* fldl2e log2(e) */
+ emit_byte(0xd8);
+ emit_byte(0xc9); /* fmul x*log2(e) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy up */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap top two elements */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* fstp copy & pop */
+ tos_make(d); /* store y=e^x */
+}
+LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s))
+{
+ int ds;
+ ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xea); /* fldl2e log2(e) */
+ emit_byte(0xd8);
+ emit_byte(0xc9); /* fmul x*log2(e) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy up */
emit_byte(0xd9);
- emit_byte(0xfc); /* rndint */
+ emit_byte(0xfc); /* frndint int(x*log2(e)) */
emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two elements */
+ emit_byte(0xc9); /* fxch swap top two elements */
emit_byte(0xd8);
- emit_byte(0xe1); /* subtract rounded from original */
+ emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
emit_byte(0xd9);
- emit_byte(0xf0); /* f2xm1 */
- emit_byte(0xdc);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale ((2^frac(x))-1)*2^int(x*log2(e)) */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* fstp copy & pop */
+ tos_make(d); /* store y=(e^x)-1 */
+}
+LENDFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xe9); /* fldl2t log2(10) */
+ emit_byte(0xd8);
+ emit_byte(0xc9); /* fmul x*log2(10) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy up */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(x*log2(10)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap top two elements */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub x*log2(10) - int(x*log2(10)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
emit_byte(0x05);
- emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
emit_byte(0xd9);
- emit_byte(0xfd); /* and scale it */
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(10)) */
emit_byte(0xdd);
- emit_byte(0xd9); /* take he rounded value off */
- tos_make(d); /* store to destination */
+ emit_byte(0xd9); /* fstp copy & pop */
+ tos_make(d); /* store y=10^x */
}
-LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+LENDFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
{
int ds;
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xc0+ds); /* fld x */
emit_byte(0xd9);
- emit_byte(0xe8); /* push '1' */
+ emit_byte(0xe8); /* fld1 1 */
emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two */
+ emit_byte(0xc9); /* fxch swap 1 with x */
emit_byte(0xd9);
- emit_byte(0xf1); /* take 1*log2(x) */
- tos_make(d); /* store to destination */
+ emit_byte(0xf1); /* fyl2x 1*log2(x) */
+ tos_make(d); /* store y=log2(x) */
}
LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xed); /* fldln2 logN(2) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap logN(2) with x */
+ emit_byte(0xd9);
+ emit_byte(0xf1); /* fyl2x logN(2)*log2(x) */
+ tos_make(d); /* store y=logN(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xed); /* fldln2 logN(2) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap logN(2) with x */
+ emit_byte(0xd9);
+ emit_byte(0xf9); /* fyl2xp1 logN(2)*log2(x+1) */
+ tos_make(d); /* store y=logN(x+1) */
+}
+LENDFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xec); /* fldlg2 log10(2) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap log10(2) with x */
+ emit_byte(0xd9);
+ emit_byte(0xf1); /* fyl2x log10(2)*log2(x) */
+ tos_make(d); /* store y=log10(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* fld 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xc1+ds); /* fld x */
+ emit_byte(0xd8);
+ emit_byte(0xc8); /* fmul x*x */
+ emit_byte(0xd8);
+ emit_byte(0xe9); /* fsubr 1 - (x^2) */
+ emit_byte(0xd9);
+ emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */
+ emit_byte(0xd8);
+ emit_byte(0xfa+ds); /* fdivr x / sqrt(1-(x^2)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap with 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xf3); /* fpatan atan(x)/1 & pop */
+ tos_make(d); /* store y=asin(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* fld 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xc1+ds); /* fld x */
+ emit_byte(0xd8);
+ emit_byte(0xc8); /* fmul x*x */
+ emit_byte(0xd8);
+ emit_byte(0xe9); /* fsubr 1 - (x^2) */
+ emit_byte(0xd9);
+ emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */
+ emit_byte(0xd8);
+ emit_byte(0xf2+ds); /* fdiv sqrt(1-(x^2)) / x */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap with 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xf3); /* fpatan atan(x)/1 & pop */
+ tos_make(d); /* store y=acos(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* fld 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xf3); /* fpatan atan(x)/1 */
+ tos_make(d); /* store y=atan(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s))
+{
+ int ds;
+
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* fld 1.0 */
+ emit_byte(0xdc);
+ emit_byte(0xc1); /* fadd 1 + x */
+ emit_byte(0xd8);
+ emit_byte(0xe2+ds); /* fsub 1 - x */
+ emit_byte(0xde);
+ emit_byte(0xf9); /* fdivp (1+x)/(1-x) */
+ emit_byte(0xd9);
+ emit_byte(0xed); /* fldl2e logN(2) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap logN(2) with (1+x)/(1-x) */
+ emit_byte(0xd9);
+ emit_byte(0xf1); /* fyl2x logN(2)*log2((1+x)/(1-x)) pop */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* fld 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* fchs -1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale logN((1+x)/(1-x)) * 2^(-1) */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* fstp copy & pop */
+ tos_make(d); /* store y=atanh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s))
+{
+ int ds,tr;
+
+ tr=live.onstack[live.tos+3];
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xea); /* fldl2e log2(e) */
+ emit_byte(0xd8);
+ emit_byte(0xc9); /* fmul x*log2(e) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy x*log2(e) */
+ if (tr>=0) {
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap with temp-reg */
+ emit_byte(0x83);
+ emit_byte(0xc4);
+ emit_byte(0xf4); /* add -12 to esp */
+ emit_byte(0xdb);
+ emit_byte(0x3c);
+ emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
+ }
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* fchs -x*log2(e) */
+ emit_byte(0xd9);
+ emit_byte(0xc0); /* fld -x*log2(e) again */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(-x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy x*log2(e) */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* fstp copy e^x & pop */
+ if (tr>=0) {
+ emit_byte(0xdb);
+ emit_byte(0x2c);
+ emit_byte(0x24); /* fld load temp-reg from [esp] */
+ emit_byte(0x83);
+ emit_byte(0xc4);
+ emit_byte(0x0c); /* add +12 to esp */
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
+ emit_byte(0xde);
+ emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */
+ }
+ else {
+ emit_byte(0xde);
+ emit_byte(0xe1); /* fsubrp (e^x)-(e^-x) */
+ }
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* fld 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* fchs -1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale ((e^x)-(e^-x))/2 */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* fstp copy & pop */
+ tos_make(d); /* store y=sinh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s))
+{
+ int ds,tr;
+
+ tr=live.onstack[live.tos+3];
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xea); /* fldl2e log2(e) */
+ emit_byte(0xd8);
+ emit_byte(0xc9); /* fmul x*log2(e) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy x*log2(e) */
+ if (tr>=0) {
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap with temp-reg */
+ emit_byte(0x83);
+ emit_byte(0xc4);
+ emit_byte(0xf4); /* add -12 to esp */
+ emit_byte(0xdb);
+ emit_byte(0x3c);
+ emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
+ }
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* fchs -x*log2(e) */
+ emit_byte(0xd9);
+ emit_byte(0xc0); /* fld -x*log2(e) again */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(-x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy x*log2(e) */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* fstp copy e^x & pop */
+ if (tr>=0) {
+ emit_byte(0xdb);
+ emit_byte(0x2c);
+ emit_byte(0x24); /* fld load temp-reg from [esp] */
+ emit_byte(0x83);
+ emit_byte(0xc4);
+ emit_byte(0x0c); /* add +12 to esp */
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
+ }
+ emit_byte(0xde);
+ emit_byte(0xc1); /* faddp (e^x)+(e^-x) */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* fld 1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* fchs -1.0 */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale ((e^x)+(e^-x))/2 */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* fstp copy & pop */
+ tos_make(d); /* store y=cosh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s))
+{
+ int ds,tr;
+
+ tr=live.onstack[live.tos+3];
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld x */
+ emit_byte(0xd9);
+ emit_byte(0xea); /* fldl2e log2(e) */
+ emit_byte(0xd8);
+ emit_byte(0xc9); /* fmul x*log2(e) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy x*log2(e) */
+ if (tr>=0) {
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap with temp-reg */
+ emit_byte(0x83);
+ emit_byte(0xc4);
+ emit_byte(0xf4); /* add -12 to esp */
+ emit_byte(0xdb);
+ emit_byte(0x3c);
+ emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
+ }
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* fchs -x*log2(e) */
+ emit_byte(0xd9);
+ emit_byte(0xc0); /* fld -x*log2(e) again */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(-x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap e^-x with x*log2(e) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy x*log2(e) */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* frndint int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* fxch swap */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */
+ emit_byte(0xd8);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* fadd (2^frac(x))-1 + 1 */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */
+ emit_byte(0xdd);
+ emit_byte(0xd1); /* fst copy e^x */
+ emit_byte(0xd8);
+ emit_byte(0xc2); /* fadd (e^x)+(e^-x) */
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap with e^-x */
+ emit_byte(0xde);
+ emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */
+ if (tr>=0) {
+ emit_byte(0xdb);
+ emit_byte(0x2c);
+ emit_byte(0x24); /* fld load temp-reg from [esp] */
+ emit_byte(0x83);
+ emit_byte(0xc4);
+ emit_byte(0x0c); /* add +12 to esp */
+ emit_byte(0xd9);
+ emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
+ emit_byte(0xde);
+ emit_byte(0xf9); /* fdivp ((e^x)-(e^-x))/((e^x)+(e^-x)) */
+ }
+ else {
+ emit_byte(0xde);
+ emit_byte(0xf1); /* fdivrp ((e^x)-(e^-x))/((e^x)+(e^-x)) */
+ }
+ tos_make(d); /* store y=tanh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
{
int ds;
if (d!=s) {
- usereg(s);
ds=stackpos(s);
emit_byte(0xd9);
emit_byte(0xc0+ds); /* duplicate source */
{
int ds;
- usereg(s);
- usereg(d);
-
if (live.spos[s]==live.tos) {
/* Source is on top of stack */
ds=stackpos(d);
LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
{
int ds;
-
- usereg(s);
- usereg(d);
if (live.spos[s]==live.tos) {
/* Source is on top of stack */
LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
{
int ds;
-
- usereg(s);
- usereg(d);
make_tos(d);
ds=stackpos(s);
LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
{
int ds;
-
- usereg(s);
- usereg(d);
if (live.spos[s]==live.tos) {
/* Source is on top of stack */
LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
{
int ds;
-
- usereg(s);
- usereg(d);
if (live.spos[s]==live.tos) {
/* Source is on top of stack */
LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
{
int ds;
-
- usereg(s);
- usereg(d);
- make_tos2(d,s);
- ds=stackpos(s);
-
- if (ds!=1) {
- printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
- abort();
- }
+ make_tos(s); /* tos=x */
+ ds=stackpos(d);
emit_byte(0xd9);
- emit_byte(0xf8); /* take rem from dest by source */
+ emit_byte(0xc0+ds); /* fld y */
+ emit_byte(0xd9);
+ emit_byte(0xf8); /* fprem rem(y/x) */
+ tos_make(d); /* store y=rem(y/x) */
}
LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
{
int ds;
- usereg(s);
- usereg(d);
-
- make_tos2(d,s);
- ds=stackpos(s);
-
- if (ds!=1) {
- printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
- abort();
- }
+ make_tos(s); /* tos=x */
+ ds=stackpos(d);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* fld y */
emit_byte(0xd9);
- emit_byte(0xf5); /* take rem1 from dest by source */
+ emit_byte(0xf5); /* fprem rem1(y/x) */
+ tos_make(d); /* store y=rem1(y/x) */
}
LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
--- /dev/null
+/* This should eventually end up in machdep/, but for now, x86 is the
+ only target, and it's easier this way... */
+
+/*************************************************************************
+ * Some basic information about the the target CPU *
+ *************************************************************************/
+
+#define EAX 0
+#define ECX 1
+#define EDX 2
+#define EBX 3
+
+/* The register in which subroutines return an integer return value */
+#define REG_RESULT 0
+
+/* The registers subroutines take their first and second argument in */
+#define REG_PAR1 0
+#define REG_PAR2 2
+
+/* Three registers that are not used for any of the above */
+#define REG_NOPAR1 6
+#define REG_NOPAR2 5
+#define REG_NOPAR3 3
+
+#define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
+#define REG_PC_TMP 1 /* Another register that is not the above */
+
+#define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount.
+ -1 if any reg will do */
+#define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
+#define MUL_NREG2 2 /* %edx will hold the high 32 bits */
+
+uae_s8 always_used[]={4,-1};
+uae_s8 can_byte[]={0,1,2,3,-1};
+uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
+
+uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
+
+/* This *should* be the same as call_saved. But:
+ - We might not really know which registers are saved, and which aren't,
+ so we need to preserve some, but don't want to rely on everyone else
+ also saving those registers
+ - Special registers (such like the stack pointer) should not be "preserved"
+ by pushing, even though they are "saved" across function calls
+*/
+uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
+
+/* Whether classes of instructions do or don't clobber the native flags */
+#define CLOBBER_MOV
+#define CLOBBER_LEA
+#define CLOBBER_CMOV
+#define CLOBBER_POP
+#define CLOBBER_PUSH
+#define CLOBBER_SUB clobber_flags()
+#define CLOBBER_SBB clobber_flags()
+#define CLOBBER_CMP clobber_flags()
+#define CLOBBER_ADD clobber_flags()
+#define CLOBBER_ADC clobber_flags()
+#define CLOBBER_AND clobber_flags()
+#define CLOBBER_OR clobber_flags()
+#define CLOBBER_XOR clobber_flags()
+
+#define CLOBBER_ROL clobber_flags()
+#define CLOBBER_ROR clobber_flags()
+#define CLOBBER_SHLL clobber_flags()
+#define CLOBBER_SHRL clobber_flags()
+#define CLOBBER_SHRA clobber_flags()
+#define CLOBBER_TEST clobber_flags()
+#define CLOBBER_CL16
+#define CLOBBER_CL8
+#define CLOBBER_SE16
+#define CLOBBER_SE8
+#define CLOBBER_ZE16
+#define CLOBBER_ZE8
+#define CLOBBER_SW16 clobber_flags()
+#define CLOBBER_SW32
+#define CLOBBER_SETCC
+#define CLOBBER_MUL clobber_flags()
+#define CLOBBER_BT clobber_flags()
+#define CLOBBER_BSF clobber_flags()
+
+/*************************************************************************
+ * Actual encoding of the instructions on the target CPU *
+ *************************************************************************/
+
+static int have_cmov=0; /* We need to generate different code if
+ we don't have cmov */
+
+#include "compemu_optimizer_x86.c"
+
+static uae_u16 swap16(uae_u16 x)
+{
+ return ((x&0xff00)>>8)|((x&0x00ff)<<8);
+}
+
+static uae_u32 swap32(uae_u32 x)
+{
+ return ((x&0xff00)<<8)|((x&0x00ff)<<24)|((x&0xff0000)>>8)|((x&0xff000000)>>24);
+}
+
+static __inline__ int isbyte(uae_s32 x)
+{
+ return (x>=-128 && x<=127);
+}
+
+LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+{
+ emit_byte(0x50+r);
+}
+LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+
+LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+{
+ emit_byte(0x58+r);
+}
+LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xa3);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbb);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xf0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xb3);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xab);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+{
+ emit_byte(0x66);
+ if (isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xe8+d);
+ emit_byte(i);
+ }
+ else {
+ emit_byte(0x81);
+ emit_byte(0xe8+d);
+ emit_word(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+{
+ emit_byte(0xc7);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_long(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0xc7);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_word(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+{
+ emit_byte(0xc6);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_byte(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+{
+ emit_byte(0xc0);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+{
+ emit_byte(0xc0);
+ emit_byte(0xc0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xc0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0xc1);
+ emit_byte(0xc0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+{
+ emit_byte(0xc0);
+ emit_byte(0xc8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xc8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0xc1);
+ emit_byte(0xc8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0xc1);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+{
+ emit_byte(0xc0);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0xc1);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+{
+ emit_byte(0xc0);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0xc1);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+{
+ emit_byte(0xc0);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+{
+ emit_byte(0x9e);
+}
+LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+
+LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+{
+ emit_byte(0x0f);
+ emit_byte(0xa2);
+}
+LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+
+LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+{
+ emit_byte(0x9f);
+}
+LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+
+LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+{
+ emit_byte(0x0f);
+ emit_byte(0x90+cc);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+
+LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+{
+ emit_byte(0x0f);
+ emit_byte(0x90+cc);
+ emit_byte(0x05);
+ emit_long(d);
+}
+LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+{
+ if (have_cmov) {
+ emit_byte(0x0f);
+ emit_byte(0x40+cc);
+ emit_byte(0xc0+8*d+s);
+ }
+ else { /* replacement using branch and mov */
+ int uncc=(cc^1);
+ emit_byte(0x70+uncc);
+ emit_byte(2); /* skip next 2 bytes if not cc=true */
+ emit_byte(0x89);
+ emit_byte(0xc0+8*s+d);
+ }
+}
+LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+
+LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbc);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbf);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbe);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xb7);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xb6);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xaf);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+{
+ if (d!=MUL_NREG1 || s!=MUL_NREG2)
+ abort();
+ emit_byte(0xf7);
+ emit_byte(0xea);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+{
+ if (d!=MUL_NREG1 || s!=MUL_NREG2) {
+ printf("Bad register in MUL: d=%d, s=%d\n",d,s);
+ abort();
+ }
+ emit_byte(0xf7);
+ emit_byte(0xe2);
+}
+LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+{
+ abort(); /* %^$&%^$%#^ x86! */
+ emit_byte(0x0f);
+ emit_byte(0xaf);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+{
+ emit_byte(0x88);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+{
+ int isebp=(baser==5)?0x40:0;
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x8a);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x89);
+ emit_byte(0x04+8*s+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x04+8*s+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x88);
+ emit_byte(0x04+8*s+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x89);
+ emit_byte(0x84+8*s);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x84+8*s);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x88);
+ emit_byte(0x84+8*s);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x8b);
+ emit_byte(0x84+8*d);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x84+8*d);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x8a);
+ emit_byte(0x84+8*d);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+{
+ int fi;
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default:
+ fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
+ abort();
+ }
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d);
+ emit_byte(0x05+8*index+64*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+{
+ int fi;
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default:
+ fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
+ abort();
+ }
+ if (have_cmov) {
+ emit_byte(0x0f);
+ emit_byte(0x40+cond);
+ emit_byte(0x04+8*d);
+ emit_byte(0x05+8*index+64*fi);
+ emit_long(base);
+ }
+ else { /* replacement using branch and mov */
+ int uncc=(cond^1);
+ emit_byte(0x70+uncc);
+ emit_byte(7); /* skip next 7 bytes if not cc=true */
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d);
+ emit_byte(0x05+8*index+64*fi);
+ emit_long(base);
+ }
+}
+LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+{
+ if (have_cmov) {
+ emit_byte(0x0f);
+ emit_byte(0x40+cond);
+ emit_byte(0x05+8*d);
+ emit_long(mem);
+ }
+ else { /* replacement using branch and mov */
+ int uncc=(cond^1);
+ emit_byte(0x70+uncc);
+ emit_byte(6); /* skip next 6 bytes if not cc=true */
+ emit_byte(0x8b);
+ emit_byte(0x05+8*d);
+ emit_long(mem);
+ }
+}
+LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+{
+ emit_byte(0x8b);
+ emit_byte(0x40+8*d+s);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+{
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x40+8*d+s);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+{
+ emit_byte(0x8a);
+ emit_byte(0x40+8*d+s);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+{
+ emit_byte(0x8b);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+{
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+{
+ emit_byte(0x8a);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+{
+ emit_byte(0xc7);
+ emit_byte(0x40+d);
+ emit_byte(offset);
+ emit_long(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+{
+ emit_byte(0x66);
+ emit_byte(0xc7);
+ emit_byte(0x40+d);
+ emit_byte(offset);
+ emit_word(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+{
+ emit_byte(0xc6);
+ emit_byte(0x40+d);
+ emit_byte(offset);
+ emit_byte(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+{
+ emit_byte(0x89);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+{
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+{
+ emit_byte(0x88);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+{
+ emit_byte(0x8d);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x8d);
+ emit_byte(0x84+8*d);
+ emit_byte(0x40*fi+8*index+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+
+LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+{
+ int isebp=(s==5)?0x40:0;
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x8d);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(0x40*fi+8*index+s);
+ if (isebp)
+ emit_byte(0);
+}
+LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+{
+ emit_byte(0x89);
+ emit_byte(0x80+8*s+d);
+ emit_long(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+{
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x80+8*s+d);
+ emit_long(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+{
+ emit_byte(0x88);
+ emit_byte(0x80+8*s+d);
+ emit_long(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+{
+ emit_byte(0x0f);
+ emit_byte(0xc8+r);
+}
+LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+
+LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xc0+r);
+ emit_byte(0x08);
+}
+LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+{
+ emit_byte(0x89);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+{
+ emit_byte(0x89);
+ emit_byte(0x05+8*s);
+ emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+{
+ emit_byte(0x8b);
+ emit_byte(0x05+8*d);
+ emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x05+8*s);
+ emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x05+8*d);
+ emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+{
+ emit_byte(0x88);
+ emit_byte(0x05+8*s);
+ emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+{
+ emit_byte(0x8a);
+ emit_byte(0x05+8*d);
+ emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+{
+ emit_byte(0xb8+d);
+ emit_long(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0xb8+d);
+ emit_word(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+{
+ emit_byte(0xb0+d);
+ emit_byte(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+
+LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+{
+ emit_byte(0x81);
+ emit_byte(0x15);
+ emit_long(d);
+ emit_long(s);
+}
+LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
+{
+ emit_byte(0x81);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_long(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0x81);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_word(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
+{
+ emit_byte(0x80);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_byte(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+{
+ emit_byte(0xf7);
+ emit_byte(0xc0+d);
+ emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+{
+ emit_byte(0x85);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x85);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+{
+ emit_byte(0x84);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+{
+ emit_byte(0x81);
+ emit_byte(0xe0+d);
+ emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0x81);
+ emit_byte(0xe0+d);
+ emit_word(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+{
+ emit_byte(0x21);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x21);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+{
+ emit_byte(0x20);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+{
+ emit_byte(0x81);
+ emit_byte(0xc8+d);
+ emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+{
+ emit_byte(0x09);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x09);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+{
+ emit_byte(0x08);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+{
+ emit_byte(0x11);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x11);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+{
+ emit_byte(0x10);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+{
+ emit_byte(0x01);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x01);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+{
+ emit_byte(0x00);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+{
+ if (isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xe8+d);
+ emit_byte(i);
+ }
+ else {
+ emit_byte(0x81);
+ emit_byte(0xe8+d);
+ emit_long(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+{
+ emit_byte(0x80);
+ emit_byte(0xe8+d);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+{
+ if (isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xc0+d);
+ emit_byte(i);
+ }
+ else {
+ emit_byte(0x81);
+ emit_byte(0xc0+d);
+ emit_long(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+{
+ if (isbyte(i)) {
+ emit_byte(0x66);
+ emit_byte(0x83);
+ emit_byte(0xc0+d);
+ emit_byte(i);
+ }
+ else {
+ emit_byte(0x66);
+ emit_byte(0x81);
+ emit_byte(0xc0+d);
+ emit_word(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+{
+ emit_byte(0x80);
+ emit_byte(0xc0+d);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+{
+ emit_byte(0x19);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x19);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+{
+ emit_byte(0x18);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+{
+ emit_byte(0x29);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x29);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+{
+ emit_byte(0x28);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+{
+ emit_byte(0x39);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+{
+ emit_byte(0x81);
+ emit_byte(0xf8+r);
+ emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x39);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+{
+ emit_byte(0x80);
+ emit_byte(0xf8+d);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+{
+ emit_byte(0x38);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+
+LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ emit_byte(0x39);
+ emit_byte(0x04+8*d);
+ emit_byte(5+8*index+0x40*fi);
+ emit_long(offset);
+}
+LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+{
+ emit_byte(0x31);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x31);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+{
+ emit_byte(0x30);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+{
+ emit_byte(0x81);
+ emit_byte(0x2d);
+ emit_long(d);
+ emit_long(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+{
+ emit_byte(0x81);
+ emit_byte(0x3d);
+ emit_long(d);
+ emit_long(s);
+}
+LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+{
+ emit_byte(0x87);
+ emit_byte(0xc0+8*r1+r2);
+}
+LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+
+LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
+{
+ emit_byte(0x9c);
+}
+LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
+
+LOWFUNC(WRITE,READ,0,raw_popfl,(void))
+{
+ emit_byte(0x9d);
+}
+LENDFUNC(WRITE,READ,0,raw_popfl,(void))
+
+/*************************************************************************
+ * Unoptimizable stuff --- jump *
+ *************************************************************************/
+
+static __inline__ void raw_call_r(R4 r)
+{
+ lopt_emit_all();
+ emit_byte(0xff);
+ emit_byte(0xd0+r);
+}
+
+static __inline__ void raw_jmp_r(R4 r)
+{
+ lopt_emit_all();
+ emit_byte(0xff);
+ emit_byte(0xe0+r);
+}
+
+static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
+{
+ int mu;
+ switch(m) {
+ case 1: mu=0; break;
+ case 2: mu=1; break;
+ case 4: mu=2; break;
+ case 8: mu=3; break;
+ default: abort();
+ }
+ lopt_emit_all();
+ emit_byte(0xff);
+ emit_byte(0x24);
+ emit_byte(0x05+8*r+0x40*mu);
+ emit_long(base);
+}
+
+static __inline__ void raw_jmp_m(uae_u32 base)
+{
+ lopt_emit_all();
+ emit_byte(0xff);
+ emit_byte(0x25);
+ emit_long(base);
+}
+
+
+static __inline__ void raw_call(uae_u32 t)
+{
+ lopt_emit_all();
+ emit_byte(0xe8);
+ emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jmp(uae_u32 t)
+{
+ lopt_emit_all();
+ emit_byte(0xe9);
+ emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jl(uae_u32 t)
+{
+ lopt_emit_all();
+ emit_byte(0x0f);
+ emit_byte(0x8c);
+ emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jz(uae_u32 t)
+{
+ lopt_emit_all();
+ emit_byte(0x0f);
+ emit_byte(0x84);
+ emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jnz(uae_u32 t)
+{
+ lopt_emit_all();
+ emit_byte(0x0f);
+ emit_byte(0x85);
+ emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jnz_l_oponly(void)
+{
+ lopt_emit_all();
+ emit_byte(0x0f);
+ emit_byte(0x85);
+}
+
+static __inline__ void raw_jcc_l_oponly(int cc)
+{
+ lopt_emit_all();
+ emit_byte(0x0f);
+ emit_byte(0x80+cc);
+}
+
+static __inline__ void raw_jnz_b_oponly(void)
+{
+ lopt_emit_all();
+ emit_byte(0x75);
+}
+
+static __inline__ void raw_jz_b_oponly(void)
+{
+ lopt_emit_all();
+ emit_byte(0x74);
+}
+
+static __inline__ void raw_jmp_l_oponly(void)
+{
+ lopt_emit_all();
+ emit_byte(0xe9);
+}
+
+static __inline__ void raw_jmp_b_oponly(void)
+{
+ lopt_emit_all();
+ emit_byte(0xeb);
+}
+
+static __inline__ void raw_ret(void)
+{
+ lopt_emit_all();
+ emit_byte(0xc3);
+}
+
+static __inline__ void raw_nop(void)
+{
+ lopt_emit_all();
+ emit_byte(0x90);
+}
+
+
+/*************************************************************************
+ * Flag handling, to and fro UAE flag register *
+ *************************************************************************/
+
+
+#define FLAG_NREG1 0 /* Set to -1 if any register will do */
+
+static __inline__ void raw_flags_to_reg(int r)
+{
+ raw_lahf(0); /* Most flags in AH */
+ //raw_setcc(r,0); /* V flag in AL */
+ raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0);
+
+#if 1 /* Let's avoid those nasty partial register stalls */
+ //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
+ raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
+ //live.state[FLAGTMP].status=CLEAN;
+ live.state[FLAGTMP].status=INMEM;
+ live.state[FLAGTMP].realreg=-1;
+ /* We just "evicted" FLAGTMP. */
+ if (live.nat[r].nholds!=1) {
+ /* Huh? */
+ abort();
+ }
+ live.nat[r].nholds=0;
+#endif
+}
+
+#define FLAG_NREG2 0 /* Set to -1 if any register will do */
+static __inline__ void raw_reg_to_flags(int r)
+{
+ raw_cmp_b_ri(r,-127); /* set V */
+ raw_sahf(0);
+}
+
+/* Apparently, there are enough instructions between flag store and
+ flag reload to avoid the partial memory stall */
+static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
+{
+#if 1
+ raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
+#else
+ raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
+ raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
+#endif
+}
+
+/* FLAGX is byte sized, and we *do* write it at that size */
+static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
+{
+ if (live.nat[target].canbyte)
+ raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
+ else if (live.nat[target].canword)
+ raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
+ else
+ raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
+}
+
+#define NATIVE_FLAG_Z 0x40
+#define NATIVE_CC_EQ 4
+static __inline__ void raw_flags_set_zero(int f, int r, int t)
+{
+ // FIXME: this is really suboptimal
+ raw_pushfl();
+ raw_pop_l_r(f);
+ raw_and_l_ri(f,~NATIVE_FLAG_Z);
+ raw_test_l_rr(r,r);
+ raw_mov_l_ri(r,0);
+ raw_mov_l_ri(t,NATIVE_FLAG_Z);
+ raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
+ raw_or_l(f,r);
+ raw_push_l_r(f);
+ raw_popfl();
+}
+
+static __inline__ void raw_inc_sp(int off)
+{
+ raw_add_l_ri(4,off);
+}
+
+/*************************************************************************
+ * Handling mistaken direct memory access *
+ *************************************************************************/
+
+
+#ifdef NATMEM_OFFSET
+#ifdef _WIN32 // %%% BRIAN KING WAS HERE %%%
+#include <winbase.h>
+#else
+#include <asm/sigcontext.h>
+#endif
+#include <signal.h>
+
+#define SIG_READ 1
+#define SIG_WRITE 2
+
+static int in_handler=0;
+static uae_u8 *veccode;
+
+#ifdef _WIN32
+int EvalException ( LPEXCEPTION_POINTERS blah, int n_except )
+{
+ PEXCEPTION_RECORD pExceptRecord = NULL;
+ PCONTEXT pContext = NULL;
+
+ uae_u8* i = NULL;
+ uae_u32 addr = 0;
+ int r=-1;
+ int size=4;
+ int dir=-1;
+ int len=0;
+ int j;
+
+ if( n_except != STATUS_ACCESS_VIOLATION || !canbang)
+ return EXCEPTION_CONTINUE_SEARCH;
+
+ pExceptRecord = blah->ExceptionRecord;
+ pContext = blah->ContextRecord;
+
+ if( pContext )
+ {
+ i = (uae_u8 *)(pContext->Eip);
+ }
+ if( pExceptRecord )
+ {
+ addr = (uae_u32)(pExceptRecord->ExceptionInformation[1]);
+ }
+#ifdef JIT_DEBUG
+ write_log("JIT: fault address is 0x%x at 0x%x\n",addr,i);
+#endif
+ if (!canbang || !currprefs.cachesize)
+ {
+#ifdef JIT_DEBUG
+ write_log("JIT: Not happy! Canbang or cachesize is 0 in SIGSEGV handler!\n");
+#endif
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+
+ if (in_handler)
+ write_log("JIT: Argh --- Am already in a handler. Shouldn't happen!\n");
+
+ if (canbang && i>=compiled_code && i<=current_compile_p) {
+ if (*i==0x66) {
+ i++;
+ size=2;
+ len++;
+ }
+
+ switch(i[0]) {
+ case 0x8a:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ size=1;
+ len+=6;
+ break;
+ }
+ break;
+ case 0x88:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ size=1;
+ len+=6;
+ break;
+ }
+ break;
+ case 0x8b:
+ switch(i[1]&0xc0) {
+ case 0x80:
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=6;
+ break;
+ case 0x40:
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=3;
+ break;
+ case 0x00:
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=2;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x89:
+ switch(i[1]&0xc0) {
+ case 0x80:
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=6;
+ break;
+ case 0x40:
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=3;
+ break;
+ case 0x00:
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=2;
+ break;
+ }
+ break;
+ }
+ }
+
+ if (r!=-1) {
+ void* pr=NULL;
+#ifdef JIT_DEBUG
+ write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
+#endif
+
+ switch(r) {
+ case 0: pr=&(pContext->Eax); break;
+ case 1: pr=&(pContext->Ecx); break;
+ case 2: pr=&(pContext->Edx); break;
+ case 3: pr=&(pContext->Ebx); break;
+ case 4: pr=(size>1)?NULL:(((uae_u8*)&(pContext->Eax))+1); break;
+ case 5: pr=(size>1)?
+ (void*)(&(pContext->Ebp)):
+ (void*)(((uae_u8*)&(pContext->Ecx))+1); break;
+ case 6: pr=(size>1)?
+ (void*)(&(pContext->Esi)):
+ (void*)(((uae_u8*)&(pContext->Edx))+1); break;
+ case 7: pr=(size>1)?
+ (void*)(&(pContext->Edi)):
+ (void*)(((uae_u8*)&(pContext->Ebx))+1); break;
+ default: abort();
+ }
+ if (pr) {
+ blockinfo* bi;
+
+ if (currprefs.comp_oldsegv) {
+ addr-=NATMEM_OFFSET;
+
+ if ((addr>=0x10000000 && addr<0x40000000) ||
+ (addr>=0x50000000)) {
+#ifdef JIT_DEBUG
+ write_log("Suspicious address 0x%x in SEGV handler.\n",addr);
+#endif
+ }
+ if (dir==SIG_READ) {
+ switch(size) {
+ case 1: *((uae_u8*)pr)=get_byte(addr); break;
+ case 2: *((uae_u16*)pr)=swap16(get_word(addr)); break;
+ case 4: *((uae_u32*)pr)=swap32(get_long(addr)); break;
+ default: abort();
+ }
+ }
+ else { /* write */
+ switch(size) {
+ case 1: put_byte(addr,*((uae_u8*)pr)); break;
+ case 2: put_word(addr,swap16(*((uae_u16*)pr))); break;
+ case 4: put_long(addr,swap32(*((uae_u32*)pr))); break;
+ default: abort();
+ }
+ }
+#ifdef JIT_DEBUG
+ write_log("Handled one access!\n");
+#endif
+ fflush(stdout);
+ segvcount++;
+ pContext->Eip+=len;
+ }
+ else {
+ void* tmp=target;
+ int i;
+ uae_u8 vecbuf[5];
+
+ addr-=NATMEM_OFFSET;
+
+ if ((addr>=0x10000000 && addr<0x40000000) ||
+ (addr>=0x50000000)) {
+#ifdef JIT_DEBUG
+ write_log("Suspicious address 0x%x in SEGV handler.\n",addr);
+#endif
+ }
+
+ target=(uae_u8*)pContext->Eip;
+ for (i=0;i<5;i++)
+ vecbuf[i]=target[i];
+ emit_byte(0xe9);
+ emit_long((uae_u32)veccode-(uae_u32)target-4);
+#ifdef JIT_DEBUG
+
+ write_log("Create jump to %p\n",veccode);
+ write_log("Handled one access!\n");
+#endif
+ segvcount++;
+
+ target=veccode;
+
+ if (dir==SIG_READ) {
+ switch(size) {
+ case 1: raw_mov_b_ri(r,get_byte(addr)); break;
+ case 2: raw_mov_w_ri(r,swap16(get_word(addr))); break;
+ case 4: raw_mov_l_ri(r,swap32(get_long(addr))); break;
+ default: abort();
+ }
+ }
+ else { /* write */
+ switch(size) {
+ case 1: put_byte(addr,*((uae_u8*)pr)); break;
+ case 2: put_word(addr,swap16(*((uae_u16*)pr))); break;
+ case 4: put_long(addr,swap32(*((uae_u32*)pr))); break;
+ default: abort();
+ }
+ }
+ for (i=0;i<5;i++)
+ raw_mov_b_mi(pContext->Eip+i,vecbuf[i]);
+ raw_mov_l_mi((uae_u32)&in_handler,0);
+ emit_byte(0xe9);
+ emit_long(pContext->Eip+len-(uae_u32)target-4);
+ in_handler=1;
+ target=tmp;
+ }
+ bi=active;
+ while (bi) {
+ if (bi->handler &&
+ (uae_u8*)bi->direct_handler<=i &&
+ (uae_u8*)bi->nexthandler>i) {
+#ifdef JIT_DEBUG
+ write_log("deleted trigger (%p<%p<%p) %p\n",
+ bi->handler,
+ i,
+ bi->nexthandler,
+ bi->pc_p);
+#endif
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ set_special(0);
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ bi=bi->next;
+ }
+ /* Not found in the active list. Might be a rom routine that
+ is in the dormant list */
+ bi=dormant;
+ while (bi) {
+ if (bi->handler &&
+ (uae_u8*)bi->direct_handler<=i &&
+ (uae_u8*)bi->nexthandler>i) {
+#ifdef JIT_DEBUG
+ write_log("deleted trigger (%p<%p<%p) %p\n",
+ bi->handler,
+ i,
+ bi->nexthandler,
+ bi->pc_p);
+#endif
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ set_special(0);
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ bi=bi->next;
+ }
+#ifdef JIT_DEBUG
+ write_log("Huh? Could not find trigger!\n");
+#endif
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ }
+ write_log("JIT: Can't handle access!\n");
+ if( i )
+ {
+ for (j=0;j<10;j++) {
+ write_log("JIT: instruction byte %2d is 0x%02x\n",j,i[j]);
+ }
+ }
+#if 0
+ write_log("Please send the above info (starting at \"fault address\") to\n"
+ "bmeyer@csse.monash.edu.au\n"
+ "This shouldn't happen ;-)\n");
+#endif
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+#else
+static void vec(int x, struct sigcontext sc)
+{
+ uae_u8* i=(uae_u8*)sc.eip;
+ uae_u32 addr=sc.cr2;
+ int r=-1;
+ int size=4;
+ int dir=-1;
+ int len=0;
+ int j;
+
+ write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
+ if (!canbang)
+ write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
+ if (in_handler)
+ write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
+
+ if (canbang && i>=compiled_code && i<=current_compile_p) {
+ if (*i==0x66) {
+ i++;
+ size=2;
+ len++;
+ }
+
+ switch(i[0]) {
+ case 0x8a:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ size=1;
+ len+=6;
+ break;
+ }
+ break;
+ case 0x88:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ size=1;
+ len+=6;
+ break;
+ }
+ break;
+
+ case 0x8b:
+ switch(i[1]&0xc0) {
+ case 0x80:
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=6;
+ break;
+ case 0x40:
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=3;
+ break;
+ case 0x00:
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=2;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case 0x89:
+ switch(i[1]&0xc0) {
+ case 0x80:
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=6;
+ break;
+ case 0x40:
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=3;
+ break;
+ case 0x00:
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=2;
+ break;
+ }
+ break;
+ }
+ }
+
+ if (r!=-1) {
+ void* pr=NULL;
+ write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
+
+ switch(r) {
+ case 0: pr=&(sc.eax); break;
+ case 1: pr=&(sc.ecx); break;
+ case 2: pr=&(sc.edx); break;
+ case 3: pr=&(sc.ebx); break;
+ case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
+ case 5: pr=(size>1)?
+ (void*)(&(sc.ebp)):
+ (void*)(((uae_u8*)&(sc.ecx))+1); break;
+ case 6: pr=(size>1)?
+ (void*)(&(sc.esi)):
+ (void*)(((uae_u8*)&(sc.edx))+1); break;
+ case 7: pr=(size>1)?
+ (void*)(&(sc.edi)):
+ (void*)(((uae_u8*)&(sc.ebx))+1); break;
+ default: abort();
+ }
+ if (pr) {
+ blockinfo* bi;
+
+ if (currprefs.comp_oldsegv) {
+ addr-=NATMEM_OFFSET;
+
+ if ((addr>=0x10000000 && addr<0x40000000) ||
+ (addr>=0x50000000)) {
+ write_log("Suspicious address in %x SEGV handler.\n",addr);
+ }
+ if (dir==SIG_READ) {
+ switch(size) {
+ case 1: *((uae_u8*)pr)=get_byte(addr); break;
+ case 2: *((uae_u16*)pr)=get_word(addr); break;
+ case 4: *((uae_u32*)pr)=get_long(addr); break;
+ default: abort();
+ }
+ }
+ else { /* write */
+ switch(size) {
+ case 1: put_byte(addr,*((uae_u8*)pr)); break;
+ case 2: put_word(addr,*((uae_u16*)pr)); break;
+ case 4: put_long(addr,*((uae_u32*)pr)); break;
+ default: abort();
+ }
+ }
+ write_log("Handled one access!\n");
+ fflush(stdout);
+ segvcount++;
+ sc.eip+=len;
+ }
+ else {
+ void* tmp=target;
+ int i;
+ uae_u8 vecbuf[5];
+
+ addr-=NATMEM_OFFSET;
+
+ if ((addr>=0x10000000 && addr<0x40000000) ||
+ (addr>=0x50000000)) {
+ write_log("Suspicious address 0x%x in SEGV handler.\n",addr);
+ }
+
+ target=(uae_u8*)sc.eip;
+ for (i=0;i<5;i++)
+ vecbuf[i]=target[i];
+ emit_byte(0xe9);
+ emit_long((uae_u32)veccode-(uae_u32)target-4);
+ write_log("Create jump to %p\n",veccode);
+
+ write_log("Handled one access!\n");
+ fflush(stdout);
+ segvcount++;
+
+ target=veccode;
+
+ if (dir==SIG_READ) {
+ switch(size) {
+ case 1: raw_mov_b_ri(r,get_byte(addr)); break;
+ case 2: raw_mov_w_ri(r,get_word(addr)); break;
+ case 4: raw_mov_l_ri(r,get_long(addr)); break;
+ default: abort();
+ }
+ }
+ else { /* write */
+ switch(size) {
+ case 1: put_byte(addr,*((uae_u8*)pr)); break;
+ case 2: put_word(addr,*((uae_u16*)pr)); break;
+ case 4: put_long(addr,*((uae_u32*)pr)); break;
+ default: abort();
+ }
+ }
+ for (i=0;i<5;i++)
+ raw_mov_b_mi(sc.eip+i,vecbuf[i]);
+ raw_mov_l_mi((uae_u32)&in_handler,0);
+ emit_byte(0xe9);
+ emit_long(sc.eip+len-(uae_u32)target-4);
+ in_handler=1;
+ target=tmp;
+ }
+ bi=active;
+ while (bi) {
+ if (bi->handler &&
+ (uae_u8*)bi->direct_handler<=i &&
+ (uae_u8*)bi->nexthandler>i) {
+ write_log("deleted trigger (%p<%p<%p) %p\n",
+ bi->handler,
+ i,
+ bi->nexthandler,
+ bi->pc_p);
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ set_special(0);
+ return;
+ }
+ bi=bi->next;
+ }
+ /* Not found in the active list. Might be a rom routine that
+ is in the dormant list */
+ bi=dormant;
+ while (bi) {
+ if (bi->handler &&
+ (uae_u8*)bi->direct_handler<=i &&
+ (uae_u8*)bi->nexthandler>i) {
+ write_log("deleted trigger (%p<%p<%p) %p\n",
+ bi->handler,
+ i,
+ bi->nexthandler,
+ bi->pc_p);
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ set_special(0);
+ return;
+ }
+ bi=bi->next;
+ }
+ write_log("Huh? Could not find trigger!\n");
+ return;
+ }
+ }
+ write_log("Can't handle access!\n");
+ for (j=0;j<10;j++) {
+ write_log("instruction byte %2d is %02x\n",j,i[j]);
+ }
+#if 0
+ write_log("Please send the above info (starting at \"fault address\") to\n"
+ "bmeyer@csse.monash.edu.au\n"
+ "This shouldn't happen ;-)\n");
+ fflush(stdout);
+#endif
+ signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
+}
+#endif
+#endif
+
+/*************************************************************************
+ * Checking for CPU features *
+ *************************************************************************/
+
+typedef struct {
+ uae_u32 eax;
+ uae_u32 ecx;
+ uae_u32 edx;
+ uae_u32 ebx;
+} x86_regs;
+
+
+/* This could be so much easier if it could make assumptions about the
+ compiler... */
+
+static uae_u32 cpuid_ptr;
+static uae_u32 cpuid_level;
+
+static x86_regs cpuid(uae_u32 level)
+{
+ x86_regs answer;
+ uae_u8 *cpuid_space;
+ void* tmp=get_target();
+
+ cpuid_ptr=(uae_u32)&answer;
+ cpuid_level=level;
+
+ cpuid_space = cache_alloc (256);
+ set_target(cpuid_space);
+ raw_push_l_r(0); /* eax */
+ raw_push_l_r(1); /* ecx */
+ raw_push_l_r(2); /* edx */
+ raw_push_l_r(3); /* ebx */
+ raw_push_l_r(7); /* edi */
+ raw_mov_l_rm(0,(uae_u32)&cpuid_level);
+ raw_cpuid(0);
+ raw_mov_l_rm(7,(uae_u32)&cpuid_ptr);
+ raw_mov_l_Rr(7,0,0);
+ raw_mov_l_Rr(7,1,4);
+ raw_mov_l_Rr(7,2,8);
+ raw_mov_l_Rr(7,3,12);
+ raw_pop_l_r(7);
+ raw_pop_l_r(3);
+ raw_pop_l_r(2);
+ raw_pop_l_r(1);
+ raw_pop_l_r(0);
+ raw_ret();
+ set_target(tmp);
+
+ ((cpuop_func*)cpuid_space)(0);
+ cache_free (cpuid_space);
+ return answer;
+}
+
+static void raw_init_cpu(void)
+{
+ x86_regs x;
+ uae_u32 maxlev;
+
+ x=cpuid(0);
+ maxlev=x.eax;
+ write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n",
+ maxlev,
+ x.ebx,
+ x.ebx>>8,
+ x.ebx>>16,
+ x.ebx>>24,
+ x.edx,
+ x.edx>>8,
+ x.edx>>16,
+ x.edx>>24,
+ x.ecx,
+ x.ecx>>8,
+ x.ecx>>16,
+ x.ecx>>24
+ );
+ have_rat_stall=(x.ecx==0x6c65746e);
+
+ if (maxlev>=1) {
+ x=cpuid(1);
+ if (x.edx&(1<<15))
+ have_cmov=1;
+ }
+ have_rat_stall=1;
+#if 0
+ if (!have_cmov)
+ have_rat_stall=0;
+#endif
+#if 0
+ write_log ("have_cmov=%d, avoid_cmov=%d, have_rat_stall=%d\n",
+ have_cmov,currprefs.avoid_cmov,have_rat_stall);
+ if (currprefs.avoid_cmov) {
+ write_log("Disabling cmov use despite processor claiming to support it!\n");
+ have_cmov=0;
+ }
+#else
+ /* Dear Bernie, I don't want to keep around options which are useless, and not
+ represented in the GUI anymore... Is this okay? */
+ write_log ("have_cmov=%d, have_rat_stall=%d\n", have_cmov, have_rat_stall);
+#endif
+#if 0 /* For testing of non-cmov code! */
+ have_cmov=0;
+#endif
+#if 0 /* It appears that partial register writes are a bad idea even on
+ AMD K7 cores, even though they are not supposed to have the
+ dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
+ if (have_cmov)
+ have_rat_stall=1;
+#endif
+}
+
+/*************************************************************************
+ * FPU stuff *
+ *************************************************************************/
+
+
+static __inline__ void raw_fp_init(void)
+{
+ int i;
+
+ for (i=0;i<N_FREGS;i++)
+ live.spos[i]=-2;
+ live.tos=-1; /* Stack is empty */
+}
+
+static __inline__ void raw_fp_cleanup_drop(void)
+{
+#if 0
+ /* using FINIT instead of popping all the entries.
+ Seems to have side effects --- there is display corruption in
+ Quake when this is used */
+ if (live.tos>1) {
+ emit_byte(0x9b);
+ emit_byte(0xdb);
+ emit_byte(0xe3);
+ live.tos=-1;
+ }
+#endif
+ while (live.tos>=1) {
+ emit_byte(0xde);
+ emit_byte(0xd9);
+ live.tos-=2;
+ }
+ while (live.tos>=0) {
+ emit_byte(0xdd);
+ emit_byte(0xd8);
+ live.tos--;
+ }
+ raw_fp_init();
+}
+
+static __inline__ void make_tos(int r)
+{
+ int p,q;
+
+ if (live.spos[r]<0) { /* Register not yet on stack */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
+ live.tos++;
+ live.spos[r]=live.tos;
+ live.onstack[live.tos]=r;
+ return;
+ }
+ /* Register is on stack */
+ if (live.tos==live.spos[r])
+ return;
+ p=live.spos[r];
+ q=live.onstack[live.tos];
+
+ emit_byte(0xd9);
+ emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
+ live.onstack[live.tos]=r;
+ live.spos[r]=live.tos;
+ live.onstack[p]=q;
+ live.spos[q]=p;
+}
+
+static __inline__ void make_tos2(int r, int r2)
+{
+ int q;
+
+ make_tos(r2); /* Put the reg that's supposed to end up in position2
+ on top */
+
+ if (live.spos[r]<0) { /* Register not yet on stack */
+ make_tos(r); /* This will extend the stack */
+ return;
+ }
+ /* Register is on stack */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* Move r2 into position 2 */
+
+ q=live.onstack[live.tos-1];
+ live.onstack[live.tos]=q;
+ live.spos[q]=live.tos;
+ live.onstack[live.tos-1]=r2;
+ live.spos[r2]=live.tos-1;
+
+ make_tos(r); /* And r into 1 */
+}
+
+static __inline__ int stackpos(int r)
+{
+ if (live.spos[r]<0)
+ abort();
+ if (live.tos<live.spos[r]) {
+ printf("Looking for spos for fnreg %d\n",r);
+ abort();
+ }
+ return live.tos-live.spos[r];
+}
+
+static __inline__ void usereg(int r)
+{
+ if (live.spos[r]<0)
+ make_tos(r);
+}
+
+/* This is called with one FP value in a reg *above* tos, which it will
+ pop off the stack if necessary */
+static __inline__ void tos_make(int r)
+{
+ if (live.spos[r]<0) {
+ live.tos++;
+ live.spos[r]=live.tos;
+ live.onstack[live.tos]=r;
+ return;
+ }
+ emit_byte(0xdd);
+ emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
+ and pop it*/
+}
+
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+{
+ make_tos(r);
+ emit_byte(0xdd);
+ emit_byte(0x15);
+ emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
+{
+ make_tos(r);
+ emit_byte(0xdd);
+ emit_byte(0x1d);
+ emit_long(m);
+ live.onstack[live.tos]=-1;
+ live.tos--;
+ live.spos[r]=-2;
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
+{
+ emit_byte(0xdd);
+ emit_byte(0x05);
+ emit_long(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
+{
+ emit_byte(0xdb);
+ emit_byte(0x05);
+ emit_long(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
+{
+ make_tos(r);
+ emit_byte(0xdb);
+ emit_byte(0x15);
+ emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+{
+ emit_byte(0xd9);
+ emit_byte(0x05);
+ emit_long(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
+{
+ make_tos(r);
+ emit_byte(0xd9);
+ emit_byte(0x15);
+ emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+{
+ int rs;
+
+ /* Stupid x87 can't write a long double to mem without popping the
+ stack! */
+ usereg(r);
+ rs=stackpos(r);
+ emit_byte(0xd9); /* Get a copy to the top of stack */
+ emit_byte(0xc0+rs);
+
+ emit_byte(0xdb); /* store and pop it */
+ emit_byte(0x3d);
+ emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
+{
+ int rs;
+
+ make_tos(r);
+ emit_byte(0xdb); /* store and pop it */
+ emit_byte(0x3d);
+ emit_long(m);
+ live.onstack[live.tos]=-1;
+ live.tos--;
+ live.spos[r]=-2;
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
+{
+ emit_byte(0xdb);
+ emit_byte(0x2d);
+ emit_long(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xeb);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xec);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xea);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xed);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xe8);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xee);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ if (ds==0 && live.spos[d]>=0) {
+ /* source is on top of stack, and we already have the dest */
+ int dd=stackpos(d);
+ emit_byte(0xdd);
+ emit_byte(0xd0+dd);
+ }
+ else {
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source on tos */
+ tos_make(d); /* store to destination, pop if necessary */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+
+LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
+{
+ emit_byte(0xd9);
+ emit_byte(0xa8+index);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
+
+
+LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xfa); /* take square root */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xfa); /* take square root */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xe1); /* take fabs */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xe1); /* take fabs */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* take frndint */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* take frndint */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xff); /* take cos */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xff); /* take cos */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xfe); /* take sin */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xfe); /* take sin */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+
+double one=1;
+LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+
+ emit_byte(0xd9);
+ emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* rndint */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* swap top two elements */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* subtract rounded from original */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 */
+ emit_byte(0xdc);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* and scale it */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* take he rounded value off */
+ tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xea); /* fldl2e */
+ emit_byte(0xde);
+ emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
+
+ emit_byte(0xd9);
+ emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* rndint */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* swap top two elements */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* subtract rounded from original */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 */
+ emit_byte(0xdc);
+ emit_byte(0x05);
+ emit_long((uae_u32)&one); /* Add '1' without using extra stack space */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* and scale it */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* take he rounded value off */
+ tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* push '1' */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* swap top two */
+ emit_byte(0xd9);
+ emit_byte(0xf1); /* take 1*log2(x) */
+ tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
+
+
+LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* take fchs */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* take fchs */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xc0+ds); /* add source to dest*/
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xc0+ds); /* add source to dest*/
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xe8+ds); /* sub source from dest*/
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xe0+ds); /* sub src from dest */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xdd);
+ emit_byte(0xe0+ds); /* cmp dest with source*/
+}
+LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xc8+ds); /* mul dest by source*/
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xc8+ds); /* mul dest by source*/
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xf8+ds); /* div dest by source */
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xf0+ds); /* div dest by source*/
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ make_tos2(d,s);
+ ds=stackpos(s);
+
+ if (ds!=1) {
+ printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
+ abort();
+ }
+ emit_byte(0xd9);
+ emit_byte(0xf8); /* take rem from dest by source */
+}
+LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ make_tos2(d,s);
+ ds=stackpos(s);
+
+ if (ds!=1) {
+ printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
+ abort();
+ }
+ emit_byte(0xd9);
+ emit_byte(0xf5); /* take rem1 from dest by source */
+}
+LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+
+
+LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
+{
+ make_tos(r);
+ emit_byte(0xd9); /* ftst */
+ emit_byte(0xe4);
+}
+LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
+
+static __inline__ void raw_fflags_into_flags(int r)
+{
+ int p;
+
+ usereg(r);
+ p=stackpos(r);
+
+ emit_byte(0xd9);
+ emit_byte(0xee); /* Push 0 */
+ emit_byte(0xd9);
+ emit_byte(0xc9+p); /* swap top two around */
+ if (have_cmov) {
+ // gb-- fucomi is for P6 cores only, not K6-2 then...
+ emit_byte(0xdb);
+ emit_byte(0xe9+p); /* fucomi them */
+ }
+ else {
+ emit_byte(0xdd);
+ emit_byte(0xe1+p); /* fucom them */
+ emit_byte(0x9b);
+ emit_byte(0xdf);
+ emit_byte(0xe0); /* fstsw ax */
+ raw_sahf(0); /* sahf */
+ }
+ emit_byte(0xdd);
+ emit_byte(0xd9+p); /* store value back, and get rid of 0 */
+}
}
MENDFUNC(0,nop,(void))
-
MIDFUNC(1,f_forget_about,(FW r))
{
if (f_isinreg(r))
}
MENDFUNC(2,fcos_rr,(FW d, FR s))
+MIDFUNC(2,ftan_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_ftan_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,ftan_rr,(FW d, FR s))
+
+MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
+{
+ s=f_readreg(s); /* s for source */
+ d=f_writereg(d); /* d for sine */
+ c=f_writereg(c); /* c for cosine */
+ raw_fsincos_rr(d,c,s);
+ f_unlock(s);
+ f_unlock(d);
+ f_unlock(c);
+}
+MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
+
+MIDFUNC(2,fscale_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fscale_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fscale_rr,(FRW d, FR s))
+
MIDFUNC(2,ftwotox_rr,(FW d, FR s))
{
s=f_readreg(s);
}
MENDFUNC(2,fetox_rr,(FW d, FR s))
+MIDFUNC(2,fetoxM1_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fetoxM1_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fetoxM1_rr,(FW d, FR s))
+
+MIDFUNC(2,ftentox_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_ftentox_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,ftentox_rr,(FW d, FR s))
+
MIDFUNC(2,frndint_rr,(FW d, FR s))
{
s=f_readreg(s);
}
MENDFUNC(2,flog2_rr,(FW d, FR s))
+MIDFUNC(2,flogN_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_flogN_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,flogN_rr,(FW d, FR s))
+
+MIDFUNC(2,flogNP1_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_flogNP1_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,flogNP1_rr,(FW d, FR s))
+
+MIDFUNC(2,flog10_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_flog10_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,flog10_rr,(FW d, FR s))
+
+MIDFUNC(2,fasin_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fasin_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fasin_rr,(FW d, FR s))
+
+MIDFUNC(2,facos_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_facos_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,facos_rr,(FW d, FR s))
+
+MIDFUNC(2,fatan_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fatan_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fatan_rr,(FW d, FR s))
+
+MIDFUNC(2,fatanh_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fatanh_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fatanh_rr,(FW d, FR s))
+
+MIDFUNC(2,fsinh_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fsinh_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fsinh_rr,(FW d, FR s))
+
+MIDFUNC(2,fcosh_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fcosh_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fcosh_rr,(FW d, FR s))
+
+MIDFUNC(2,ftanh_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_ftanh_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,ftanh_rr,(FW d, FR s))
+
MIDFUNC(2,fneg_rr,(FW d, FR s))
{
s=f_readreg(s);
--- /dev/null
+#define writemem_special writemem
+#define readmem_special readmem
+
+#define USE_MATCHSTATE 0
+#define setzflg_uses_bsf 0
+#include "sysconfig.h"
+#include "sysdeps.h"
+#include "config.h"
+#include "options.h"
+#include "events.h"
+#include "include/memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "comptbl.h"
+#include "compemu.h"
+
+// %%% BRIAN KING WAS HERE %%%
+extern int canbang;
+#include <sys/mman.h>
+#include <limits.h> /* for PAGESIZE */
+
+cpuop_func *compfunctbl[65536];
+cpuop_func *nfcompfunctbl[65536];
+#ifdef NOFLAGS_SUPPORT
+cpuop_func *nfcpufunctbl[65536];
+#endif
+uae_u8* comp_pc_p;
+
+uae_u8* start_pc_p;
+uae_u32 start_pc;
+uae_u32 current_block_pc_p;
+uae_u32 current_block_start_target;
+uae_u32 needed_flags;
+static uae_u32 next_pc_p;
+static uae_u32 taken_pc_p;
+static int branch_cc;
+int segvcount=0;
+int soft_flush_count=0;
+int hard_flush_count=0;
+int compile_count=0;
+int checksum_count=0;
+static uae_u8* current_compile_p=NULL;
+static uae_u8* max_compile_start;
+uae_u8* compiled_code=NULL;
+static uae_s32 reg_alloc_run;
+static int have_rat_stall=0;
+
+void* pushall_call_handler=NULL;
+static void* popall_do_nothing=NULL;
+static void* popall_exec_nostats=NULL;
+static void* popall_execute_normal=NULL;
+static void* popall_cache_miss=NULL;
+static void* popall_recompile_block=NULL;
+static void* popall_check_checksum=NULL;
+
+extern uae_u32 oink;
+extern unsigned long foink3;
+extern unsigned long foink;
+
+/* The 68k only ever executes from even addresses. So right now, we
+ waste half the entries in this array
+ UPDATE: We now use those entries to store the start of the linked
+ lists that we maintain for each hash result. */
+cacheline cache_tags[TAGSIZE];
+int letit=0;
+blockinfo* hold_bi[MAX_HOLD_BI];
+blockinfo* active;
+blockinfo* dormant;
+
+op_properties prop[65536];
+
+#ifdef NOFLAGS_SUPPORT
+/* 68040 */
+extern struct cputbl op_smalltbl_0_nf[];
+#endif
+extern struct cputbl op_smalltbl_0_comp_nf[];
+extern struct cputbl op_smalltbl_0_comp_ff[];
+#ifdef NOFLAGS_SUPPORT
+/* 68020 + 68881 */
+extern struct cputbl op_smalltbl_1_nf[];
+/* 68020 */
+extern struct cputbl op_smalltbl_2_nf[];
+/* 68010 */
+extern struct cputbl op_smalltbl_3_nf[];
+/* 68000 */
+extern struct cputbl op_smalltbl_4_nf[];
+/* 68000 slow but compatible. */
+extern struct cputbl op_smalltbl_5_nf[];
+#endif
+
+static void flush_icache_hard(int n);
+
+
+
+bigstate live;
+smallstate empty_ss;
+smallstate default_ss;
+static int optlev;
+
+static int writereg(int r, int size);
+static void unlock(int r);
+static void setlock(int r);
+static int readreg_specific(int r, int size, int spec);
+static int writereg_specific(int r, int size, int spec);
+static void prepare_for_call_1(void);
+static void prepare_for_call_2(void);
+static void align_target(uae_u32 a);
+
+static uae_s32 nextused[VREGS];
+
+static uae_u8 *popallspace;
+
+uae_u32 m68k_pc_offset;
+
+/* Some arithmetic ooperations can be optimized away if the operands
+ are known to be constant. But that's only a good idea when the
+ side effects they would have on the flags are not important. This
+ variable indicates whether we need the side effects or not
+*/
+uae_u32 needflags=0;
+
+/* Flag handling is complicated.
+
+ x86 instructions create flags, which quite often are exactly what we
+ want. So at times, the "68k" flags are actually in the x86 flags.
+
+ Then again, sometimes we do x86 instructions that clobber the x86
+ flags, but don't represent a corresponding m68k instruction. In that
+ case, we have to save them.
+
+ We used to save them to the stack, but now store them back directly
+ into the regflags.cznv of the traditional emulation. Thus some odd
+ names.
+
+ So flags can be in either of two places (used to be three; boy were
+ things complicated back then!); And either place can contain either
+ valid flags or invalid trash (and on the stack, there was also the
+ option of "nothing at all", now gone). A couple of variables keep
+ track of the respective states.
+
+ To make things worse, we might or might not be interested in the flags.
+ by default, we are, but a call to dont_care_flags can change that
+ until the next call to live_flags. If we are not, pretty much whatever
+ is in the register and/or the native flags is seen as valid.
+*/
+
+
+static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
+{
+ return cache_tags[cl+1].bi;
+}
+
+static __inline__ blockinfo* get_blockinfo_addr(void* addr)
+{
+ blockinfo* bi=get_blockinfo(cacheline(addr));
+
+ while (bi) {
+ if (bi->pc_p==addr)
+ return bi;
+ bi=bi->next_same_cl;
+ }
+ return NULL;
+}
+
+
+/*******************************************************************
+ * All sorts of list related functions for all of the lists *
+ *******************************************************************/
+
+static __inline__ void remove_from_cl_list(blockinfo* bi)
+{
+ uae_u32 cl=cacheline(bi->pc_p);
+
+ if (bi->prev_same_cl_p)
+ *(bi->prev_same_cl_p)=bi->next_same_cl;
+ if (bi->next_same_cl)
+ bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
+ if (cache_tags[cl+1].bi)
+ cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
+ else
+ cache_tags[cl].handler=popall_execute_normal;
+}
+
+static __inline__ void remove_from_list(blockinfo* bi)
+{
+ if (bi->prev_p)
+ *(bi->prev_p)=bi->next;
+ if (bi->next)
+ bi->next->prev_p=bi->prev_p;
+}
+
+static __inline__ void remove_from_lists(blockinfo* bi)
+{
+ remove_from_list(bi);
+ remove_from_cl_list(bi);
+}
+
+static __inline__ void add_to_cl_list(blockinfo* bi)
+{
+ uae_u32 cl=cacheline(bi->pc_p);
+
+ if (cache_tags[cl+1].bi)
+ cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
+ bi->next_same_cl=cache_tags[cl+1].bi;
+
+ cache_tags[cl+1].bi=bi;
+ bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
+
+ cache_tags[cl].handler=bi->handler_to_use;
+}
+
+static __inline__ void raise_in_cl_list(blockinfo* bi)
+{
+ remove_from_cl_list(bi);
+ add_to_cl_list(bi);
+}
+
+static __inline__ void add_to_active(blockinfo* bi)
+{
+ if (active)
+ active->prev_p=&(bi->next);
+ bi->next=active;
+
+ active=bi;
+ bi->prev_p=&active;
+}
+
+static __inline__ void add_to_dormant(blockinfo* bi)
+{
+ if (dormant)
+ dormant->prev_p=&(bi->next);
+ bi->next=dormant;
+
+ dormant=bi;
+ bi->prev_p=&dormant;
+}
+
+static __inline__ void remove_dep(dependency* d)
+{
+ if (d->prev_p)
+ *(d->prev_p)=d->next;
+ if (d->next)
+ d->next->prev_p=d->prev_p;
+ d->prev_p=NULL;
+ d->next=NULL;
+}
+
+/* This block's code is about to be thrown away, so it no longer
+ depends on anything else */
+static __inline__ void remove_deps(blockinfo* bi)
+{
+ remove_dep(&(bi->dep[0]));
+ remove_dep(&(bi->dep[1]));
+}
+
+static __inline__ void adjust_jmpdep(dependency* d, void* a)
+{
+ *(d->jmp_off)=(uae_u32)a-((uae_u32)d->jmp_off+4);
+}
+
+/********************************************************************
+ * Soft flush handling support functions *
+ ********************************************************************/
+
+static __inline__ void set_dhtu(blockinfo* bi, void* dh)
+{
+ //printf("bi is %p\n",bi);
+ if (dh!=bi->direct_handler_to_use) {
+ dependency* x=bi->deplist;
+ //printf("bi->deplist=%p\n",bi->deplist);
+ while (x) {
+ //printf("x is %p\n",x);
+ //printf("x->next is %p\n",x->next);
+ //printf("x->prev_p is %p\n",x->prev_p);
+
+ if (x->jmp_off) {
+ adjust_jmpdep(x,dh);
+ }
+ x=x->next;
+ }
+ bi->direct_handler_to_use=dh;
+ }
+}
+
+static __inline__ void invalidate_block(blockinfo* bi)
+{
+ int i;
+
+ bi->optlevel=0;
+ bi->count=currprefs.optcount[0]-1;
+ bi->handler=NULL;
+ bi->handler_to_use=popall_execute_normal;
+ bi->direct_handler=NULL;
+ set_dhtu(bi,bi->direct_pen);
+ bi->needed_flags=0xff;
+
+ for (i=0;i<2;i++) {
+ bi->dep[i].jmp_off=NULL;
+ bi->dep[i].target=NULL;
+ }
+ remove_deps(bi);
+}
+
+static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
+{
+ blockinfo* tbi=get_blockinfo_addr((void*)target);
+
+ Dif(!tbi) {
+ printf("Could not create jmpdep!\n");
+ abort();
+ }
+ bi->dep[i].jmp_off=jmpaddr;
+ bi->dep[i].target=tbi;
+ bi->dep[i].next=tbi->deplist;
+ if (bi->dep[i].next)
+ bi->dep[i].next->prev_p=&(bi->dep[i].next);
+ bi->dep[i].prev_p=&(tbi->deplist);
+ tbi->deplist=&(bi->dep[i]);
+}
+
+static __inline__ void big_to_small_state(bigstate* b, smallstate* s)
+{
+ int i;
+ int count=0;
+
+ for (i=0;i<N_REGS;i++) {
+ s->nat[i].validsize=0;
+ s->nat[i].dirtysize=0;
+ if (b->nat[i].nholds) {
+ int index=b->nat[i].nholds-1;
+ int r=b->nat[i].holds[index];
+ s->nat[i].holds=r;
+ s->nat[i].validsize=b->state[r].validsize;
+ s->nat[i].dirtysize=b->state[r].dirtysize;
+ count++;
+ }
+ }
+ printf("count=%d\n",count);
+ for (i=0;i<N_REGS;i++) { // FIXME --- don't do dirty yet
+ s->nat[i].dirtysize=0;
+ }
+}
+
+static __inline__ void attached_state(blockinfo* bi)
+{
+ bi->havestate=1;
+ if (bi->direct_handler_to_use==bi->direct_handler)
+ set_dhtu(bi,bi->direct_pen);
+ bi->direct_handler=bi->direct_pen;
+ bi->status=BI_TARGETTED;
+}
+
+static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
+{
+ blockinfo* bi=get_blockinfo_addr(addr);
+ int i;
+
+#if USE_OPTIMIZER
+ if (reg_alloc_run)
+ return NULL;
+#endif
+ if (!bi) {
+ for (i=0;i<MAX_HOLD_BI && !bi;i++) {
+ if (hold_bi[i]) {
+ uae_u32 cl=cacheline(addr);
+
+ bi=hold_bi[i];
+ hold_bi[i]=NULL;
+ bi->pc_p=addr;
+ invalidate_block(bi);
+ add_to_active(bi);
+ add_to_cl_list(bi);
+
+ }
+ }
+ }
+ if (!bi) {
+ write_log ("Looking for blockinfo, can't find free one\n");
+ abort();
+ }
+
+#if USE_MATCHSTATE
+ if (setstate &&
+ !bi->havestate) {
+ big_to_small_state(&live,&(bi->env));
+ attached_state(bi);
+ }
+#endif
+ return bi;
+}
+
+static void prepare_block(blockinfo* bi);
+
+static __inline__ void alloc_blockinfos(void)
+{
+ int i;
+ blockinfo* bi;
+
+ for (i=0;i<MAX_HOLD_BI;i++) {
+ if (hold_bi[i])
+ return;
+ bi=hold_bi[i]=(blockinfo*)current_compile_p;
+ current_compile_p+=sizeof(blockinfo);
+
+ prepare_block(bi);
+ }
+}
+
+/********************************************************************
+ * Preferences handling. This is just a convenient place to put it *
+ ********************************************************************/
+extern int have_done_picasso;
+
+void check_prefs_changed_comp (void)
+{
+ currprefs.comptrustbyte = changed_prefs.comptrustbyte;
+ currprefs.comptrustword = changed_prefs.comptrustword;
+ currprefs.comptrustlong = changed_prefs.comptrustlong;
+ currprefs.comptrustnaddr= changed_prefs.comptrustnaddr;
+ currprefs.compnf = changed_prefs.compnf;
+ currprefs.comp_hardflush= changed_prefs.comp_hardflush;
+ currprefs.comp_constjump= changed_prefs.comp_constjump;
+ currprefs.comp_oldsegv= changed_prefs.comp_oldsegv;
+ currprefs.compfpu= changed_prefs.compfpu;
+
+ if (currprefs.cachesize!=changed_prefs.cachesize) {
+ currprefs.cachesize = changed_prefs.cachesize;
+ alloc_cache();
+ }
+
+ // Turn off illegal-mem logging when using JIT...
+ if( currprefs.cachesize )
+ currprefs.illegal_mem = changed_prefs.illegal_mem;// = 0;
+
+ currprefs.comp_midopt=changed_prefs.comp_midopt;
+ currprefs.comp_lowopt=changed_prefs.comp_lowopt;
+
+ if ( ( !canbang || !currprefs.cachesize ) &&
+ currprefs.comptrustbyte != 1 )
+ {
+ // Set all of these to indirect when canbang == 0
+ // Basically, set the compforcesettings option...
+ currprefs.comptrustbyte = 1;
+ currprefs.comptrustword = 1;
+ currprefs.comptrustlong = 1;
+ currprefs.comptrustnaddr= 1;
+ currprefs.compforcesettings = 1;
+
+ changed_prefs.comptrustbyte = 1;
+ changed_prefs.comptrustword = 1;
+ changed_prefs.comptrustlong = 1;
+ changed_prefs.comptrustnaddr= 1;
+ changed_prefs.compforcesettings = 1;
+
+ if( currprefs.cachesize )
+ {
+ write_log( "JIT: Reverting to \"indirect\" access, because canbang is zero!\n" );
+ }
+ }
+
+ if (!currprefs.compforcesettings && !have_done_picasso) {
+ int stop=0;
+ if (currprefs.comptrustbyte!=0 && currprefs.comptrustbyte!=3)
+ stop = 1, write_log("<JIT compiler> : comptrustbyte is not 'direct' or 'afterpic'\n");
+ if (currprefs.comptrustword!=0 && currprefs.comptrustword!=3)
+ stop = 1, write_log("<JIT compiler> : comptrustword is not 'direct' or 'afterpic'\n");
+ if (currprefs.comptrustlong!=0 && currprefs.comptrustlong!=3)
+ stop = 1, write_log("<JIT compiler> : comptrustlong is not 'direct' or 'afterpic'\n");
+ if (currprefs.comptrustnaddr!=0 && currprefs.comptrustnaddr!=3)
+ stop = 1, write_log("<JIT compiler> : comptrustnaddr is not 'direct' or 'afterpic'\n");
+ if (currprefs.compnf!=1)
+ stop = 1, write_log("<JIT compiler> : compnf is not 'yes'\n");
+ if (currprefs.cachesize<1024)
+ stop = 1, write_log("<JIT compiler> : cachesize is less than 1024\n");
+ if (currprefs.comp_hardflush)
+ stop = 1, write_log("<JIT compiler> : comp_flushmode is 'hard'\n");
+ if (!canbang)
+ stop = 1, write_log("<JIT compiler> : Cannot use most direct memory access,\n"
+ " and unable to recover from failed guess!\n");
+#if 0
+ if (stop) {
+ gui_message("JIT: Configuration problems were detected!\n"
+ "JIT: These will adversely affect performance, and should\n"
+ "JIT: not be used. For more info, please see README.JIT-tuning\n"
+ "JIT: in the UAE documentation directory. You can force\n"
+ "JIT: your settings to be used by setting\n"
+ "JIT: 'compforcesettings=yes'\n"
+ "JIT: in your config file\n");
+ exit(1);
+ }
+#endif
+ }
+}
+
+/********************************************************************
+ * Get the optimizer stuff *
+ ********************************************************************/
+
+#include "compemu_optimizer.c"
+
+/********************************************************************
+ * Functions to emit data into memory, and other general support *
+ ********************************************************************/
+
+static uae_u8* target;
+
+static void emit_init(void)
+{
+}
+
+static __inline__ void emit_byte(uae_u8 x)
+{
+ *target++=x;
+}
+
+static __inline__ void emit_word(uae_u16 x)
+{
+ *((uae_u16*)target)=x;
+ target+=2;
+}
+
+static __inline__ void emit_long(uae_u32 x)
+{
+ *((uae_u32*)target)=x;
+ target+=4;
+}
+
+static __inline__ uae_u32 reverse32(uae_u32 oldv)
+{
+ return ((oldv>>24)&0xff) | ((oldv>>8)&0xff00) |
+ ((oldv<<8)&0xff0000) | ((oldv<<24)&0xff000000);
+}
+
+
+void set_target(uae_u8* t)
+{
+ lopt_emit_all();
+ target=t;
+}
+
+static __inline__ uae_u8* get_target_noopt(void)
+{
+ return target;
+}
+
+__inline__ uae_u8* get_target(void)
+{
+ lopt_emit_all();
+ return get_target_noopt();
+}
+
+
+/********************************************************************
+ * Getting the information about the target CPU *
+ ********************************************************************/
+
+#include "compemu_raw_x86.c"
+
+
+/********************************************************************
+ * Flags status handling. EMIT TIME! *
+ ********************************************************************/
+
+static void bt_l_ri_noclobber(R4 r, IMM i);
+
+static void make_flags_live_internal(void)
+{
+ if (live.flags_in_flags==VALID)
+ return;
+ Dif (live.flags_on_stack==TRASH) {
+ printf("Want flags, got something on stack, but it is TRASH\n");
+ abort();
+ }
+ if (live.flags_on_stack==VALID) {
+ int tmp;
+ tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
+ raw_reg_to_flags(tmp);
+ unlock(tmp);
+
+ live.flags_in_flags=VALID;
+ return;
+ }
+ printf("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
+ live.flags_in_flags,live.flags_on_stack);
+ abort();
+}
+
+static void flags_to_stack(void)
+{
+ if (live.flags_on_stack==VALID)
+ return;
+ if (!live.flags_are_important) {
+ live.flags_on_stack=VALID;
+ return;
+ }
+ Dif (live.flags_in_flags!=VALID)
+ abort();
+ else {
+ int tmp;
+ tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
+ raw_flags_to_reg(tmp);
+ unlock(tmp);
+ }
+ live.flags_on_stack=VALID;
+}
+
+static __inline__ void clobber_flags(void)
+{
+ if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
+ flags_to_stack();
+ live.flags_in_flags=TRASH;
+}
+
+/* Prepare for leaving the compiled stuff */
+static __inline__ void flush_flags(void)
+{
+ flags_to_stack();
+ return;
+}
+
+int touchcnt;
+
+/********************************************************************
+ * register allocation per block logging *
+ ********************************************************************/
+
+static uae_s8 vstate[VREGS];
+static uae_s8 nstate[N_REGS];
+
+#define L_UNKNOWN -127
+#define L_UNAVAIL -1
+#define L_NEEDED -2
+#define L_UNNEEDED -3
+
+static __inline__ void log_startblock(void)
+{
+ int i;
+ for (i=0;i<VREGS;i++)
+ vstate[i]=L_UNKNOWN;
+ for (i=0;i<N_REGS;i++)
+ nstate[i]=L_UNKNOWN;
+}
+
+static __inline__ void log_isused(int n)
+{
+ if (nstate[n]==L_UNKNOWN)
+ nstate[n]=L_UNAVAIL;
+}
+
+static __inline__ void log_isreg(int n, int r)
+{
+ if (nstate[n]==L_UNKNOWN)
+ nstate[n]=r;
+ if (vstate[r]==L_UNKNOWN)
+ vstate[r]=L_NEEDED;
+}
+
+static __inline__ void log_clobberreg(int r)
+{
+ if (vstate[r]==L_UNKNOWN)
+ vstate[r]=L_UNNEEDED;
+}
+
+/* This ends all possibility of clever register allocation */
+
+static __inline__ void log_flush(void)
+{
+ int i;
+ for (i=0;i<VREGS;i++)
+ if (vstate[i]==L_UNKNOWN)
+ vstate[i]=L_NEEDED;
+ for (i=0;i<N_REGS;i++)
+ if (nstate[i]==L_UNKNOWN)
+ nstate[i]=L_UNAVAIL;
+}
+
+static __inline__ void log_dump(void)
+{
+ int i;
+
+ return;
+
+ write_log("----------------------\n");
+ for (i=0;i<N_REGS;i++) {
+ switch(nstate[i]) {
+ case L_UNKNOWN: write_log("Nat %d : UNKNOWN\n",i); break;
+ case L_UNAVAIL: write_log("Nat %d : UNAVAIL\n",i); break;
+ default: write_log("Nat %d : %d\n",i,nstate[i]); break;
+ }
+ }
+ for (i=0;i<VREGS;i++) {
+ if (vstate[i]==L_UNNEEDED)
+ write_log("Virt %d: UNNEEDED\n",i);
+ }
+}
+
+/********************************************************************
+ * register status handling. EMIT TIME! *
+ ********************************************************************/
+
+static __inline__ void set_status(int r, int status)
+{
+ if (status==ISCONST)
+ log_clobberreg(r);
+ live.state[r].status=status;
+}
+
+
+static __inline__ int isinreg(int r)
+{
+ return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
+}
+
+static __inline__ void adjust_nreg(int r, uae_u32 val)
+{
+ if (!val)
+ return;
+ raw_lea_l_brr(r,r,val);
+}
+
+static void tomem(int r)
+{
+ int rr=live.state[r].realreg;
+
+ if (isinreg(r)) {
+ if (live.state[r].val &&
+ live.nat[rr].nholds==1 &&
+ !live.nat[rr].locked) {
+ // printf("RemovingA offset %x from reg %d (%d) at %p\n",
+ // live.state[r].val,r,rr,target);
+ adjust_nreg(rr,live.state[r].val);
+ live.state[r].val=0;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ }
+ }
+
+ if (live.state[r].status==DIRTY) {
+ switch (live.state[r].dirtysize) {
+ case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
+ case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
+ case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
+ default: abort();
+ }
+ set_status(r,CLEAN);
+ live.state[r].dirtysize=0;
+ }
+}
+
+static __inline__ int isconst(int r)
+{
+ return live.state[r].status==ISCONST;
+}
+
+int is_const(int r)
+{
+ return isconst(r);
+}
+
+static __inline__ void writeback_const(int r)
+{
+ if (!isconst(r))
+ return;
+ Dif (live.state[r].needflush==NF_HANDLER) {
+ write_log ("Trying to write back constant NF_HANDLER!\n");
+ abort();
+ }
+
+ raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
+ live.state[r].val=0;
+ set_status(r,INMEM);
+}
+
+static __inline__ void tomem_c(int r)
+{
+ if (isconst(r)) {
+ writeback_const(r);
+ }
+ else
+ tomem(r);
+}
+
+static void evict(int r)
+{
+ int rr;
+
+ if (!isinreg(r))
+ return;
+ tomem(r);
+ rr=live.state[r].realreg;
+
+ Dif (live.nat[rr].locked &&
+ live.nat[rr].nholds==1) {
+ write_log ("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
+ abort();
+ }
+
+ live.nat[rr].nholds--;
+ if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
+ int topreg=live.nat[rr].holds[live.nat[rr].nholds];
+ int thisind=live.state[r].realind;
+ live.nat[rr].holds[thisind]=topreg;
+ live.state[topreg].realind=thisind;
+ }
+ live.state[r].realreg=-1;
+ set_status(r,INMEM);
+}
+
+static __inline__ void free_nreg(int r)
+{
+ int i=live.nat[r].nholds;
+
+ while (i) {
+ int vr;
+
+ --i;
+ vr=live.nat[r].holds[i];
+ evict(vr);
+ }
+ Dif (live.nat[r].nholds!=0) {
+ printf("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
+ abort();
+ }
+}
+
+/* Use with care! */
+static __inline__ void isclean(int r)
+{
+ if (!isinreg(r))
+ return;
+ live.state[r].validsize=4;
+ live.state[r].dirtysize=0;
+ live.state[r].val=0;
+ set_status(r,CLEAN);
+}
+
+static __inline__ void disassociate(int r)
+{
+ isclean(r);
+ evict(r);
+}
+
+static __inline__ void set_const(int r, uae_u32 val)
+{
+ disassociate(r);
+ live.state[r].val=val;
+ set_status(r,ISCONST);
+}
+
+static __inline__ uae_u32 get_offset(int r)
+{
+ return live.state[r].val;
+}
+
+static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
+{
+ int bestreg;
+ uae_s32 when;
+ int i;
+ uae_s32 badness=0; /* to shut up gcc */
+ bestreg=-1;
+ when=2000000000;
+
+ for (i=N_REGS;i--;) {
+ badness=live.nat[i].touched;
+ if (live.nat[i].nholds==0)
+ badness=0;
+ if (i==hint)
+ badness-=200000000;
+ if (!live.nat[i].locked && badness<when) {
+ if ((size==1 && live.nat[i].canbyte) ||
+ (size==2 && live.nat[i].canword) ||
+ (size==4)) {
+ bestreg=i;
+ when=badness;
+ if (live.nat[i].nholds==0 && hint<0)
+ break;
+ if (i==hint)
+ break;
+ }
+ }
+ }
+ Dif (bestreg==-1)
+ abort();
+
+ if (live.nat[bestreg].nholds>0) {
+ free_nreg(bestreg);
+ }
+ if (isinreg(r)) {
+ int rr=live.state[r].realreg;
+ /* This will happen if we read a partially dirty register at a
+ bigger size */
+ Dif (willclobber || live.state[r].validsize>=size)
+ abort();
+ Dif (live.nat[rr].nholds!=1)
+ abort();
+ if (size==4 && live.state[r].validsize==2) {
+ log_isused(bestreg);
+ raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
+ raw_bswap_32(bestreg);
+ raw_zero_extend_16_rr(rr,rr);
+ raw_zero_extend_16_rr(bestreg,bestreg);
+ raw_bswap_32(bestreg);
+ raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
+ live.state[r].validsize=4;
+ live.nat[rr].touched=touchcnt++;
+ return rr;
+ }
+ if (live.state[r].validsize==1) {
+ /* Nothing yet */
+ }
+ evict(r);
+ }
+
+ if (!willclobber) {
+ if (live.state[r].status!=UNDEF) {
+ if (isconst(r)) {
+ raw_mov_l_ri(bestreg,live.state[r].val);
+ live.state[r].val=0;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ log_isused(bestreg);
+ }
+ else {
+ if (r==FLAGTMP)
+ raw_load_flagreg(bestreg,r);
+ else if (r==FLAGX)
+ raw_load_flagx(bestreg,r);
+ else {
+ raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
+ }
+ live.state[r].dirtysize=0;
+ set_status(r,CLEAN);
+ log_isreg(bestreg,r);
+ }
+ }
+ else {
+ live.state[r].val=0;
+ live.state[r].dirtysize=0;
+ set_status(r,CLEAN);
+ log_isused(bestreg);
+ }
+ live.state[r].validsize=4;
+ }
+ else { /* this is the easiest way, but not optimal. FIXME! */
+ /* Now it's trickier, but hopefully still OK */
+ if (!isconst(r) || size==4) {
+ live.state[r].validsize=size;
+ live.state[r].dirtysize=size;
+ live.state[r].val=0;
+ set_status(r,DIRTY);
+ if (size==4)
+ log_isused(bestreg);
+ else
+ log_isreg(bestreg,r);
+ }
+ else {
+ if (live.state[r].status!=UNDEF)
+ raw_mov_l_ri(bestreg,live.state[r].val);
+ live.state[r].val=0;
+ live.state[r].validsize=4;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ log_isused(bestreg);
+ }
+ }
+ live.state[r].realreg=bestreg;
+ live.state[r].realind=live.nat[bestreg].nholds;
+ live.nat[bestreg].touched=touchcnt++;
+ live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
+ live.nat[bestreg].nholds++;
+
+ return bestreg;
+}
+
+static int alloc_reg(int r, int size, int willclobber)
+{
+ return alloc_reg_hinted(r,size,willclobber,-1);
+}
+
+static void unlock(int r)
+{
+ Dif (!live.nat[r].locked)
+ abort();
+ live.nat[r].locked--;
+}
+
+static void setlock(int r)
+{
+ live.nat[r].locked++;
+}
+
+
+static void mov_nregs(int d, int s)
+{
+ int ns=live.nat[s].nholds;
+ int nd=live.nat[d].nholds;
+ int i;
+
+ if (s==d)
+ return;
+
+ if (nd>0)
+ free_nreg(d);
+
+ raw_mov_l_rr(d,s);
+ log_isused(d);
+
+ for (i=0;i<live.nat[s].nholds;i++) {
+ int vs=live.nat[s].holds[i];
+
+ live.state[vs].realreg=d;
+ live.state[vs].realind=i;
+ live.nat[d].holds[i]=vs;
+ }
+ live.nat[d].nholds=live.nat[s].nholds;
+
+ live.nat[s].nholds=0;
+}
+
+
+static __inline__ void make_exclusive(int r, int size, int spec)
+{
+ int clobber;
+ reg_status oldstate;
+ int rr=live.state[r].realreg;
+ int nr;
+ int nind;
+ int ndirt=0;
+ int i;
+
+ if (!isinreg(r))
+ return;
+ if (live.nat[rr].nholds==1)
+ return;
+ for (i=0;i<live.nat[rr].nholds;i++) {
+ int vr=live.nat[rr].holds[i];
+ if (vr!=r &&
+ (live.state[vr].status==DIRTY || live.state[vr].val))
+ ndirt++;
+ }
+ if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
+ /* Everything else is clean, so let's keep this register */
+ for (i=0;i<live.nat[rr].nholds;i++) {
+ int vr=live.nat[rr].holds[i];
+ if (vr!=r) {
+ evict(vr);
+ i--; /* Try that index again! */
+ }
+ }
+ Dif (live.nat[rr].nholds!=1) {
+ printf("natreg %d holds %d vregs, %d not exclusive\n",
+ rr,live.nat[rr].nholds,r);
+ abort();
+ }
+ return;
+ }
+
+ /* We have to split the register */
+ oldstate=live.state[r];
+
+ setlock(rr); /* Make sure this doesn't go away */
+ /* Forget about r being in the register rr */
+ disassociate(r);
+ /* Get a new register, that we will clobber completely */
+ if (oldstate.status==DIRTY) {
+ /* If dirtysize is <4, we need a register that can handle the
+ eventual smaller memory store! Thanks to Quake68k for exposing
+ this detail ;-) */
+ nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
+ }
+ else {
+ nr=alloc_reg_hinted(r,4,1,spec);
+ }
+ nind=live.state[r].realind;
+ live.state[r]=oldstate; /* Keep all the old state info */
+ live.state[r].realreg=nr;
+ live.state[r].realind=nind;
+
+ if (size<live.state[r].validsize) {
+ if (live.state[r].val) {
+ /* Might as well compensate for the offset now */
+ raw_lea_l_brr(nr,rr,oldstate.val);
+ live.state[r].val=0;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ }
+ else
+ raw_mov_l_rr(nr,rr); /* Make another copy */
+ }
+ unlock(rr);
+}
+
+static __inline__ void add_offset(int r, uae_u32 off)
+{
+ live.state[r].val+=off;
+}
+
+static __inline__ void remove_offset(int r, int spec)
+{
+ reg_status oldstate;
+ int rr;
+
+ if (isconst(r))
+ return;
+ if (live.state[r].val==0)
+ return;
+ if (isinreg(r) && live.state[r].validsize<4)
+ evict(r);
+
+ if (!isinreg(r))
+ alloc_reg_hinted(r,4,0,spec);
+
+ Dif (live.state[r].validsize!=4) {
+ printf("Validsize=%d in remove_offset\n",live.state[r].validsize);
+ abort();
+ }
+ make_exclusive(r,0,-1);
+ /* make_exclusive might have done the job already */
+ if (live.state[r].val==0)
+ return;
+
+ rr=live.state[r].realreg;
+
+ if (live.nat[rr].nholds==1) {
+ //printf("RemovingB offset %x from reg %d (%d) at %p\n",
+ // live.state[r].val,r,rr,target);
+ adjust_nreg(rr,live.state[r].val);
+ live.state[r].dirtysize=4;
+ live.state[r].val=0;
+ set_status(r,DIRTY);
+ return;
+ }
+ printf("Failed in remove_offset\n");
+ abort();
+}
+
+STATIC_INLINE void remove_all_offsets(void)
+{
+ int i;
+
+ for (i=0;i<VREGS;i++)
+ remove_offset(i,-1);
+}
+
+static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
+{
+ int n;
+ int answer=-1;
+
+ if (live.state[r].status==UNDEF) {
+ printf("WARNING: Unexpected read of undefined register %d\n",r);
+ }
+ if (!can_offset)
+ remove_offset(r,spec);
+
+ if (isinreg(r) && live.state[r].validsize>=size) {
+ n=live.state[r].realreg;
+ switch(size) {
+ case 1:
+ if (live.nat[n].canbyte || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 2:
+ if (live.nat[n].canword || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 4:
+ answer=n;
+ break;
+ default: abort();
+ }
+ if (answer<0)
+ evict(r);
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0) {
+ answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
+ }
+
+ if (spec>=0 && spec!=answer) {
+ /* Too bad */
+ mov_nregs(spec,answer);
+ answer=spec;
+ }
+ live.nat[answer].locked++;
+ live.nat[answer].touched=touchcnt++;
+ return answer;
+}
+
+
+
+static int readreg(int r, int size)
+{
+ return readreg_general(r,size,-1,0);
+}
+
+static int readreg_specific(int r, int size, int spec)
+{
+ return readreg_general(r,size,spec,0);
+}
+
+static int readreg_offset(int r, int size)
+{
+ return readreg_general(r,size,-1,1);
+}
+
+
+static __inline__ int writereg_general(int r, int size, int spec)
+{
+ int n;
+ int answer=-1;
+
+ if (size<4) {
+ remove_offset(r,spec);
+ }
+
+ make_exclusive(r,size,spec);
+ if (isinreg(r)) {
+ int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
+ int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
+ n=live.state[r].realreg;
+
+ Dif (live.nat[n].nholds!=1)
+ abort();
+ switch(size) {
+ case 1:
+ if (live.nat[n].canbyte || spec>=0) {
+ live.state[r].dirtysize=ndsize;
+ live.state[r].validsize=nvsize;
+ answer=n;
+ }
+ break;
+ case 2:
+ if (live.nat[n].canword || spec>=0) {
+ live.state[r].dirtysize=ndsize;
+ live.state[r].validsize=nvsize;
+ answer=n;
+ }
+ break;
+ case 4:
+ live.state[r].dirtysize=ndsize;
+ live.state[r].validsize=nvsize;
+ answer=n;
+ break;
+ default: abort();
+ }
+ if (answer<0)
+ evict(r);
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0) {
+ answer=alloc_reg_hinted(r,size,1,spec);
+ }
+ if (spec>=0 && spec!=answer) {
+ mov_nregs(spec,answer);
+ answer=spec;
+ }
+ if (live.state[r].status==UNDEF)
+ live.state[r].validsize=4;
+ live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
+ live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
+
+ live.nat[answer].locked++;
+ live.nat[answer].touched=touchcnt++;
+ if (size==4) {
+ live.state[r].val=0;
+ }
+ else {
+ Dif (live.state[r].val) {
+ printf("Problem with val\n");
+ abort();
+ }
+ }
+ set_status(r,DIRTY);
+ return answer;
+}
+
+static int writereg(int r, int size)
+{
+ return writereg_general(r,size,-1);
+}
+
+static int writereg_specific(int r, int size, int spec)
+{
+ return writereg_general(r,size,spec);
+}
+
+static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
+{
+ int n;
+ int answer=-1;
+
+ if (live.state[r].status==UNDEF) {
+ printf("WARNING: Unexpected read of undefined register %d\n",r);
+ }
+ remove_offset(r,spec);
+ make_exclusive(r,0,spec);
+
+ Dif (wsize<rsize) {
+ printf("Cannot handle wsize<rsize in rmw_general()\n");
+ abort();
+ }
+ if (isinreg(r) && live.state[r].validsize>=rsize) {
+ n=live.state[r].realreg;
+ Dif (live.nat[n].nholds!=1)
+ abort();
+
+ switch(rsize) {
+ case 1:
+ if (live.nat[n].canbyte || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 2:
+ if (live.nat[n].canword || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 4:
+ answer=n;
+ break;
+ default: abort();
+ }
+ if (answer<0)
+ evict(r);
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0) {
+ answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
+ }
+
+ if (spec>=0 && spec!=answer) {
+ /* Too bad */
+ mov_nregs(spec,answer);
+ answer=spec;
+ }
+ if (wsize>live.state[r].dirtysize)
+ live.state[r].dirtysize=wsize;
+ if (wsize>live.state[r].validsize)
+ live.state[r].validsize=wsize;
+ set_status(r,DIRTY);
+
+ live.nat[answer].locked++;
+ live.nat[answer].touched=touchcnt++;
+
+ Dif (live.state[r].val) {
+ printf("Problem with val(rmw)\n");
+ abort();
+ }
+ return answer;
+}
+
+static int rmw(int r, int wsize, int rsize)
+{
+ return rmw_general(r,wsize,rsize,-1);
+}
+
+static int rmw_specific(int r, int wsize, int rsize, int spec)
+{
+ return rmw_general(r,wsize,rsize,spec);
+}
+
+
+/* needed for restoring the carry flag on non-P6 cores */
+static void bt_l_ri_noclobber(R4 r, IMM i)
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ r=readreg(r,size);
+ raw_bt_l_ri(r,i);
+ unlock(r);
+}
+
+/********************************************************************
+ * FPU register status handling. EMIT TIME! *
+ ********************************************************************/
+
+static void f_tomem(int r)
+{
+ if (live.fate[r].status==DIRTY) {
+#if USE_LONG_DOUBLE
+ raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
+#else
+ raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
+#endif
+ live.fate[r].status=CLEAN;
+ }
+}
+
+static void f_tomem_drop(int r)
+{
+ if (live.fate[r].status==DIRTY) {
+#if USE_LONG_DOUBLE
+ raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
+#else
+ raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
+#endif
+ live.fate[r].status=INMEM;
+ }
+}
+
+
+static __inline__ int f_isinreg(int r)
+{
+ return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
+}
+
+static void f_evict(int r)
+{
+ int rr;
+
+ if (!f_isinreg(r))
+ return;
+ rr=live.fate[r].realreg;
+ if (live.fat[rr].nholds==1)
+ f_tomem_drop(r);
+ else
+ f_tomem(r);
+
+ Dif (live.fat[rr].locked &&
+ live.fat[rr].nholds==1) {
+ write_log ("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
+ abort();
+ }
+
+ live.fat[rr].nholds--;
+ if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
+ int topreg=live.fat[rr].holds[live.fat[rr].nholds];
+ int thisind=live.fate[r].realind;
+ live.fat[rr].holds[thisind]=topreg;
+ live.fate[topreg].realind=thisind;
+ }
+ live.fate[r].status=INMEM;
+ live.fate[r].realreg=-1;
+}
+
+static __inline__ void f_free_nreg(int r)
+{
+ int i=live.fat[r].nholds;
+
+ while (i) {
+ int vr;
+
+ --i;
+ vr=live.fat[r].holds[i];
+ f_evict(vr);
+ }
+ Dif (live.fat[r].nholds!=0) {
+ printf("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
+ abort();
+ }
+}
+
+
+/* Use with care! */
+static __inline__ void f_isclean(int r)
+{
+ if (!f_isinreg(r))
+ return;
+ live.fate[r].status=CLEAN;
+}
+
+static __inline__ void f_disassociate(int r)
+{
+ f_isclean(r);
+ f_evict(r);
+}
+
+
+
+static int f_alloc_reg(int r, int willclobber)
+{
+ int bestreg;
+ uae_s32 when;
+ int i;
+ uae_s32 badness;
+ bestreg=-1;
+ when=2000000000;
+ for (i=N_FREGS;i--;) {
+ badness=live.fat[i].touched;
+ if (live.fat[i].nholds==0)
+ badness=0;
+
+ if (!live.fat[i].locked && badness<when) {
+ bestreg=i;
+ when=badness;
+ if (live.fat[i].nholds==0)
+ break;
+ }
+ }
+ Dif (bestreg==-1)
+ abort();
+
+ if (live.fat[bestreg].nholds>0) {
+ f_free_nreg(bestreg);
+ }
+ if (f_isinreg(r)) {
+ f_evict(r);
+ }
+
+ if (!willclobber) {
+ if (live.fate[r].status!=UNDEF) {
+#if USE_LONG_DOUBLE
+ raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
+#else
+ raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
+#endif
+ }
+ live.fate[r].status=CLEAN;
+ }
+ else {
+ live.fate[r].status=DIRTY;
+ }
+ live.fate[r].realreg=bestreg;
+ live.fate[r].realind=live.fat[bestreg].nholds;
+ live.fat[bestreg].touched=touchcnt++;
+ live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
+ live.fat[bestreg].nholds++;
+
+ return bestreg;
+}
+
+static void f_unlock(int r)
+{
+ Dif (!live.fat[r].locked)
+ abort();
+ live.fat[r].locked--;
+}
+
+static void f_setlock(int r)
+{
+ live.fat[r].locked++;
+}
+
+static __inline__ int f_readreg(int r)
+{
+ int n;
+ int answer=-1;
+
+ if (f_isinreg(r)) {
+ n=live.fate[r].realreg;
+ answer=n;
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0)
+ answer=f_alloc_reg(r,0);
+
+ live.fat[answer].locked++;
+ live.fat[answer].touched=touchcnt++;
+ return answer;
+}
+
+static __inline__ void f_make_exclusive(int r, int clobber)
+{
+ freg_status oldstate;
+ int rr=live.fate[r].realreg;
+ int nr;
+ int nind;
+ int ndirt=0;
+ int i;
+
+ if (!f_isinreg(r))
+ return;
+ if (live.fat[rr].nholds==1)
+ return;
+ for (i=0;i<live.fat[rr].nholds;i++) {
+ int vr=live.fat[rr].holds[i];
+ if (vr!=r && live.fate[vr].status==DIRTY)
+ ndirt++;
+ }
+ if (!ndirt && !live.fat[rr].locked) {
+ /* Everything else is clean, so let's keep this register */
+ for (i=0;i<live.fat[rr].nholds;i++) {
+ int vr=live.fat[rr].holds[i];
+ if (vr!=r) {
+ f_evict(vr);
+ i--; /* Try that index again! */
+ }
+ }
+ Dif (live.fat[rr].nholds!=1) {
+ printf("realreg %d holds %d (",rr,live.fat[rr].nholds);
+ for (i=0;i<live.fat[rr].nholds;i++) {
+ printf(" %d(%d,%d)",live.fat[rr].holds[i],
+ live.fate[live.fat[rr].holds[i]].realreg,
+ live.fate[live.fat[rr].holds[i]].realind);
+ }
+ printf("\n");
+ abort();
+ }
+ return;
+ }
+
+ /* We have to split the register */
+ oldstate=live.fate[r];
+
+ f_setlock(rr); /* Make sure this doesn't go away */
+ /* Forget about r being in the register rr */
+ f_disassociate(r);
+ /* Get a new register, that we will clobber completely */
+ nr=f_alloc_reg(r,1);
+ nind=live.fate[r].realind;
+ if (!clobber)
+ raw_fmov_rr(nr,rr); /* Make another copy */
+ live.fate[r]=oldstate; /* Keep all the old state info */
+ live.fate[r].realreg=nr;
+ live.fate[r].realind=nind;
+ f_unlock(rr);
+}
+
+
+static __inline__ int f_writereg(int r)
+{
+ int n;
+ int answer=-1;
+
+ f_make_exclusive(r,1);
+ if (f_isinreg(r)) {
+ n=live.fate[r].realreg;
+ answer=n;
+ }
+ if (answer<0) {
+ answer=f_alloc_reg(r,1);
+ }
+ live.fate[r].status=DIRTY;
+ live.fat[answer].locked++;
+ live.fat[answer].touched=touchcnt++;
+ return answer;
+}
+
+static int f_rmw(int r)
+{
+ int n;
+
+ f_make_exclusive(r,0);
+ if (f_isinreg(r)) {
+ n=live.fate[r].realreg;
+ }
+ else
+ n=f_alloc_reg(r,0);
+ live.fate[r].status=DIRTY;
+ live.fat[n].locked++;
+ live.fat[n].touched=touchcnt++;
+ return n;
+}
+
+static void fflags_into_flags_internal(uae_u32 tmp)
+{
+ int r;
+
+ clobber_flags();
+ r=f_readreg(FP_RESULT);
+ raw_fflags_into_flags(r);
+ f_unlock(r);
+}
+
+
+
+
+/********************************************************************
+ * CPU functions exposed to gencomp. Both CREATE and EMIT time *
+ ********************************************************************/
+
+/*
+ * RULES FOR HANDLING REGISTERS:
+ *
+ * * In the function headers, order the parameters
+ * - 1st registers written to
+ * - 2nd read/modify/write registers
+ * - 3rd registers read from
+ * * Before calling raw_*, you must call readreg, writereg or rmw for
+ * each register
+ * * The order for this is
+ * - 1st call remove_offset for all registers written to with size<4
+ * - 2nd call readreg for all registers read without offset
+ * - 3rd call rmw for all rmw registers
+ * - 4th call readreg_offset for all registers that can handle offsets
+ * - 5th call get_offset for all the registers from the previous step
+ * - 6th call writereg for all written-to registers
+ * - 7th call raw_*
+ * - 8th unlock all registers that were locked
+ */
+
+MIDFUNC(0,live_flags,(void))
+{
+ live.flags_on_stack=TRASH;
+ live.flags_in_flags=VALID;
+ live.flags_are_important=1;
+}
+MENDFUNC(0,live_flags,(void))
+
+MIDFUNC(0,dont_care_flags,(void))
+{
+ live.flags_are_important=0;
+}
+MENDFUNC(0,dont_care_flags,(void))
+
+
+MIDFUNC(0,duplicate_carry,(void))
+{
+ evict(FLAGX);
+ make_flags_live_internal();
+ COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
+}
+MENDFUNC(0,duplicate_carry,(void))
+
+MIDFUNC(0,restore_carry,(void))
+{
+ if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
+ bt_l_ri_noclobber(FLAGX,0);
+ }
+ else { /* Avoid the stall the above creates.
+ This is slow on non-P6, though.
+ */
+ COMPCALL(rol_b_ri(FLAGX,8));
+ isclean(FLAGX);
+ /* Why is the above faster than the below? */
+ //raw_rol_b_mi((uae_u32)live.state[FLAGX].mem,8);
+ }
+}
+MENDFUNC(0,restore_carry,(void))
+
+MIDFUNC(0,start_needflags,(void))
+{
+ needflags=1;
+}
+MENDFUNC(0,start_needflags,(void))
+
+MIDFUNC(0,end_needflags,(void))
+{
+ needflags=0;
+}
+MENDFUNC(0,end_needflags,(void))
+
+MIDFUNC(0,make_flags_live,(void))
+{
+ make_flags_live_internal();
+}
+MENDFUNC(0,make_flags_live,(void))
+
+MIDFUNC(1,fflags_into_flags,(W2 tmp))
+{
+ clobber_flags();
+ fflags_into_flags_internal(tmp);
+}
+MENDFUNC(1,fflags_into_flags,(W2 tmp))
+
+
+MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=readreg(r,size);
+ raw_bt_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
+
+MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
+{
+ CLOBBER_BT;
+ r=readreg(r,4);
+ b=readreg(b,4);
+ raw_bt_l_rr(r,b);
+ unlock(r);
+ unlock(b);
+}
+MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
+
+MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=rmw(r,size,size);
+ raw_btc_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
+{
+ CLOBBER_BT;
+ b=readreg(b,4);
+ r=rmw(r,4,4);
+ raw_btc_l_rr(r,b);
+ unlock(r);
+ unlock(b);
+}
+MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
+
+
+MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=rmw(r,size,size);
+ raw_btr_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
+{
+ CLOBBER_BT;
+ b=readreg(b,4);
+ r=rmw(r,4,4);
+ raw_btr_l_rr(r,b);
+ unlock(r);
+ unlock(b);
+}
+MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
+
+
+MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=rmw(r,size,size);
+ raw_bts_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
+{
+ CLOBBER_BT;
+ b=readreg(b,4);
+ r=rmw(r,4,4);
+ raw_bts_l_rr(r,b);
+ unlock(r);
+ unlock(b);
+}
+MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
+
+MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,4);
+ raw_mov_l_rm(d,s);
+ unlock(d);
+}
+MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
+
+
+MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
+{
+ r=readreg(r,4);
+ raw_call_r(r);
+ unlock(r);
+}
+MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
+
+MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
+{
+ CLOBBER_SUB;
+ raw_sub_l_mi(d,s) ;
+}
+MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
+
+MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
+{
+ CLOBBER_MOV;
+ raw_mov_l_mi(d,s) ;
+}
+MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
+
+MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
+{
+ CLOBBER_MOV;
+ raw_mov_w_mi(d,s) ;
+}
+MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
+
+MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
+{
+ CLOBBER_MOV;
+ raw_mov_b_mi(d,s) ;
+}
+MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
+
+MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROL;
+ r=rmw(r,1,1);
+ raw_rol_b_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROL;
+ r=rmw(r,2,2);
+ raw_rol_w_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROL;
+ r=rmw(r,4,4);
+ raw_rol_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_rol_l_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_rol_w_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_ROL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_rol_b_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
+
+
+MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHLL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_shll_l_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHLL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_shll_b\n",r);
+ abort();
+ }
+ raw_shll_w_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_SHLL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_shll_b\n",r);
+ abort();
+ }
+ raw_shll_b_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
+
+
+MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROR;
+ r=rmw(r,1,1);
+ raw_ror_b_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
+
+MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROR;
+ r=rmw(r,2,2);
+ raw_ror_w_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
+
+MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROR;
+ r=rmw(r,4,4);
+ raw_ror_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
+
+MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROR;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ raw_ror_l_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
+
+MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROR;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ raw_ror_w_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
+
+MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_ROR;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ raw_ror_b_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
+
+MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_shrl_l_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_shrl_b\n",r);
+ abort();
+ }
+ raw_shrl_w_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_SHRL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_shrl_b\n",r);
+ abort();
+ }
+ raw_shrl_b_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
+
+
+
+MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(r) && !needflags) {
+ live.state[r].val<<=i;
+ return;
+ }
+ CLOBBER_SHLL;
+ r=rmw(r,4,4);
+ raw_shll_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHLL;
+ r=rmw(r,2,2);
+ raw_shll_w_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHLL;
+ r=rmw(r,1,1);
+ raw_shll_b_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(r) && !needflags) {
+ live.state[r].val>>=i;
+ return;
+ }
+ CLOBBER_SHRL;
+ r=rmw(r,4,4);
+ raw_shrl_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRL;
+ r=rmw(r,2,2);
+ raw_shrl_w_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRL;
+ r=rmw(r,1,1);
+ raw_shrl_b_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRA;
+ r=rmw(r,4,4);
+ raw_shra_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRA;
+ r=rmw(r,2,2);
+ raw_shra_w_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRA;
+ r=rmw(r,1,1);
+ raw_shra_b_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRA;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_shra_l_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRA;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_shra_b\n",r);
+ abort();
+ }
+ raw_shra_w_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_SHRA;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log ("Illegal register %d in raw_shra_b\n",r);
+ abort();
+ }
+ raw_shra_b_rr(d,r) ;
+ unlock(r);
+ unlock(d);
+}
+MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
+
+
+MIDFUNC(2,setcc,(W1 d, IMM cc))
+{
+ CLOBBER_SETCC;
+ d=writereg(d,1);
+ raw_setcc(d,cc);
+ unlock(d);
+}
+MENDFUNC(2,setcc,(W1 d, IMM cc))
+
+MIDFUNC(2,setcc_m,(IMM d, IMM cc))
+{
+ CLOBBER_SETCC;
+ raw_setcc_m(d,cc);
+}
+MENDFUNC(2,setcc_m,(IMM d, IMM cc))
+
+MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
+{
+ if (d==s)
+ return;
+ CLOBBER_CMOV;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+ raw_cmov_l_rr(d,s,cc);
+ unlock(s);
+ unlock(d);
+}
+MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
+
+MIDFUNC(1,setzflg_l,(RW4 r))
+{
+ if (setzflg_uses_bsf) {
+ CLOBBER_BSF;
+ r=rmw(r,4,4);
+ raw_bsf_l_rr(r,r);
+ unlock(r);
+ }
+ else {
+ Dif (live.flags_in_flags!=VALID) {
+ write_log("setzflg() wanted flags in native flags, they are %d\n",
+ live.flags_in_flags);
+ abort();
+ }
+ r=readreg(r,4);
+ {
+ int f=writereg(S11,4);
+ int t=writereg(S12,4);
+ raw_flags_set_zero(f,r,t);
+ unlock(f);
+ unlock(r);
+ unlock(t);
+ }
+ }
+}
+MENDFUNC(1,setzflg_l,(RW4 r))
+
+MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
+{
+ CLOBBER_CMOV;
+ d=rmw(d,4,4);
+ raw_cmov_l_rm(d,s,cc);
+ unlock(d);
+}
+MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
+
+MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
+{
+ CLOBBER_BSF;
+ s=readreg(s,4);
+ d=writereg(d,4);
+ raw_bsf_l_rr(d,s);
+ unlock(s);
+ unlock(d);
+}
+MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
+
+MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
+{
+ CLOBBER_MUL;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+ raw_imul_32_32(d,s);
+ unlock(s);
+ unlock(d);
+}
+MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
+
+MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
+{
+ CLOBBER_MUL;
+ s=rmw_specific(s,4,4,MUL_NREG2);
+ d=rmw_specific(d,4,4,MUL_NREG1);
+ raw_imul_64_32(d,s);
+ unlock(s);
+ unlock(d);
+}
+MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
+
+MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
+{
+ CLOBBER_MUL;
+ s=rmw_specific(s,4,4,MUL_NREG2);
+ d=rmw_specific(d,4,4,MUL_NREG1);
+ raw_mul_64_32(d,s);
+ unlock(s);
+ unlock(d);
+}
+MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
+
+MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
+{
+ CLOBBER_MUL;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+ raw_mul_32_32(d,s);
+ unlock(s);
+ unlock(d);
+}
+MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
+
+MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
+{
+ int isrmw;
+
+ if (isconst(s)) {
+ set_const(d,(uae_s32)(uae_s16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SE16;
+ isrmw=(s==d);
+ if (!isrmw) {
+ s=readreg(s,2);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,2);
+ }
+ raw_sign_extend_16_rr(d,s);
+ if (!isrmw) {
+ unlock(d);
+ unlock(s);
+ }
+ else {
+ unlock(s);
+ }
+}
+MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
+
+MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
+{
+ int isrmw;
+
+ if (isconst(s)) {
+ set_const(d,(uae_s32)(uae_s8)live.state[s].val);
+ return;
+ }
+
+ isrmw=(s==d);
+ CLOBBER_SE8;
+ if (!isrmw) {
+ s=readreg(s,1);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,1);
+ }
+
+ raw_sign_extend_8_rr(d,s);
+
+ if (!isrmw) {
+ unlock(d);
+ unlock(s);
+ }
+ else {
+ unlock(s);
+ }
+}
+MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
+
+
+MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
+{
+ int isrmw;
+
+ if (isconst(s)) {
+ set_const(d,(uae_u32)(uae_u16)live.state[s].val);
+ return;
+ }
+
+ isrmw=(s==d);
+ CLOBBER_ZE16;
+ if (!isrmw) {
+ s=readreg(s,2);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,2);
+ }
+ raw_zero_extend_16_rr(d,s);
+ if (!isrmw) {
+ unlock(d);
+ unlock(s);
+ }
+ else {
+ unlock(s);
+ }
+}
+MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
+
+MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
+{
+ int isrmw;
+ if (isconst(s)) {
+ set_const(d,(uae_u32)(uae_u8)live.state[s].val);
+ return;
+ }
+
+ isrmw=(s==d);
+ CLOBBER_ZE8;
+ if (!isrmw) {
+ s=readreg(s,1);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,1);
+ }
+
+ raw_zero_extend_8_rr(d,s);
+
+ if (!isrmw) {
+ unlock(d);
+ unlock(s);
+ }
+ else {
+ unlock(s);
+ }
+}
+MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
+
+MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
+{
+ if (d==s)
+ return;
+ if (isconst(s)) {
+ COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ d=writereg(d,1);
+ raw_mov_b_rr(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
+
+MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
+{
+ if (d==s)
+ return;
+ if (isconst(s)) {
+ COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ d=writereg(d,2);
+ raw_mov_w_rr(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
+
+
+MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ d=writereg(d,4);
+
+ raw_mov_l_rrm_indexed(d,baser,index,factor);
+ unlock(d);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+
+MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ d=writereg(d,2);
+
+ raw_mov_w_rrm_indexed(d,baser,index,factor);
+ unlock(d);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+
+MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ d=writereg(d,1);
+
+ raw_mov_b_rrm_indexed(d,baser,index,factor);
+
+ unlock(d);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ s=readreg(s,4);
+
+ Dif (baser==s || index==s)
+ abort();
+
+
+ raw_mov_l_mrr_indexed(baser,index,factor,s);
+ unlock(s);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+
+MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ s=readreg(s,2);
+
+ raw_mov_w_mrr_indexed(baser,index,factor,s);
+ unlock(s);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+
+MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+{
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+
+ raw_mov_b_mrr_indexed(baser,index,factor,s);
+ unlock(s);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+
+
+MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+
+ raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
+ unlock(s);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+
+MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+
+ raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
+ unlock(s);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+
+MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+
+ raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
+ unlock(s);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+
+
+
+/* Read a long from base+baser+factor*index */
+MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+ d=writereg(d,4);
+ raw_mov_l_brrm_indexed(d,base,baser,index,factor);
+ unlock(d);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+ d=writereg(d,2);
+ raw_mov_w_brrm_indexed(d,base,baser,index,factor);
+ unlock(d);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+ d=writereg(d,1);
+ raw_mov_b_brrm_indexed(d,base,baser,index,factor);
+ unlock(d);
+ unlock(baser);
+ unlock(index);
+}
+MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+
+/* Read a long from base+factor*index */
+MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+{
+ int indexreg=index;
+
+ if (isconst(index)) {
+ COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ index=readreg_offset(index,4);
+ base+=get_offset(indexreg)*factor;
+ d=writereg(d,4);
+
+ raw_mov_l_rm_indexed(d,base,index,factor);
+ unlock(index);
+ unlock(d);
+}
+MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+
+
+/* read the long at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=writereg(d,4);
+
+ raw_mov_l_rR(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=writereg(d,2);
+
+ raw_mov_w_rR(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=writereg(d,1);
+
+ raw_mov_b_rR(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
+
+/* read the long at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
+{
+ int sreg=s;
+ if (isconst(s)) {
+ COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg_offset(s,4);
+ offset+=get_offset(sreg);
+ d=writereg(d,4);
+
+ raw_mov_l_brR(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
+{
+ int sreg=s;
+ if (isconst(s)) {
+ COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ s=readreg_offset(s,4);
+ offset+=get_offset(sreg);
+ d=writereg(d,2);
+
+ raw_mov_w_brR(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
+{
+ int sreg=s;
+ if (isconst(s)) {
+ COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ s=readreg_offset(s,4);
+ offset+=get_offset(sreg);
+ d=writereg(d,1);
+
+ raw_mov_b_brR(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
+
+MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
+ return;
+ }
+
+ CLOBBER_MOV;
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_l_Ri(d,i,offset);
+ unlock(d);
+}
+MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
+
+MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
+ return;
+ }
+
+ CLOBBER_MOV;
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_w_Ri(d,i,offset);
+ unlock(d);
+}
+MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
+
+MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
+ return;
+ }
+
+ CLOBBER_MOV;
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_b_Ri(d,i,offset);
+ unlock(d);
+}
+MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
+
+ /* Warning! OFFSET is byte sized only! */
+MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
+{
+ if (isconst(d)) {
+ COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=readreg(d,4);
+
+ raw_mov_l_Rr(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
+
+MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
+{
+ if (isconst(d)) {
+ COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ d=readreg(d,4);
+ raw_mov_w_Rr(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
+
+MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
+{
+ if (isconst(d)) {
+ COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ d=readreg(d,4);
+ raw_mov_b_Rr(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
+
+MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
+ return;
+ }
+#if USE_OFFSET
+ if (d==s) {
+ add_offset(d,offset);
+ return;
+ }
+#endif
+ CLOBBER_LEA;
+ s=readreg(s,4);
+ d=writereg(d,4);
+ raw_lea_l_brr(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
+
+MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+{
+ if (!offset) {
+ COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
+ return;
+ }
+ CLOBBER_LEA;
+ s=readreg(s,4);
+ index=readreg(index,4);
+ d=writereg(d,4);
+
+ raw_lea_l_brr_indexed(d,s,index,factor,offset);
+ unlock(d);
+ unlock(index);
+ unlock(s);
+}
+MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+
+MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+{
+ CLOBBER_LEA;
+ s=readreg(s,4);
+ index=readreg(index,4);
+ d=writereg(d,4);
+
+ raw_lea_l_rr_indexed(d,s,index,factor);
+ unlock(d);
+ unlock(index);
+ unlock(s);
+}
+MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+
+/* write d to the long at the address contained in s+offset */
+MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+
+ raw_mov_l_bRr(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
+
+/* write the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
+{
+ int dreg=d;
+
+ if (isconst(d)) {
+ COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_w_bRr(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
+
+MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_b_bRr(d,s,offset);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
+
+MIDFUNC(1,bswap_32,(RW4 r))
+{
+ int reg=r;
+
+ if (isconst(r)) {
+ uae_u32 oldv=live.state[r].val;
+ live.state[r].val=reverse32(oldv);
+ return;
+ }
+
+ CLOBBER_SW32;
+ r=rmw(r,4,4);
+ raw_bswap_32(r);
+ unlock(r);
+}
+MENDFUNC(1,bswap_32,(RW4 r))
+
+MIDFUNC(1,bswap_16,(RW2 r))
+{
+ if (isconst(r)) {
+ uae_u32 oldv=live.state[r].val;
+ live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
+ (oldv&0xffff0000);
+ return;
+ }
+
+ CLOBBER_SW16;
+ r=rmw(r,2,2);
+
+ raw_bswap_16(r);
+ unlock(r);
+}
+MENDFUNC(1,bswap_16,(RW2 r))
+
+
+
+MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
+{
+ int olds;
+
+ if (d==s) { /* How pointless! */
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_l_ri)(d,live.state[s].val);
+ return;
+ }
+#if USE_ALIAS
+ olds=s;
+ disassociate(d);
+ s=readreg_offset(s,4);
+ live.state[d].realreg=s;
+ live.state[d].realind=live.nat[s].nholds;
+ live.state[d].val=live.state[olds].val;
+ live.state[d].validsize=4;
+ live.state[d].dirtysize=4;
+ set_status(d,DIRTY);
+
+ live.nat[s].holds[live.nat[s].nholds]=d;
+ live.nat[s].nholds++;
+ log_clobberreg(d);
+
+ /* printf("Added %d to nreg %d(%d), now holds %d regs\n",
+ d,s,live.state[d].realind,live.nat[s].nholds); */
+ unlock(s);
+#else
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=writereg(d,4);
+
+ raw_mov_l_rr(d,s);
+ unlock(d);
+ unlock(s);
+#endif
+}
+MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
+
+MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_l_mi)(d,live.state[s].val);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+
+ raw_mov_l_mr(d,s);
+ unlock(s);
+}
+MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
+
+
+MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,2);
+
+ raw_mov_w_mr(d,s);
+ unlock(s);
+}
+MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
+
+MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,2);
+
+ raw_mov_w_rm(d,s);
+ unlock(d);
+}
+MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
+
+MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+
+ raw_mov_b_mr(d,s);
+ unlock(s);
+}
+MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
+
+MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,1);
+
+ raw_mov_b_rm(d,s);
+ unlock(d);
+}
+MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
+
+MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
+{
+ set_const(d,s);
+ return;
+}
+MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
+
+MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,2);
+
+ raw_mov_w_ri(d,s);
+ unlock(d);
+}
+MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
+
+MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,1);
+
+ raw_mov_b_ri(d,s);
+ unlock(d);
+}
+MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
+
+
+MIDFUNC(2,add_l_mi,(IMM d, IMM s))
+{
+ CLOBBER_ADD;
+ raw_add_l_mi(d,s) ;
+}
+MENDFUNC(2,add_l_mi,(IMM d, IMM s))
+
+MIDFUNC(2,add_w_mi,(IMM d, IMM s))
+{
+ CLOBBER_ADD;
+ raw_add_w_mi(d,s) ;
+}
+MENDFUNC(2,add_w_mi,(IMM d, IMM s))
+
+MIDFUNC(2,add_b_mi,(IMM d, IMM s))
+{
+ CLOBBER_ADD;
+ raw_add_b_mi(d,s) ;
+}
+MENDFUNC(2,add_b_mi,(IMM d, IMM s))
+
+
+MIDFUNC(2,test_l_ri,(R4 d, IMM i))
+{
+ CLOBBER_TEST;
+ d=readreg(d,4);
+
+ raw_test_l_ri(d,i);
+ unlock(d);
+}
+MENDFUNC(2,test_l_ri,(R4 d, IMM i))
+
+MIDFUNC(2,test_l_rr,(R4 d, R4 s))
+{
+ CLOBBER_TEST;
+ d=readreg(d,4);
+ s=readreg(s,4);
+
+ raw_test_l_rr(d,s);;
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,test_l_rr,(R4 d, R4 s))
+
+MIDFUNC(2,test_w_rr,(R2 d, R2 s))
+{
+ CLOBBER_TEST;
+ d=readreg(d,2);
+ s=readreg(s,2);
+
+ raw_test_w_rr(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,test_w_rr,(R2 d, R2 s))
+
+MIDFUNC(2,test_b_rr,(R1 d, R1 s))
+{
+ CLOBBER_TEST;
+ d=readreg(d,1);
+ s=readreg(s,1);
+
+ raw_test_b_rr(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,test_b_rr,(R1 d, R1 s))
+
+
+MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
+{
+ if (isconst (d) && ! needflags) {
+ live.state[d].val &= i;
+ return;
+ }
+
+ CLOBBER_AND;
+ d=rmw(d,4,4);
+
+ raw_and_l_ri(d,i);
+ unlock(d);
+}
+MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,and_l,(RW4 d, R4 s))
+{
+ CLOBBER_AND;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_and_l(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,and_l,(RW4 d, R4 s))
+
+MIDFUNC(2,and_w,(RW2 d, R2 s))
+{
+ CLOBBER_AND;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_and_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,and_w,(RW2 d, R2 s))
+
+MIDFUNC(2,and_b,(RW1 d, R1 s))
+{
+ CLOBBER_AND;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_and_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,and_b,(RW1 d, R1 s))
+
+MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
+{
+ if (isconst(d) && !needflags) {
+ live.state[d].val|=i;
+ return;
+ }
+ CLOBBER_OR;
+ d=rmw(d,4,4);
+
+ raw_or_l_ri(d,i);
+ unlock(d);
+}
+MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,or_l,(RW4 d, R4 s))
+{
+ if (isconst(d) && isconst(s) && !needflags) {
+ live.state[d].val|=live.state[s].val;
+ return;
+ }
+ CLOBBER_OR;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_or_l(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,or_l,(RW4 d, R4 s))
+
+MIDFUNC(2,or_w,(RW2 d, R2 s))
+{
+ CLOBBER_OR;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_or_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,or_w,(RW2 d, R2 s))
+
+MIDFUNC(2,or_b,(RW1 d, R1 s))
+{
+ CLOBBER_OR;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_or_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,or_b,(RW1 d, R1 s))
+
+MIDFUNC(2,adc_l,(RW4 d, R4 s))
+{
+ CLOBBER_ADC;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_adc_l(d,s);
+
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,adc_l,(RW4 d, R4 s))
+
+MIDFUNC(2,adc_w,(RW2 d, R2 s))
+{
+ CLOBBER_ADC;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_adc_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,adc_w,(RW2 d, R2 s))
+
+MIDFUNC(2,adc_b,(RW1 d, R1 s))
+{
+ CLOBBER_ADC;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_adc_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,adc_b,(RW1 d, R1 s))
+
+MIDFUNC(2,add_l,(RW4 d, R4 s))
+{
+ if (isconst(s)) {
+ COMPCALL(add_l_ri)(d,live.state[s].val);
+ return;
+ }
+
+ CLOBBER_ADD;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_add_l(d,s);
+
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,add_l,(RW4 d, R4 s))
+
+MIDFUNC(2,add_w,(RW2 d, R2 s))
+{
+ if (isconst(s)) {
+ COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_ADD;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_add_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,add_w,(RW2 d, R2 s))
+
+MIDFUNC(2,add_b,(RW1 d, R1 s))
+{
+ if (isconst(s)) {
+ COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_ADD;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_add_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,add_b,(RW1 d, R1 s))
+
+MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(d) && !needflags) {
+ live.state[d].val-=i;
+ return;
+ }
+#if USE_OFFSET
+ if (!needflags) {
+ add_offset(d,-(signed)i);
+ return;
+ }
+#endif
+
+ CLOBBER_SUB;
+ d=rmw(d,4,4);
+
+ raw_sub_l_ri(d,i);
+ unlock(d);
+}
+MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_SUB;
+ d=rmw(d,2,2);
+
+ raw_sub_w_ri(d,i);
+ unlock(d);
+}
+MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
+
+MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_SUB;
+ d=rmw(d,1,1);
+
+ raw_sub_b_ri(d,i);
+
+ unlock(d);
+}
+MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
+
+MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(d) && !needflags) {
+ live.state[d].val+=i;
+ return;
+ }
+#if USE_OFFSET
+ if (!needflags) {
+ add_offset(d,i);
+ return;
+ }
+#endif
+ CLOBBER_ADD;
+ d=rmw(d,4,4);
+ raw_add_l_ri(d,i);
+ unlock(d);
+}
+MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_ADD;
+ d=rmw(d,2,2);
+
+ raw_add_w_ri(d,i);
+ unlock(d);
+}
+MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
+
+MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_ADD;
+ d=rmw(d,1,1);
+
+ raw_add_b_ri(d,i);
+
+ unlock(d);
+}
+MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
+
+MIDFUNC(2,sbb_l,(RW4 d, R4 s))
+{
+ CLOBBER_SBB;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_sbb_l(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,sbb_l,(RW4 d, R4 s))
+
+MIDFUNC(2,sbb_w,(RW2 d, R2 s))
+{
+ CLOBBER_SBB;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_sbb_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,sbb_w,(RW2 d, R2 s))
+
+MIDFUNC(2,sbb_b,(RW1 d, R1 s))
+{
+ CLOBBER_SBB;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_sbb_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,sbb_b,(RW1 d, R1 s))
+
+MIDFUNC(2,sub_l,(RW4 d, R4 s))
+{
+ if (isconst(s)) {
+ COMPCALL(sub_l_ri)(d,live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SUB;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_sub_l(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,sub_l,(RW4 d, R4 s))
+
+MIDFUNC(2,sub_w,(RW2 d, R2 s))
+{
+ if (isconst(s)) {
+ COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SUB;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_sub_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,sub_w,(RW2 d, R2 s))
+
+MIDFUNC(2,sub_b,(RW1 d, R1 s))
+{
+ if (isconst(s)) {
+ COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SUB;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_sub_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,sub_b,(RW1 d, R1 s))
+
+MIDFUNC(2,cmp_l,(R4 d, R4 s))
+{
+ CLOBBER_CMP;
+ s=readreg(s,4);
+ d=readreg(d,4);
+
+ raw_cmp_l(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,cmp_l,(R4 d, R4 s))
+
+MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
+{
+ CLOBBER_CMP;
+ r=readreg(r,4);
+
+ raw_cmp_l_ri(r,i);
+ unlock(r);
+}
+MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
+
+MIDFUNC(2,cmp_w,(R2 d, R2 s))
+{
+ CLOBBER_CMP;
+ s=readreg(s,2);
+ d=readreg(d,2);
+
+ raw_cmp_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,cmp_w,(R2 d, R2 s))
+
+MIDFUNC(2,cmp_b,(R1 d, R1 s))
+{
+ CLOBBER_CMP;
+ s=readreg(s,1);
+ d=readreg(d,1);
+
+ raw_cmp_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,cmp_b,(R1 d, R1 s))
+
+
+MIDFUNC(2,xor_l,(RW4 d, R4 s))
+{
+ CLOBBER_XOR;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_xor_l(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,xor_l,(RW4 d, R4 s))
+
+MIDFUNC(2,xor_w,(RW2 d, R2 s))
+{
+ CLOBBER_XOR;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_xor_w(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,xor_w,(RW2 d, R2 s))
+
+MIDFUNC(2,xor_b,(RW1 d, R1 s))
+{
+ CLOBBER_XOR;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_xor_b(d,s);
+ unlock(d);
+ unlock(s);
+}
+MENDFUNC(2,xor_b,(RW1 d, R1 s))
+
+MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
+{
+ clobber_flags();
+ remove_all_offsets();
+ if (osize==4) {
+ if (out1!=in1 && out1!=r) {
+ COMPCALL(forget_about)(out1);
+ }
+ }
+ else {
+ tomem_c(out1);
+ }
+
+ in1=readreg_specific(in1,isize,REG_PAR1);
+ r=readreg(r,4);
+ prepare_for_call_1(); /* This should ensure that there won't be
+ any need for swapping nregs in prepare_for_call_2
+ */
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_push_l_r(in1);
+#endif
+ unlock(in1);
+ unlock(r);
+
+ prepare_for_call_2();
+ raw_call_r(r);
+
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_inc_sp(4);
+#endif
+
+
+ live.nat[REG_RESULT].holds[0]=out1;
+ live.nat[REG_RESULT].nholds=1;
+ live.nat[REG_RESULT].touched=touchcnt++;
+
+ live.state[out1].realreg=REG_RESULT;
+ live.state[out1].realind=0;
+ live.state[out1].val=0;
+ live.state[out1].validsize=osize;
+ live.state[out1].dirtysize=osize;
+ set_status(out1,DIRTY);
+}
+MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
+
+MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
+{
+ clobber_flags();
+ remove_all_offsets();
+ in1=readreg_specific(in1,isize1,REG_PAR1);
+ in2=readreg_specific(in2,isize2,REG_PAR2);
+ r=readreg(r,4);
+ prepare_for_call_1(); /* This should ensure that there won't be
+ any need for swapping nregs in prepare_for_call_2
+ */
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_push_l_r(in2);
+ raw_push_l_r(in1);
+#endif
+ unlock(r);
+ unlock(in1);
+ unlock(in2);
+ prepare_for_call_2();
+ raw_call_r(r);
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_inc_sp(8);
+#endif
+}
+MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
+
+
+MIDFUNC(1,forget_about,(W4 r))
+{
+ if (isinreg(r))
+ disassociate(r);
+ live.state[r].val=0;
+ set_status(r,UNDEF);
+}
+MENDFUNC(1,forget_about,(W4 r))
+
+MIDFUNC(0,nop,(void))
+{
+ raw_nop();
+}
+MENDFUNC(0,nop,(void))
+
+
+MIDFUNC(1,f_forget_about,(FW r))
+{
+ if (f_isinreg(r))
+ f_disassociate(r);
+ live.fate[r].status=UNDEF;
+}
+MENDFUNC(1,f_forget_about,(FW r))
+
+MIDFUNC(1,fmov_pi,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_pi(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_pi,(FW r))
+
+MIDFUNC(1,fmov_log10_2,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_log10_2(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_log10_2,(FW r))
+
+MIDFUNC(1,fmov_log2_e,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_log2_e(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_log2_e,(FW r))
+
+MIDFUNC(1,fmov_loge_2,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_loge_2(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_loge_2,(FW r))
+
+MIDFUNC(1,fmov_1,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_1(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_1,(FW r))
+
+MIDFUNC(1,fmov_0,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_0(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_0,(FW r))
+
+MIDFUNC(2,fmov_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmov_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmovi_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmovi_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmovs_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmovs_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmov_ext_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmov_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmov_ext_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmov_rr,(FW d, FR s))
+{
+ if (d==s) { /* How pointless! */
+ return;
+ }
+#if USE_F_ALIAS
+ f_disassociate(d);
+ s=f_readreg(s);
+ live.fate[d].realreg=s;
+ live.fate[d].realind=live.fat[s].nholds;
+ live.fate[d].status=DIRTY;
+ live.fat[s].holds[live.fat[s].nholds]=d;
+ live.fat[s].nholds++;
+ f_unlock(s);
+#else
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fmov_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+#endif
+}
+MENDFUNC(2,fmov_rr,(FW d, FR s))
+
+MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
+{
+ index=readreg(index,4);
+
+ raw_fldcw_m_indexed(index,base);
+ unlock(index);
+}
+MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
+
+MIDFUNC(1,ftst_r,(FR r))
+{
+ r=f_readreg(r);
+ raw_ftst_r(r);
+ f_unlock(r);
+}
+MENDFUNC(1,ftst_r,(FR r))
+
+MIDFUNC(0,dont_care_fflags,(void))
+{
+ f_disassociate(FP_RESULT);
+}
+MENDFUNC(0,dont_care_fflags,(void))
+
+MIDFUNC(2,fsqrt_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fsqrt_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fsqrt_rr,(FW d, FR s))
+
+MIDFUNC(2,fabs_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fabs_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fabs_rr,(FW d, FR s))
+
+MIDFUNC(2,fsin_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fsin_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fsin_rr,(FW d, FR s))
+
+MIDFUNC(2,fcos_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fcos_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fcos_rr,(FW d, FR s))
+
+MIDFUNC(2,ftwotox_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_ftwotox_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,ftwotox_rr,(FW d, FR s))
+
+MIDFUNC(2,fetox_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fetox_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fetox_rr,(FW d, FR s))
+
+MIDFUNC(2,frndint_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_frndint_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,frndint_rr,(FW d, FR s))
+
+MIDFUNC(2,flog2_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_flog2_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,flog2_rr,(FW d, FR s))
+
+MIDFUNC(2,fneg_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fneg_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fneg_rr,(FW d, FR s))
+
+MIDFUNC(2,fadd_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fadd_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fadd_rr,(FRW d, FR s))
+
+MIDFUNC(2,fsub_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fsub_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fsub_rr,(FRW d, FR s))
+
+MIDFUNC(2,fcmp_rr,(FR d, FR s))
+{
+ d=f_readreg(d);
+ s=f_readreg(s);
+ raw_fcmp_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fcmp_rr,(FR d, FR s))
+
+MIDFUNC(2,fdiv_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fdiv_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fdiv_rr,(FRW d, FR s))
+
+MIDFUNC(2,frem_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_frem_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,frem_rr,(FRW d, FR s))
+
+MIDFUNC(2,frem1_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_frem1_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,frem1_rr,(FRW d, FR s))
+
+MIDFUNC(2,fmul_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fmul_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fmul_rr,(FRW d, FR s))
+
+
+/********************************************************************
+ * Support functions exposed to gencomp. CREATE time *
+ ********************************************************************/
+
+int kill_rodent(int r)
+{
+ return KILLTHERAT &&
+ have_rat_stall &&
+ (live.state[r].status==INMEM ||
+ live.state[r].status==CLEAN ||
+ live.state[r].status==ISCONST ||
+ live.state[r].dirtysize==4);
+}
+
+uae_u32 get_const(int r)
+{
+#if USE_OPTIMIZER
+ if (!reg_alloc_run)
+#endif
+ Dif (!isconst(r)) {
+ printf("Register %d should be constant, but isn't\n",r);
+ abort();
+ }
+ return live.state[r].val;
+}
+
+void sync_m68k_pc(void)
+{
+ if (m68k_pc_offset) {
+ add_l_ri(PC_P,m68k_pc_offset);
+ comp_pc_p+=m68k_pc_offset;
+ m68k_pc_offset=0;
+ }
+}
+
+/********************************************************************
+ * Support functions exposed to newcpu *
+ ********************************************************************/
+
+uae_u32 scratch[VREGS];
+fptype fscratch[VFREGS];
+
+void init_comp(void)
+{
+ int i;
+ uae_u8* cb=can_byte;
+ uae_u8* cw=can_word;
+ uae_u8* au=always_used;
+
+ for (i=0;i<VREGS;i++) {
+ live.state[i].realreg=-1;
+ live.state[i].needflush=NF_SCRATCH;
+ live.state[i].val=0;
+ set_status(i,UNDEF);
+ }
+
+ for (i=0;i<VFREGS;i++) {
+ live.fate[i].status=UNDEF;
+ live.fate[i].realreg=-1;
+ live.fate[i].needflush=NF_SCRATCH;
+ }
+
+ for (i=0;i<VREGS;i++) {
+ if (i<16) { /* First 16 registers map to 68k registers */
+ live.state[i].mem=((uae_u32*)®s)+i;
+ live.state[i].needflush=NF_TOMEM;
+ set_status(i,INMEM);
+ }
+ else
+ live.state[i].mem=scratch+i;
+ }
+ live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
+ live.state[PC_P].needflush=NF_TOMEM;
+ set_const(PC_P,(uae_u32)comp_pc_p);
+
+ live.state[FLAGX].mem=&(regflags.x);
+ live.state[FLAGX].needflush=NF_TOMEM;
+ set_status(FLAGX,INMEM);
+
+ live.state[FLAGTMP].mem=&(regflags.cznv);
+ live.state[FLAGTMP].needflush=NF_TOMEM;
+ set_status(FLAGTMP,INMEM);
+
+ live.state[NEXT_HANDLER].needflush=NF_HANDLER;
+ set_status(NEXT_HANDLER,UNDEF);
+
+ for (i=0;i<VFREGS;i++) {
+ if (i<8) { /* First 8 registers map to 68k FPU registers */
+ live.fate[i].mem=(uae_u32*)(((fptype*)regs.fp)+i);
+ live.fate[i].needflush=NF_TOMEM;
+ live.fate[i].status=INMEM;
+ }
+ else if (i==FP_RESULT) {
+ live.fate[i].mem=(uae_u32*)(®s.fp_result);
+ live.fate[i].needflush=NF_TOMEM;
+ live.fate[i].status=INMEM;
+ }
+ else
+ live.fate[i].mem=(uae_u32*)(fscratch+i);
+ }
+
+
+ for (i=0;i<N_REGS;i++) {
+ live.nat[i].touched=0;
+ live.nat[i].nholds=0;
+ live.nat[i].locked=0;
+ if (*cb==i) {
+ live.nat[i].canbyte=1; cb++;
+ } else live.nat[i].canbyte=0;
+ if (*cw==i) {
+ live.nat[i].canword=1; cw++;
+ } else live.nat[i].canword=0;
+ if (*au==i) {
+ live.nat[i].locked=1; au++;
+ }
+ }
+
+ for (i=0;i<N_FREGS;i++) {
+ live.fat[i].touched=0;
+ live.fat[i].nholds=0;
+ live.fat[i].locked=0;
+ }
+
+ touchcnt=1;
+ m68k_pc_offset=0;
+ live.flags_in_flags=TRASH;
+ live.flags_on_stack=VALID;
+ live.flags_are_important=1;
+
+ raw_fp_init();
+}
+
+
+static void vinton(int i, uae_s8* vton, int depth)
+{
+ int n;
+ int rr;
+
+ Dif (vton[i]==-1) {
+ printf("Asked to load register %d, but nowhere to go\n",i);
+ abort();
+ }
+ n=vton[i];
+ Dif (live.nat[n].nholds>1)
+ abort();
+ if (live.nat[n].nholds && depth<N_REGS) {
+ vinton(live.nat[n].holds[0],vton,depth+1);
+ }
+ if (!isinreg(i))
+ return; /* Oops --- got rid of that one in the recursive calls */
+ rr=live.state[i].realreg;
+ if (rr!=n)
+ mov_nregs(n,rr);
+}
+
+#if USE_MATCHSTATE
+/* This is going to be, amongst other things, a more elaborate version of
+ flush() */
+static __inline__ void match_states(smallstate* s)
+{
+ uae_s8 vton[VREGS];
+ uae_s8 ndone[N_REGS];
+ int i;
+ int again=0;
+
+ for (i=0;i<VREGS;i++)
+ vton[i]=-1;
+
+ for (i=0;i<N_REGS;i++)
+ if (s->nat[i].validsize)
+ vton[s->nat[i].holds]=i;
+
+ flush_flags(); /* low level */
+ sync_m68k_pc(); /* mid level */
+
+ /* We don't do FREGS yet, so this is raw flush() code */
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_SCRATCH ||
+ live.fate[i].status==CLEAN) {
+ f_disassociate(i);
+ }
+ }
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_TOMEM &&
+ live.fate[i].status==DIRTY) {
+ f_evict(i);
+ }
+ }
+ raw_fp_cleanup_drop();
+
+ /* Now comes the fun part. First, we need to remove all offsets */
+ for (i=0;i<VREGS;i++)
+ if (!isconst(i) && live.state[i].val)
+ remove_offset(i,-1);
+
+ /* Next, we evict everything that does not end up in registers,
+ write back overly dirty registers, and write back constants */
+ for (i=0;i<VREGS;i++) {
+ switch (live.state[i].status) {
+ case ISCONST:
+ if (i!=PC_P)
+ writeback_const(i);
+ break;
+ case DIRTY:
+ if (vton[i]==-1) {
+ evict(i);
+ break;
+ }
+ if (live.state[i].dirtysize>s->nat[vton[i]].dirtysize)
+ tomem(i);
+ /* Fall-through! */
+ case CLEAN:
+ if (vton[i]==-1 ||
+ live.state[i].validsize<s->nat[vton[i]].validsize)
+ evict(i);
+ else
+ make_exclusive(i,0,-1);
+ break;
+ case INMEM:
+ break;
+ case UNDEF:
+ break;
+ default:
+ printf("Weird status: %d\n",live.state[i].status);
+ abort();
+ }
+ }
+
+ /* Quick consistency check */
+ for (i=0;i<VREGS;i++) {
+ if (isinreg(i)) {
+ int n=live.state[i].realreg;
+
+ if (live.nat[n].nholds!=1) {
+ printf("Register %d isn't alone in nreg %d\n",
+ i,n);
+ abort();
+ }
+ if (vton[i]==-1) {
+ printf("Register %d is still in register, shouldn't be\n",
+ i);
+ abort();
+ }
+ }
+ }
+
+ /* Now we need to shuffle things around so the VREGs are in the
+ right N_REGs. */
+ for (i=0;i<VREGS;i++) {
+ if (isinreg(i) && vton[i]!=live.state[i].realreg)
+ vinton(i,vton,0);
+ }
+
+ /* And now we may need to load some registers from memory */
+ for (i=0;i<VREGS;i++) {
+ int n=vton[i];
+ if (n==-1) {
+ Dif (isinreg(i)) {
+ printf("Register %d unexpectedly in nreg %d\n",
+ i,live.state[i].realreg);
+ abort();
+ }
+ }
+ else {
+ switch(live.state[i].status) {
+ case CLEAN:
+ case DIRTY:
+ Dif (n!=live.state[i].realreg)
+ abort();
+ break;
+ case INMEM:
+ Dif (live.nat[n].nholds) {
+ printf("natreg %d holds %d vregs, should be empty\n",
+ n,live.nat[n].nholds);
+ }
+ raw_mov_l_rm(n,(uae_u32)live.state[i].mem);
+ live.state[i].validsize=4;
+ live.state[i].dirtysize=0;
+ live.state[i].realreg=n;
+ live.state[i].realind=0;
+ live.state[i].val=0;
+ live.state[i].is_swapped=0;
+ live.nat[n].nholds=1;
+ live.nat[n].holds[0]=i;
+
+ set_status(i,CLEAN);
+ break;
+ case ISCONST:
+ if (i!=PC_P) {
+ printf("Got constant in matchstate for reg %d. Bad!\n",i);
+ abort();
+ }
+ break;
+ case UNDEF:
+ break;
+ }
+ }
+ }
+
+ /* One last consistency check, and adjusting the states in live
+ to those in s */
+ for (i=0;i<VREGS;i++) {
+ int n=vton[i];
+ switch(live.state[i].status) {
+ case INMEM:
+ if (n!=-1)
+ abort();
+ break;
+ case ISCONST:
+ if (i!=PC_P)
+ abort();
+ break;
+ case CLEAN:
+ case DIRTY:
+ if (n==-1)
+ abort();
+ if (live.state[i].dirtysize>s->nat[n].dirtysize)
+ abort;
+ if (live.state[i].validsize<s->nat[n].validsize)
+ abort;
+ live.state[i].dirtysize=s->nat[n].dirtysize;
+ live.state[i].validsize=s->nat[n].validsize;
+ if (live.state[i].dirtysize)
+ set_status(i,DIRTY);
+ break;
+ case UNDEF:
+ break;
+ }
+ if (n!=-1)
+ live.nat[n].touched=touchcnt++;
+ }
+}
+#else
+static __inline__ void match_states(smallstate* s)
+{
+ flush(1);
+}
+#endif
+
+/* Only do this if you really mean it! The next call should be to init!*/
+void flush(int save_regs)
+{
+ int fi,i;
+
+ log_flush();
+ flush_flags(); /* low level */
+ sync_m68k_pc(); /* mid level */
+
+ if (save_regs) {
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_SCRATCH ||
+ live.fate[i].status==CLEAN) {
+ f_disassociate(i);
+ }
+ }
+ for (i=0;i<VREGS;i++) {
+ if (live.state[i].needflush==NF_TOMEM) {
+ switch(live.state[i].status) {
+ case INMEM:
+ if (live.state[i].val) {
+ raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
+ live.state[i].val=0;
+ }
+ break;
+ case CLEAN:
+ case DIRTY:
+ remove_offset(i,-1); tomem(i); break;
+ case ISCONST:
+ if (i!=PC_P)
+ writeback_const(i);
+ break;
+ default: break;
+ }
+ Dif (live.state[i].val && i!=PC_P) {
+ printf("Register %d still has val %x\n",
+ i,live.state[i].val);
+ }
+ }
+ }
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_TOMEM &&
+ live.fate[i].status==DIRTY) {
+ f_evict(i);
+ }
+ }
+ raw_fp_cleanup_drop();
+ }
+ if (needflags) {
+ printf("Warning! flush with needflags=1!\n");
+ }
+
+ lopt_emit_all();
+}
+
+static void flush_keepflags(void)
+{
+ int fi,i;
+
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_SCRATCH ||
+ live.fate[i].status==CLEAN) {
+ f_disassociate(i);
+ }
+ }
+ for (i=0;i<VREGS;i++) {
+ if (live.state[i].needflush==NF_TOMEM) {
+ switch(live.state[i].status) {
+ case INMEM:
+ /* Can't adjust the offset here --- that needs "add" */
+ break;
+ case CLEAN:
+ case DIRTY:
+ remove_offset(i,-1); tomem(i); break;
+ case ISCONST:
+ if (i!=PC_P)
+ writeback_const(i);
+ break;
+ default: break;
+ }
+ }
+ }
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_TOMEM &&
+ live.fate[i].status==DIRTY) {
+ f_evict(i);
+ }
+ }
+ raw_fp_cleanup_drop();
+ lopt_emit_all();
+}
+
+void freescratch(void)
+{
+ int i;
+ for (i=0;i<N_REGS;i++)
+ if (live.nat[i].locked && i!=4)
+ printf("Warning! %d is locked\n",i);
+
+ for (i=0;i<VREGS;i++)
+ if (live.state[i].needflush==NF_SCRATCH) {
+ forget_about(i);
+ }
+
+ for (i=0;i<VFREGS;i++)
+ if (live.fate[i].needflush==NF_SCRATCH) {
+ f_forget_about(i);
+ }
+}
+
+/********************************************************************
+ * Support functions, internal *
+ ********************************************************************/
+
+
+static void align_target(uae_u32 a)
+{
+ lopt_emit_all();
+ /* Fill with NOPs --- makes debugging with gdb easier */
+ while ((uae_u32)target&(a-1))
+ *target++=0x90;
+}
+
+extern uae_u8* kickmemory;
+static __inline__ int isinrom(uae_u32 addr)
+{
+ return (addr>=(uae_u32)kickmemory &&
+ addr<(uae_u32)kickmemory+8*65536);
+}
+
+static void flush_all(void)
+{
+ int i;
+
+ log_flush();
+ for (i=0;i<VREGS;i++)
+ if (live.state[i].status==DIRTY) {
+ if (!call_saved[live.state[i].realreg]) {
+ tomem(i);
+ }
+ }
+ for (i=0;i<VFREGS;i++)
+ if (f_isinreg(i))
+ f_evict(i);
+ raw_fp_cleanup_drop();
+}
+
+/* Make sure all registers that will get clobbered by a call are
+ save and sound in memory */
+static void prepare_for_call_1(void)
+{
+ flush_all(); /* If there are registers that don't get clobbered,
+ * we should be a bit more selective here */
+}
+
+/* We will call a C routine in a moment. That will clobber all registers,
+ so we need to disassociate everything */
+static void prepare_for_call_2(void)
+{
+ int i;
+ for (i=0;i<N_REGS;i++)
+ if (!call_saved[i] && live.nat[i].nholds>0)
+ free_nreg(i);
+
+ for (i=0;i<N_FREGS;i++)
+ if (live.fat[i].nholds>0)
+ f_free_nreg(i);
+
+ live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
+ flags at the very start of the call_r
+ functions! */
+}
+
+
+/********************************************************************
+ * Memory access and related functions, CREATE time *
+ ********************************************************************/
+
+void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
+{
+ next_pc_p=not_taken;
+ taken_pc_p=taken;
+ branch_cc=cond;
+}
+
+
+static uae_u32 get_handler_address(uae_u32 addr)
+{
+ uae_u32 cl=cacheline(addr);
+ blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
+
+#if USE_OPTIMIZER
+ if (!bi && reg_alloc_run)
+ return 0;
+#endif
+ return (uae_u32)&(bi->direct_handler_to_use);
+}
+
+static uae_u32 get_handler(uae_u32 addr)
+{
+ uae_u32 cl=cacheline(addr);
+ blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
+
+#if USE_OPTIMIZER
+ if (!bi && reg_alloc_run)
+ return 0;
+#endif
+ return (uae_u32)bi->direct_handler_to_use;
+}
+
+static void load_handler(int reg, uae_u32 addr)
+{
+ mov_l_rm(reg,get_handler_address(addr));
+}
+
+/* This version assumes that it is writing *real* memory, and *will* fail
+ * if that assumption is wrong! No branches, no second chances, just
+ * straight go-for-it attitude */
+
+static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
+{
+ int f=tmp;
+
+#ifdef NATMEM_OFFSET
+ if (canbang) { /* Woohoo! go directly at the memory! */
+ if (clobber)
+ f=source;
+ switch(size) {
+ case 1: mov_b_bRr(address,source,NATMEM_OFFSET); break;
+ case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,NATMEM_OFFSET); break;
+ case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,NATMEM_OFFSET); break;
+ }
+ forget_about(tmp);
+ forget_about(f);
+ return;
+ }
+#endif
+
+ mov_l_rr(f,address);
+ shrl_l_ri(f,16); /* The index into the baseaddr table */
+ mov_l_rm_indexed(f,(uae_u32)(baseaddr),f,4);
+
+ if (address==source && size>1) { /* IBrowse does this! */
+ add_l(f,address); /* f now has the final address */
+ switch(size) {
+ case 2: bswap_16(source); mov_w_Rr(f,source,0); bswap_16(source); break;
+ case 4: bswap_32(source); mov_l_Rr(f,source,0); bswap_32(source); break;
+ }
+ }
+ else {
+ /* f now holds the offset */
+ switch(size) {
+ case 1: mov_b_mrr_indexed(address,f,1,source); break;
+ case 2: bswap_16(source); mov_w_mrr_indexed(address,f,1,source); bswap_16(source); break;
+ case 4: bswap_32(source); mov_l_mrr_indexed(address,f,1,source); bswap_32(source); break;
+ }
+ }
+}
+
+
+
+static __inline__ void writemem(int address, int source, int offset, int size, int tmp)
+{
+ int f=tmp;
+
+ mov_l_rr(f,address);
+ shrl_l_ri(f,16); /* The index into the mem bank table */
+ mov_l_rm_indexed(f,(uae_u32)mem_banks,f,4);
+ /* Now f holds a pointer to the actual membank */
+ mov_l_rR(f,f,offset);
+ /* Now f holds the address of the b/w/lput function */
+ call_r_02(f,address,source,4,size);
+ forget_about(tmp);
+}
+
+
+void writebyte(int address, int source, int tmp)
+{
+ int distrust;
+ switch (currprefs.comptrustbyte) {
+ case 0: distrust=0; break;
+ case 1: distrust=1; break;
+ case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+ case 3: distrust=!have_done_picasso; break;
+ default: abort();
+ }
+
+ if ((special_mem&S_WRITE) || distrust)
+ writemem_special(address,source,20,1,tmp);
+ else
+ writemem_real(address,source,20,1,tmp,0);
+}
+
+static __inline__ void writeword_general(int address, int source, int tmp,
+ int clobber)
+{
+ int distrust;
+ switch (currprefs.comptrustword) {
+ case 0: distrust=0; break;
+ case 1: distrust=1; break;
+ case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+ case 3: distrust=!have_done_picasso; break;
+ default: abort();
+ }
+
+ if ((special_mem&S_WRITE) || distrust)
+ writemem_special(address,source,16,2,tmp);
+ else
+ writemem_real(address,source,16,2,tmp,clobber);
+}
+
+void writeword_clobber(int address, int source, int tmp)
+{
+ writeword_general(address,source,tmp,1);
+}
+
+void writeword(int address, int source, int tmp)
+{
+ writeword_general(address,source,tmp,0);
+}
+
+static __inline__ void writelong_general(int address, int source, int tmp,
+ int clobber)
+{
+ int distrust;
+ switch (currprefs.comptrustlong) {
+ case 0: distrust=0; break;
+ case 1: distrust=1; break;
+ case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+ case 3: distrust=!have_done_picasso; break;
+ default: abort();
+ }
+
+ if ((special_mem&S_WRITE) || distrust)
+ writemem_special(address,source,12,4,tmp);
+ else
+ writemem_real(address,source,12,4,tmp,clobber);
+}
+
+void writelong_clobber(int address, int source, int tmp)
+{
+ writelong_general(address,source,tmp,1);
+}
+
+void writelong(int address, int source, int tmp)
+{
+ writelong_general(address,source,tmp,0);
+}
+
+
+
+/* This version assumes that it is reading *real* memory, and *will* fail
+ * if that assumption is wrong! No branches, no second chances, just
+ * straight go-for-it attitude */
+
+static void readmem_real(int address, int dest, int offset, int size, int tmp)
+{
+ int f=tmp;
+
+ if (size==4 && address!=dest)
+ f=dest;
+
+#ifdef NATMEM_OFFSET
+ if (canbang) { /* Woohoo! go directly at the memory! */
+ switch(size) {
+ case 1: mov_b_brR(dest,address,NATMEM_OFFSET); break;
+ case 2: mov_w_brR(dest,address,NATMEM_OFFSET); bswap_16(dest); break;
+ case 4: mov_l_brR(dest,address,NATMEM_OFFSET); bswap_32(dest); break;
+ }
+ forget_about(tmp);
+ return;
+ }
+#endif
+
+ mov_l_rr(f,address);
+ shrl_l_ri(f,16); /* The index into the baseaddr table */
+ mov_l_rm_indexed(f,(uae_u32)baseaddr,f,4);
+ /* f now holds the offset */
+
+ switch(size) {
+ case 1: mov_b_rrm_indexed(dest,address,f,1); break;
+ case 2: mov_w_rrm_indexed(dest,address,f,1); bswap_16(dest); break;
+ case 4: mov_l_rrm_indexed(dest,address,f,1); bswap_32(dest); break;
+ }
+ forget_about(tmp);
+}
+
+
+
+static __inline__ void readmem(int address, int dest, int offset, int size, int tmp)
+{
+ int f=tmp;
+
+ mov_l_rr(f,address);
+ shrl_l_ri(f,16); /* The index into the mem bank table */
+ mov_l_rm_indexed(f,(uae_u32)mem_banks,f,4);
+ /* Now f holds a pointer to the actual membank */
+ mov_l_rR(f,f,offset);
+ /* Now f holds the address of the b/w/lget function */
+ call_r_11(dest,f,address,size,4);
+ forget_about(tmp);
+}
+
+void readbyte(int address, int dest, int tmp)
+{
+ int distrust;
+ switch (currprefs.comptrustbyte) {
+ case 0: distrust=0; break;
+ case 1: distrust=1; break;
+ case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+ case 3: distrust=!have_done_picasso; break;
+ default: abort();
+ }
+
+ if ((special_mem&S_READ) || distrust)
+ readmem_special(address,dest,8,1,tmp);
+ else
+ readmem_real(address,dest,8,1,tmp);
+}
+
+void readword(int address, int dest, int tmp)
+{
+ int distrust;
+ switch (currprefs.comptrustword) {
+ case 0: distrust=0; break;
+ case 1: distrust=1; break;
+ case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+ case 3: distrust=!have_done_picasso; break;
+ default: abort();
+ }
+
+ if ((special_mem&S_READ) || distrust)
+ readmem_special(address,dest,4,2,tmp);
+ else
+ readmem_real(address,dest,4,2,tmp);
+}
+
+void readlong(int address, int dest, int tmp)
+{
+ int distrust;
+ switch (currprefs.comptrustlong) {
+ case 0: distrust=0; break;
+ case 1: distrust=1; break;
+ case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+ case 3: distrust=!have_done_picasso; break;
+ default: abort();
+ }
+
+ if ((special_mem&S_READ) || distrust)
+ readmem_special(address,dest,0,4,tmp);
+ else
+ readmem_real(address,dest,0,4,tmp);
+}
+
+
+
+/* This one might appear a bit odd... */
+static __inline__ void get_n_addr_old(int address, int dest, int tmp)
+{
+ readmem(address,dest,24,4,tmp);
+}
+
+static __inline__ void get_n_addr_real(int address, int dest, int tmp)
+{
+ int f=tmp;
+ if (address!=dest)
+ f=dest;
+
+#ifdef NATMEM_OFFSET
+ if (canbang) {
+ lea_l_brr(dest,address,NATMEM_OFFSET);
+ forget_about(tmp);
+ return;
+ }
+#endif
+ mov_l_rr(f,address);
+ mov_l_rr(dest,address); // gb-- nop if dest==address
+ shrl_l_ri(f,16);
+ mov_l_rm_indexed(f, (uae_u32)baseaddr, f, 4);
+ add_l(dest,f);
+ forget_about(tmp);
+}
+
+void get_n_addr(int address, int dest, int tmp)
+{
+ int distrust;
+ switch (currprefs.comptrustnaddr) {
+ case 0: distrust=0; break;
+ case 1: distrust=1; break;
+ case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+ case 3: distrust=!have_done_picasso; break;
+ default: abort();
+ }
+
+ if (special_mem || distrust)
+ get_n_addr_old(address,dest,tmp);
+ else
+ get_n_addr_real(address,dest,tmp);
+}
+
+void get_n_addr_jmp(int address, int dest, int tmp)
+{
+#if 0 /* For this, we need to get the same address as the rest of UAE
+ would --- otherwise we end up translating everything twice */
+ get_n_addr(address,dest,tmp);
+#else
+ int f=tmp;
+ if (address!=dest)
+ f=dest;
+ mov_l_rr(f,address);
+ shrl_l_ri(f,16); /* The index into the baseaddr bank table */
+ mov_l_rm_indexed(dest,(uae_u32)baseaddr,f,4);
+ add_l(dest,address);
+ and_l_ri (dest, ~1);
+ forget_about(tmp);
+#endif
+}
+
+
+/* base is a register, but dp is an actual value.
+ target is a register, as is tmp */
+void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
+{
+ int reg = (dp >> 12) & 15;
+ int regd_shift=(dp >> 9) & 3;
+
+ if (dp & 0x100) {
+ int ignorebase=(dp&0x80);
+ int ignorereg=(dp&0x40);
+ int addbase=0;
+ int outer=0;
+
+ if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
+
+ if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
+
+ if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
+ if (!ignorereg) {
+ if ((dp & 0x800) == 0)
+ sign_extend_16_rr(target,reg);
+ else
+ mov_l_rr(target,reg);
+ shll_l_ri(target,regd_shift);
+ }
+ else
+ mov_l_ri(target,0);
+
+ /* target is now regd */
+ if (!ignorebase)
+ add_l(target,base);
+ add_l_ri(target,addbase);
+ if (dp&0x03) readlong(target,target,tmp);
+ } else { /* do the getlong first, then add regd */
+ if (!ignorebase) {
+ mov_l_rr(target,base);
+ add_l_ri(target,addbase);
+ }
+ else
+ mov_l_ri(target,addbase);
+ if (dp&0x03) readlong(target,target,tmp);
+
+ if (!ignorereg) {
+ if ((dp & 0x800) == 0)
+ sign_extend_16_rr(tmp,reg);
+ else
+ mov_l_rr(tmp,reg);
+ shll_l_ri(tmp,regd_shift);
+ /* tmp is now regd */
+ add_l(target,tmp);
+ }
+ }
+ add_l_ri(target,outer);
+ }
+ else { /* 68000 version */
+ if ((dp & 0x800) == 0) { /* Sign extend */
+ sign_extend_16_rr(target,reg);
+ lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
+ }
+ else {
+ lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
+ }
+ }
+ forget_about(tmp);
+}
+
+static __inline__ unsigned int cft_map (unsigned int f)
+{
+ return ((f >> 8) & 255) | ((f & 255) << 8);
+}
+
+void set_cache_state(int enabled)
+{
+ if (enabled!=letit)
+ flush_icache_hard(77);
+ letit=enabled;
+}
+
+int get_cache_state(void)
+{
+ return letit;
+}
+
+uae_u32 get_jitted_size(void)
+{
+ if (compiled_code)
+ return current_compile_p-compiled_code;
+ return 0;
+}
+
+void alloc_cache(void)
+{
+ if (compiled_code) {
+ flush_icache_hard(6);
+ cache_free(compiled_code);
+ }
+ if (veccode == NULL)
+ veccode = cache_alloc (256);
+ if (popallspace == NULL)
+ popallspace = cache_alloc (1024);
+ compiled_code = NULL;
+ if (currprefs.cachesize == 0)
+ return;
+
+ while (!compiled_code && currprefs.cachesize) {
+ compiled_code=cache_alloc(currprefs.cachesize*1024);
+ if (!compiled_code)
+ currprefs.cachesize/=2;
+ }
+ if (compiled_code) {
+ max_compile_start=compiled_code+currprefs.cachesize*1024-BYTES_PER_INST;
+ current_compile_p=compiled_code;
+ }
+}
+
+extern unsigned long op_illg_1 (uae_u32 opcode) REGPARAM;
+
+static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
+{
+ uae_u32 k1=0;
+ uae_u32 k2=0;
+ uae_s32 len=bi->len;
+ uae_u32 tmp=bi->min_pcp;
+ uae_u32* pos;
+
+ len+=(tmp&3);
+ tmp&=(~3);
+ pos=(uae_u32*)tmp;
+
+ if (len<0 || len>MAX_CHECKSUM_LEN) {
+ *c1=0;
+ *c2=0;
+ }
+ else {
+ while (len>0) {
+ k1+=*pos;
+ k2^=*pos;
+ pos++;
+ len-=4;
+ }
+ *c1=k1;
+ *c2=k2;
+ }
+}
+
+static void show_checksum(blockinfo* bi)
+{
+ uae_u32 k1=0;
+ uae_u32 k2=0;
+ uae_s32 len=bi->len;
+ uae_u32 tmp=(uae_u32)bi->pc_p;
+ uae_u32* pos;
+
+ len+=(tmp&3);
+ tmp&=(~3);
+ pos=(uae_u32*)tmp;
+
+ if (len<0 || len>MAX_CHECKSUM_LEN) {
+ return;
+ }
+ else {
+ while (len>0) {
+ printf("%08x ",*pos);
+ pos++;
+ len-=4;
+ }
+ printf(" bla\n");
+ }
+}
+
+
+int check_for_cache_miss(void)
+{
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+
+ if (bi) {
+ int cl=cacheline(regs.pc_p);
+ if (bi!=cache_tags[cl+1].bi) {
+ raise_in_cl_list(bi);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+static void recompile_block(void)
+{
+ /* An existing block's countdown code has expired. We need to make
+ sure that execute_normal doesn't refuse to recompile due to a
+ perceived cache miss... */
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+
+ Dif (!bi)
+ abort();
+ raise_in_cl_list(bi);
+ execute_normal();
+ return;
+}
+
+static void cache_miss(void)
+{
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+ uae_u32 cl=cacheline(regs.pc_p);
+ blockinfo* bi2=get_blockinfo(cl);
+
+ if (!bi) {
+ execute_normal(); /* Compile this block now */
+ return;
+ }
+ Dif (!bi2 || bi==bi2) {
+ write_log ("Unexplained cache miss %p %p\n",bi,bi2);
+ abort();
+ }
+ raise_in_cl_list(bi);
+ return;
+}
+
+static void check_checksum(void)
+{
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+ uae_u32 cl=cacheline(regs.pc_p);
+ blockinfo* bi2=get_blockinfo(cl);
+
+ uae_u32 c1,c2;
+
+ checksum_count++;
+ /* These are not the droids you are looking for... */
+ if (!bi) {
+ /* Whoever is the primary target is in a dormant state, but
+ calling it was accidental, and we should just compile this
+ new block */
+ execute_normal();
+ return;
+ }
+ if (bi!=bi2) {
+ /* The block was hit accidentally, but it does exist. Cache miss */
+ cache_miss();
+ return;
+ }
+
+ if (bi->c1 || bi->c2)
+ calc_checksum(bi,&c1,&c2);
+ else {
+ c1=c2=1; /* Make sure it doesn't match */
+ }
+ if (c1==bi->c1 && c2==bi->c2) {
+ /* This block is still OK. So we reactivate. Of course, that
+ means we have to move it into the needs-to-be-flushed list */
+ bi->handler_to_use=bi->handler;
+ set_dhtu(bi,bi->direct_handler);
+
+ /* printf("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
+ c1,c2,bi->c1,bi->c2);*/
+ remove_from_list(bi);
+ add_to_active(bi);
+ raise_in_cl_list(bi);
+ }
+ else {
+ /* This block actually changed. We need to invalidate it,
+ and set it up to be recompiled */
+ /* printf("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
+ c1,c2,bi->c1,bi->c2); */
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ execute_normal();
+ }
+}
+
+
+static __inline__ void create_popalls(void)
+{
+ int i,r;
+
+ current_compile_p=popallspace;
+ set_target(current_compile_p);
+#if USE_PUSH_POP
+ /* If we can't use gcc inline assembly, we need to pop some
+ registers before jumping back to the various get-out routines.
+ This generates the code for it.
+ */
+ popall_do_nothing=current_compile_p;
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uae_u32)do_nothing);
+ align_target(32);
+
+ popall_execute_normal=get_target();
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uae_u32)execute_normal);
+ align_target(32);
+
+ popall_cache_miss=get_target();
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uae_u32)cache_miss);
+ align_target(32);
+
+ popall_recompile_block=get_target();
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uae_u32)recompile_block);
+ align_target(32);
+
+ popall_exec_nostats=get_target();
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uae_u32)exec_nostats);
+ align_target(32);
+
+ popall_check_checksum=get_target();
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uae_u32)check_checksum);
+ align_target(32);
+
+ current_compile_p=get_target();
+#else
+ popall_exec_nostats=exec_nostats;
+ popall_execute_normal=execute_normal;
+ popall_cache_miss=cache_miss;
+ popall_recompile_block=recompile_block;
+ popall_do_nothing=do_nothing;
+ popall_check_checksum=check_checksum;
+#endif
+
+ /* And now, the code to do the matching pushes and then jump
+ into a handler routine */
+ pushall_call_handler=get_target();
+#if USE_PUSH_POP
+ for (i=N_REGS;i--;) {
+ if (need_to_preserve[i])
+ raw_push_l_r(i);
+ }
+#endif
+ r=REG_PC_TMP;
+ raw_mov_l_rm(r,(uae_u32)®s.pc_p);
+ raw_and_l_ri(r,TAGMASK);
+ raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
+}
+
+static __inline__ void reset_lists(void)
+{
+ int i;
+
+ for (i=0;i<MAX_HOLD_BI;i++)
+ hold_bi[i]=NULL;
+ active=NULL;
+ dormant=NULL;
+}
+
+static void prepare_block(blockinfo* bi)
+{
+ int i;
+
+ set_target(current_compile_p);
+ align_target(32);
+ bi->direct_pen=(void*)get_target();
+ raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
+ raw_mov_l_mr((uae_u32)®s.pc_p,0);
+ raw_jmp((uae_u32)popall_execute_normal);
+
+ align_target(32);
+ bi->direct_pcc=(void*)get_target();
+ raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
+ raw_mov_l_mr((uae_u32)®s.pc_p,0);
+ raw_jmp((uae_u32)popall_check_checksum);
+
+ align_target(32);
+ current_compile_p=get_target();
+
+ bi->deplist=NULL;
+ for (i=0;i<2;i++) {
+ bi->dep[i].prev_p=NULL;
+ bi->dep[i].next=NULL;
+ }
+ bi->env=default_ss;
+ bi->status=BI_NEW;
+ bi->havestate=0;
+ //bi->env=empty_ss;
+}
+
+void compemu_reset(void)
+{
+ int i;
+
+ set_cache_state(0);
+}
+
+void build_comp(void)
+{
+ int i;
+ int jumpcount=0;
+ unsigned long opcode;
+ struct cputbl* tbl=op_smalltbl_0_comp_ff;
+ struct cputbl* nftbl=op_smalltbl_0_comp_nf;
+ int count;
+#ifdef NOFLAGS_SUPPORT
+ struct cputbl *nfctbl = (currprefs.cpu_level >= 4 ? op_smalltbl_0_nf
+ : currprefs.cpu_level == 3 ? op_smalltbl_1_nf
+ : currprefs.cpu_level == 2 ? op_smalltbl_2_nf
+ : currprefs.cpu_level == 1 ? op_smalltbl_3_nf
+ : ! currprefs.cpu_compatible ? op_smalltbl_4_nf
+ : op_smalltbl_5_nf);
+#endif
+ raw_init_cpu();
+#ifdef NATMEM_OFFSET
+ write_log ("JIT: Setting signal handler\n");
+#ifndef _WIN32
+ signal(SIGSEGV,vec);
+#endif
+#endif
+ write_log ("JIT: Building Compiler function table\n");
+ for (opcode = 0; opcode < 65536; opcode++) {
+#ifdef NOFLAGS_SUPPORT
+ nfcpufunctbl[opcode] = op_illg_1;
+#endif
+ compfunctbl[opcode] = NULL;
+ nfcompfunctbl[opcode] = NULL;
+ prop[opcode].use_flags = 0x1f;
+ prop[opcode].set_flags = 0x1f;
+ prop[opcode].is_jump=1;
+ }
+
+ for (i = 0; tbl[i].opcode < 65536; i++) {
+ int isjmp=(tbl[i].specific&1);
+ int isaddx=(tbl[i].specific&8);
+ int iscjmp=(tbl[i].specific&16);
+
+ prop[tbl[i].opcode].is_jump=isjmp;
+ prop[tbl[i].opcode].is_const_jump=iscjmp;
+ prop[tbl[i].opcode].is_addx=isaddx;
+ compfunctbl[tbl[i].opcode] = tbl[i].handler;
+ }
+ for (i = 0; nftbl[i].opcode < 65536; i++) {
+ nfcompfunctbl[nftbl[i].opcode] = nftbl[i].handler;
+#ifdef NOFLAGS_SUPPORT
+ nfcpufunctbl[nftbl[i].opcode] = nfctbl[i].handler;
+#endif
+ }
+
+#ifdef NOFLAGS_SUPPORT
+ for (i = 0; nfctbl[i].handler; i++) {
+ nfcpufunctbl[nfctbl[i].opcode] = nfctbl[i].handler;
+ }
+#endif
+
+ for (opcode = 0; opcode < 65536; opcode++) {
+ cpuop_func *f;
+ cpuop_func *nff;
+#ifdef NOFLAGS_SUPPORT
+ cpuop_func *nfcf;
+#endif
+ int isjmp,isaddx,iscjmp;
+
+ if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > currprefs.cpu_level)
+ continue;
+
+ if (table68k[opcode].handler != -1) {
+ f = compfunctbl[table68k[opcode].handler];
+ nff = nfcompfunctbl[table68k[opcode].handler];
+#ifdef NOFLAGS_SUPPORT
+ nfcf = nfcpufunctbl[table68k[opcode].handler];
+#endif
+ isjmp=prop[table68k[opcode].handler].is_jump;
+ iscjmp=prop[table68k[opcode].handler].is_const_jump;
+ isaddx=prop[table68k[opcode].handler].is_addx;
+ prop[opcode].is_jump=isjmp;
+ prop[opcode].is_const_jump=iscjmp;
+ prop[opcode].is_addx=isaddx;
+ compfunctbl[opcode] = f;
+ nfcompfunctbl[opcode] = nff;
+#ifdef NOFLAGS_SUPPORT
+ Dif (nfcf == op_illg_1)
+ abort();
+ nfcpufunctbl[opcode] = nfcf;
+#endif
+ }
+ prop[opcode].set_flags =table68k[opcode].flagdead;
+ prop[opcode].use_flags =table68k[opcode].flaglive;
+ /* Unconditional jumps don't evaluate condition codes, so they
+ don't actually use any flags themselves */
+ if (prop[opcode].is_const_jump)
+ prop[opcode].use_flags=0;
+ }
+#ifdef NOFLAGS_SUPPORT
+ for (i = 0; nfctbl[i].handler != NULL; i++) {
+ if (nfctbl[i].specific)
+ nfcpufunctbl[tbl[i].opcode] = nfctbl[i].handler;
+ }
+#endif
+
+ count=0;
+ for (opcode = 0; opcode < 65536; opcode++) {
+ if (compfunctbl[opcode])
+ count++;
+ }
+ write_log ("Supposedly %d compileable opcodes!\n",count);
+
+ /* Initialise state */
+ alloc_cache();
+ create_popalls();
+ reset_lists();
+
+ for (i=0;i<TAGSIZE;i+=2) {
+ cache_tags[i].handler=(void*)popall_execute_normal;
+ cache_tags[i+1].bi=NULL;
+ }
+ compemu_reset();
+
+ for (i=0;i<N_REGS;i++) {
+ empty_ss.nat[i].holds=-1;
+ empty_ss.nat[i].validsize=0;
+ empty_ss.nat[i].dirtysize=0;
+ }
+ default_ss=empty_ss;
+#if 0
+ default_ss.nat[6].holds=11;
+ default_ss.nat[6].validsize=4;
+ default_ss.nat[5].holds=12;
+ default_ss.nat[5].validsize=4;
+#endif
+}
+
+
+static void flush_icache_hard(int n)
+{
+ uae_u32 i;
+ blockinfo* bi;
+
+ hard_flush_count++;
+#if 0
+ printf("Flush Icache_hard(%d/%x/%p), %u instruction bytes\n",
+ n,regs.pc,regs.pc_p,current_compile_p-compiled_code);
+#endif
+ bi=active;
+ while(bi) {
+ cache_tags[cacheline(bi->pc_p)].handler=(void*)popall_execute_normal;
+ cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
+ bi=bi->next;
+ }
+ bi=dormant;
+ while(bi) {
+ cache_tags[cacheline(bi->pc_p)].handler=(void*)popall_execute_normal;
+ cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
+ bi=bi->next;
+ }
+
+ reset_lists();
+ if (!compiled_code)
+ return;
+ current_compile_p=compiled_code;
+ set_special(0); /* To get out of compiled code */
+}
+
+
+/* "Soft flushing" --- instead of actually throwing everything away,
+ we simply mark everything as "needs to be checked".
+*/
+
+void flush_icache(int n)
+{
+ uae_u32 i;
+ blockinfo* bi;
+ blockinfo* bi2;
+
+ if (currprefs.comp_hardflush) {
+ flush_icache_hard(n);
+ return;
+ }
+ soft_flush_count++;
+ if (!active)
+ return;
+
+ bi=active;
+ while (bi) {
+ uae_u32 cl=cacheline(bi->pc_p);
+ if (!bi->handler) {
+ /* invalidated block */
+ if (bi==cache_tags[cl+1].bi)
+ cache_tags[cl].handler=popall_execute_normal;
+ bi->handler_to_use=popall_execute_normal;
+ set_dhtu(bi,bi->direct_pen);
+ }
+ else {
+ if (bi==cache_tags[cl+1].bi)
+ cache_tags[cl].handler=popall_check_checksum;
+ bi->handler_to_use=popall_check_checksum;
+ set_dhtu(bi,bi->direct_pcc);
+ }
+ bi2=bi;
+ bi=bi->next;
+ }
+ /* bi2 is now the last entry in the active list */
+ bi2->next=dormant;
+ if (dormant)
+ dormant->prev_p=&(bi2->next);
+
+ dormant=active;
+ active->prev_p=&dormant;
+ active=NULL;
+}
+
+
+static void catastrophe(void)
+{
+ abort();
+}
+
+int failure;
+
+
+void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
+{
+ if (letit && compiled_code && currprefs.cpu_level>=2) {
+
+ /* OK, here we need to 'compile' a block */
+ int i;
+ int r;
+ int was_comp=0;
+ uae_u8 liveflags[MAXRUN+1];
+ uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
+ uae_u32 min_pcp=max_pcp;
+ uae_u32 cl=cacheline(pc_hist[0].location);
+ void* specflags=(void*)®s.spcflags;
+ blockinfo* bi=NULL;
+ blockinfo* bi2;
+ int extra_len=0;
+
+ compile_count++;
+ if (current_compile_p>=max_compile_start)
+ flush_icache_hard(7);
+
+ alloc_blockinfos();
+
+ bi=get_blockinfo_addr_new(pc_hist[0].location,0);
+ bi2=get_blockinfo(cl);
+
+ optlev=bi->optlevel;
+ if (bi->handler) {
+ Dif (bi!=bi2) {
+ /* I don't think it can happen anymore. Shouldn't, in
+ any case. So let's make sure... */
+ printf("WOOOWOO count=%d, ol=%d %p %p\n",
+ bi->count,bi->optlevel,bi->handler_to_use,
+ cache_tags[cl].handler);
+ abort();
+ }
+
+ Dif (bi->count!=-1 && bi->status!=BI_TARGETTED) {
+ /* What the heck? We are not supposed to be here! */
+ abort();
+ }
+ }
+ if (bi->count==-1) {
+ optlev++;
+ while (!currprefs.optcount[optlev])
+ optlev++;
+ bi->count=currprefs.optcount[optlev]-1;
+ }
+ current_block_pc_p=(uae_u32)pc_hist[0].location;
+
+ remove_deps(bi); /* We are about to create new code */
+ bi->optlevel=optlev;
+ bi->pc_p=(uae_u8*)pc_hist[0].location;
+
+ liveflags[blocklen]=0x1f; /* All flags needed afterwards */
+ i=blocklen;
+ while (i--) {
+ uae_u16* currpcp=pc_hist[i].location;
+ int op=cft_map(*currpcp);
+
+ if ((uae_u32)currpcp<min_pcp)
+ min_pcp=(uae_u32)currpcp;
+ if ((uae_u32)currpcp>max_pcp)
+ max_pcp=(uae_u32)currpcp;
+
+ if (currprefs.compnf) {
+ liveflags[i]=((liveflags[i+1]&
+ (~prop[op].set_flags))|
+ prop[op].use_flags);
+ if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
+ liveflags[i]&= ~FLAG_Z;
+ }
+ else {
+ liveflags[i]=0x1f;
+ }
+ }
+
+ bi->needed_flags=liveflags[0];
+
+ /* This is the non-direct handler */
+ align_target(32);
+ set_target(get_target()+1);
+ align_target(16);
+ /* Now aligned at n*32+16 */
+
+ bi->handler=
+ bi->handler_to_use=(void*)get_target();
+ raw_cmp_l_mi((uae_u32)®s.pc_p,(uae_u32)pc_hist[0].location);
+ raw_jnz((uae_u32)popall_cache_miss);
+ /* This was 16 bytes on the x86, so now aligned on (n+1)*32 */
+
+ was_comp=0;
+
+#if USE_MATCHSTATE
+ comp_pc_p=(uae_u8*)pc_hist[0].location;
+ init_comp();
+ match_states(&(bi->env));
+ was_comp=1;
+#endif
+
+ bi->direct_handler=(void*)get_target();
+ set_dhtu(bi,bi->direct_handler);
+ current_block_start_target=(uae_u32)get_target();
+
+ if (bi->count>=0) { /* Need to generate countdown code */
+ raw_mov_l_mi((uae_u32)®s.pc_p,(uae_u32)pc_hist[0].location);
+ raw_sub_l_mi((uae_u32)&(bi->count),1);
+ raw_jl((uae_u32)popall_recompile_block);
+ }
+ if (optlev==0) { /* No need to actually translate */
+ /* Execute normally without keeping stats */
+ raw_mov_l_mi((uae_u32)®s.pc_p,(uae_u32)pc_hist[0].location);
+ raw_jmp((uae_u32)popall_exec_nostats);
+ }
+ else {
+ reg_alloc_run=0;
+ next_pc_p=0;
+ taken_pc_p=0;
+ branch_cc=0;
+
+ log_startblock();
+ for (i=0;i<blocklen &&
+ get_target_noopt()<max_compile_start;i++) {
+ cpuop_func **cputbl;
+ cpuop_func **comptbl;
+ uae_u16 opcode;
+
+ opcode=cft_map((uae_u16)*pc_hist[i].location);
+ special_mem=pc_hist[i].specmem;
+ needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
+ if (!needed_flags && currprefs.compnf) {
+#ifdef NOFLAGS_SUPPORT
+ cputbl=nfcpufunctbl;
+#else
+ cputbl=cpufunctbl;
+#endif
+ comptbl=nfcompfunctbl;
+ }
+ else {
+ cputbl=cpufunctbl;
+ comptbl=compfunctbl;
+ }
+
+ if (comptbl[opcode] && optlev>1) {
+ failure=0;
+ if (!was_comp) {
+ comp_pc_p=(uae_u8*)pc_hist[i].location;
+ init_comp();
+ }
+ was_comp++;
+
+ comptbl[opcode](opcode);
+ freescratch();
+ if (!(liveflags[i+1] & FLAG_CZNV)) {
+ /* We can forget about flags */
+ dont_care_flags();
+ }
+#if INDIVIDUAL_INST
+ flush(1);
+ nop();
+ flush(1);
+ was_comp=0;
+#endif
+ }
+ else
+ failure=1;
+ if (failure) {
+ if (was_comp) {
+ flush(1);
+ was_comp=0;
+ }
+ raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_push_l_r(REG_PAR1);
+#endif
+ raw_mov_l_mi((uae_u32)®s.pc_p,
+ (uae_u32)pc_hist[i].location);
+ raw_call((uae_u32)cputbl[opcode]);
+ //raw_add_l_mi((uae_u32)&oink,1); // FIXME
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_inc_sp(4);
+#endif
+ if (needed_flags) {
+ //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode+65536);
+ }
+ else {
+ //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode);
+ }
+
+ if (i<blocklen-1) {
+ uae_s8* branchadd;
+
+ raw_mov_l_rm(0,(uae_u32)specflags);
+ raw_test_l_rr(0,0);
+ raw_jz_b_oponly();
+ branchadd=get_target();
+ emit_byte(0);
+ raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+ raw_jmp((uae_u32)popall_do_nothing);
+ *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
+ }
+ }
+ }
+#if 0 /* This isn't completely kosher yet; It really needs to be
+ be integrated into a general inter-block-dependency scheme */
+ if (next_pc_p && taken_pc_p &&
+ was_comp && taken_pc_p==current_block_pc_p) {
+ blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
+ blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
+ uae_u8 x=bi1->needed_flags;
+
+ if (x==0xff || 1) { /* To be on the safe side */
+ uae_u16* next=(uae_u16*)next_pc_p;
+ uae_u16 op=cft_map(*next);
+
+ x=0x1f;
+ x&=(~prop[op].set_flags);
+ x|=prop[op].use_flags;
+ }
+
+ x|=bi2->needed_flags;
+ if (!(x & FLAG_CZNV)) {
+ /* We can forget about flags */
+ dont_care_flags();
+ extra_len+=2; /* The next instruction now is part of this
+ block */
+ }
+
+ }
+#endif
+
+ if (next_pc_p) { /* A branch was registered */
+ uae_u32 t1=next_pc_p;
+ uae_u32 t2=taken_pc_p;
+ int cc=branch_cc;
+
+ uae_u32* branchadd;
+ uae_u32* tba;
+ bigstate tmp;
+ blockinfo* tbi;
+
+ if (taken_pc_p<next_pc_p) {
+ /* backward branch. Optimize for the "taken" case ---
+ which means the raw_jcc should fall through when
+ the 68k branch is taken. */
+ t1=taken_pc_p;
+ t2=next_pc_p;
+ cc=branch_cc^1;
+ }
+
+#if !USE_MATCHSTATE
+ flush_keepflags();
+#endif
+ tmp=live; /* ouch! This is big... */
+ raw_jcc_l_oponly(cc);
+ branchadd=(uae_u32*)get_target();
+ emit_long(0);
+ /* predicted outcome */
+ tbi=get_blockinfo_addr_new((void*)t1,1);
+ match_states(&(tbi->env));
+ //flush(1); /* Can only get here if was_comp==1 */
+ raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+ raw_jcc_l_oponly(9);
+ tba=(uae_u32*)get_target();
+ emit_long(get_handler(t1)-((uae_u32)tba+4));
+ raw_mov_l_mi((uae_u32)®s.pc_p,t1);
+ raw_jmp((uae_u32)popall_do_nothing);
+ create_jmpdep(bi,0,tba,t1);
+
+ align_target(16);
+ /* not-predicted outcome */
+ *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
+ live=tmp; /* Ouch again */
+ tbi=get_blockinfo_addr_new((void*)t2,1);
+ match_states(&(tbi->env));
+
+ //flush(1); /* Can only get here if was_comp==1 */
+ raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+ raw_jcc_l_oponly(9);
+ tba=(uae_u32*)get_target();
+ emit_long(get_handler(t2)-((uae_u32)tba+4));
+ raw_mov_l_mi((uae_u32)®s.pc_p,t2);
+ raw_jmp((uae_u32)popall_do_nothing);
+ create_jmpdep(bi,1,tba,t2);
+ }
+ else
+ {
+ if (was_comp) {
+ flush(1);
+ }
+
+ /* Let's find out where next_handler is... */
+ if (was_comp && isinreg(PC_P)) {
+ int r2;
+
+ r=live.state[PC_P].realreg;
+
+ if (r==0)
+ r2=1;
+ else
+ r2=0;
+
+ raw_and_l_ri(r,TAGMASK);
+ raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
+ raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+ raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,9);
+ raw_jmp_r(r2);
+ }
+ else if (was_comp && isconst(PC_P)) {
+ uae_u32 v=live.state[PC_P].val;
+ uae_u32* tba;
+ blockinfo* tbi;
+
+ tbi=get_blockinfo_addr_new((void*)v,1);
+ match_states(&(tbi->env));
+
+ raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+ raw_jcc_l_oponly(9);
+ tba=(uae_u32*)get_target();
+ emit_long(get_handler(v)-((uae_u32)tba+4));
+ raw_mov_l_mi((uae_u32)®s.pc_p,v);
+ raw_jmp((uae_u32)popall_do_nothing);
+ create_jmpdep(bi,0,tba,v);
+ }
+ else {
+ int r2;
+
+ r=REG_PC_TMP;
+ raw_mov_l_rm(r,(uae_u32)®s.pc_p);
+ if (r==0)
+ r2=1;
+ else
+ r2=0;
+
+ raw_and_l_ri(r,TAGMASK);
+ raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
+ raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+ raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,9);
+ raw_jmp_r(r2);
+ }
+ }
+ }
+
+ if (next_pc_p+extra_len>=max_pcp &&
+ next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
+ max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
+ else
+ max_pcp+=LONGEST_68K_INST;
+ bi->len=max_pcp-min_pcp;
+ bi->min_pcp=min_pcp;
+
+ remove_from_list(bi);
+ if (isinrom(min_pcp) && isinrom(max_pcp))
+ add_to_dormant(bi); /* No need to checksum it on cache flush.
+ Please don't start changing ROMs in
+ flight! */
+ else {
+ calc_checksum(bi,&(bi->c1),&(bi->c2));
+ add_to_active(bi);
+ }
+
+ log_dump();
+ align_target(32);
+ current_compile_p=get_target();
+
+ raise_in_cl_list(bi);
+ bi->nexthandler=current_compile_p;
+
+ /* We will flush soon, anyway, so let's do it now */
+ if (current_compile_p>=max_compile_start)
+ flush_icache_hard(7);
+
+ do_extra_cycles(totcycles); /* for the compilation time */
+ }
+}
+
+
currprefs.df[dnum][255] = 0;
strncpy (changed_prefs.df[dnum], fname, 255);
changed_prefs.df[dnum][255] = 0;
+ strcpy (drv->newname, fname);
gui_filename (dnum, fname);
memset (buffer, 0, sizeof (buffer));
}
}
drv->dskchange_time = dsktime;
+#ifdef DISK_DEBUG
+ write_dlog("delayed insert enable %d\n", dsktime);
+#endif
}
void DISK_reinsert (int num)
ui->self->cmds_sent++;
/* The message is sent by our interrupt handler, so make sure an interrupt
* happens. */
- uae_int_requested = 1;
+ do_uae_int_requested();
/* Send back the locks. */
if (get_long (ui->self->locklist) != 0)
write_comm_pipe_int (ui->back_pipe, (int)(get_long (ui->self->locklist)), 0);
DECLARE(frndint_rr(FW d, FR s));
DECLARE(fsin_rr(FW d, FR s));
DECLARE(fcos_rr(FW d, FR s));
+DECLARE(ftan_rr(FW d, FR s));
+DECLARE(fsincos_rr(FW d, FW c, FR s));
+DECLARE(fscale_rr(FRW d, FR s));
DECLARE(ftwotox_rr(FW d, FR s));
DECLARE(fetox_rr(FW d, FR s));
+DECLARE(fetoxM1_rr(FW d, FR s));
+DECLARE(ftentox_rr(FW d, FR s));
DECLARE(flog2_rr(FW d, FR s));
+DECLARE(flogN_rr(FW d, FR s));
+DECLARE(flogNP1_rr(FW d, FR s));
+DECLARE(flog10_rr(FW d, FR s));
+DECLARE(fasin_rr(FW d, FR s));
+DECLARE(facos_rr(FW d, FR s));
+DECLARE(fatan_rr(FW d, FR s));
+DECLARE(fsinh_rr(FW d, FR s));
+DECLARE(fcosh_rr(FW d, FR s));
+DECLARE(ftanh_rr(FW d, FR s));
+DECLARE(fatanh_rr(FW d, FR s));
DECLARE(fneg_rr(FW d, FR s));
DECLARE(fadd_rr(FRW d, FR s));
DECLARE(fsub_rr(FRW d, FR s));
--- /dev/null
+#define USE_OPTIMIZER 0
+#define USE_LOW_OPTIMIZER 0
+#define USE_ALIAS 1
+#define USE_F_ALIAS 1
+#define USE_SOFT_FLUSH 1
+#define USE_OFFSET 1
+#define COMP_DEBUG 1
+
+#if COMP_DEBUG
+#define Dif(x) if (x)
+#else
+#define Dif(x) if (0)
+#endif
+
+#define SCALE 2
+#define MAXCYCLES (1000 * CYCLE_UNIT)
+#define MAXREGOPT 65536
+
+#define BYTES_PER_INST 10240 /* paranoid ;-) */
+#define LONGEST_68K_INST 16 /* The number of bytes the longest possible
+ 68k instruction takes */
+#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums
+ for. Anything larger will be flushed
+ unconditionally even with SOFT_FLUSH */
+#define MAX_HOLD_BI 3 /* One for the current block, and up to two
+ for jump targets */
+
+#define INDIVIDUAL_INST 0
+#define FLAG_C 0x0010
+#define FLAG_V 0x0008
+#define FLAG_Z 0x0004
+#define FLAG_N 0x0002
+#define FLAG_X 0x0001
+#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V)
+#define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V)
+
+#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */
+
+/* Whether to preserve registers across calls to JIT compiled routines */
+#if defined X86_ASSEMBLY
+#define USE_PUSH_POP 0
+#else
+#define USE_PUSH_POP 1
+#endif
+
+#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
+#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
+
+/* Functions exposed to newcpu, or to what was moved from newcpu.c to
+ * compemu_support.c */
+extern void init_comp(void);
+extern void flush(int save_regs);
+extern void small_flush(int save_regs);
+extern void set_target(uae_u8* t);
+extern uae_u8* get_target(void);
+extern void freescratch(void);
+extern void build_comp(void);
+extern void set_cache_state(int enabled);
+extern int get_cache_state(void);
+extern uae_u32 get_jitted_size(void);
+#ifdef JIT
+extern void flush_icache(int n);
+#endif
+extern void alloc_cache(void);
+extern void compile_block(cpu_history* pc_hist, int blocklen, int totcyles);
+extern void lopt_emit_all(void);
+extern int check_for_cache_miss(void);
+
+
+#define scaled_cycles(x) (currprefs.m68k_speed==-1?(((x)/SCALE)?(((x)/SCALE<MAXCYCLES?((x)/SCALE):MAXCYCLES)):1):(x))
+
+
+extern uae_u32 needed_flags;
+extern cacheline cache_tags[];
+extern uae_u8* comp_pc_p;
+extern void* pushall_call_handler;
+
+#define VREGS 32
+#define VFREGS 16
+
+#define INMEM 1
+#define CLEAN 2
+#define DIRTY 3
+#define UNDEF 4
+#define ISCONST 5
+
+typedef struct {
+ uae_u32* mem;
+ uae_u32 val;
+ uae_u8 is_swapped;
+ uae_u8 status;
+ uae_u8 realreg;
+ uae_u8 realind; /* The index in the holds[] array */
+ uae_u8 needflush;
+ uae_u8 validsize;
+ uae_u8 dirtysize;
+ uae_u8 dummy;
+} reg_status;
+
+typedef struct {
+ uae_u32* mem;
+ double val;
+ uae_u8 status;
+ uae_u8 realreg;
+ uae_u8 realind;
+ uae_u8 needflush;
+} freg_status;
+
+typedef struct {
+ uae_u8 use_flags;
+ uae_u8 set_flags;
+ uae_u8 is_jump;
+ uae_u8 is_addx;
+ uae_u8 is_const_jump;
+} op_properties;
+extern op_properties prop[65536];
+
+static __inline__ int end_block(uae_u16 opcode)
+{
+ return prop[opcode].is_jump ||
+ (prop[opcode].is_const_jump && !currprefs.comp_constjump);
+}
+
+#define PC_P 16
+#define FLAGX 17
+#define FLAGTMP 18
+#define NEXT_HANDLER 19
+#define S1 20
+#define S2 21
+#define S3 22
+#define S4 23
+#define S5 24
+#define S6 25
+#define S7 26
+#define S8 27
+#define S9 28
+#define S10 29
+#define S11 30
+#define S12 31
+
+#define FP_RESULT 8
+#define FS1 9
+#define FS2 10
+#define FS3 11
+
+typedef struct {
+ uae_u32 touched;
+ uae_s8 holds[VREGS];
+ uae_u8 nholds;
+ uae_u8 canbyte;
+ uae_u8 canword;
+ uae_u8 locked;
+} n_status;
+
+typedef struct {
+ uae_s8 holds;
+ uae_u8 validsize;
+ uae_u8 dirtysize;
+} n_smallstatus;
+
+typedef struct {
+ uae_u32 touched;
+ uae_s8 holds[VFREGS];
+ uae_u8 nholds;
+ uae_u8 locked;
+} fn_status;
+
+/* For flag handling */
+#define NADA 1
+#define TRASH 2
+#define VALID 3
+
+/* needflush values */
+#define NF_SCRATCH 0
+#define NF_TOMEM 1
+#define NF_HANDLER 2
+
+typedef struct {
+ /* Integer part */
+ reg_status state[VREGS];
+ n_status nat[N_REGS];
+ uae_u32 flags_on_stack;
+ uae_u32 flags_in_flags;
+ uae_u32 flags_are_important;
+ /* FPU part */
+ freg_status fate[VFREGS];
+ fn_status fat[N_FREGS];
+
+ /* x86 FPU part */
+ uae_s8 spos[N_FREGS];
+ uae_s8 onstack[6];
+ uae_s8 tos;
+} bigstate;
+
+typedef struct {
+ /* Integer part */
+ n_smallstatus nat[N_REGS];
+} smallstate;
+
+extern bigstate live;
+extern int touchcnt;
+
+
+#define IMM uae_u32
+#define R1 uae_u32
+#define R2 uae_u32
+#define R4 uae_u32
+#define W1 uae_u32
+#define W2 uae_u32
+#define W4 uae_u32
+#define RW1 uae_u32
+#define RW2 uae_u32
+#define RW4 uae_u32
+#define MEMR uae_u32
+#define MEMW uae_u32
+#define MEMRW uae_u32
+
+#define FW uae_u32
+#define FR uae_u32
+#define FRW uae_u32
+
+#define MIDFUNC(nargs,func,args) void func args
+#define MENDFUNC(nargs,func,args)
+#define COMPCALL(func) func
+
+#define LOWFUNC(flags,mem,nargs,func,args) static __inline__ void func args
+#define LENDFUNC(flags,mem,nargs,func,args)
+
+#if USE_OPTIMIZER
+#define REGALLOC_O 2
+#define PEEPHOLE_O 3 /* Has to be >= REGALLOC */
+#define DECLARE(func) extern void func; extern void do_##func
+#else
+#define REGALLOC_O 2000000
+#define PEEPHOLE_O 2000000
+#define DECLARE(func) extern void func
+#endif
+
+
+/* What we expose to the outside */
+DECLARE(bt_l_ri(R4 r, IMM i));
+DECLARE(bt_l_rr(R4 r, R4 b));
+DECLARE(btc_l_ri(RW4 r, IMM i));
+DECLARE(btc_l_rr(RW4 r, R4 b));
+DECLARE(bts_l_ri(RW4 r, IMM i));
+DECLARE(bts_l_rr(RW4 r, R4 b));
+DECLARE(btr_l_ri(RW4 r, IMM i));
+DECLARE(btr_l_rr(RW4 r, R4 b));
+DECLARE(mov_l_rm(W4 d, IMM s));
+DECLARE(call_r(R4 r));
+DECLARE(sub_l_mi(IMM d, IMM s));
+DECLARE(mov_l_mi(IMM d, IMM s));
+DECLARE(mov_w_mi(IMM d, IMM s));
+DECLARE(mov_b_mi(IMM d, IMM s));
+DECLARE(rol_b_ri(RW1 r, IMM i));
+DECLARE(rol_w_ri(RW2 r, IMM i));
+DECLARE(rol_l_ri(RW4 r, IMM i));
+DECLARE(rol_l_rr(RW4 d, R1 r));
+DECLARE(rol_w_rr(RW2 d, R1 r));
+DECLARE(rol_b_rr(RW1 d, R1 r));
+DECLARE(shll_l_rr(RW4 d, R1 r));
+DECLARE(shll_w_rr(RW2 d, R1 r));
+DECLARE(shll_b_rr(RW1 d, R1 r));
+DECLARE(ror_b_ri(R1 r, IMM i));
+DECLARE(ror_w_ri(R2 r, IMM i));
+DECLARE(ror_l_ri(R4 r, IMM i));
+DECLARE(ror_l_rr(R4 d, R1 r));
+DECLARE(ror_w_rr(R2 d, R1 r));
+DECLARE(ror_b_rr(R1 d, R1 r));
+DECLARE(shrl_l_rr(RW4 d, R1 r));
+DECLARE(shrl_w_rr(RW2 d, R1 r));
+DECLARE(shrl_b_rr(RW1 d, R1 r));
+DECLARE(shra_l_rr(RW4 d, R1 r));
+DECLARE(shra_w_rr(RW2 d, R1 r));
+DECLARE(shra_b_rr(RW1 d, R1 r));
+DECLARE(shll_l_ri(RW4 r, IMM i));
+DECLARE(shll_w_ri(RW2 r, IMM i));
+DECLARE(shll_b_ri(RW1 r, IMM i));
+DECLARE(shrl_l_ri(RW4 r, IMM i));
+DECLARE(shrl_w_ri(RW2 r, IMM i));
+DECLARE(shrl_b_ri(RW1 r, IMM i));
+DECLARE(shra_l_ri(RW4 r, IMM i));
+DECLARE(shra_w_ri(RW2 r, IMM i));
+DECLARE(shra_b_ri(RW1 r, IMM i));
+DECLARE(setcc(W1 d, IMM cc));
+DECLARE(setcc_m(IMM d, IMM cc));
+DECLARE(cmov_l_rr(RW4 d, R4 s, IMM cc));
+DECLARE(cmov_l_rm(RW4 d, IMM s, IMM cc));
+DECLARE(bsf_l_rr(W4 d, R4 s));
+DECLARE(pop_m(IMM d));
+DECLARE(push_m(IMM d));
+DECLARE(pop_l(W4 d));
+DECLARE(push_l_i(IMM i));
+DECLARE(push_l(R4 s));
+DECLARE(clear_16(RW4 r));
+DECLARE(clear_8(RW4 r));
+DECLARE(sign_extend_16_rr(W4 d, R2 s));
+DECLARE(sign_extend_8_rr(W4 d, R1 s));
+DECLARE(zero_extend_16_rr(W4 d, R2 s));
+DECLARE(zero_extend_8_rr(W4 d, R1 s));
+DECLARE(imul_64_32(RW4 d, RW4 s));
+DECLARE(mul_64_32(RW4 d, RW4 s));
+DECLARE(imul_32_32(RW4 d, R4 s));
+DECLARE(mul_32_32(RW4 d, R4 s));
+DECLARE(mov_b_rr(W1 d, R1 s));
+DECLARE(mov_w_rr(W2 d, R2 s));
+DECLARE(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor));
+DECLARE(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor));
+DECLARE(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor));
+DECLARE(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s));
+DECLARE(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s));
+DECLARE(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s));
+DECLARE(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s));
+DECLARE(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s));
+DECLARE(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s));
+DECLARE(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor));
+DECLARE(mov_l_rR(W4 d, R4 s, IMM offset));
+DECLARE(mov_w_rR(W2 d, R4 s, IMM offset));
+DECLARE(mov_b_rR(W1 d, R4 s, IMM offset));
+DECLARE(mov_l_brR(W4 d, R4 s, IMM offset));
+DECLARE(mov_w_brR(W2 d, R4 s, IMM offset));
+DECLARE(mov_b_brR(W1 d, R4 s, IMM offset));
+DECLARE(mov_l_Ri(R4 d, IMM i, IMM offset));
+DECLARE(mov_w_Ri(R4 d, IMM i, IMM offset));
+DECLARE(mov_b_Ri(R4 d, IMM i, IMM offset));
+DECLARE(mov_l_Rr(R4 d, R4 s, IMM offset));
+DECLARE(mov_w_Rr(R4 d, R2 s, IMM offset));
+DECLARE(mov_b_Rr(R4 d, R1 s, IMM offset));
+DECLARE(lea_l_brr(W4 d, R4 s, IMM offset));
+DECLARE(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset));
+DECLARE(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor));
+DECLARE(mov_l_bRr(R4 d, R4 s, IMM offset));
+DECLARE(mov_w_bRr(R4 d, R2 s, IMM offset));
+DECLARE(mov_b_bRr(R4 d, R1 s, IMM offset));
+DECLARE(bswap_32(RW4 r));
+DECLARE(bswap_16(RW2 r));
+DECLARE(mov_l_rr(W4 d, R4 s));
+DECLARE(mov_l_mr(IMM d, R4 s));
+DECLARE(mov_w_mr(IMM d, R2 s));
+DECLARE(mov_w_rm(W2 d, IMM s));
+DECLARE(mov_b_mr(IMM d, R1 s));
+DECLARE(mov_b_rm(W1 d, IMM s));
+DECLARE(mov_l_ri(W4 d, IMM s));
+DECLARE(mov_w_ri(W2 d, IMM s));
+DECLARE(mov_b_ri(W1 d, IMM s));
+DECLARE(add_l_mi(IMM d, IMM s) );
+DECLARE(add_w_mi(IMM d, IMM s) );
+DECLARE(add_b_mi(IMM d, IMM s) );
+DECLARE(test_l_ri(R4 d, IMM i));
+DECLARE(test_l_rr(R4 d, R4 s));
+DECLARE(test_w_rr(R2 d, R2 s));
+DECLARE(test_b_rr(R1 d, R1 s));
+DECLARE(and_l_ri(RW4 d, IMM i));
+DECLARE(and_l(RW4 d, R4 s));
+DECLARE(and_w(RW2 d, R2 s));
+DECLARE(and_b(RW1 d, R1 s));
+DECLARE(or_l_ri(RW4 d, IMM i));
+DECLARE(or_l(RW4 d, R4 s));
+DECLARE(or_w(RW2 d, R2 s));
+DECLARE(or_b(RW1 d, R1 s));
+DECLARE(adc_l(RW4 d, R4 s));
+DECLARE(adc_w(RW2 d, R2 s));
+DECLARE(adc_b(RW1 d, R1 s));
+DECLARE(add_l(RW4 d, R4 s));
+DECLARE(add_w(RW2 d, R2 s));
+DECLARE(add_b(RW1 d, R1 s));
+DECLARE(sub_l_ri(RW4 d, IMM i));
+DECLARE(sub_w_ri(RW2 d, IMM i));
+DECLARE(sub_b_ri(RW1 d, IMM i));
+DECLARE(add_l_ri(RW4 d, IMM i));
+DECLARE(add_w_ri(RW2 d, IMM i));
+DECLARE(add_b_ri(RW1 d, IMM i));
+DECLARE(sbb_l(RW4 d, R4 s));
+DECLARE(sbb_w(RW2 d, R2 s));
+DECLARE(sbb_b(RW1 d, R1 s));
+DECLARE(sub_l(RW4 d, R4 s));
+DECLARE(sub_w(RW2 d, R2 s));
+DECLARE(sub_b(RW1 d, R1 s));
+DECLARE(cmp_l(R4 d, R4 s));
+DECLARE(cmp_l_ri(R4 r, IMM i));
+DECLARE(cmp_w(R2 d, R2 s));
+DECLARE(cmp_b(R1 d, R1 s));
+DECLARE(xor_l(RW4 d, R4 s));
+DECLARE(xor_w(RW2 d, R2 s));
+DECLARE(xor_b(RW1 d, R1 s));
+DECLARE(live_flags(void));
+DECLARE(dont_care_flags(void));
+DECLARE(duplicate_carry(void));
+DECLARE(restore_carry(void));
+DECLARE(start_needflags(void));
+DECLARE(end_needflags(void));
+DECLARE(make_flags_live(void));
+DECLARE(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize));
+DECLARE(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2));
+DECLARE(readmem_new(R4 address, W4 dest, IMM offset, IMM size, W4 tmp));
+DECLARE(writemem_new(R4 address, R4 source, IMM offset, IMM size, W4 tmp));
+DECLARE(forget_about(W4 r));
+DECLARE(nop(void));
+
+DECLARE(f_forget_about(FW r));
+DECLARE(fmov_pi(FW r));
+DECLARE(fmov_log10_2(FW r));
+DECLARE(fmov_log2_e(FW r));
+DECLARE(fmov_loge_2(FW r));
+DECLARE(fmov_1(FW r));
+DECLARE(fmov_0(FW r));
+DECLARE(fmov_rm(FW r, MEMR m));
+DECLARE(fmovi_rm(FW r, MEMR m));
+DECLARE(fmovi_mr(MEMW m, FR r));
+DECLARE(fmovs_rm(FW r, MEMR m));
+DECLARE(fmovs_mr(MEMW m, FR r));
+DECLARE(fmov_mr(MEMW m, FR r));
+DECLARE(fmov_ext_mr(MEMW m, FR r));
+DECLARE(fmov_ext_rm(FW r, MEMR m));
+DECLARE(fmov_rr(FW d, FR s));
+DECLARE(fldcw_m_indexed(R4 index, IMM base));
+DECLARE(ftst_r(FR r));
+DECLARE(dont_care_fflags(void));
+DECLARE(fsqrt_rr(FW d, FR s));
+DECLARE(fabs_rr(FW d, FR s));
+DECLARE(frndint_rr(FW d, FR s));
+DECLARE(fsin_rr(FW d, FR s));
+DECLARE(fcos_rr(FW d, FR s));
+DECLARE(ftwotox_rr(FW d, FR s));
+DECLARE(fetox_rr(FW d, FR s));
+DECLARE(flog2_rr(FW d, FR s));
+DECLARE(fneg_rr(FW d, FR s));
+DECLARE(fadd_rr(FRW d, FR s));
+DECLARE(fsub_rr(FRW d, FR s));
+DECLARE(fmul_rr(FRW d, FR s));
+DECLARE(frem_rr(FRW d, FR s));
+DECLARE(frem1_rr(FRW d, FR s));
+DECLARE(fdiv_rr(FRW d, FR s));
+DECLARE(fcmp_rr(FR d, FR s));
+DECLARE(fflags_into_flags(W2 tmp));
+
+extern int failure;
+#define FAIL(x) do { failure|=x; } while (0)
+
+/* Convenience functions exposed to gencomp */
+extern uae_u32 m68k_pc_offset;
+extern void readbyte(int address, int dest, int tmp);
+extern void readword(int address, int dest, int tmp);
+extern void readlong(int address, int dest, int tmp);
+extern void writebyte(int address, int source, int tmp);
+extern void writeword(int address, int source, int tmp);
+extern void writelong(int address, int source, int tmp);
+extern void writeword_clobber(int address, int source, int tmp);
+extern void writelong_clobber(int address, int source, int tmp);
+extern void get_n_addr(int address, int dest, int tmp);
+extern void get_n_addr_jmp(int address, int dest, int tmp);
+extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
+extern int kill_rodent(int r);
+extern void sync_m68k_pc(void);
+extern uae_u32 get_const(int r);
+extern int is_const(int r);
+extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond);
+extern void empty_optimizer(void);
+
+#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1))
+#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o)))
+#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o)))
+
+/* Preferences handling */
+void check_prefs_changed_comp (void);
+
+struct blockinfo_t;
+
+typedef struct dep_t {
+ uae_u32* jmp_off;
+ struct blockinfo_t* target;
+ struct dep_t** prev_p;
+ struct dep_t* next;
+} dependency;
+
+typedef struct blockinfo_t {
+ uae_s32 count;
+ cpuop_func* direct_handler_to_use;
+ cpuop_func* handler_to_use;
+ /* The direct handler does not check for the correct address */
+
+ cpuop_func* handler;
+ cpuop_func* direct_handler;
+
+ cpuop_func* direct_pen;
+ cpuop_func* direct_pcc;
+
+ uae_u8* nexthandler;
+ uae_u8* pc_p;
+
+ uae_u32 c1;
+ uae_u32 c2;
+ uae_u32 len;
+
+ struct blockinfo_t* next_same_cl;
+ struct blockinfo_t** prev_same_cl_p;
+ struct blockinfo_t* next;
+ struct blockinfo_t** prev_p;
+
+ uae_u32 min_pcp;
+ uae_u8 optlevel;
+ uae_u8 needed_flags;
+ uae_u8 status;
+ uae_u8 havestate;
+
+ dependency dep[2]; /* Holds things we depend on */
+ dependency* deplist; /* List of things that depend on this */
+ smallstate env;
+} blockinfo;
+
+#define BI_NEW 0
+#define BI_COUNTING 1
+#define BI_TARGETTED 2
+
+typedef struct {
+ uae_u8 type;
+ uae_u8 reg;
+ uae_u32 next;
+} regacc;
+
+void execute_normal(void);
+void exec_nostats(void);
+void do_nothing(void);
+
* It's emptied via exter_int_helper by the EXTER interrupt. */
extern smp_comm_pipe native2amiga_pending;
#endif
+
+STATIC_INLINE do_uae_int_requested(void)
+{
+ uae_int_requested = 1;
+ set_uae_int_flag ();
+ INTREQ (0x8000 | 0x0008);
+}
uae_sem_wait (&n2asem);
write_comm_pipe_int (&native2amiga_pending, 3, 0);
write_comm_pipe_u32 (&native2amiga_pending, interrupt, 1);
- uae_int_requested = 1;
+ do_uae_int_requested();
uae_sem_post (&n2asem);
}
uae_sem_wait (&n2asem);
write_comm_pipe_int (&native2amiga_pending, 2, 0);
write_comm_pipe_u32 (&native2amiga_pending, msg, 1);
- uae_int_requested = 1;
+ do_uae_int_requested();
uae_sem_post (&n2asem);
}
write_comm_pipe_int (&native2amiga_pending, 1, 0);
write_comm_pipe_u32 (&native2amiga_pending, port, 0);
write_comm_pipe_u32 (&native2amiga_pending, msg, 1);
- uae_int_requested = 1;
+ do_uae_int_requested();
uae_sem_post (&n2asem);
}
write_comm_pipe_int (&native2amiga_pending, 0, 0);
write_comm_pipe_u32 (&native2amiga_pending, task, 0);
write_comm_pipe_int (&native2amiga_pending, mask, 1);
- uae_int_requested = 1;
+ do_uae_int_requested();
uae_sem_post (&n2asem);
}
write_comm_pipe_int (&native2amiga_pending, 4, 0);
write_comm_pipe_int (&native2amiga_pending, port, 0);
write_comm_pipe_int (&native2amiga_pending, nr, 1);
- uae_int_requested = 1;
+ do_uae_int_requested();
uae_sem_post (&n2asem);
}
if (initialized) {
pShutdownWinIo();
FreeLibrary (ioh);
+ io_log ("io freed\n");
}
#endif
- io_log ("io freed\n");
initialized = 0;
}
if ((shmids[shmid].key == shmid) && shmids[shmid].size) {
got = FALSE;
if (got == FALSE) {
- if (shmaddr) {
- result = (void*)VirtualFree(shmaddr, 0, os_winnt ? MEM_RESET : MEM_RELEASE);
- }
+ if (shmaddr)
+ VirtualFree(shmaddr, 0, os_winnt ? MEM_RESET : MEM_RELEASE);
result = VirtualAlloc(shmaddr, size, os_winnt ? MEM_COMMIT : (MEM_RESERVE | MEM_COMMIT),
PAGE_EXECUTE_READWRITE);
if (result == NULL) {
extern int mouseactive, focus;
extern int ignore_messages_all;
#define WINUAEBETA 1
-#define WINUAEBETASTR " Beta 7"
+#define WINUAEBETASTR " Beta 9"
extern char start_path_exe[MAX_DPATH];
extern char start_path_data[MAX_DPATH];
/* Is this drive-letter valid (it used to check for media in drive) */
if( ( dwDriveMask & 1 ) /* && CheckRM( volumepath ) */ )
{
- BOOL inserted = CheckRM( volumepath ); /* Is there a disk inserted? */
- drivetype = GetDriveType( volumepath );
+ BOOL inserted = CheckRM(volumepath); /* Is there a disk inserted? */
+ drivetype = GetDriveType(volumepath);
if (drivetype != DRIVE_CDROM) {
get_volume_name( currprefs.mountinfo, volumepath, volumename, MAX_DPATH, inserted, drivetype, 1 );
- if( drivetype == DRIVE_REMOTE )
+ if (drivetype == DRIVE_REMOTE)
strcat( volumepath, "." );
else
strcat( volumepath, ".." );
CheckDlgButton (hDlg, IDC_CLOCKSYNC, workprefs.tod_hack);
cw = catweasel_detect();
EnableWindow (GetDlgItem (hDlg, IDC_CATWEASEL), cw);
- if (!cw)
+ if (!cw && workprefs.catweasel < 100)
workprefs.catweasel = 0;
CheckDlgButton (hDlg, IDC_CATWEASEL, workprefs.catweasel);
CheckDlgButton (hDlg, IDC_STATE_CAPTURE, workprefs.statecapture);
BufferSecurityCheck="false"
EnableFunctionLevelLinking="false"
EnableEnhancedInstructionSet="0"
+ FloatingPointModel="0"
TreatWChar_tAsBuiltInType="false"
RuntimeTypeInfo="false"
UsePrecompiledHeader="0"