]> git.unchartedbackwaters.co.uk Git - francis/winuae.git/commitdiff
imported winuaesrc1000b9.zip
authorToni Wilen <twilen@winuae.net>
Thu, 26 May 2005 17:09:11 +0000 (20:09 +0300)
committerToni Wilen <twilen@winuae.net>
Mon, 22 Feb 2010 19:25:45 +0000 (21:25 +0200)
20 files changed:
catweasel.c
cfgfile.c
compemu_fpp.c
compemu_fpp_old.c [new file with mode: 0755]
compemu_raw_x86.c
compemu_raw_x86_old.c [new file with mode: 0755]
compemu_support.c
compemu_support_old.c [new file with mode: 0755]
disk.c
filesys.c
include/compemu.h
include/compemu_old.h [new file with mode: 0755]
include/native2amiga.h
native2amiga.c
od-win32/ioport.c
od-win32/mman.c
od-win32/win32.h
od-win32/win32_filesys.c
od-win32/win32gui.c
od-win32/winuae_msvc/winuae_msvc.vcproj

index 6fc1813369c90094788e2e0f2adfbe45775e1941..7d9d919dfdc440b855d7d0d5851c0552944c1b3b 100755 (executable)
@@ -84,7 +84,10 @@ uae_u32 catweasel_do_bget (uaecptr addr)
     if (addr >= 0x100)
        return 0;
     buf1[0] = (uae_u8)addr;
-    DeviceIoControl (handle, CW_PEEKREG_FULL, buf1, 1, buf2, 1, &did_read, 0);
+    if (handle != INVALID_HANDLE_VALUE)
+       DeviceIoControl (handle, CW_PEEKREG_FULL, buf1, 1, buf2, 1, &did_read, 0);
+    else
+       buf2[0] = ioport_read (cwc.iobase + addr);
     //write_log ("G %02.2X %02.2X %d\n", buf1[0], buf2[0], did_read);
     return buf2[0];
 }
@@ -98,7 +101,10 @@ void catweasel_do_bput (uaecptr addr, uae_u32 b)
        return;
     buf[0] = (uae_u8)addr;
     buf[1] = b;
-    DeviceIoControl (handle, CW_POKEREG_FULL, buf, 2, 0, 0, &did_read, 0);
+    if (handle != INVALID_HANDLE_VALUE)
+        DeviceIoControl (handle, CW_POKEREG_FULL, buf, 2, 0, 0, &did_read, 0);
+    else
+       ioport_write (cwc.iobase + addr, b);
     //write_log ("P %02.2X %02.2X %d\n", (uae_u8)addr, (uae_u8)b, did_read);
 }
 
@@ -114,42 +120,52 @@ int catweasel_init (void)
 
     if (!currprefs.catweasel)
        return 0;
-    for (i = 0; i < 4; i++) {
-       if (currprefs.catweasel > 0)
-           i = currprefs.catweasel;
-        sprintf (name, "\\\\.\\CAT%d_F0", i);
-       handle = CreateFile (name, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0,
-           OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
-       if (handle != INVALID_HANDLE_VALUE || currprefs.catweasel > 0)
-           break;
-    }
-    if (handle == INVALID_HANDLE_VALUE) {
-       write_log ("No Catweasel detected\n");
-       goto fail;
-    }
-    if (!DeviceIoControl (handle, CW_GET_VERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
-       write_log ("CW_GET_VERSION failed %d\n", GetLastError());
-       goto fail;
-    }
-    write_log ("CW driver version string '%s'\n", buffer);
-    if (!DeviceIoControl (handle, CW_GET_HWVERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
-       write_log ("CW_GET_HWVERSION failed %d\n", GetLastError());
-       goto fail;
-    }
-    write_log ("CW: v=%d 14=%d 28=%d 56=%d joy=%d dpm=%d sid=%d kb=%d sidfifo=%d\n",
+
+    if (currprefs.catweasel >= 100) {
+       cwc.type = currprefs.catweasel >= 0x400 ? 3 : 1;
+       cwc.iobase = currprefs.catweasel;
+       if (!ioport_init())
+           goto fail;
+       strcpy(name, "[DIRECT]");
+    } else {
+       for (i = 0; i < 4; i++) {
+           if (currprefs.catweasel > 0)
+               i = currprefs.catweasel;
+           sprintf (name, "\\\\.\\CAT%d_F0", i);
+           handle = CreateFile (name, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0,
+               OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
+           if (handle != INVALID_HANDLE_VALUE || currprefs.catweasel > 0)
+               break;
+       }
+       if (handle == INVALID_HANDLE_VALUE) {
+           write_log ("No Catweasel detected\n");
+           goto fail;
+       }
+       if (!DeviceIoControl (handle, CW_GET_VERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
+           write_log ("CW_GET_VERSION failed %d\n", GetLastError());
+           goto fail;
+       }
+       write_log ("CW driver version string '%s'\n", buffer);
+       if (!DeviceIoControl (handle, CW_GET_HWVERSION, 0, 0, buffer, sizeof (buffer), &len, 0)) {
+           write_log ("CW_GET_HWVERSION failed %d\n", GetLastError());
+           goto fail;
+       }
+       write_log ("CW: v=%d 14=%d 28=%d 56=%d joy=%d dpm=%d sid=%d kb=%d sidfifo=%d\n",
        buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5],
        buffer[6], buffer[7], ((uae_u32*)(buffer + 8))[0]);
-    if (!DeviceIoControl (handle, CW_LOCK_EXCLUSIVE, 0, 0, buffer, sizeof (buffer), &len, 0)) {
-       write_log ("CW_LOCK_EXCLUSIVE failed %d\n", GetLastError());
-       goto fail;
+       if (!DeviceIoControl (handle, CW_LOCK_EXCLUSIVE, 0, 0, buffer, sizeof (buffer), &len, 0)) {
+           write_log ("CW_LOCK_EXCLUSIVE failed %d\n", GetLastError());
+           goto fail;
+       }
+       model = *((uae_u32*)(buffer + 4));
+       base = *((uae_u32*)(buffer + 0));
+       cwc.type = model == 0 ? 1 : model == 2 ? 4 : 3;
+       cwc.iobase = base;
     }
-    model = *((uae_u32*)(buffer + 4));
-    base = *((uae_u32*)(buffer + 0));
-    cwc.type = model == 0 ? 1 : model == 2 ? 4 : 3;
-    cwc.iobase = base;
     write_log ("Catweasel MK%d @%p (%s) detected and enabled\n",
        cwc.type, cwc.iobase, name);
-    catweasel_do_bput (3, 0x41); /* enable MK3-mode */
+    if (cwc.type == CATWEASEL_TYPE_MK4)
+        catweasel_do_bput (3, 0x41); /* enable MK3-mode */
     catweasel_init_controller (&cwc);
     return 1;
 fail:
@@ -163,6 +179,7 @@ void catweasel_free (void)
     if (handle != INVALID_HANDLE_VALUE)
        CloseHandle (handle);
     handle = INVALID_HANDLE_VALUE;
+    ioport_free();
     cwc.type = 0;
 }
 
index 2a24fae4d364ce101164c283db1e22dcf795502f..86d48fc7e0546ca118f1cad8614a7430e0e13bee 100755 (executable)
--- a/cfgfile.c
+++ b/cfgfile.c
@@ -112,7 +112,7 @@ static struct cfg_lines opttable[] =
     {"floppy3", "Diskfile for drive 3" },
     {"hardfile", "access,sectors, surfaces, reserved, blocksize, path format" },
     {"filesystem", "access,'Amiga volume-name':'host directory path' - where 'access' can be 'read-only' or 'read-write'" },
-    {"catweasel_io","Catweasel board io base address" }
+    {"catweasel", "Catweasel board io base address" }
 };
 
 static const char *guimode1[] = { "no", "yes", "nowait", 0 };
@@ -412,7 +412,10 @@ static void save_options (struct zfile *f, struct uae_prefs *p, int type)
     cfgfile_write (f, "blitter_cycle_exact=%s\n", p->blitter_cycle_exact ? "true" : "false");
 
     cfgfile_write (f, "log_illegal_mem=%s\n", p->illegal_mem ? "true" : "false");
-    cfgfile_write (f, "catweasel=%d\n", p->catweasel);
+    if (p->catweasel >= 100)
+       cfgfile_write (f, "catweasel=0x%x\n", p->catweasel);
+    else
+       cfgfile_write (f, "catweasel=%d\n", p->catweasel);
 
     cfgfile_write (f, "kbd_lang=%s\n", (p->keyboard_lang == KBD_LANG_DE ? "de"
                                  : p->keyboard_lang == KBD_LANG_DK ? "dk"
index db9564f4b967df53c9e072e66bcf5bc306cde71e..9ff0945fcb3bf15fcf5476f802a7a48290f610da 100755 (executable)
@@ -486,7 +486,6 @@ STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad)
        }
     }
     abort();
-    return -1;
 }
 
 void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
@@ -866,9 +865,9 @@ void comp_frestore_opp (uae_u32 opcode)
        m68k_areg (regs, opcode & 7) = ad;
 }
 
-static fptype const_e=2.718281828;  /* Got some more digits? */
-static fptype const_log10_e=0.4342944819;
-static fptype const_loge_10=2.302585093;
+static fptype       const_e=2.718281828459045235360;
+static fptype const_log10_e=0.434294481903251827651;
+static fptype const_loge_10=2.302585092994045684018;
 static fptype power10[]={1e0,1e1,1e2,1e4,1e8,1e16,1e32,1e64,1e128,1e256
 #if USE_LONG_DOUBLE
 ,       1e512, 1e1024, 1e2048, 1e4096
@@ -1192,16 +1191,24 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
            MAKE_FPSR (src);
            break;
         case 0x01:             /* FINT */
-           FAIL(1);    
-           return;
            dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           frndint_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
         case 0x02:             /* FSINH */
-           FAIL(1);  
-           return;
-
            dont_care_fflags();
-           regs.fp[reg] = sinh (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fsinh_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x03:             /* FINTRZ */
 #if USE_X86_FPUCW 
@@ -1243,50 +1250,64 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
            MAKE_FPSR (reg);
            break;
         case 0x06:             /* FLOGNP1 */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = log (src + 1.0);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           flogNP1_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x08:             /* FETOXM1 */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = exp (src) - 1.0;
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fetoxM1_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x09:             /* FTANH */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = tanh (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           ftanh_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x0a:             /* FATAN */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = atan (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fatan_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x0c:             /* FASIN */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = asin (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fasin_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x0d:             /* FATANH */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-#if 1                          /* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */
-           regs.fp[reg] = log ((1 + src) / (1 - src)) / 2;
-#else
-           regs.fp[reg] = atanh (src);
-#endif
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fatanh_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x0e:             /* FSIN */
            dont_care_fflags();
@@ -1299,11 +1320,14 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
            MAKE_FPSR (reg);
            break;
         case 0x0f:             /* FTAN */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = tan (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           ftan_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x10:             /* FETOX */
            dont_care_fflags();
@@ -1326,26 +1350,37 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
            MAKE_FPSR (reg);
            break;
         case 0x12:             /* FTENTOX */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = pow (10.0, src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           ftentox_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x14:             /* FLOGN */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = log (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           flogN_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
+
         case 0x15:             /* FLOG10 */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = log10 (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           flog10_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
+
         case 0x16:             /* FLOG2 */
            dont_care_fflags();
            src=get_fp_value (opcode, extra);
@@ -1369,11 +1404,14 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
            MAKE_FPSR (reg);
            break;
         case 0x19:             /* FCOSH */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = cosh (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fcosh_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x1a:             /* FNEG */
         case 0x5a:
@@ -1388,11 +1426,14 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
            MAKE_FPSR (reg);
            break;
         case 0x1c:             /* FACOS */
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = acos (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           facos_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x1d:             /* FCOS */
            dont_care_fflags();
@@ -1493,10 +1534,13 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
            break;
         case 0x26:             /* FSCALE */
            dont_care_fflags();
-           FAIL(1);  
-           return;
-           regs.fp[reg] *= exp (log (2.0) * src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fscale_rr(reg,src);
+           MAKE_FPSR (reg);
            break;
         case 0x27:             /* FSGLMUL */
            dont_care_fflags();
@@ -1528,12 +1572,15 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
         case 0x35:
         case 0x36:
         case 0x37:
-           FAIL(1);  
-           return;
            dont_care_fflags();
-           regs.fp[reg] = sin (src);
-           regs.fp[extra & 7] = cos (src);
-           MAKE_FPSR (regs.fp[reg]);
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fsincos_rr(reg,extra & 7,src);
+           MAKE_FPSR (extra & 7);
+           MAKE_FPSR (reg);
            break;
         case 0x38:             /* FCMP */
            src=get_fp_value (opcode, extra);
diff --git a/compemu_fpp_old.c b/compemu_fpp_old.c
new file mode 100755 (executable)
index 0000000..db9564f
--- /dev/null
@@ -0,0 +1,1564 @@
+/*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * MC68881 emulation
+  *
+  * Copyright 1996 Herman ten Brugge
+  * Adapted for JIT compilation (c) Bernd Meyer, 2000
+  */
+
+#include <math.h>
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include "config.h"
+#include "options.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "ersatz.h"
+#include "md-fpp.h"
+#include "compemu.h"
+
+#define MAKE_FPSR(r) do { fmov_rr(FP_RESULT,r); } while (0)
+
+#define delay   //nop() ;nop()  
+#define delay2  //nop() ;nop()   
+
+uae_s32 temp_fp[3];  /* To convert between FP/integer */
+
+/* return register number, or -1 for failure */
+STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra)
+{
+    uaecptr tmppc;
+    uae_u16 tmp;
+    int size;
+    int mode;
+    int reg;
+    double* src;
+    uae_u32 ad = 0;
+    static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
+    static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
+
+    if ((extra & 0x4000) == 0) {
+       return (extra >> 10) & 7;
+    }
+
+    mode = (opcode >> 3) & 7;
+    reg = opcode & 7;
+    size = (extra >> 10) & 7;
+    switch (mode) {
+     case 0:
+       switch (size) {
+        case 6:
+           sign_extend_8_rr(S1,reg);
+           mov_l_mr((uae_u32)temp_fp,S1);
+           delay2;
+           fmovi_rm(FS1,(uae_u32)temp_fp);
+           return FS1;
+        case 4:
+           sign_extend_16_rr(S1,reg);
+           mov_l_mr((uae_u32)temp_fp,S1);
+           delay2;
+           fmovi_rm(FS1,(uae_u32)temp_fp);
+           return FS1;
+        case 0:
+           mov_l_mr((uae_u32)temp_fp,reg);
+           delay2;
+           fmovi_rm(FS1,(uae_u32)temp_fp);
+           return FS1;
+        case 1:
+           mov_l_mr((uae_u32)temp_fp,reg);
+           delay2;
+           fmovs_rm(FS1,(uae_u32)temp_fp);
+           return FS1;
+        default:
+           return -1;
+       }
+       return -1; /* Should be unreachable */
+     case 1:
+       return -1; /* Genuine invalid instruction */
+     default:
+       break;
+    }
+    /* OK, we *will* have to load something from an address. Let's make
+       sure we know how to handle that, or quit early --- i.e. *before*
+       we do any postincrement/predecrement that we may regret */
+
+    switch (size) {
+     case 3:
+       return -1;
+     case 0:
+     case 1:
+     case 2:
+     case 4:
+     case 5:
+     case 6:
+       break; 
+     default:
+       return -1;
+    }
+
+    switch (mode) {
+     case 2:
+       ad=S1;  /* We will change it, anyway ;-) */
+       mov_l_rr(ad,reg+8);
+       break;
+     case 3:
+       ad=S1;
+       mov_l_rr(ad,reg+8);
+       lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
+       break;
+     case 4:
+       ad=S1;
+       
+       lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
+       mov_l_rr(ad,reg+8);
+       break;
+     case 5:
+     {
+        uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+        ad=S1;
+        mov_l_rr(ad,reg+8);
+        lea_l_brr(ad,ad,off);
+        break;
+     }
+     case 6:
+     {
+       uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
+       ad=S1;
+       calc_disp_ea_020(reg+8,dp,ad,S2);
+       break;
+     }
+     case 7:
+       switch (reg) {
+        case 0:
+        {
+            uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+            ad=S1;
+            mov_l_ri(ad,off);
+            break;
+        }
+        case 1:
+        {
+            uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
+            ad=S1;
+            mov_l_ri(ad,off);
+            break;
+        }
+        case 2:
+        {
+            uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+                m68k_pc_offset;
+            uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+            ad=S1;
+            mov_l_ri(ad,address+PC16off);
+            break;
+        }
+        case 3:
+           return -1;
+           tmppc = m68k_getpc ();
+           tmp = next_iword ();
+           ad = get_disp_ea_020 (tmppc, tmp);
+           break;
+        case 4: 
+        {
+            uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+                m68k_pc_offset;
+            ad=S1;
+            if (size == 6)
+                address++;
+            mov_l_ri(ad,address);
+            m68k_pc_offset+=sz2[size];
+            break;
+        }
+        default:
+           return -1;
+       }
+    }
+
+    switch (size) {
+     case 0:
+       readlong(ad,S2,S3);
+       mov_l_mr((uae_u32)temp_fp,S2);
+       delay2;
+       fmovi_rm(FS1,(uae_u32)temp_fp);
+       break;
+     case 1:
+       readlong(ad,S2,S3);
+       mov_l_mr((uae_u32)temp_fp,S2);
+       delay2;
+       fmovs_rm(FS1,(uae_u32)temp_fp);
+       break;
+     case 2:
+       readword(ad,S2,S3);
+       mov_w_mr(((uae_u32)temp_fp)+8,S2);
+       add_l_ri(ad,4);
+       readlong(ad,S2,S3);
+       mov_l_mr((uae_u32)(temp_fp)+4,S2);
+       add_l_ri(ad,4);
+       readlong(ad,S2,S3);
+       mov_l_mr((uae_u32)(temp_fp),S2);
+       delay2;
+       fmov_ext_rm(FS1,(uae_u32)(temp_fp));
+       break;
+     case 3:
+       return -1; /* Some silly "packed" stuff */
+     case 4:
+       readword(ad,S2,S3);
+       sign_extend_16_rr(S2,S2);
+       mov_l_mr((uae_u32)temp_fp,S2);
+       delay2;
+       fmovi_rm(FS1,(uae_u32)temp_fp);
+       break;
+     case 5:
+       readlong(ad,S2,S3);
+       mov_l_mr(((uae_u32)temp_fp)+4,S2);
+       add_l_ri(ad,4);
+       readlong(ad,S2,S3);
+       mov_l_mr((uae_u32)(temp_fp),S2);
+       delay2;
+       fmov_rm(FS1,(uae_u32)(temp_fp));
+       break;
+     case 6:
+       readbyte(ad,S2,S3);
+       sign_extend_8_rr(S2,S2);
+       mov_l_mr((uae_u32)temp_fp,S2);
+       delay2;
+       fmovi_rm(FS1,(uae_u32)temp_fp);
+       break;
+     default:
+       return -1;
+    }
+    return FS1;
+}
+
+/* return of -1 means failure, >=0 means OK */
+STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra)
+{
+    uae_u16 tmp;
+    uaecptr tmppc;
+    int size;
+    int mode;
+    int reg;
+    uae_u32 ad;
+    static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
+    static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
+
+    if ((extra & 0x4000) == 0) {
+       fmov_rr((extra>>10)&7,val);
+       return 0;
+    }
+
+    mode = (opcode >> 3) & 7;
+    reg = opcode & 7;
+    size = (extra >> 10) & 7;
+    ad = -1;
+    switch (mode) {
+     case 0:
+       switch (size) {
+        case 6:
+           fmovi_mr((uae_u32)temp_fp,val);
+           delay;
+           mov_b_rm(reg,(uae_u32)temp_fp);
+           return 0;
+        case 4:
+           fmovi_mr((uae_u32)temp_fp,val);
+           delay;
+           mov_w_rm(reg,(uae_u32)temp_fp);
+           return 0;
+        case 0:
+           fmovi_mr((uae_u32)temp_fp,val);
+           delay;
+           mov_l_rm(reg,(uae_u32)temp_fp);
+           return 0;
+        case 1:
+           fmovs_mr((uae_u32)temp_fp,val);
+           delay;
+           mov_l_rm(reg,(uae_u32)temp_fp);
+           return 0;
+        default:
+           return -1;
+       }
+     case 1:
+       return -1; /* genuine invalid instruction */
+     default: break;
+    }
+
+    /* Let's make sure we get out *before* doing something silly if
+       we can't handle the size */
+    switch (size) {
+     case 0:
+     case 4:
+     case 5:
+     case 6:
+     case 2:
+     case 1:
+       break;
+     case 3:
+     default:
+       return -1;
+    }
+    
+    switch (mode) {
+     case 2:
+       ad=S1;
+       mov_l_rr(ad,reg+8);
+       break;
+     case 3:
+       ad=S1;
+       mov_l_rr(ad,reg+8);
+       lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
+       break;
+     case 4:
+       ad=S1;
+       lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
+       mov_l_rr(ad,reg+8);
+       break;
+     case 5:
+     {
+        uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+        ad=S1;
+        mov_l_rr(ad,reg+8);
+        add_l_ri(ad,off);
+        break;
+     }
+     case 6:
+     {
+       uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
+       ad=S1;
+       calc_disp_ea_020(reg+8,dp,ad,S2);
+       break;
+     }
+     case 7:
+       switch (reg) {
+        case 0:
+        {
+            uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+            ad=S1;
+            mov_l_ri(ad,off);
+            break;
+        }
+        case 1:
+        {
+            uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
+            ad=S1;
+            mov_l_ri(ad,off);
+            break;
+        }
+        case 2:
+        {
+            uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+                m68k_pc_offset;
+            uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+            ad=S1;
+            mov_l_ri(ad,address+PC16off);
+            break;
+        }
+        case 3:
+           return -1;
+           tmppc = m68k_getpc ();
+           tmp = next_iword ();
+           ad = get_disp_ea_020 (tmppc, tmp);
+           break;
+        case 4:
+        {
+            uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+                m68k_pc_offset;
+            ad=S1;
+            mov_l_ri(ad,address);
+            m68k_pc_offset+=sz2[size];
+            break;
+        }
+        default:
+           return -1;
+       }
+    }
+    switch (size) {
+     case 0:
+       fmovi_mr((uae_u32)temp_fp,val);
+       delay;
+       mov_l_rm(S2,(uae_u32)temp_fp);
+       writelong_clobber(ad,S2,S3);
+       break;
+     case 1:
+       fmovs_mr((uae_u32)temp_fp,val);
+       delay;
+       mov_l_rm(S2,(uae_u32)temp_fp);
+       writelong_clobber(ad,S2,S3);
+       break;
+     case 2:
+       fmov_ext_mr((uae_u32)temp_fp,val);
+       delay;
+       mov_w_rm(S2,(uae_u32)temp_fp+8);
+       writeword_clobber(ad,S2,S3);
+       add_l_ri(ad,4);
+       mov_l_rm(S2,(uae_u32)temp_fp+4);
+       writelong_clobber(ad,S2,S3);
+       add_l_ri(ad,4);
+       mov_l_rm(S2,(uae_u32)temp_fp);
+       writelong_clobber(ad,S2,S3);
+       break;
+     case 3: return -1; /* Packed */
+
+     case 4:
+       fmovi_mr((uae_u32)temp_fp,val);
+       delay;
+       mov_l_rm(S2,(uae_u32)temp_fp);
+       writeword_clobber(ad,S2,S3);
+       break;
+     case 5:
+       fmov_mr((uae_u32)temp_fp,val);
+       delay;
+       mov_l_rm(S2,(uae_u32)temp_fp+4);
+       writelong_clobber(ad,S2,S3);
+       add_l_ri(ad,4);
+       mov_l_rm(S2,(uae_u32)temp_fp);
+       writelong_clobber(ad,S2,S3);
+       break;
+     case 6:
+       fmovi_mr((uae_u32)temp_fp,val);
+       delay;
+       mov_l_rm(S2,(uae_u32)temp_fp);
+       writebyte(ad,S2,S3);
+       break;
+     default:
+       return -1;
+    }
+    return 0;
+}
+
+/* return -1 for failure, or register number for success */
+STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad)
+{
+    uae_u16 tmp;
+    uaecptr tmppc;
+    int mode;
+    int reg;
+    uae_s32 off;
+
+    mode = (opcode >> 3) & 7;
+    reg = opcode & 7;
+    switch (mode) {
+     case 0:
+     case 1:
+       return -1;
+     case 2:
+     case 3:
+     case 4:
+       mov_l_rr(S1,8+reg);
+       return S1;
+       *ad = m68k_areg (regs, reg);
+       break;
+     case 5:
+       off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+
+       mov_l_rr(S1,8+reg);
+       add_l_ri(S1,off);
+       return S1;
+     case 6:
+       return -1;
+       break;
+     case 7:
+       switch (reg) {
+        case 0:
+           off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+           mov_l_ri(S1,off);
+           return S1;
+        case 1:
+           off=comp_get_ilong((m68k_pc_offset+=4)-4);
+           mov_l_ri(S1,off);
+           return S1;
+        case 2:
+           return -1;
+           *ad = m68k_getpc ();
+           *ad += (uae_s32) (uae_s16) next_iword ();
+           break;
+        case 3:
+           return -1;
+           tmppc = m68k_getpc ();
+           tmp = next_iword ();
+           *ad = get_disp_ea_020 (tmppc, tmp);
+           break;
+        default:
+           return -1;
+       }
+    }
+    abort();
+    return -1;
+}
+
+void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
+{
+    FAIL(1);
+    return;
+
+    if (!currprefs.compfpu) {
+       FAIL(1);
+       return;
+    }
+}
+
+void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
+{
+    uae_u32 ad;
+    int cc;
+    int reg;
+
+    if (!currprefs.compfpu) {
+       FAIL(1);
+       return;
+    }
+
+#if DEBUG_FPP
+    printf ("fscc_opp at %08lx\n", m68k_getpc ());
+    fflush (stdout);
+#endif
+
+
+    if (extra&0x20) {  /* only cc from 00 to 1f are defined */
+       FAIL(1);
+       return;
+    }
+    if ((opcode & 0x38) != 0) { /* We can only do to integer register */
+       FAIL(1); 
+       return;
+    }
+
+    fflags_into_flags(S2);
+    reg=(opcode&7);
+    
+    mov_l_ri(S1,255);
+    mov_l_ri(S4,0);
+    switch(extra&0x0f) {  /* according to fpp.c, the 0x10 bit is ignored
+                           */
+     case 0: break;  /* set never */
+     case 1: mov_l_rr(S2,S4); 
+       cmov_l_rr(S4,S1,4); 
+       cmov_l_rr(S4,S2,10); break;
+     case 2: cmov_l_rr(S4,S1,7); break;
+     case 3: cmov_l_rr(S4,S1,3); break;
+     case 4: mov_l_rr(S2,S4); 
+       cmov_l_rr(S4,S1,2); 
+       cmov_l_rr(S4,S2,10); break;
+     case 5: mov_l_rr(S2,S4); 
+       cmov_l_rr(S4,S1,6); 
+       cmov_l_rr(S4,S2,10); break;
+     case 6: cmov_l_rr(S4,S1,5); break;
+     case 7: cmov_l_rr(S4,S1,11); break;
+     case 8: cmov_l_rr(S4,S1,10); break;
+     case 9: cmov_l_rr(S4,S1,4); break;
+     case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break;
+     case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break;
+     case 12: cmov_l_rr(S4,S1,2); break;
+     case 13: cmov_l_rr(S4,S1,6); break;
+     case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break;
+     case 15: mov_l_rr(S4,S1); break;
+    }
+
+    if ((opcode & 0x38) == 0) {
+       mov_b_rr(reg,S4);
+    } else {
+       abort();
+       if (get_fp_ad (opcode, &ad) == 0) {
+           m68k_setpc (m68k_getpc () - 4);
+           op_illg (opcode);
+       } else
+           put_byte (ad, cc ? 0xff : 0x00);
+    }
+}
+
+void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
+{
+    int cc;
+
+    FAIL(1);
+    return;
+}
+
+extern unsigned long foink3, oink;
+
+void comp_fbcc_opp (uae_u32 opcode)
+{
+    uae_u32 start_68k_offset=m68k_pc_offset;
+    uae_u32 off;
+    uae_u32 v1;
+    uae_u32 v2;
+    uae_u32 nh;
+    int cc;
+
+    if (!currprefs.compfpu) {
+       FAIL(1);
+       return;
+    }
+
+    if (opcode&0x20) {  /* only cc from 00 to 1f are defined */
+       FAIL(1);
+       return;
+    }
+    if ((opcode&0x40)==0) {
+       off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+    }
+    else {
+       off=comp_get_ilong((m68k_pc_offset+=4)-4);
+    }
+    mov_l_ri(S1,(uae_u32)
+            (comp_pc_p+off-(m68k_pc_offset-start_68k_offset)));
+    mov_l_ri(PC_P,(uae_u32)comp_pc_p);
+
+    /* Now they are both constant. Might as well fold in m68k_pc_offset */
+    add_l_ri(S1,m68k_pc_offset);
+    add_l_ri(PC_P,m68k_pc_offset);
+    m68k_pc_offset=0;
+
+    /* according to fpp.c, the 0x10 bit is ignored
+       (it handles exception handling, which we don't
+       do, anyway ;-) */
+    cc=opcode&0x0f;
+    v1=get_const(PC_P);
+    v2=get_const(S1);
+    fflags_into_flags(S2);
+
+    // mov_l_mi((uae_u32)&foink3,cc);
+    switch(cc) {  
+     case 0: break;  /* jump never */
+     case 1: 
+       mov_l_rr(S2,PC_P); 
+       cmov_l_rr(PC_P,S1,4); 
+       cmov_l_rr(PC_P,S2,10); break;
+     case 2: register_branch(v1,v2,7); break;
+     case 3: register_branch(v1,v2,3); break;
+     case 4: 
+       mov_l_rr(S2,PC_P); 
+       cmov_l_rr(PC_P,S1,2); 
+       cmov_l_rr(PC_P,S2,10); break;
+     case 5:
+       mov_l_rr(S2,PC_P); 
+       cmov_l_rr(PC_P,S1,6); 
+       cmov_l_rr(PC_P,S2,10); break;
+     case 6: register_branch(v1,v2,5); break;
+     case 7: register_branch(v1,v2,11); break;
+     case 8: register_branch(v1,v2,10); break;
+     case 9: register_branch(v1,v2,4); break;
+     case 10: 
+       cmov_l_rr(PC_P,S1,10); 
+       cmov_l_rr(PC_P,S1,7); break;
+     case 11: 
+       cmov_l_rr(PC_P,S1,4); 
+       cmov_l_rr(PC_P,S1,3); break;
+     case 12: register_branch(v1,v2,2); break;
+     case 13: register_branch(v1,v2,6); break;
+     case 14: 
+       cmov_l_rr(PC_P,S1,5); 
+       cmov_l_rr(PC_P,S1,10); break;
+     case 15: mov_l_rr(PC_P,S1); break;
+    }
+}
+
+    /* Floating point conditions 
+       The "NotANumber" part could be problematic; Howver, when NaN is
+       encountered, the ftst instruction sets bot N and Z to 1 on the x87,
+       so quite often things just fall into place. This is probably not
+       accurate wrt the 68k FPU, but it is *as* accurate as this was before.
+       However, some more thought should go into fixing this stuff up so
+       it accurately emulates the 68k FPU.
+>=<U 
+0000    0x00: 0                        ---   Never jump
+0101    0x01: Z                        ---   jump if zero (x86: 4)
+1000    0x02: !(NotANumber || Z || N)  --- Neither Z nor N set (x86: 7)
+1101    0x03: Z || !(NotANumber || N); --- Z or !N (x86: 4 and 3)
+0010    0x04: N && !(NotANumber || Z); --- N and !Z (x86: hard!)
+0111    0x05: Z || (N && !NotANumber); --- Z or N (x86: 6)
+1010    0x06: !(NotANumber || Z);      --- not Z (x86: 5)
+1110    0x07: !NotANumber;             --- not NaN (x86: 11, not parity)
+0001    0x08: NotANumber;              --- NaN (x86: 10)
+0101    0x09: NotANumber || Z;         --- Z (x86: 4)
+1001    0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
+1101    0x0b: NotANumber || Z || !N;   --- Z or !N (x86: 4 and 3)
+0011    0x0c: NotANumber || (N && !Z); --- N (x86: 2)
+0111    0x0d: NotANumber || Z || N;    --- Z or N (x86: 6)
+1010    0x0e: !Z;                      --- not Z (x86: 5)
+1111    0x0f: 1;                       --- always
+
+This is not how the 68k handles things, though --- it sets Z to 0 and N
+to the NaN's sign.... ('o' and 'i' denote differences from the above
+table)
+
+>=<U 
+0000    0x00: 0                        ---   Never jump
+010o    0x01: Z                        ---   jump if zero (x86: 4, not 10)
+1000    0x02: !(NotANumber || Z || N)  --- Neither Z nor N set (x86: 7)
+110o    0x03: Z || !(NotANumber || N); --- Z or !N (x86: 3)
+0010    0x04: N && !(NotANumber || Z); --- N and !Z (x86: 2, not 10)
+011o    0x05: Z || (N && !NotANumber); --- Z or N (x86: 6, not 10)
+1010    0x06: !(NotANumber || Z);      --- not Z (x86: 5)
+1110    0x07: !NotANumber;             --- not NaN (x86: 11, not parity)
+0001    0x08: NotANumber;              --- NaN (x86: 10)
+0101    0x09: NotANumber || Z;         --- Z (x86: 4)
+1001    0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
+1101    0x0b: NotANumber || Z || !N;   --- Z or !N (x86: 4 and 3)
+0011    0x0c: NotANumber || (N && !Z); --- N (x86: 2)
+0111    0x0d: NotANumber || Z || N;    --- Z or N (x86: 6)
+101i    0x0e: !Z;                      --- not Z (x86: 5 and 10)
+1111    0x0f: 1;                       --- always
+
+Of course, this *still* doesn't mean that the x86 and 68k conditions are
+equivalent --- the handling of infinities is different, for one thing.
+On the 68k, +infinity minus +infinity is NotANumber (as it should be). On
+the x86, it is +infinity, and some exception is raised (which I suspect
+is promptly ignored) STUPID! 
+The more I learn about their CPUs, the more I detest Intel....
+
+You can see this in action if you have "Benoit" (see Aminet) and
+set the exponent to 16. Wait for a long time, and marvel at the extra black
+areas outside the center one. That's where Benoit expects NaN, and the x86
+gives +infinity. [Ooops --- that must have been some kind of bug in my code.
+it no longer happens, and the resulting graphic looks much better, too]
+
+x86 conditions
+0011    : 2
+1100    : 3
+0101    : 4
+1010    : 5
+0111    : 6
+1000    : 7
+0001    : 10
+1110    : 11
+    */
+void comp_fsave_opp (uae_u32 opcode)
+{
+    uae_u32 ad;
+    int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
+    int i;
+
+    FAIL(1);
+    return;
+
+    if (!currprefs.compfpu) {
+       FAIL(1);
+       return;
+    }
+
+#if DEBUG_FPP
+    printf ("fsave_opp at %08lx\n", m68k_getpc ());
+    fflush (stdout);
+#endif
+    if (get_fp_ad (opcode, &ad) == 0) {
+       m68k_setpc (m68k_getpc () - 2);
+       op_illg (opcode);
+       return;
+    }
+
+    if (currprefs.cpu_level >= 4) {
+       /* 4 byte 68040 IDLE frame.  */
+       if (incr < 0) {
+           ad -= 4;
+           put_long (ad, 0x41000000);
+       } else {
+           put_long (ad, 0x41000000);
+           ad += 4;
+       }
+    } else {
+       if (incr < 0) {
+           ad -= 4;
+           put_long (ad, 0x70000000);
+           for (i = 0; i < 5; i++) {
+               ad -= 4;
+               put_long (ad, 0x00000000);
+           }
+           ad -= 4;
+           put_long (ad, 0x1f180000);
+       } else {
+           put_long (ad, 0x1f180000);
+           ad += 4;
+           for (i = 0; i < 5; i++) {
+               put_long (ad, 0x00000000);
+               ad += 4;
+           }
+           put_long (ad, 0x70000000);
+           ad += 4;
+       }
+    }
+    if ((opcode & 0x38) == 0x18)
+       m68k_areg (regs, opcode & 7) = ad;
+    if ((opcode & 0x38) == 0x20)
+       m68k_areg (regs, opcode & 7) = ad;
+}
+
+void comp_frestore_opp (uae_u32 opcode)
+{
+    uae_u32 ad;
+    uae_u32 d;
+    int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
+
+    FAIL(1);
+    return;
+
+    if (!currprefs.compfpu) {
+       FAIL(1);
+       return;
+    }
+
+#if DEBUG_FPP
+    printf ("frestore_opp at %08lx\n", m68k_getpc ());
+    fflush (stdout);
+#endif
+    if (get_fp_ad (opcode, &ad) == 0) {
+       m68k_setpc (m68k_getpc () - 2);
+       op_illg (opcode);
+       return;
+    }
+    if (currprefs.cpu_level >= 4) {
+       /* 68040 */
+       if (incr < 0) {
+           /* @@@ This may be wrong.  */
+           ad -= 4;
+           d = get_long (ad);
+           if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
+               if ((d & 0x00ff0000) == 0) { /* IDLE */
+               } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
+                   ad -= 44;
+               } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
+                   ad -= 92;
+               }
+           }
+       } else {
+           d = get_long (ad);
+           ad += 4;
+           if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
+               if ((d & 0x00ff0000) == 0) { /* IDLE */
+               } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
+                   ad += 44;
+               } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
+                   ad += 92;
+               }
+           }
+       }
+    } else {
+       if (incr < 0) {
+           ad -= 4;
+           d = get_long (ad);
+           if ((d & 0xff000000) != 0) {
+               if ((d & 0x00ff0000) == 0x00180000)
+                   ad -= 6 * 4;
+               else if ((d & 0x00ff0000) == 0x00380000)
+                   ad -= 14 * 4;
+               else if ((d & 0x00ff0000) == 0x00b40000)
+                   ad -= 45 * 4;
+           }
+       } else {
+           d = get_long (ad);
+           ad += 4;
+           if ((d & 0xff000000) != 0) {
+               if ((d & 0x00ff0000) == 0x00180000)
+                   ad += 6 * 4;
+               else if ((d & 0x00ff0000) == 0x00380000)
+                   ad += 14 * 4;
+               else if ((d & 0x00ff0000) == 0x00b40000)
+                   ad += 45 * 4;
+           }
+       }
+    }
+    if ((opcode & 0x38) == 0x18)
+       m68k_areg (regs, opcode & 7) = ad;
+    if ((opcode & 0x38) == 0x20)
+       m68k_areg (regs, opcode & 7) = ad;
+}
+
+static fptype const_e=2.718281828;  /* Got some more digits? */
+static fptype const_log10_e=0.4342944819;
+static fptype const_loge_10=2.302585093;
+static fptype power10[]={1e0,1e1,1e2,1e4,1e8,1e16,1e32,1e64,1e128,1e256
+#if USE_LONG_DOUBLE
+,       1e512, 1e1024, 1e2048, 1e4096
+#endif
+};
+
+/* 128 words, indexed through the low byte of the 68k fpu control word */
+static uae_u16 x86_fpucw[]={
+    0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p0r0 */
+    0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p0r1 */
+    0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p0r2 */
+    0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, /* p0r3 */
+
+    0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, /* p1r0 */
+    0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, /* p1r1 */
+    0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, /* p1r2 */
+    0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, /* p1r3 */
+
+    0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, /* p2r0 */
+    0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, /* p2r1 */
+    0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, /* p2r2 */
+    0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, /* p2r3 */
+
+    0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p3r0 */
+    0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p3r1 */
+    0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p3r2 */
+    0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f  /* p3r3 */
+};
+
+void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
+{
+    int reg;
+    int src;
+    
+    if (!currprefs.compfpu) {
+       FAIL(1);
+       return;
+    }
+    switch ((extra >> 13) & 0x7) {
+     case 3: /* 2nd most common */
+       if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) {
+           FAIL(1);
+           return;
+       }
+       return;
+     case 6:
+     case 7: 
+       {
+           uae_u32 ad, list = 0;
+           int incr = 0;
+           if (extra & 0x2000) {
+               int ad;
+
+               /* FMOVEM FPP->memory */
+               switch ((extra >> 11) & 3) { /* Get out early if failure */
+                case 0:
+                case 2:
+                   break;
+                case 1:
+                case 3: 
+                default:
+                   FAIL(1); return;
+               }
+               ad=get_fp_ad (opcode, &ad);
+               if (ad<0) {
+                   FAIL(1);
+#if 0
+                   m68k_setpc (m68k_getpc () - 4);
+                   op_illg (opcode);
+#endif
+                   return;
+               }
+               switch ((extra >> 11) & 3) {
+               case 0: /* static pred */
+                   list = extra & 0xff;
+                   incr = -1;
+                   break;
+               case 2: /* static postinc */
+                   list = extra & 0xff;
+                   incr = 1;
+                   break;
+               case 1: /* dynamic pred */
+               case 3: /* dynamic postinc */
+                  abort();
+               }
+               while (list) {
+                   uae_u32 wrd1, wrd2, wrd3;
+                   if (incr < 0) { /* Predecrement */
+                       fmov_ext_mr((uae_u32)temp_fp,fpp_movem_index2[list]);
+                       delay;
+                       sub_l_ri(ad,4); 
+                       mov_l_rm(S2,(uae_u32)temp_fp);
+                       writelong_clobber(ad,S2,S3);
+                       sub_l_ri(ad,4); 
+                       mov_l_rm(S2,(uae_u32)temp_fp+4);
+                       writelong_clobber(ad,S2,S3);
+                       sub_l_ri(ad,4); 
+                       mov_w_rm(S2,(uae_u32)temp_fp+8);
+                       writeword_clobber(ad,S2,S3);
+                   } else { /* postinc */
+                       fmov_ext_mr((uae_u32)temp_fp,fpp_movem_index2[list]);
+                       delay;
+                       mov_w_rm(S2,(uae_u32)temp_fp+8);
+                       writeword_clobber(ad,S2,S3);
+                       add_l_ri(ad,4);
+                       mov_l_rm(S2,(uae_u32)temp_fp+4);
+                       writelong_clobber(ad,S2,S3);
+                       add_l_ri(ad,4);
+                       mov_l_rm(S2,(uae_u32)temp_fp);
+                       writelong_clobber(ad,S2,S3);
+                       add_l_ri(ad,4);
+                   }
+                   list = fpp_movem_next[list];
+               }
+               if ((opcode & 0x38) == 0x18)
+                   mov_l_rr((opcode & 7)+8,ad);
+               if ((opcode & 0x38) == 0x20)
+                   mov_l_rr((opcode & 7)+8,ad);
+           } else {
+               /* FMOVEM memory->FPP */
+
+               int ad;
+               switch ((extra >> 11) & 3) { /* Get out early if failure */
+                case 0:
+                case 2:
+                   break;
+                case 1:
+                case 3: 
+                default:
+                   FAIL(1); return;
+               }
+               ad=get_fp_ad (opcode, &ad);
+               if (ad<0) {
+                   FAIL(1);
+#if 0
+                   m68k_setpc (m68k_getpc () - 4);
+                   op_illg (opcode);
+#endif
+                   return;
+               }
+               switch ((extra >> 11) & 3) {
+               case 0: /* static pred */
+                   list = extra & 0xff;
+                   incr = -1;
+                   break;
+               case 2: /* static postinc */
+                   list = extra & 0xff;
+                   incr = 1;
+                   break;
+               case 1: /* dynamic pred */
+               case 3: /* dynamic postinc */
+                  abort();
+               }
+
+               while (list) {
+                   uae_u32 wrd1, wrd2, wrd3;
+                   if (incr < 0) {
+                       sub_l_ri(ad,4);
+                       readlong(ad,S2,S3);
+                       mov_l_mr((uae_u32)(temp_fp),S2);
+                       sub_l_ri(ad,4);
+                       readlong(ad,S2,S3);
+                       mov_l_mr((uae_u32)(temp_fp)+4,S2);
+                       sub_l_ri(ad,4);
+                       readword(ad,S2,S3);
+                       mov_w_mr(((uae_u32)temp_fp)+8,S2);
+                       delay2;
+                       fmov_ext_rm(fpp_movem_index2[list],(uae_u32)(temp_fp));
+                   } else {
+                       readword(ad,S2,S3);
+                       mov_w_mr(((uae_u32)temp_fp)+8,S2);
+                       add_l_ri(ad,4);
+                       readlong(ad,S2,S3);
+                       mov_l_mr((uae_u32)(temp_fp)+4,S2);
+                       add_l_ri(ad,4);
+                       readlong(ad,S2,S3);
+                       mov_l_mr((uae_u32)(temp_fp),S2);
+                       add_l_ri(ad,4);
+                       delay2;
+                       fmov_ext_rm(fpp_movem_index1[list],(uae_u32)(temp_fp));
+                   }
+                   list = fpp_movem_next[list];
+               }
+               if ((opcode & 0x38) == 0x18)
+                   mov_l_rr((opcode & 7)+8,ad);
+               if ((opcode & 0x38) == 0x20)
+                   mov_l_rr((opcode & 7)+8,ad);
+           }
+       }
+       return;
+
+     case 4:
+     case 5:  /* rare */
+       if ((opcode & 0x30) == 0) {
+           if (extra & 0x2000) {
+               if (extra & 0x1000) {
+                   mov_l_rm(opcode & 15,(uae_u32)&regs.fpcr); return;
+               }
+               if (extra & 0x0800) {
+                   FAIL(1);
+                   return;
+               }
+               if (extra & 0x0400) {
+                   mov_l_rm(opcode & 15,(uae_u32)&regs.fpiar); return;
+               }
+           } else {
+               if (extra & 0x1000) {
+                   mov_l_mr((uae_u32)&regs.fpcr,opcode & 15); 
+#if USE_X86_FPUCW
+                   mov_l_rr(S1,opcode & 15);
+                   and_l_ri(S1,0x000000f0);
+                   fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+#endif             
+                   return;
+               }
+               if (extra & 0x0800) {
+                   FAIL(1);
+                   return;
+                   // set_fpsr(m68k_dreg (regs, opcode & 15));
+               }
+               if (extra & 0x0400) {
+                   mov_l_mr((uae_u32)&regs.fpiar,opcode & 15); return;
+               }
+           }
+       } else if ((opcode & 0x3f) == 0x3c) {
+           if ((extra & 0x2000) == 0) {
+               if (extra & 0x1000) {
+                   uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
+                   mov_l_mi((uae_u32)&regs.fpcr,val);
+#if USE_X86_FPUCW
+                   mov_l_ri(S1,val&0x000000f0);
+                   fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+#endif             
+                   return;
+               }
+               if (extra & 0x0800) {
+                   FAIL(1);
+                   return;
+               }
+               if (extra & 0x0400) {
+                   uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
+                   mov_l_mi((uae_u32)&regs.fpiar,val);
+                   return;
+               }
+           }
+           FAIL(1);
+           return;
+       } else if (extra & 0x2000) {
+           FAIL(1);
+           return;
+       } else {
+           FAIL(1);
+           return;
+       }
+       FAIL(1);
+       return;
+
+     case 0:
+     case 2: /* Extremely common */
+       reg = (extra >> 7) & 7;
+       if ((extra & 0xfc00) == 0x5c00) {
+           switch (extra & 0x7f) {
+            case 0x00:
+               fmov_pi(reg);
+               break;
+            case 0x0b:
+               fmov_log10_2(reg);
+               break;
+            case 0x0c:
+               fmov_rm(reg,(uae_u32)&const_e);
+               break;
+            case 0x0d:
+               fmov_log2_e(reg);
+               break;
+            case 0x0e:
+               fmov_rm(reg,(uae_u32)&const_log10_e);
+               break;
+            case 0x0f:
+               fmov_0(reg);
+               break;
+            case 0x30:
+               fmov_loge_2(reg);
+               break;
+            case 0x31:
+               fmov_rm(reg,(uae_u32)&const_loge_10);
+               break;
+            case 0x32:
+               fmov_1(reg);
+               break;
+            case 0x33:
+            case 0x34:
+            case 0x35:
+            case 0x36:
+            case 0x37:
+            case 0x38:
+            case 0x39:
+            case 0x3a:
+            case 0x3b:
+               fmov_rm(reg,(uae_u32)(power10+(extra & 0x7f)-0x32));
+               break;
+            default:
+               /* This is not valid, so we fail */
+               FAIL(1);
+               return;
+           }
+           return;
+       }
+       
+       switch (extra & 0x7f) {
+        case 0x00:             /* FMOVE */
+        case 0x40:  /* Explicit rounding. This is just a quick fix. Same
+                     * for all other cases that have three choices */
+        case 0x44:   
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fmov_rr(reg,src);
+           MAKE_FPSR (src);
+           break;
+        case 0x01:             /* FINT */
+           FAIL(1);    
+           return;
+           dont_care_fflags();
+        case 0x02:             /* FSINH */
+           FAIL(1);  
+           return;
+
+           dont_care_fflags();
+           regs.fp[reg] = sinh (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x03:             /* FINTRZ */
+#if USE_X86_FPUCW 
+           /* If we have control over the CW, we can do this */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           mov_l_ri(S1,16);  /* Switch to "round to zero" mode */
+           fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+           
+           frndint_rr(reg,src);
+
+           /* restore control word */
+           mov_l_rm(S1,(uae_u32)&regs.fpcr); 
+           and_l_ri(S1,0x000000f0);
+           fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+
+           MAKE_FPSR (reg);
+           break;
+#endif             
+           FAIL(1);  
+           return;
+           regs.fp[reg] = (int) src;
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x04:             /* FSQRT */
+        case 0x41:
+        case 0x45:
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fsqrt_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x06:             /* FLOGNP1 */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = log (src + 1.0);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x08:             /* FETOXM1 */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = exp (src) - 1.0;
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x09:             /* FTANH */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = tanh (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x0a:             /* FATAN */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = atan (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x0c:             /* FASIN */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = asin (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x0d:             /* FATANH */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+#if 1                          /* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */
+           regs.fp[reg] = log ((1 + src) / (1 - src)) / 2;
+#else
+           regs.fp[reg] = atanh (src);
+#endif
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x0e:             /* FSIN */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fsin_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x0f:             /* FTAN */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = tan (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x10:             /* FETOX */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fetox_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x11:             /* FTWOTOX */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           ftwotox_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x12:             /* FTENTOX */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = pow (10.0, src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x14:             /* FLOGN */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = log (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x15:             /* FLOG10 */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = log10 (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x16:             /* FLOG2 */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           flog2_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x18:             /* FABS */
+        case 0x58:
+        case 0x5c:
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fabs_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x19:             /* FCOSH */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = cosh (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x1a:             /* FNEG */
+        case 0x5a:
+        case 0x5e:
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fneg_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x1c:             /* FACOS */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = acos (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x1d:             /* FCOS */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fcos_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x1e:             /* FGETEXP */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           {
+               int expon;
+               frexp (src, &expon);
+               regs.fp[reg] = (double) (expon - 1);
+               MAKE_FPSR (regs.fp[reg]);
+           }
+           break;
+        case 0x1f:             /* FGETMAN */
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           {
+               int expon;
+               regs.fp[reg] = frexp (src, &expon) * 2.0;
+               MAKE_FPSR (regs.fp[reg]);
+           }
+           break;
+        case 0x20:             /* FDIV */
+        case 0x60:
+        case 0x64:
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fdiv_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x21:             /* FMOD */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           frem_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x22:             /* FADD */
+        case 0x62:
+        case 0x66:
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fadd_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x23:             /* FMUL */
+        case 0x63:
+        case 0x67:
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fmul_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x24:             /* FSGLDIV */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fdiv_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x25:             /* FREM */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           frem1_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x26:             /* FSCALE */
+           dont_care_fflags();
+           FAIL(1);  
+           return;
+           regs.fp[reg] *= exp (log (2.0) * src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x27:             /* FSGLMUL */
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fmul_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x28:             /* FSUB */
+        case 0x68:
+        case 0x6c:
+           dont_care_fflags();
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fsub_rr(reg,src);
+           MAKE_FPSR (reg);
+           break;
+        case 0x30:             /* FSINCOS */
+        case 0x31:
+        case 0x32:
+        case 0x33:
+        case 0x34:
+        case 0x35:
+        case 0x36:
+        case 0x37:
+           FAIL(1);  
+           return;
+           dont_care_fflags();
+           regs.fp[reg] = sin (src);
+           regs.fp[extra & 7] = cos (src);
+           MAKE_FPSR (regs.fp[reg]);
+           break;
+        case 0x38:             /* FCMP */
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fmov_rr(FP_RESULT,reg);
+           fsub_rr(FP_RESULT,src); /* Right way? */
+           break;
+        case 0x3a:             /* FTST */
+           src=get_fp_value (opcode, extra);
+           if (src < 0) {
+               FAIL(1);  /* Illegal instruction */
+               return;
+           }
+           fmov_rr(FP_RESULT,src);
+           break;
+        default:
+           FAIL(1);  
+           return;
+           break;
+       }
+       return;
+    }
+    m68k_setpc (m68k_getpc () - 4);
+    op_illg (opcode);
+}
index 3670a3cb7a0751b1f5b92b6f98d074bb8fabfbb7..a64895bac11fb088ca845fd108e5718047b37a96 100755 (executable)
@@ -2546,30 +2546,6 @@ static __inline__ void make_tos(int r)
     live.spos[q]=p;
 }
 
-static __inline__ void make_tos2(int r, int r2)
-{
-    int q;
-
-    make_tos(r2); /* Put the reg that's supposed to end up in position2
-                    on top */
-
-    if (live.spos[r]<0) { /* Register not yet on stack */
-       make_tos(r); /* This will extend the stack */
-       return;
-    }
-    /* Register is on stack */
-    emit_byte(0xd9);
-    emit_byte(0xc9); /* Move r2 into position 2 */
-
-    q=live.onstack[live.tos-1];
-    live.onstack[live.tos]=q;
-    live.spos[q]=live.tos;
-    live.onstack[live.tos-1]=r2;
-    live.spos[r2]=live.tos-1;
-
-    make_tos(r); /* And r into 1 */
-}
-
 static __inline__ int stackpos(int r)
 {
     if (live.spos[r]<0)
@@ -2581,14 +2557,21 @@ static __inline__ int stackpos(int r)
     return live.tos-live.spos[r];
 }
 
+/* IMO, calling usereg(r) makes no sense, if the register r should supply our function with
+   an argument, because I would expect all arguments to be on the stack already, won't they?
+   Thus, usereg(s) is always useless and also for every FRW d it's too late here now. PeterK
+*/
 static __inline__ void usereg(int r)
 {
-    if (live.spos[r]<0)
+
+    if (live.spos[r]<0) {
+       // write_log ("usereg wants to push reg %d onto the x87 stack calling make_tos\n", r);
        make_tos(r);
+    }
 }
 
-/* This is called with one FP value in a reg *above* tos, which it will
-   pop off the stack if necessary */
+/* This is called with one FP value in a reg *above* tos,
+   which it will pop off the stack if necessary */
 static __inline__ void tos_make(int r)
 {
     if (live.spos[r]<0) {
@@ -2598,8 +2581,8 @@ static __inline__ void tos_make(int r)
        return;
     }
     emit_byte(0xdd);
-    emit_byte(0xd8+(live.tos+1)-live.spos[r]);  /* store top of stack in reg, 
-                                        and pop it*/
+    emit_byte(0xd8+(live.tos+1)-live.spos[r]);
+    /* store top of stack in reg and pop it*/
 }
     
        
@@ -2761,7 +2744,6 @@ LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
 {
     int ds;
 
-    usereg(s);
     ds=stackpos(s);
     if (ds==0 && live.spos[d]>=0) {
        /* source is on top of stack, and we already have the dest */
@@ -2791,18 +2773,17 @@ LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
     int ds;
 
     if (d!=s) {
-       usereg(s);
        ds=stackpos(s);
        emit_byte(0xd9);
-       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xc0+ds); /* fld x */
        emit_byte(0xd9);
-       emit_byte(0xfa); /* take square root */
-       tos_make(d); /* store to destination */
+       emit_byte(0xfa);    /* fsqrt sqrt(x) */
+       tos_make(d);        /* store to destination */
     }
     else {
        make_tos(d);
        emit_byte(0xd9);
-       emit_byte(0xfa); /* take square root */
+       emit_byte(0xfa);    /* fsqrt y=sqrt(x) */
     }  
 }
 LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
@@ -2812,18 +2793,17 @@ LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
     int ds;
 
     if (d!=s) {
-       usereg(s);
        ds=stackpos(s);
        emit_byte(0xd9);
-       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xc0+ds); /* fld x */
        emit_byte(0xd9);
-       emit_byte(0xe1); /* take fabs */
-       tos_make(d); /* store to destination */
+       emit_byte(0xe1);    /* fabs abs(x) */
+       tos_make(d);        /* store to destination */
     }
     else {
        make_tos(d);
        emit_byte(0xd9);
-       emit_byte(0xe1); /* take fabs */
+       emit_byte(0xe1);    /* fabs y=abs(x) */
     }  
 }
 LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
@@ -2833,92 +2813,159 @@ LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
     int ds;
 
     if (d!=s) {
-       usereg(s);
        ds=stackpos(s);
        emit_byte(0xd9);
-       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xc0+ds); /* fld x */
        emit_byte(0xd9);
-       emit_byte(0xfc); /* take frndint */
-       tos_make(d); /* store to destination */
+       emit_byte(0xfc);    /* frndint int(x) */
+       tos_make(d);        /* store to destination */
     }
     else {
        make_tos(d);
        emit_byte(0xd9);
-       emit_byte(0xfc); /* take frndint */
+       emit_byte(0xfc);    /* frndint y=int(x) */
     }  
 }
 LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
 
-LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
 {
     int ds;
 
     if (d!=s) {
-       usereg(s);
        ds=stackpos(s);
        emit_byte(0xd9);
-       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xc0+ds); /* fld x */
        emit_byte(0xd9);
-       emit_byte(0xff); /* take cos */
-       tos_make(d); /* store to destination */
+       emit_byte(0xfe);    /* fsin sin(x) */
+       tos_make(d);        /* store to destination */
     }
     else {
        make_tos(d);
        emit_byte(0xd9);
-       emit_byte(0xff); /* take cos */
+       emit_byte(0xfe);    /* fsin y=sin(x) */
     }  
 }
-LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
 
-LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
 {
     int ds;
 
     if (d!=s) {
-       usereg(s);
        ds=stackpos(s);
        emit_byte(0xd9);
-       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xc0+ds); /* fld x */
        emit_byte(0xd9);
-       emit_byte(0xfe); /* take sin */
-       tos_make(d); /* store to destination */
+       emit_byte(0xff);    /* fcos cos(x) */
+       tos_make(d);        /* store to destination */
     }
     else {
        make_tos(d);
        emit_byte(0xd9);
-       emit_byte(0xfe); /* take sin */
+       emit_byte(0xff);    /* fcos y=cos(x) */
     }  
 }
-LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
 
-double one=1;
-LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s))
+{
+    int ds;
+
+    if (d!=s) {
+       ds=stackpos(s);
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* fld x */
+       emit_byte(0xd9);
+       emit_byte(0xf2);    /* fptan tan(x)=y/1.0 */
+       emit_byte(0xdd);
+       emit_byte(0xd8);    /* fstp pop 1.0 */
+       tos_make(d);        /* store to destination */
+    }
+    else {
+       make_tos(d);
+       emit_byte(0xd9);
+       emit_byte(0xf2);    /* fptan tan(x)=y/1.0 */
+       emit_byte(0xdd);
+       emit_byte(0xd8);    /* fstp pop 1.0 */
+    }  
+}
+LENDFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s))
 {
     int ds;
 
-    usereg(s);
     ds=stackpos(s);
     emit_byte(0xd9);
-    emit_byte(0xc0+ds); /* duplicate source */
+    emit_byte(0xc0+ds);  /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xfb);     /* fsincos sin(x) push cos(x) */
+    if ((live.spos[c]<0)&&(live.spos[d]<0)) {
+       live.tos++;
+       live.spos[d]=live.tos;
+       live.onstack[live.tos]=d; /* sin(x) comes first */
+       live.tos++;
+       live.spos[c]=live.tos;
+       live.onstack[live.tos]=c;
+       return;          /* occupy both regs directly */
+    }
+    if (live.spos[c]<0) {
+       emit_byte(0xd9);
+       emit_byte(0xc9); /* fxch swap cos(x) with sin(x) */
+       emit_byte(0xdd); /* store sin(x) to d & pop */
+       emit_byte(0xd8+(live.tos+2)-live.spos[d]);
+       live.tos++;      /* occupy a reg for cos(x) here */
+       live.spos[c]=live.tos;
+       live.onstack[live.tos]=c;
+    }
+    else {
+       emit_byte(0xdd); /* store cos(x) to c & pop */
+       emit_byte(0xd8+(live.tos+2)-live.spos[c]);
+       tos_make(d);     /* store sin(x) to destination */
+    }
+}
+LENDFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s))
+
+float one=1;
+
+LOWFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s))
+{
+    int ds;
+
+    make_tos(s);        /* tos=x */
+    ds=stackpos(d);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld y */
+    emit_byte(0xd9);
+    emit_byte(0xfd);    /* fscale y*(2^x) */
+    tos_make(d);        /* store y=y*(2^x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+{
+    int ds;
 
+    ds=stackpos(s);
     emit_byte(0xd9);
-    emit_byte(0xc0);  /* duplicate top of stack. Now up to 8 high */
+    emit_byte(0xc0+ds); /* fld x */
     emit_byte(0xd9);
-    emit_byte(0xfc);  /* rndint */
+    emit_byte(0xfc);    /* frndint int(x) */
     emit_byte(0xd9);
-    emit_byte(0xc9);  /* swap top two elements */
+    emit_byte(0xc1+ds); /* fld x again */
     emit_byte(0xd8);
-    emit_byte(0xe1);  /* subtract rounded from original */
+    emit_byte(0xe1);    /* fsub frac(x) = x - int(x) */
     emit_byte(0xd9);
-    emit_byte(0xf0);  /* f2xm1 */
-    emit_byte(0xdc);
+    emit_byte(0xf0);    /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
     emit_byte(0x05);
-    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
     emit_byte(0xd9);
-    emit_byte(0xfd);  /* and scale it */
+    emit_byte(0xfd);    /* fscale (2^frac(x))*2^int(x) */
     emit_byte(0xdd);
-    emit_byte(0xd9);  /* take he rounded value off */
-    tos_make(d); /* store to destination */
+    emit_byte(0xd9);    /* fstp copy & pop */
+    tos_make(d);        /* store y=2^x */
 }
 LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
 
@@ -2926,61 +2973,530 @@ LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
 {
     int ds;
 
-    usereg(s);
     ds=stackpos(s);
     emit_byte(0xd9);
-    emit_byte(0xc0+ds); /* duplicate source */
+    emit_byte(0xc0+ds); /* fld x */
     emit_byte(0xd9);
-    emit_byte(0xea);   /* fldl2e */
-    emit_byte(0xde);
-    emit_byte(0xc9);  /* fmulp --- multiply source by log2(e) */
+    emit_byte(0xea);    /* fldl2e log2(e) */
+    emit_byte(0xd8);
+    emit_byte(0xc9);    /* fmul x*log2(e) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);    /* fst copy up */
+    emit_byte(0xd9);
+    emit_byte(0xfc);    /* frndint int(x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap top two elements */
+    emit_byte(0xd8);
+    emit_byte(0xe1);    /* fsub x*log2(e) - int(x*log2(e))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);    /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);    /* fscale (2^frac(x))*2^int(x*log2(e)) */
+    emit_byte(0xdd);
+    emit_byte(0xd9);    /* fstp copy & pop */
+    tos_make(d);        /* store y=e^x */
+}
+LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s))
+{
+    int ds;
 
+    ds=stackpos(s);
     emit_byte(0xd9);
-    emit_byte(0xc0);  /* duplicate top of stack. Now up to 8 high */
+    emit_byte(0xc0+ds); /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xea);    /* fldl2e log2(e) */
+    emit_byte(0xd8);
+    emit_byte(0xc9);    /* fmul x*log2(e) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);    /* fst copy up */
     emit_byte(0xd9);
-    emit_byte(0xfc);  /* rndint */
+    emit_byte(0xfc);    /* frndint int(x*log2(e)) */
     emit_byte(0xd9);
-    emit_byte(0xc9);  /* swap top two elements */
+    emit_byte(0xc9);    /* fxch swap top two elements */
     emit_byte(0xd8);
-    emit_byte(0xe1);  /* subtract rounded from original */
+    emit_byte(0xe1);    /* fsub x*log2(e) - int(x*log2(e))  */
     emit_byte(0xd9);
-    emit_byte(0xf0);  /* f2xm1 */
-    emit_byte(0xdc);
+    emit_byte(0xf0);    /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);    /* fscale ((2^frac(x))-1)*2^int(x*log2(e)) */
+    emit_byte(0xdd);
+    emit_byte(0xd9);    /* fstp copy & pop */
+    tos_make(d);        /* store y=(e^x)-1 */
+}
+LENDFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xe9);    /* fldl2t log2(10) */
+    emit_byte(0xd8);
+    emit_byte(0xc9);    /* fmul x*log2(10) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);    /* fst copy up */
+    emit_byte(0xd9);
+    emit_byte(0xfc);    /* frndint int(x*log2(10)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap top two elements */
+    emit_byte(0xd8);
+    emit_byte(0xe1);    /* fsub x*log2(10) - int(x*log2(10))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);    /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
     emit_byte(0x05);
-    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
     emit_byte(0xd9);
-    emit_byte(0xfd);  /* and scale it */
+    emit_byte(0xfd);    /* fscale (2^frac(x))*2^int(x*log2(10)) */
     emit_byte(0xdd);
-    emit_byte(0xd9);  /* take he rounded value off */
-    tos_make(d); /* store to destination */
+    emit_byte(0xd9);    /* fstp copy & pop */
+    tos_make(d);        /* store y=10^x */
 }
-LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+LENDFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s))
  
 LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
 {
     int ds;
 
-    usereg(s);
     ds=stackpos(s);
     emit_byte(0xd9);
-    emit_byte(0xc0+ds); /* duplicate source */
+    emit_byte(0xc0+ds); /* fld x */
     emit_byte(0xd9);
-    emit_byte(0xe8); /* push '1' */
+    emit_byte(0xe8);    /* fld1 1 */
     emit_byte(0xd9);
-    emit_byte(0xc9); /* swap top two */
+    emit_byte(0xc9);    /* fxch swap 1 with x */
     emit_byte(0xd9);
-    emit_byte(0xf1); /* take 1*log2(x) */
-    tos_make(d); /* store to destination */
+    emit_byte(0xf1);    /* fyl2x 1*log2(x) */
+    tos_make(d);        /* store y=log2(x) */
 }
 LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
 
+LOWFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xed);    /* fldln2 logN(2) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap logN(2) with x */
+    emit_byte(0xd9);
+    emit_byte(0xf1);    /* fyl2x logN(2)*log2(x) */
+    tos_make(d);        /* store y=logN(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xed);    /* fldln2 logN(2) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap logN(2) with x */
+    emit_byte(0xd9);
+    emit_byte(0xf9);    /* fyl2xp1 logN(2)*log2(x+1) */
+    tos_make(d);        /* store y=logN(x+1) */
+}
+LENDFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xec);    /* fldlg2 log10(2) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap log10(2) with x */
+    emit_byte(0xd9);
+    emit_byte(0xf1);    /* fyl2x log10(2)*log2(x) */
+    tos_make(d);        /* store y=log10(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xe8);    /* fld 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xc1+ds); /* fld x */
+    emit_byte(0xd8);
+    emit_byte(0xc8);    /* fmul x*x */
+    emit_byte(0xd8);
+    emit_byte(0xe9);    /* fsubr 1 - (x^2) */
+    emit_byte(0xd9);
+    emit_byte(0xfa);    /* fsqrt sqrt(1-(x^2)) */
+    emit_byte(0xd8);
+    emit_byte(0xfa+ds); /* fdivr x / sqrt(1-(x^2)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap with 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xf3);    /* fpatan atan(x)/1 & pop */
+    tos_make(d);        /* store y=asin(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xe8);    /* fld 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xc1+ds); /* fld x */
+    emit_byte(0xd8);
+    emit_byte(0xc8);    /* fmul x*x */
+    emit_byte(0xd8);
+    emit_byte(0xe9);    /* fsubr 1 - (x^2) */
+    emit_byte(0xd9);
+    emit_byte(0xfa);    /* fsqrt sqrt(1-(x^2)) */
+    emit_byte(0xd8);
+    emit_byte(0xf2+ds); /* fdiv sqrt(1-(x^2)) / x */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap with 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xf3);    /* fpatan atan(x)/1 & pop */
+    tos_make(d);        /* store y=acos(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xe8);    /* fld 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xf3);    /* fpatan atan(x)/1 */
+    tos_make(d);        /* store y=atan(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s))
+{
+    int ds;
+
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xe8);    /* fld 1.0 */
+    emit_byte(0xdc);
+    emit_byte(0xc1);    /* fadd 1 + x */
+    emit_byte(0xd8);
+    emit_byte(0xe2+ds); /* fsub 1 - x */
+    emit_byte(0xde);
+    emit_byte(0xf9);    /* fdivp (1+x)/(1-x) */
+    emit_byte(0xd9);
+    emit_byte(0xed);    /* fldl2e logN(2) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap logN(2) with (1+x)/(1-x) */
+    emit_byte(0xd9);
+    emit_byte(0xf1);    /* fyl2x logN(2)*log2((1+x)/(1-x)) pop */
+    emit_byte(0xd9);
+    emit_byte(0xe8);    /* fld 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xe0);    /* fchs -1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xc9);    /* fxch swap */
+    emit_byte(0xd9);
+    emit_byte(0xfd);    /* fscale logN((1+x)/(1-x)) * 2^(-1) */
+    emit_byte(0xdd);
+    emit_byte(0xd9);    /* fstp copy & pop */
+    tos_make(d);        /* store y=atanh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s))
+{
+    int ds,tr;
+
+    tr=live.onstack[live.tos+3];
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds);  /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xea);     /* fldl2e log2(e) */
+    emit_byte(0xd8);
+    emit_byte(0xc9);     /* fmul x*log2(e) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);     /* fst copy x*log2(e) */
+    if (tr>=0) {
+       emit_byte(0xd9);
+       emit_byte(0xca); /* fxch swap with temp-reg */
+       emit_byte(0x83);
+       emit_byte(0xc4);
+       emit_byte(0xf4); /* add -12 to esp */
+       emit_byte(0xdb);
+       emit_byte(0x3c);
+       emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
+    }
+    emit_byte(0xd9);
+    emit_byte(0xe0);     /* fchs -x*log2(e) */
+    emit_byte(0xd9);
+    emit_byte(0xc0);     /* fld -x*log2(e) again */
+    emit_byte(0xd9);
+    emit_byte(0xfc);     /* frndint int(-x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd8);
+    emit_byte(0xe1);     /* fsub -x*log2(e) - int(-x*log2(e))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);     /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale (2^frac(x))*2^int(x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xca);     /* fxch swap e^-x with x*log2(e) in tr */
+    emit_byte(0xdd);
+    emit_byte(0xd1);     /* fst copy x*log2(e) */
+    emit_byte(0xd9);
+    emit_byte(0xfc);     /* frndint int(x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd8);
+    emit_byte(0xe1);     /* fsub x*log2(e) - int(x*log2(e))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);     /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale (2^frac(x))*2^int(x*log2(e)) */
+    emit_byte(0xdd);
+    emit_byte(0xd9);     /* fstp copy e^x & pop */
+    if (tr>=0) {
+       emit_byte(0xdb);
+       emit_byte(0x2c);
+       emit_byte(0x24); /* fld load temp-reg from [esp] */
+       emit_byte(0x83);
+       emit_byte(0xc4);
+       emit_byte(0x0c); /* add +12 to esp */
+       emit_byte(0xd9);
+       emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
+       emit_byte(0xde);
+       emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */
+    }
+    else {
+       emit_byte(0xde);
+       emit_byte(0xe1); /* fsubrp (e^x)-(e^-x) */
+    }
+    emit_byte(0xd9);
+    emit_byte(0xe8);     /* fld 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xe0);     /* fchs -1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale ((e^x)-(e^-x))/2 */
+    emit_byte(0xdd);
+    emit_byte(0xd9);     /* fstp copy & pop */
+    tos_make(d);         /* store y=sinh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s))
+{
+    int ds,tr;
+
+    tr=live.onstack[live.tos+3];
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds);  /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xea);     /* fldl2e log2(e) */
+    emit_byte(0xd8);
+    emit_byte(0xc9);     /* fmul x*log2(e) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);     /* fst copy x*log2(e) */
+    if (tr>=0) {
+       emit_byte(0xd9);
+       emit_byte(0xca); /* fxch swap with temp-reg */
+       emit_byte(0x83);
+       emit_byte(0xc4);
+       emit_byte(0xf4); /* add -12 to esp */
+       emit_byte(0xdb);
+       emit_byte(0x3c);
+       emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
+    }
+    emit_byte(0xd9);
+    emit_byte(0xe0);     /* fchs -x*log2(e) */
+    emit_byte(0xd9);
+    emit_byte(0xc0);     /* fld -x*log2(e) again */
+    emit_byte(0xd9);
+    emit_byte(0xfc);     /* frndint int(-x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd8);
+    emit_byte(0xe1);     /* fsub -x*log2(e) - int(-x*log2(e))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);     /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale (2^frac(x))*2^int(x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xca);     /* fxch swap e^-x with x*log2(e) in tr */
+    emit_byte(0xdd);
+    emit_byte(0xd1);     /* fst copy x*log2(e) */
+    emit_byte(0xd9);
+    emit_byte(0xfc);     /* frndint int(x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd8);
+    emit_byte(0xe1);     /* fsub x*log2(e) - int(x*log2(e))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);     /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale (2^frac(x))*2^int(x*log2(e)) */
+    emit_byte(0xdd);
+    emit_byte(0xd9);     /* fstp copy e^x & pop */
+    if (tr>=0) {
+       emit_byte(0xdb);
+       emit_byte(0x2c);
+       emit_byte(0x24); /* fld load temp-reg from [esp] */
+       emit_byte(0x83);
+       emit_byte(0xc4);
+       emit_byte(0x0c); /* add +12 to esp */
+       emit_byte(0xd9);
+       emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
+    }
+    emit_byte(0xde);
+    emit_byte(0xc1);     /* faddp (e^x)+(e^-x) */
+    emit_byte(0xd9);
+    emit_byte(0xe8);     /* fld 1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xe0);     /* fchs -1.0 */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale ((e^x)+(e^-x))/2 */
+    emit_byte(0xdd);
+    emit_byte(0xd9);     /* fstp copy & pop */
+    tos_make(d);         /* store y=cosh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s))
+{
+    int ds,tr;
+
+    tr=live.onstack[live.tos+3];
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds);  /* fld x */
+    emit_byte(0xd9);
+    emit_byte(0xea);     /* fldl2e log2(e) */
+    emit_byte(0xd8);
+    emit_byte(0xc9);     /* fmul x*log2(e) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);     /* fst copy x*log2(e) */
+    if (tr>=0) {
+       emit_byte(0xd9);
+       emit_byte(0xca); /* fxch swap with temp-reg */
+       emit_byte(0x83);
+       emit_byte(0xc4);
+       emit_byte(0xf4); /* add -12 to esp */
+       emit_byte(0xdb);
+       emit_byte(0x3c);
+       emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */
+    }
+    emit_byte(0xd9);
+    emit_byte(0xe0);     /* fchs -x*log2(e) */
+    emit_byte(0xd9);
+    emit_byte(0xc0);     /* fld -x*log2(e) again */
+    emit_byte(0xd9);
+    emit_byte(0xfc);     /* frndint int(-x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd8);
+    emit_byte(0xe1);     /* fsub -x*log2(e) - int(-x*log2(e))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);     /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale (2^frac(x))*2^int(x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xca);     /* fxch swap e^-x with x*log2(e) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);     /* fst copy x*log2(e) */
+    emit_byte(0xd9);
+    emit_byte(0xfc);     /* frndint int(x*log2(e)) */
+    emit_byte(0xd9);
+    emit_byte(0xc9);     /* fxch swap */
+    emit_byte(0xd8);
+    emit_byte(0xe1);     /* fsub x*log2(e) - int(x*log2(e))  */
+    emit_byte(0xd9);
+    emit_byte(0xf0);     /* f2xm1 (2^frac(x))-1 */
+    emit_byte(0xd8);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* fadd (2^frac(x))-1 + 1 */
+    emit_byte(0xd9);
+    emit_byte(0xfd);     /* fscale (2^frac(x))*2^int(x*log2(e)) */
+    emit_byte(0xdd);
+    emit_byte(0xd1);     /* fst copy e^x */
+    emit_byte(0xd8);
+    emit_byte(0xc2);     /* fadd (e^x)+(e^-x) */
+    emit_byte(0xd9);
+    emit_byte(0xca);     /* fxch swap with e^-x */
+    emit_byte(0xde);
+    emit_byte(0xe9);     /* fsubp (e^x)-(e^-x) */
+    if (tr>=0) {
+       emit_byte(0xdb);
+       emit_byte(0x2c);
+       emit_byte(0x24); /* fld load temp-reg from [esp] */
+       emit_byte(0x83);
+       emit_byte(0xc4);
+       emit_byte(0x0c); /* add +12 to esp */
+       emit_byte(0xd9);
+       emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */
+       emit_byte(0xde);
+       emit_byte(0xf9); /* fdivp ((e^x)-(e^-x))/((e^x)+(e^-x)) */
+    }
+    else {
+       emit_byte(0xde);
+       emit_byte(0xf1); /* fdivrp ((e^x)-(e^-x))/((e^x)+(e^-x)) */
+    }
+    tos_make(d);         /* store y=tanh(x) */
+}
+LENDFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s))
 
 LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
 {
     int ds;
 
     if (d!=s) {
-       usereg(s);
        ds=stackpos(s);
        emit_byte(0xd9);
        emit_byte(0xc0+ds); /* duplicate source */
@@ -3000,9 +3516,6 @@ LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
 {
     int ds;
 
-    usereg(s);
-    usereg(d);
-    
     if (live.spos[s]==live.tos) {
        /* Source is on top of stack */
        ds=stackpos(d);
@@ -3022,9 +3535,6 @@ LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
 LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
 {
     int ds;
-
-    usereg(s);
-    usereg(d);
     
     if (live.spos[s]==live.tos) {
        /* Source is on top of stack */
@@ -3045,9 +3555,6 @@ LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
 LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
 {
     int ds;
-
-    usereg(s);
-    usereg(d);
     
     make_tos(d);
     ds=stackpos(s);
@@ -3060,9 +3567,6 @@ LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
 LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
 {
     int ds;
-
-    usereg(s);
-    usereg(d);
     
     if (live.spos[s]==live.tos) {
        /* Source is on top of stack */
@@ -3083,9 +3587,6 @@ LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
 LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
 {
     int ds;
-
-    usereg(s);
-    usereg(d);
     
     if (live.spos[s]==live.tos) {
        /* Source is on top of stack */
@@ -3106,19 +3607,14 @@ LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
 LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
 {
     int ds;
-
-    usereg(s);
-    usereg(d);
     
-    make_tos2(d,s);
-    ds=stackpos(s);
-
-    if (ds!=1) {
-       printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
-       abort();
-    }
+    make_tos(s);        /* tos=x */
+    ds=stackpos(d);
     emit_byte(0xd9);
-    emit_byte(0xf8); /* take rem from dest by source */
+    emit_byte(0xc0+ds); /* fld y */
+    emit_byte(0xd9);
+    emit_byte(0xf8);    /* fprem rem(y/x) */
+    tos_make(d);        /* store y=rem(y/x) */
 }
 LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
 
@@ -3126,18 +3622,13 @@ LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
 {
     int ds;
 
-    usereg(s);
-    usereg(d);
-    
-    make_tos2(d,s);
-    ds=stackpos(s);
-
-    if (ds!=1) {
-       printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
-       abort();
-    }
+    make_tos(s);        /* tos=x */
+    ds=stackpos(d);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* fld y */
     emit_byte(0xd9);
-    emit_byte(0xf5); /* take rem1 from dest by source */
+    emit_byte(0xf5);    /* fprem rem1(y/x) */
+    tos_make(d);        /* store y=rem1(y/x) */
 }
 LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
 
diff --git a/compemu_raw_x86_old.c b/compemu_raw_x86_old.c
new file mode 100755 (executable)
index 0000000..3670a3c
--- /dev/null
@@ -0,0 +1,3179 @@
+/* This should eventually end up in machdep/, but for now, x86 is the
+   only target, and it's easier this way... */
+
+/*************************************************************************
+ * Some basic information about the the target CPU                       *
+ *************************************************************************/
+
+#define EAX 0
+#define ECX 1
+#define EDX 2
+#define EBX 3
+
+/* The register in which subroutines return an integer return value */
+#define REG_RESULT 0
+
+/* The registers subroutines take their first and second argument in */
+#define REG_PAR1 0
+#define REG_PAR2 2
+
+/* Three registers that are not used for any of the above */
+#define REG_NOPAR1 6
+#define REG_NOPAR2 5
+#define REG_NOPAR3 3
+
+#define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
+#define REG_PC_TMP 1 /* Another register that is not the above */
+
+#define SHIFTCOUNT_NREG 1  /* Register that can be used for shiftcount.
+                             -1 if any reg will do */
+#define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */
+#define MUL_NREG2 2 /* %edx will hold the high 32 bits */
+
+uae_s8 always_used[]={4,-1};
+uae_s8 can_byte[]={0,1,2,3,-1};
+uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
+
+uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
+
+/* This *should* be the same as call_saved. But:
+   - We might not really know which registers are saved, and which aren't,
+     so we need to preserve some, but don't want to rely on everyone else
+     also saving those registers
+   - Special registers (such like the stack pointer) should not be "preserved"
+     by pushing, even though they are "saved" across function calls
+*/
+uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
+
+/* Whether classes of instructions do or don't clobber the native flags */
+#define CLOBBER_MOV
+#define CLOBBER_LEA
+#define CLOBBER_CMOV
+#define CLOBBER_POP
+#define CLOBBER_PUSH
+#define CLOBBER_SUB  clobber_flags()
+#define CLOBBER_SBB  clobber_flags()
+#define CLOBBER_CMP  clobber_flags()
+#define CLOBBER_ADD  clobber_flags()
+#define CLOBBER_ADC  clobber_flags()
+#define CLOBBER_AND  clobber_flags()
+#define CLOBBER_OR   clobber_flags()
+#define CLOBBER_XOR  clobber_flags()
+
+#define CLOBBER_ROL  clobber_flags()
+#define CLOBBER_ROR  clobber_flags()
+#define CLOBBER_SHLL clobber_flags()
+#define CLOBBER_SHRL clobber_flags()
+#define CLOBBER_SHRA clobber_flags()
+#define CLOBBER_TEST clobber_flags()
+#define CLOBBER_CL16 
+#define CLOBBER_CL8  
+#define CLOBBER_SE16
+#define CLOBBER_SE8
+#define CLOBBER_ZE16
+#define CLOBBER_ZE8
+#define CLOBBER_SW16 clobber_flags()
+#define CLOBBER_SW32
+#define CLOBBER_SETCC
+#define CLOBBER_MUL  clobber_flags()
+#define CLOBBER_BT   clobber_flags()
+#define CLOBBER_BSF  clobber_flags()
+
+/*************************************************************************
+ * Actual encoding of the instructions on the target CPU                 *
+ *************************************************************************/
+
+static int have_cmov=0;  /* We need to generate different code if 
+                           we don't have cmov */
+
+#include "compemu_optimizer_x86.c"
+
+static uae_u16 swap16(uae_u16 x)
+{
+    return ((x&0xff00)>>8)|((x&0x00ff)<<8);
+}
+
+static uae_u32 swap32(uae_u32 x)
+{
+    return ((x&0xff00)<<8)|((x&0x00ff)<<24)|((x&0xff0000)>>8)|((x&0xff000000)>>24);
+}
+
+static __inline__ int isbyte(uae_s32 x)
+{
+  return (x>=-128 && x<=127);
+}
+
+LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+{
+       emit_byte(0x50+r);
+}
+LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+
+LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+{
+       emit_byte(0x58+r);
+}
+LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+{
+       emit_byte(0x0f);
+       emit_byte(0xba);
+       emit_byte(0xe0+r);
+       emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+{
+       emit_byte(0x0f);
+       emit_byte(0xa3);
+       emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+{
+       emit_byte(0x0f);
+       emit_byte(0xba);
+       emit_byte(0xf8+r);
+       emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+{
+       emit_byte(0x0f);
+       emit_byte(0xbb);
+       emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+{
+       emit_byte(0x0f);
+       emit_byte(0xba);
+       emit_byte(0xf0+r);
+       emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+{
+       emit_byte(0x0f);
+       emit_byte(0xb3);
+       emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+{
+       emit_byte(0x0f);
+       emit_byte(0xba);
+       emit_byte(0xe8+r);
+       emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+{
+       emit_byte(0x0f);
+       emit_byte(0xab);
+       emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+{
+    emit_byte(0x66);
+    if (isbyte(i)) {
+       emit_byte(0x83);
+       emit_byte(0xe8+d);
+       emit_byte(i);
+    }
+    else {
+       emit_byte(0x81);
+       emit_byte(0xe8+d);
+       emit_word(i);
+    }
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+{
+    emit_byte(0xc7);
+    emit_byte(0x05);
+    emit_long(d);
+    emit_long(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+{
+    emit_byte(0x66);
+    emit_byte(0xc7);
+    emit_byte(0x05);
+    emit_long(d);
+    emit_word(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+{
+    emit_byte(0xc6);
+    emit_byte(0x05);
+    emit_long(d);
+    emit_byte(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+{
+    emit_byte(0xc0);
+    emit_byte(0x05);
+    emit_long(d);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+{
+    emit_byte(0xc0);
+    emit_byte(0xc0+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+{
+    emit_byte(0x66);
+    emit_byte(0xc1);
+    emit_byte(0xc0+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+{
+    emit_byte(0xc1);
+    emit_byte(0xc0+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+{
+    emit_byte(0xd3);
+    emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+{
+    emit_byte(0x66);
+    emit_byte(0xd3);
+    emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+{
+    emit_byte(0xd2);
+    emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+{
+    emit_byte(0xd3);
+    emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+{
+    emit_byte(0x66);
+    emit_byte(0xd3);
+    emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+{
+    emit_byte(0xd2);
+    emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+{
+    emit_byte(0xc0);
+    emit_byte(0xc8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+{
+    emit_byte(0x66);
+    emit_byte(0xc1);
+    emit_byte(0xc8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+{
+    emit_byte(0xc1);
+    emit_byte(0xc8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+{
+    emit_byte(0xd3);
+    emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+{
+    emit_byte(0x66);
+    emit_byte(0xd3);
+    emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+{
+    emit_byte(0xd2);
+    emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+{
+    emit_byte(0xd3);
+    emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+{
+    emit_byte(0x66);
+    emit_byte(0xd3);
+    emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+{
+    emit_byte(0xd2);
+    emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+{
+    emit_byte(0xd3);
+    emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+{
+    emit_byte(0x66);
+    emit_byte(0xd3);
+    emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+{
+    emit_byte(0xd2);
+    emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+{
+    emit_byte(0xc1);
+    emit_byte(0xe0+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+{
+    emit_byte(0x66);
+    emit_byte(0xc1);
+    emit_byte(0xe0+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+{
+    emit_byte(0xc0);
+    emit_byte(0xe0+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+{
+    emit_byte(0xc1);
+    emit_byte(0xe8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+{
+    emit_byte(0x66);
+    emit_byte(0xc1);
+    emit_byte(0xe8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+{
+    emit_byte(0xc0);
+    emit_byte(0xe8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+{
+    emit_byte(0xc1);
+    emit_byte(0xf8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+{
+    emit_byte(0x66);
+    emit_byte(0xc1);
+    emit_byte(0xf8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+{
+    emit_byte(0xc0);
+    emit_byte(0xf8+r);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+{
+    emit_byte(0x9e);
+}
+LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+
+LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+{
+    emit_byte(0x0f);
+    emit_byte(0xa2);
+}
+LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+
+LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+{
+    emit_byte(0x9f);
+}
+LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+
+LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+{
+    emit_byte(0x0f);
+    emit_byte(0x90+cc);
+    emit_byte(0xc0+d);
+}
+LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+
+LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+{
+    emit_byte(0x0f);
+    emit_byte(0x90+cc);
+    emit_byte(0x05);
+    emit_long(d);
+}
+LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+{
+    if (have_cmov) {
+       emit_byte(0x0f);
+       emit_byte(0x40+cc);
+       emit_byte(0xc0+8*d+s);
+    }
+    else { /* replacement using branch and mov */
+       int uncc=(cc^1);
+       emit_byte(0x70+uncc); 
+       emit_byte(2);  /* skip next 2 bytes if not cc=true */
+       emit_byte(0x89);
+       emit_byte(0xc0+8*s+d);
+    }
+}
+LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+
+LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+{
+    emit_byte(0x0f);
+    emit_byte(0xbc);
+    emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+{
+    emit_byte(0x0f);
+    emit_byte(0xbf);
+    emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+{
+    emit_byte(0x0f);
+    emit_byte(0xbe);
+    emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+{
+    emit_byte(0x0f);
+    emit_byte(0xb7);
+    emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+{
+    emit_byte(0x0f);
+    emit_byte(0xb6);
+    emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+{
+    emit_byte(0x0f);
+    emit_byte(0xaf);
+    emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+{
+    if (d!=MUL_NREG1 || s!=MUL_NREG2)
+       abort();
+    emit_byte(0xf7);
+    emit_byte(0xea);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+{
+    if (d!=MUL_NREG1 || s!=MUL_NREG2) {
+       printf("Bad register in MUL: d=%d, s=%d\n",d,s);
+       abort();
+    }
+    emit_byte(0xf7);
+    emit_byte(0xe2);
+}
+LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+{
+    abort(); /* %^$&%^$%#^ x86! */
+    emit_byte(0x0f);
+    emit_byte(0xaf);
+    emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+{
+    emit_byte(0x88);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x89);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+{
+    int isebp=(baser==5)?0x40:0;
+    int fi;
+    
+    switch(factor) {
+     case 1: fi=0; break;
+     case 2: fi=1; break;
+     case 4: fi=2; break;
+     case 8: fi=3; break;
+     default: abort();
+    }
+
+
+    emit_byte(0x8b);
+    emit_byte(0x04+8*d+isebp);
+    emit_byte(baser+8*index+0x40*fi);
+    if (isebp)
+       emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+{
+    int fi;
+    int isebp;
+    
+    switch(factor) {
+     case 1: fi=0; break;
+     case 2: fi=1; break;
+     case 4: fi=2; break;
+     case 8: fi=3; break;
+     default: abort();
+    }
+    isebp=(baser==5)?0x40:0;
+    
+    emit_byte(0x66);
+    emit_byte(0x8b);
+    emit_byte(0x04+8*d+isebp);
+    emit_byte(baser+8*index+0x40*fi);
+    if (isebp)
+       emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+{
+   int fi;
+  int isebp;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+  isebp=(baser==5)?0x40:0;
+
+   emit_byte(0x8a);
+    emit_byte(0x04+8*d+isebp);
+    emit_byte(baser+8*index+0x40*fi);
+    if (isebp)
+       emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+{
+  int fi;
+  int isebp;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+  
+  isebp=(baser==5)?0x40:0;
+
+    emit_byte(0x89);
+    emit_byte(0x04+8*s+isebp);
+    emit_byte(baser+8*index+0x40*fi);
+    if (isebp)
+       emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+{
+  int fi;
+  int isebp;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+  isebp=(baser==5)?0x40:0;
+
+    emit_byte(0x66);
+    emit_byte(0x89);
+    emit_byte(0x04+8*s+isebp);
+    emit_byte(baser+8*index+0x40*fi);
+    if (isebp)
+       emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+{
+  int fi;
+  int isebp;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+  isebp=(baser==5)?0x40:0;
+
+    emit_byte(0x88);
+    emit_byte(0x04+8*s+isebp);
+    emit_byte(baser+8*index+0x40*fi);
+    if (isebp)
+       emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+{
+  int fi;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x89);
+    emit_byte(0x84+8*s);
+    emit_byte(baser+8*index+0x40*fi);
+    emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+{
+  int fi;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x66);
+    emit_byte(0x89);
+    emit_byte(0x84+8*s);
+    emit_byte(baser+8*index+0x40*fi);
+    emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+{
+  int fi;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x88);
+    emit_byte(0x84+8*s);
+    emit_byte(baser+8*index+0x40*fi);
+    emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+  int fi;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x8b);
+    emit_byte(0x84+8*d);
+    emit_byte(baser+8*index+0x40*fi);
+    emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+  int fi;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x66);
+    emit_byte(0x8b);
+    emit_byte(0x84+8*d);
+    emit_byte(baser+8*index+0x40*fi);
+    emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+  int fi;
+
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x8a);
+    emit_byte(0x84+8*d);
+    emit_byte(baser+8*index+0x40*fi);
+    emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+{
+  int fi;
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: 
+    fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
+    abort();
+  }
+    emit_byte(0x8b);
+    emit_byte(0x04+8*d);
+    emit_byte(0x05+8*index+64*fi);
+    emit_long(base);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+{
+    int fi;
+    switch(factor) {
+     case 1: fi=0; break;
+     case 2: fi=1; break;
+     case 4: fi=2; break;
+     case 8: fi=3; break;
+     default: 
+       fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
+       abort();
+    }
+    if (have_cmov) {
+       emit_byte(0x0f);
+       emit_byte(0x40+cond);
+       emit_byte(0x04+8*d);
+       emit_byte(0x05+8*index+64*fi);
+       emit_long(base);
+    }
+    else { /* replacement using branch and mov */
+       int uncc=(cond^1);
+       emit_byte(0x70+uncc); 
+       emit_byte(7);  /* skip next 7 bytes if not cc=true */
+       emit_byte(0x8b);
+       emit_byte(0x04+8*d);
+       emit_byte(0x05+8*index+64*fi);
+       emit_long(base);
+    }
+}
+LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+{
+    if (have_cmov) {
+       emit_byte(0x0f);
+       emit_byte(0x40+cond);
+       emit_byte(0x05+8*d);
+       emit_long(mem);
+    }
+    else { /* replacement using branch and mov */
+       int uncc=(cond^1);
+       emit_byte(0x70+uncc); 
+       emit_byte(6);  /* skip next 6 bytes if not cc=true */
+       emit_byte(0x8b);
+       emit_byte(0x05+8*d);
+       emit_long(mem);
+    }
+}
+LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+{
+    emit_byte(0x8b);
+    emit_byte(0x40+8*d+s);
+    emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+{
+    emit_byte(0x66);
+    emit_byte(0x8b);
+    emit_byte(0x40+8*d+s);
+    emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+{
+    emit_byte(0x8a);
+    emit_byte(0x40+8*d+s);
+    emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+{
+    emit_byte(0x8b);
+    emit_byte(0x80+8*d+s);
+    emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+{
+    emit_byte(0x66);
+    emit_byte(0x8b);
+    emit_byte(0x80+8*d+s);
+    emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+{
+    emit_byte(0x8a);
+    emit_byte(0x80+8*d+s);
+    emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+{
+    emit_byte(0xc7);
+    emit_byte(0x40+d);
+    emit_byte(offset);
+    emit_long(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+{
+    emit_byte(0x66);
+    emit_byte(0xc7);
+    emit_byte(0x40+d);
+    emit_byte(offset);
+    emit_word(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+{
+    emit_byte(0xc6);
+    emit_byte(0x40+d);
+    emit_byte(offset);
+    emit_byte(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+{
+    emit_byte(0x89);
+    emit_byte(0x40+8*s+d);
+    emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+{
+    emit_byte(0x66);
+    emit_byte(0x89);
+    emit_byte(0x40+8*s+d);
+    emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+{
+    emit_byte(0x88);
+    emit_byte(0x40+8*s+d);
+    emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+{
+    emit_byte(0x8d);
+    emit_byte(0x80+8*d+s);
+    emit_long(offset);
+}
+LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+{
+  int fi;
+  
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x8d);
+    emit_byte(0x84+8*d);
+    emit_byte(0x40*fi+8*index+s);
+    emit_long(offset);
+}
+LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+
+LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+{
+  int isebp=(s==5)?0x40:0;
+  int fi;
+  
+  switch(factor) {
+  case 1: fi=0; break;
+  case 2: fi=1; break;
+  case 4: fi=2; break;
+  case 8: fi=3; break;
+  default: abort();
+  }
+
+    emit_byte(0x8d);
+    emit_byte(0x04+8*d+isebp);
+    emit_byte(0x40*fi+8*index+s);
+    if (isebp)
+      emit_byte(0);
+}
+LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+{
+    emit_byte(0x89);
+    emit_byte(0x80+8*s+d);
+    emit_long(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+{
+    emit_byte(0x66);
+    emit_byte(0x89);
+    emit_byte(0x80+8*s+d);
+    emit_long(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+{
+    emit_byte(0x88);
+    emit_byte(0x80+8*s+d);
+    emit_long(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+{
+    emit_byte(0x0f);
+    emit_byte(0xc8+r);
+}
+LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+
+LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+{
+    emit_byte(0x66);
+    emit_byte(0xc1);
+    emit_byte(0xc0+r);
+    emit_byte(0x08);
+}
+LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+{
+    emit_byte(0x89);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+{
+    emit_byte(0x89);
+    emit_byte(0x05+8*s);
+    emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+{
+    emit_byte(0x8b);
+    emit_byte(0x05+8*d);
+    emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x89);
+    emit_byte(0x05+8*s);
+    emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+{
+    emit_byte(0x66);
+    emit_byte(0x8b);
+    emit_byte(0x05+8*d);
+    emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+{
+    emit_byte(0x88);
+    emit_byte(0x05+8*s);
+    emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+{
+    emit_byte(0x8a);
+    emit_byte(0x05+8*d);
+    emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+{
+    emit_byte(0xb8+d);
+    emit_long(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+{
+    emit_byte(0x66);
+    emit_byte(0xb8+d);
+    emit_word(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+{
+    emit_byte(0xb0+d);
+    emit_byte(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+
+LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+{
+    emit_byte(0x81);
+    emit_byte(0x15);
+    emit_long(d);
+    emit_long(s);
+}
+LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) 
+{
+    emit_byte(0x81);
+    emit_byte(0x05);
+    emit_long(d);
+    emit_long(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) 
+
+LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) 
+{
+    emit_byte(0x66);
+    emit_byte(0x81);
+    emit_byte(0x05);
+    emit_long(d);
+    emit_word(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) 
+
+LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) 
+{
+    emit_byte(0x80);
+    emit_byte(0x05);
+    emit_long(d);
+    emit_byte(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) 
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+{
+    emit_byte(0xf7);
+    emit_byte(0xc0+d);
+    emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+{
+    emit_byte(0x85);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x85);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+{
+    emit_byte(0x84);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+{
+    emit_byte(0x81);
+    emit_byte(0xe0+d);
+    emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+{
+    emit_byte(0x66);
+    emit_byte(0x81);
+    emit_byte(0xe0+d);
+    emit_word(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+{
+    emit_byte(0x21);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x21);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+{
+    emit_byte(0x20);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+{
+    emit_byte(0x81);
+    emit_byte(0xc8+d);
+    emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+{
+    emit_byte(0x09);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x09);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+{
+    emit_byte(0x08);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+{
+    emit_byte(0x11);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x11);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+{
+    emit_byte(0x10);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+{
+    emit_byte(0x01);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x01);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+{
+    emit_byte(0x00);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+{
+  if (isbyte(i)) {
+    emit_byte(0x83);
+    emit_byte(0xe8+d);
+    emit_byte(i);
+  }
+  else {
+    emit_byte(0x81);
+    emit_byte(0xe8+d);
+    emit_long(i);
+  }
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+{
+    emit_byte(0x80);
+    emit_byte(0xe8+d);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+{
+    if (isbyte(i)) {
+       emit_byte(0x83);
+       emit_byte(0xc0+d);
+       emit_byte(i);
+    }
+    else {
+       emit_byte(0x81);
+       emit_byte(0xc0+d);
+       emit_long(i);
+    }
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+{
+    if (isbyte(i)) {
+       emit_byte(0x66);
+       emit_byte(0x83);
+       emit_byte(0xc0+d);
+       emit_byte(i);
+    }
+    else {
+       emit_byte(0x66);
+       emit_byte(0x81);
+       emit_byte(0xc0+d);
+       emit_word(i);
+    }
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+{
+    emit_byte(0x80);
+    emit_byte(0xc0+d);
+    emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+{
+    emit_byte(0x19);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x19);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+{
+    emit_byte(0x18);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+{
+    emit_byte(0x29);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x29);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+{
+    emit_byte(0x28);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+{
+    emit_byte(0x39);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+{
+    emit_byte(0x81);
+    emit_byte(0xf8+r);
+    emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x39);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+{
+  emit_byte(0x80);
+  emit_byte(0xf8+d);
+  emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+{
+    emit_byte(0x38);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+
+LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+{
+    int fi;
+    
+    switch(factor) {
+     case 1: fi=0; break;
+     case 2: fi=1; break;
+     case 4: fi=2; break;
+     case 8: fi=3; break;
+     default: abort();
+    }
+    emit_byte(0x39);
+    emit_byte(0x04+8*d);
+    emit_byte(5+8*index+0x40*fi);
+    emit_long(offset);
+}
+LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+{
+    emit_byte(0x31);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+{
+    emit_byte(0x66);
+    emit_byte(0x31);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+{
+    emit_byte(0x30);
+    emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+{
+    emit_byte(0x81);
+    emit_byte(0x2d);
+    emit_long(d);
+    emit_long(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+{
+    emit_byte(0x81);
+    emit_byte(0x3d);
+    emit_long(d);
+    emit_long(s);
+}
+LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+{
+    emit_byte(0x87);
+    emit_byte(0xc0+8*r1+r2);
+}
+LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+
+LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
+{
+    emit_byte(0x9c);
+}
+LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
+
+LOWFUNC(WRITE,READ,0,raw_popfl,(void))
+{
+    emit_byte(0x9d);
+}
+LENDFUNC(WRITE,READ,0,raw_popfl,(void))
+
+/*************************************************************************
+ * Unoptimizable stuff --- jump                                          *
+ *************************************************************************/
+
+static __inline__ void raw_call_r(R4 r)
+{
+    lopt_emit_all();
+    emit_byte(0xff);
+    emit_byte(0xd0+r);
+}
+
+static __inline__ void raw_jmp_r(R4 r)
+{
+    lopt_emit_all();
+    emit_byte(0xff);
+    emit_byte(0xe0+r);
+}
+
+static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
+{
+    int mu;
+    switch(m) {
+     case 1: mu=0; break;
+     case 2: mu=1; break;
+     case 4: mu=2; break;
+     case 8: mu=3; break;
+     default: abort();
+    }
+    lopt_emit_all();
+    emit_byte(0xff);
+    emit_byte(0x24);
+    emit_byte(0x05+8*r+0x40*mu);
+    emit_long(base);
+}
+
+static __inline__ void raw_jmp_m(uae_u32 base)
+{
+    lopt_emit_all();
+    emit_byte(0xff);
+    emit_byte(0x25);
+    emit_long(base);
+}
+
+
+static __inline__ void raw_call(uae_u32 t)
+{
+    lopt_emit_all();
+    emit_byte(0xe8);
+    emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jmp(uae_u32 t)
+{
+    lopt_emit_all();
+    emit_byte(0xe9);
+    emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jl(uae_u32 t)
+{
+    lopt_emit_all();
+    emit_byte(0x0f);
+    emit_byte(0x8c);
+    emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jz(uae_u32 t)
+{
+    lopt_emit_all();
+    emit_byte(0x0f);
+    emit_byte(0x84);
+    emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jnz(uae_u32 t)
+{
+    lopt_emit_all();
+    emit_byte(0x0f);
+    emit_byte(0x85);
+    emit_long(t-(uae_u32)target-4);
+}
+
+static __inline__ void raw_jnz_l_oponly(void)
+{
+    lopt_emit_all();
+    emit_byte(0x0f); 
+    emit_byte(0x85); 
+}
+
+static __inline__ void raw_jcc_l_oponly(int cc)
+{
+    lopt_emit_all();
+    emit_byte(0x0f); 
+    emit_byte(0x80+cc); 
+}
+
+static __inline__ void raw_jnz_b_oponly(void)
+{
+    lopt_emit_all();
+    emit_byte(0x75); 
+}
+
+static __inline__ void raw_jz_b_oponly(void)
+{
+    lopt_emit_all();
+    emit_byte(0x74); 
+}
+
+static __inline__ void raw_jmp_l_oponly(void)
+{
+    lopt_emit_all();
+    emit_byte(0xe9); 
+}
+
+static __inline__ void raw_jmp_b_oponly(void)
+{
+    lopt_emit_all();
+    emit_byte(0xeb); 
+}
+
+static __inline__ void raw_ret(void)
+{
+    lopt_emit_all();
+    emit_byte(0xc3);  
+}
+
+static __inline__ void raw_nop(void)
+{
+    lopt_emit_all();
+    emit_byte(0x90);
+}
+
+
+/*************************************************************************
+ * Flag handling, to and fro UAE flag register                           *
+ *************************************************************************/
+
+
+#define FLAG_NREG1 0  /* Set to -1 if any register will do */
+
+static __inline__ void raw_flags_to_reg(int r)
+{
+  raw_lahf(0);  /* Most flags in AH */
+  //raw_setcc(r,0); /* V flag in AL */
+  raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0); 
+  
+#if 1   /* Let's avoid those nasty partial register stalls */
+  //raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r);
+  raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4);
+  //live.state[FLAGTMP].status=CLEAN;
+  live.state[FLAGTMP].status=INMEM;
+  live.state[FLAGTMP].realreg=-1;
+  /* We just "evicted" FLAGTMP. */
+  if (live.nat[r].nholds!=1) {
+      /* Huh? */
+      abort();
+  }
+  live.nat[r].nholds=0;
+#endif
+}
+
+#define FLAG_NREG2 0  /* Set to -1 if any register will do */
+static __inline__ void raw_reg_to_flags(int r)
+{
+  raw_cmp_b_ri(r,-127); /* set V */
+  raw_sahf(0);
+}
+
+/* Apparently, there are enough instructions between flag store and
+   flag reload to avoid the partial memory stall */
+static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
+{
+#if 1
+    raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
+#else
+    raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
+    raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1);
+#endif
+}
+
+/* FLAGX is byte sized, and we *do* write it at that size */
+static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
+{
+    if (live.nat[target].canbyte)
+       raw_mov_b_rm(target,(uae_u32)live.state[r].mem);
+    else if (live.nat[target].canword)
+       raw_mov_w_rm(target,(uae_u32)live.state[r].mem);
+    else
+       raw_mov_l_rm(target,(uae_u32)live.state[r].mem);
+}
+
+#define NATIVE_FLAG_Z 0x40
+#define NATIVE_CC_EQ  4
+static __inline__ void raw_flags_set_zero(int f, int r, int t)
+{
+    // FIXME: this is really suboptimal
+    raw_pushfl();
+    raw_pop_l_r(f);
+    raw_and_l_ri(f,~NATIVE_FLAG_Z);
+    raw_test_l_rr(r,r);
+    raw_mov_l_ri(r,0);
+    raw_mov_l_ri(t,NATIVE_FLAG_Z);
+    raw_cmov_l_rr(r,t,NATIVE_CC_EQ);
+    raw_or_l(f,r);
+    raw_push_l_r(f);
+    raw_popfl();
+}
+
+static __inline__ void raw_inc_sp(int off)
+{
+    raw_add_l_ri(4,off);
+}
+
+/*************************************************************************
+ * Handling mistaken direct memory access                                *
+ *************************************************************************/
+
+
+#ifdef NATMEM_OFFSET
+#ifdef _WIN32 // %%% BRIAN KING WAS HERE %%%
+#include <winbase.h>
+#else
+#include <asm/sigcontext.h>
+#endif
+#include <signal.h>
+
+#define SIG_READ 1
+#define SIG_WRITE 2
+
+static int in_handler=0;
+static uae_u8 *veccode;
+
+#ifdef _WIN32
+int EvalException ( LPEXCEPTION_POINTERS blah, int n_except )
+{
+    PEXCEPTION_RECORD pExceptRecord = NULL;
+    PCONTEXT          pContext = NULL;
+
+    uae_u8* i = NULL;
+    uae_u32 addr = 0;
+    int r=-1;
+    int size=4;
+    int dir=-1;
+    int len=0;
+    int j;
+
+    if( n_except != STATUS_ACCESS_VIOLATION || !canbang)
+        return EXCEPTION_CONTINUE_SEARCH;
+
+    pExceptRecord = blah->ExceptionRecord;
+    pContext = blah->ContextRecord;
+
+    if( pContext )
+    {
+       i = (uae_u8 *)(pContext->Eip);
+    }
+    if( pExceptRecord )
+    {
+       addr = (uae_u32)(pExceptRecord->ExceptionInformation[1]);
+    }
+#ifdef JIT_DEBUG
+    write_log("JIT: fault address is 0x%x at 0x%x\n",addr,i);
+#endif
+    if (!canbang || !currprefs.cachesize) 
+    {
+#ifdef JIT_DEBUG
+       write_log("JIT: Not happy! Canbang or cachesize is 0 in SIGSEGV handler!\n");
+#endif
+       return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+    if (in_handler) 
+       write_log("JIT: Argh --- Am already in a handler. Shouldn't happen!\n");
+    
+    if (canbang && i>=compiled_code && i<=current_compile_p) {
+       if (*i==0x66) {
+           i++;
+           size=2;
+           len++;
+       }
+       
+       switch(i[0]) {
+       case 0x8a:
+           if ((i[1]&0xc0)==0x80) {
+               r=(i[1]>>3)&7;
+               dir=SIG_READ;
+               size=1;
+               len+=6;
+               break;
+           }
+           break;
+       case 0x88:
+           if ((i[1]&0xc0)==0x80) {
+               r=(i[1]>>3)&7;
+               dir=SIG_WRITE;
+               size=1;
+               len+=6;
+               break;
+           }
+           break;
+       case 0x8b:
+           switch(i[1]&0xc0) {
+           case 0x80:
+               r=(i[1]>>3)&7;
+               dir=SIG_READ;
+               len+=6;
+               break;
+           case 0x40:
+               r=(i[1]>>3)&7;
+               dir=SIG_READ;
+               len+=3;
+               break;
+           case 0x00:
+               r=(i[1]>>3)&7;
+               dir=SIG_READ;
+               len+=2;
+               break;
+           default: 
+               break;
+           }
+           break;
+           case 0x89:
+               switch(i[1]&0xc0) {
+               case 0x80:
+                   r=(i[1]>>3)&7;
+                   dir=SIG_WRITE;
+                   len+=6;
+                   break;
+               case 0x40:
+                   r=(i[1]>>3)&7;
+                   dir=SIG_WRITE;
+                   len+=3;
+                   break;
+               case 0x00:
+                   r=(i[1]>>3)&7;
+                   dir=SIG_WRITE;
+                   len+=2;
+                   break;
+               }
+               break;
+       }       
+    }
+    
+    if (r!=-1) { 
+       void* pr=NULL;
+#ifdef JIT_DEBUG
+       write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
+#endif
+
+       switch(r) {
+       case 0: pr=&(pContext->Eax); break;
+       case 1: pr=&(pContext->Ecx); break;
+       case 2: pr=&(pContext->Edx); break;
+       case 3: pr=&(pContext->Ebx); break;
+       case 4: pr=(size>1)?NULL:(((uae_u8*)&(pContext->Eax))+1); break;
+       case 5: pr=(size>1)?
+                   (void*)(&(pContext->Ebp)):
+           (void*)(((uae_u8*)&(pContext->Ecx))+1); break;
+       case 6: pr=(size>1)?
+                   (void*)(&(pContext->Esi)):
+           (void*)(((uae_u8*)&(pContext->Edx))+1); break;
+       case 7: pr=(size>1)?
+                   (void*)(&(pContext->Edi)):
+           (void*)(((uae_u8*)&(pContext->Ebx))+1); break;
+       default: abort();
+       }
+       if (pr) {
+           blockinfo* bi;
+           
+           if (currprefs.comp_oldsegv) {
+               addr-=NATMEM_OFFSET;
+               
+               if ((addr>=0x10000000 && addr<0x40000000) ||
+                   (addr>=0x50000000)) {
+#ifdef JIT_DEBUG
+                   write_log("Suspicious address 0x%x in SEGV handler.\n",addr);
+#endif
+               }
+               if (dir==SIG_READ) {
+                   switch(size) {
+                   case 1: *((uae_u8*)pr)=get_byte(addr); break;
+                   case 2: *((uae_u16*)pr)=swap16(get_word(addr)); break;
+                   case 4: *((uae_u32*)pr)=swap32(get_long(addr)); break;
+                   default: abort();
+                   }
+               }
+               else { /* write */
+                   switch(size) {
+                   case 1: put_byte(addr,*((uae_u8*)pr)); break;
+                   case 2: put_word(addr,swap16(*((uae_u16*)pr))); break;
+                   case 4: put_long(addr,swap32(*((uae_u32*)pr))); break;
+                   default: abort();
+                   }
+               }
+#ifdef JIT_DEBUG
+               write_log("Handled one access!\n");
+#endif
+               fflush(stdout);
+               segvcount++;
+               pContext->Eip+=len;
+           }
+           else {
+               void* tmp=target;
+               int i;
+               uae_u8 vecbuf[5];
+               
+               addr-=NATMEM_OFFSET;
+               
+               if ((addr>=0x10000000 && addr<0x40000000) ||
+                   (addr>=0x50000000)) {
+#ifdef JIT_DEBUG
+                   write_log("Suspicious address 0x%x in SEGV handler.\n",addr);
+#endif
+               }
+       
+               target=(uae_u8*)pContext->Eip;
+               for (i=0;i<5;i++)
+                   vecbuf[i]=target[i];
+               emit_byte(0xe9);
+               emit_long((uae_u32)veccode-(uae_u32)target-4);
+#ifdef JIT_DEBUG
+
+               write_log("Create jump to %p\n",veccode);
+               write_log("Handled one access!\n");
+#endif
+               segvcount++;
+               
+               target=veccode;
+               
+               if (dir==SIG_READ) {
+                   switch(size) {
+                   case 1: raw_mov_b_ri(r,get_byte(addr)); break;
+                   case 2: raw_mov_w_ri(r,swap16(get_word(addr))); break;
+                   case 4: raw_mov_l_ri(r,swap32(get_long(addr))); break;
+                   default: abort();
+                   }
+               }
+               else { /* write */
+                   switch(size) {
+                   case 1: put_byte(addr,*((uae_u8*)pr)); break;
+                   case 2: put_word(addr,swap16(*((uae_u16*)pr))); break;
+                   case 4: put_long(addr,swap32(*((uae_u32*)pr))); break;
+                   default: abort();
+                   }
+               }
+               for (i=0;i<5;i++)
+                   raw_mov_b_mi(pContext->Eip+i,vecbuf[i]);
+               raw_mov_l_mi((uae_u32)&in_handler,0);
+               emit_byte(0xe9);
+               emit_long(pContext->Eip+len-(uae_u32)target-4);
+               in_handler=1;
+               target=tmp;
+           }
+           bi=active;
+           while (bi) {
+               if (bi->handler && 
+                   (uae_u8*)bi->direct_handler<=i &&
+                   (uae_u8*)bi->nexthandler>i) {
+#ifdef JIT_DEBUG
+                   write_log("deleted trigger (%p<%p<%p) %p\n",
+                       bi->handler,
+                       i,
+                       bi->nexthandler,
+                       bi->pc_p);
+#endif
+                   invalidate_block(bi);
+                   raise_in_cl_list(bi);
+                   set_special(0);
+                   return EXCEPTION_CONTINUE_EXECUTION;
+               }
+               bi=bi->next;
+           }
+           /* Not found in the active list. Might be a rom routine that
+           is in the dormant list */
+           bi=dormant;
+           while (bi) {
+               if (bi->handler && 
+                   (uae_u8*)bi->direct_handler<=i &&
+                   (uae_u8*)bi->nexthandler>i) {
+#ifdef JIT_DEBUG
+                   write_log("deleted trigger (%p<%p<%p) %p\n",
+                       bi->handler,
+                       i,
+                       bi->nexthandler,
+                       bi->pc_p);
+#endif
+                   invalidate_block(bi);
+                   raise_in_cl_list(bi);
+                   set_special(0);
+                   return EXCEPTION_CONTINUE_EXECUTION;
+               }
+               bi=bi->next;
+           }
+#ifdef JIT_DEBUG
+           write_log("Huh? Could not find trigger!\n");
+#endif
+           return EXCEPTION_CONTINUE_EXECUTION;
+       }
+    }
+    write_log("JIT: Can't handle access!\n");
+    if( i )
+    {
+       for (j=0;j<10;j++) {
+           write_log("JIT: instruction byte %2d is 0x%02x\n",j,i[j]);
+       }
+    }
+#if 0
+    write_log("Please send the above info (starting at \"fault address\") to\n"
+          "bmeyer@csse.monash.edu.au\n"
+          "This shouldn't happen ;-)\n");
+#endif
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+#else
+static void vec(int x, struct sigcontext sc)
+{
+    uae_u8* i=(uae_u8*)sc.eip;
+    uae_u32 addr=sc.cr2;
+    int r=-1;
+    int size=4;
+    int dir=-1;
+    int len=0;
+    int j;
+    
+    write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
+    if (!canbang) 
+       write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
+    if (in_handler) 
+       write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
+
+    if (canbang && i>=compiled_code && i<=current_compile_p) {
+       if (*i==0x66) {
+           i++;
+           size=2;
+           len++;
+       }
+       
+       switch(i[0]) {
+        case 0x8a:
+           if ((i[1]&0xc0)==0x80) {
+               r=(i[1]>>3)&7;
+               dir=SIG_READ;
+               size=1;
+               len+=6;
+               break;
+           }
+           break;
+        case 0x88:
+           if ((i[1]&0xc0)==0x80) {
+               r=(i[1]>>3)&7;
+               dir=SIG_WRITE;
+               size=1;
+               len+=6;
+               break;
+           }
+           break;
+
+        case 0x8b:
+          switch(i[1]&0xc0) {
+          case 0x80:
+            r=(i[1]>>3)&7;
+            dir=SIG_READ;
+            len+=6;
+            break;
+          case 0x40:
+            r=(i[1]>>3)&7;
+            dir=SIG_READ;
+            len+=3;
+            break;
+          case 0x00:
+            r=(i[1]>>3)&7;
+            dir=SIG_READ;
+            len+=2;
+            break;
+          default: 
+            break;
+          }
+          break;
+           
+        case 0x89:
+          switch(i[1]&0xc0) {
+          case 0x80:
+            r=(i[1]>>3)&7;
+            dir=SIG_WRITE;
+            len+=6;
+            break;
+          case 0x40:
+            r=(i[1]>>3)&7;
+            dir=SIG_WRITE;
+            len+=3;
+            break;
+          case 0x00:
+            r=(i[1]>>3)&7;
+            dir=SIG_WRITE;
+            len+=2;
+            break;
+          }
+          break;
+       }       
+    }
+
+    if (r!=-1) { 
+       void* pr=NULL;
+       write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
+       
+       switch(r) {
+        case 0: pr=&(sc.eax); break;
+        case 1: pr=&(sc.ecx); break;
+        case 2: pr=&(sc.edx); break;
+        case 3: pr=&(sc.ebx); break;
+        case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
+        case 5: pr=(size>1)?
+                    (void*)(&(sc.ebp)):
+                        (void*)(((uae_u8*)&(sc.ecx))+1); break;
+        case 6: pr=(size>1)?
+                    (void*)(&(sc.esi)):
+                        (void*)(((uae_u8*)&(sc.edx))+1); break;
+        case 7: pr=(size>1)?
+                    (void*)(&(sc.edi)):
+                        (void*)(((uae_u8*)&(sc.ebx))+1); break;
+        default: abort();
+       }
+       if (pr) {
+           blockinfo* bi;
+
+           if (currprefs.comp_oldsegv) {
+           addr-=NATMEM_OFFSET;
+               
+           if ((addr>=0x10000000 && addr<0x40000000) ||
+               (addr>=0x50000000)) {
+               write_log("Suspicious address in %x SEGV handler.\n",addr);
+           }
+           if (dir==SIG_READ) {
+               switch(size) {
+                case 1: *((uae_u8*)pr)=get_byte(addr); break;
+                case 2: *((uae_u16*)pr)=get_word(addr); break;
+                case 4: *((uae_u32*)pr)=get_long(addr); break;
+                default: abort();
+               }
+           }
+           else { /* write */
+               switch(size) {
+                case 1: put_byte(addr,*((uae_u8*)pr)); break;
+                case 2: put_word(addr,*((uae_u16*)pr)); break;
+                case 4: put_long(addr,*((uae_u32*)pr)); break;
+                default: abort();
+               }
+           }
+           write_log("Handled one access!\n");
+           fflush(stdout);
+           segvcount++;
+           sc.eip+=len;
+           }
+           else {
+               void* tmp=target;
+               int i;
+               uae_u8 vecbuf[5];
+               
+               addr-=NATMEM_OFFSET;
+               
+               if ((addr>=0x10000000 && addr<0x40000000) ||
+                   (addr>=0x50000000)) {
+                   write_log("Suspicious address 0x%x in SEGV handler.\n",addr);
+               }
+               
+               target=(uae_u8*)sc.eip;
+               for (i=0;i<5;i++)
+                   vecbuf[i]=target[i];
+               emit_byte(0xe9);
+               emit_long((uae_u32)veccode-(uae_u32)target-4);
+               write_log("Create jump to %p\n",veccode);
+
+               write_log("Handled one access!\n");
+               fflush(stdout);
+               segvcount++;
+               
+               target=veccode;
+
+               if (dir==SIG_READ) {
+                   switch(size) {
+                    case 1: raw_mov_b_ri(r,get_byte(addr)); break;
+                    case 2: raw_mov_w_ri(r,get_word(addr)); break;
+                    case 4: raw_mov_l_ri(r,get_long(addr)); break;
+                    default: abort();
+                   }
+               }
+               else { /* write */
+                   switch(size) {
+                    case 1: put_byte(addr,*((uae_u8*)pr)); break;
+                    case 2: put_word(addr,*((uae_u16*)pr)); break;
+                    case 4: put_long(addr,*((uae_u32*)pr)); break;
+                    default: abort();
+                   }
+               }
+               for (i=0;i<5;i++)
+                   raw_mov_b_mi(sc.eip+i,vecbuf[i]);
+               raw_mov_l_mi((uae_u32)&in_handler,0);
+               emit_byte(0xe9);
+               emit_long(sc.eip+len-(uae_u32)target-4);
+               in_handler=1;
+               target=tmp;
+           }
+           bi=active;
+           while (bi) {
+               if (bi->handler && 
+                   (uae_u8*)bi->direct_handler<=i &&
+                   (uae_u8*)bi->nexthandler>i) {
+                   write_log("deleted trigger (%p<%p<%p) %p\n",
+                             bi->handler,
+                             i,
+                             bi->nexthandler,
+                             bi->pc_p);
+                   invalidate_block(bi);
+                   raise_in_cl_list(bi);
+                   set_special(0);
+                   return;
+               }
+               bi=bi->next;
+           }
+           /* Not found in the active list. Might be a rom routine that
+              is in the dormant list */
+           bi=dormant;
+           while (bi) {
+               if (bi->handler && 
+                   (uae_u8*)bi->direct_handler<=i &&
+                   (uae_u8*)bi->nexthandler>i) {
+                   write_log("deleted trigger (%p<%p<%p) %p\n",
+                             bi->handler,
+                             i,
+                             bi->nexthandler,
+                             bi->pc_p);
+                   invalidate_block(bi);
+                   raise_in_cl_list(bi);
+                   set_special(0);
+                   return;
+               }
+               bi=bi->next;
+           }
+           write_log("Huh? Could not find trigger!\n");
+           return;
+       }
+    }
+    write_log("Can't handle access!\n");
+    for (j=0;j<10;j++) {
+       write_log("instruction byte %2d is %02x\n",j,i[j]);
+    }
+#if 0
+    write_log("Please send the above info (starting at \"fault address\") to\n"
+          "bmeyer@csse.monash.edu.au\n"
+          "This shouldn't happen ;-)\n");
+    fflush(stdout);
+#endif
+    signal(SIGSEGV,SIG_DFL);  /* returning here will cause a "real" SEGV */
+}
+#endif
+#endif
+
+/*************************************************************************
+ * Checking for CPU features                                             *
+ *************************************************************************/
+
+typedef struct {
+    uae_u32 eax;
+    uae_u32 ecx;
+    uae_u32 edx;
+    uae_u32 ebx;
+} x86_regs;
+
+
+/* This could be so much easier if it could make assumptions about the
+   compiler... */
+
+static uae_u32 cpuid_ptr;
+static uae_u32 cpuid_level;
+
+static x86_regs cpuid(uae_u32 level)
+{
+    x86_regs answer;
+    uae_u8 *cpuid_space;
+    void* tmp=get_target();
+
+    cpuid_ptr=(uae_u32)&answer;
+    cpuid_level=level;
+
+    cpuid_space = cache_alloc (256);
+    set_target(cpuid_space);
+    raw_push_l_r(0); /* eax */
+    raw_push_l_r(1); /* ecx */
+    raw_push_l_r(2); /* edx */
+    raw_push_l_r(3); /* ebx */
+    raw_push_l_r(7); /* edi */
+    raw_mov_l_rm(0,(uae_u32)&cpuid_level);
+    raw_cpuid(0);
+    raw_mov_l_rm(7,(uae_u32)&cpuid_ptr);
+    raw_mov_l_Rr(7,0,0);
+    raw_mov_l_Rr(7,1,4);
+    raw_mov_l_Rr(7,2,8);
+    raw_mov_l_Rr(7,3,12);
+    raw_pop_l_r(7);
+    raw_pop_l_r(3);
+    raw_pop_l_r(2);
+    raw_pop_l_r(1);
+    raw_pop_l_r(0);
+    raw_ret();
+    set_target(tmp);
+
+    ((cpuop_func*)cpuid_space)(0);
+    cache_free (cpuid_space);
+    return answer;
+}
+
+static void raw_init_cpu(void)
+{
+    x86_regs x;
+    uae_u32 maxlev;
+    
+    x=cpuid(0);
+    maxlev=x.eax;
+    write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n",
+             maxlev,
+             x.ebx,
+             x.ebx>>8,
+             x.ebx>>16,
+             x.ebx>>24,
+             x.edx,
+             x.edx>>8,
+             x.edx>>16,
+             x.edx>>24,
+             x.ecx,
+             x.ecx>>8,
+             x.ecx>>16,
+             x.ecx>>24
+             );
+    have_rat_stall=(x.ecx==0x6c65746e);
+
+    if (maxlev>=1) {
+       x=cpuid(1);
+       if (x.edx&(1<<15)) 
+           have_cmov=1;
+    }
+    have_rat_stall=1;
+#if 0
+    if (!have_cmov)
+       have_rat_stall=0;
+#endif
+#if 0
+    write_log ("have_cmov=%d, avoid_cmov=%d, have_rat_stall=%d\n",
+              have_cmov,currprefs.avoid_cmov,have_rat_stall);
+    if (currprefs.avoid_cmov) {
+       write_log("Disabling cmov use despite processor claiming to support it!\n");
+       have_cmov=0;
+    }
+#else
+    /* Dear Bernie, I don't want to keep around options which are useless, and not
+       represented in the GUI anymore... Is this okay? */
+    write_log ("have_cmov=%d, have_rat_stall=%d\n", have_cmov, have_rat_stall);
+#endif
+#if 0   /* For testing of non-cmov code! */
+    have_cmov=0;
+#endif
+#if 0 /* It appears that partial register writes are a bad idea even on
+        AMD K7 cores, even though they are not supposed to have the
+        dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
+    if (have_cmov)
+      have_rat_stall=1;
+#endif
+}
+
+/*************************************************************************
+ * FPU stuff                                                             *
+ *************************************************************************/
+
+
+static __inline__ void raw_fp_init(void)
+{
+    int i;
+    
+    for (i=0;i<N_FREGS;i++)
+       live.spos[i]=-2;
+    live.tos=-1;  /* Stack is empty */
+}
+
+static __inline__ void raw_fp_cleanup_drop(void)
+{
+#if 0
+    /* using FINIT instead of popping all the entries.
+       Seems to have side effects --- there is display corruption in
+       Quake when this is used */
+    if (live.tos>1) {
+       emit_byte(0x9b);
+       emit_byte(0xdb);
+       emit_byte(0xe3);
+       live.tos=-1;
+    }
+#endif
+    while (live.tos>=1) {
+       emit_byte(0xde);
+       emit_byte(0xd9);
+       live.tos-=2;
+    }
+    while (live.tos>=0) {
+       emit_byte(0xdd);
+       emit_byte(0xd8);
+       live.tos--;
+    }
+    raw_fp_init();
+}
+
+static __inline__ void make_tos(int r)
+{
+    int p,q;
+
+    if (live.spos[r]<0) { /* Register not yet on stack */
+       emit_byte(0xd9);
+       emit_byte(0xe8);  /* Push '1' on the stack, just to grow it */
+       live.tos++;
+       live.spos[r]=live.tos;
+       live.onstack[live.tos]=r;
+       return;
+    }
+    /* Register is on stack */
+    if (live.tos==live.spos[r])
+       return;
+    p=live.spos[r];
+    q=live.onstack[live.tos];
+
+    emit_byte(0xd9);
+    emit_byte(0xc8+live.tos-live.spos[r]);  /* exchange it with top of stack */
+    live.onstack[live.tos]=r;
+    live.spos[r]=live.tos;
+    live.onstack[p]=q;
+    live.spos[q]=p;
+}
+
+static __inline__ void make_tos2(int r, int r2)
+{
+    int q;
+
+    make_tos(r2); /* Put the reg that's supposed to end up in position2
+                    on top */
+
+    if (live.spos[r]<0) { /* Register not yet on stack */
+       make_tos(r); /* This will extend the stack */
+       return;
+    }
+    /* Register is on stack */
+    emit_byte(0xd9);
+    emit_byte(0xc9); /* Move r2 into position 2 */
+
+    q=live.onstack[live.tos-1];
+    live.onstack[live.tos]=q;
+    live.spos[q]=live.tos;
+    live.onstack[live.tos-1]=r2;
+    live.spos[r2]=live.tos-1;
+
+    make_tos(r); /* And r into 1 */
+}
+
+static __inline__ int stackpos(int r)
+{
+    if (live.spos[r]<0)
+       abort();
+    if (live.tos<live.spos[r]) {
+       printf("Looking for spos for fnreg %d\n",r);
+       abort();
+    }
+    return live.tos-live.spos[r];
+}
+
+static __inline__ void usereg(int r)
+{
+    if (live.spos[r]<0)
+       make_tos(r);
+}
+
+/* This is called with one FP value in a reg *above* tos, which it will
+   pop off the stack if necessary */
+static __inline__ void tos_make(int r)
+{
+    if (live.spos[r]<0) {
+       live.tos++;
+       live.spos[r]=live.tos;
+       live.onstack[live.tos]=r;
+       return;
+    }
+    emit_byte(0xdd);
+    emit_byte(0xd8+(live.tos+1)-live.spos[r]);  /* store top of stack in reg, 
+                                        and pop it*/
+}
+    
+       
+LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+{
+    make_tos(r);
+    emit_byte(0xdd);
+    emit_byte(0x15);
+    emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
+{
+    make_tos(r);
+    emit_byte(0xdd);
+    emit_byte(0x1d);
+    emit_long(m);
+    live.onstack[live.tos]=-1;
+    live.tos--;
+    live.spos[r]=-2;
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
+{
+    emit_byte(0xdd);
+    emit_byte(0x05);
+    emit_long(m);
+    tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
+{
+    emit_byte(0xdb);
+    emit_byte(0x05);
+    emit_long(m);
+    tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
+{
+    make_tos(r);
+    emit_byte(0xdb);
+    emit_byte(0x15);
+    emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+{
+    emit_byte(0xd9);
+    emit_byte(0x05);
+    emit_long(m);
+    tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
+{
+    make_tos(r);
+    emit_byte(0xd9);
+    emit_byte(0x15);
+    emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+{
+    int rs;
+
+    /* Stupid x87 can't write a long double to mem without popping the 
+       stack! */
+    usereg(r);
+    rs=stackpos(r);
+    emit_byte(0xd9);     /* Get a copy to the top of stack */
+    emit_byte(0xc0+rs);
+
+    emit_byte(0xdb);  /* store and pop it */
+    emit_byte(0x3d);
+    emit_long(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
+{
+    int rs;
+
+    make_tos(r);
+    emit_byte(0xdb);  /* store and pop it */
+    emit_byte(0x3d);
+    emit_long(m);
+    live.onstack[live.tos]=-1;
+    live.tos--;
+    live.spos[r]=-2;
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
+{
+    emit_byte(0xdb);
+    emit_byte(0x2d);
+    emit_long(m);
+    tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
+{
+    emit_byte(0xd9);
+    emit_byte(0xeb);
+    tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
+{
+    emit_byte(0xd9);
+    emit_byte(0xec);
+    tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
+{
+    emit_byte(0xd9);
+    emit_byte(0xea);
+    tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
+{
+    emit_byte(0xd9);
+    emit_byte(0xed);
+    tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
+{
+    emit_byte(0xd9);
+    emit_byte(0xe8);
+    tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
+{
+    emit_byte(0xd9);
+    emit_byte(0xee);
+    tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    ds=stackpos(s);
+    if (ds==0 && live.spos[d]>=0) {
+       /* source is on top of stack, and we already have the dest */
+       int dd=stackpos(d);
+       emit_byte(0xdd);
+       emit_byte(0xd0+dd);
+    }
+    else {
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* duplicate source on tos */
+       tos_make(d); /* store to destination, pop if necessary */
+    }
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+
+LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
+{
+    emit_byte(0xd9);
+    emit_byte(0xa8+index);
+    emit_long(base);
+}
+LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
+
+
+LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+{
+    int ds;
+
+    if (d!=s) {
+       usereg(s);
+       ds=stackpos(s);
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xd9);
+       emit_byte(0xfa); /* take square root */
+       tos_make(d); /* store to destination */
+    }
+    else {
+       make_tos(d);
+       emit_byte(0xd9);
+       emit_byte(0xfa); /* take square root */
+    }  
+}
+LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+{
+    int ds;
+
+    if (d!=s) {
+       usereg(s);
+       ds=stackpos(s);
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xd9);
+       emit_byte(0xe1); /* take fabs */
+       tos_make(d); /* store to destination */
+    }
+    else {
+       make_tos(d);
+       emit_byte(0xd9);
+       emit_byte(0xe1); /* take fabs */
+    }  
+}
+LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+{
+    int ds;
+
+    if (d!=s) {
+       usereg(s);
+       ds=stackpos(s);
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xd9);
+       emit_byte(0xfc); /* take frndint */
+       tos_make(d); /* store to destination */
+    }
+    else {
+       make_tos(d);
+       emit_byte(0xd9);
+       emit_byte(0xfc); /* take frndint */
+    }  
+}
+LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+{
+    int ds;
+
+    if (d!=s) {
+       usereg(s);
+       ds=stackpos(s);
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xd9);
+       emit_byte(0xff); /* take cos */
+       tos_make(d); /* store to destination */
+    }
+    else {
+       make_tos(d);
+       emit_byte(0xd9);
+       emit_byte(0xff); /* take cos */
+    }  
+}
+LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+{
+    int ds;
+
+    if (d!=s) {
+       usereg(s);
+       ds=stackpos(s);
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xd9);
+       emit_byte(0xfe); /* take sin */
+       tos_make(d); /* store to destination */
+    }
+    else {
+       make_tos(d);
+       emit_byte(0xd9);
+       emit_byte(0xfe); /* take sin */
+    }  
+}
+LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+
+double one=1;
+LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* duplicate source */
+
+    emit_byte(0xd9);
+    emit_byte(0xc0);  /* duplicate top of stack. Now up to 8 high */
+    emit_byte(0xd9);
+    emit_byte(0xfc);  /* rndint */
+    emit_byte(0xd9);
+    emit_byte(0xc9);  /* swap top two elements */
+    emit_byte(0xd8);
+    emit_byte(0xe1);  /* subtract rounded from original */
+    emit_byte(0xd9);
+    emit_byte(0xf0);  /* f2xm1 */
+    emit_byte(0xdc);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
+    emit_byte(0xd9);
+    emit_byte(0xfd);  /* and scale it */
+    emit_byte(0xdd);
+    emit_byte(0xd9);  /* take he rounded value off */
+    tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* duplicate source */
+    emit_byte(0xd9);
+    emit_byte(0xea);   /* fldl2e */
+    emit_byte(0xde);
+    emit_byte(0xc9);  /* fmulp --- multiply source by log2(e) */
+
+    emit_byte(0xd9);
+    emit_byte(0xc0);  /* duplicate top of stack. Now up to 8 high */
+    emit_byte(0xd9);
+    emit_byte(0xfc);  /* rndint */
+    emit_byte(0xd9);
+    emit_byte(0xc9);  /* swap top two elements */
+    emit_byte(0xd8);
+    emit_byte(0xe1);  /* subtract rounded from original */
+    emit_byte(0xd9);
+    emit_byte(0xf0);  /* f2xm1 */
+    emit_byte(0xdc);
+    emit_byte(0x05);
+    emit_long((uae_u32)&one);  /* Add '1' without using extra stack space */
+    emit_byte(0xd9);
+    emit_byte(0xfd);  /* and scale it */
+    emit_byte(0xdd);
+    emit_byte(0xd9);  /* take he rounded value off */
+    tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    ds=stackpos(s);
+    emit_byte(0xd9);
+    emit_byte(0xc0+ds); /* duplicate source */
+    emit_byte(0xd9);
+    emit_byte(0xe8); /* push '1' */
+    emit_byte(0xd9);
+    emit_byte(0xc9); /* swap top two */
+    emit_byte(0xd9);
+    emit_byte(0xf1); /* take 1*log2(x) */
+    tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
+
+
+LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+{
+    int ds;
+
+    if (d!=s) {
+       usereg(s);
+       ds=stackpos(s);
+       emit_byte(0xd9);
+       emit_byte(0xc0+ds); /* duplicate source */
+       emit_byte(0xd9);
+       emit_byte(0xe0); /* take fchs */
+       tos_make(d); /* store to destination */
+    }
+    else {
+       make_tos(d);
+       emit_byte(0xd9);
+       emit_byte(0xe0); /* take fchs */
+    }  
+}
+LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    usereg(d);
+    
+    if (live.spos[s]==live.tos) {
+       /* Source is on top of stack */
+       ds=stackpos(d);
+       emit_byte(0xdc);
+       emit_byte(0xc0+ds); /* add source to dest*/
+    }
+    else {
+       make_tos(d);
+       ds=stackpos(s);
+       
+       emit_byte(0xd8);
+       emit_byte(0xc0+ds); /* add source to dest*/
+    }
+}
+LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    usereg(d);
+    
+    if (live.spos[s]==live.tos) {
+       /* Source is on top of stack */
+       ds=stackpos(d);
+       emit_byte(0xdc);
+       emit_byte(0xe8+ds); /* sub source from dest*/
+    }
+    else {
+       make_tos(d);
+       ds=stackpos(s);
+       
+       emit_byte(0xd8);
+       emit_byte(0xe0+ds); /* sub src from dest */
+    }
+}
+LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    usereg(d);
+    
+    make_tos(d);
+    ds=stackpos(s);
+
+    emit_byte(0xdd);
+    emit_byte(0xe0+ds); /* cmp dest with source*/
+}
+LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    usereg(d);
+    
+    if (live.spos[s]==live.tos) {
+       /* Source is on top of stack */
+       ds=stackpos(d);
+       emit_byte(0xdc);
+       emit_byte(0xc8+ds); /* mul dest by source*/
+    }
+    else {
+       make_tos(d);
+       ds=stackpos(s);
+       
+       emit_byte(0xd8);
+       emit_byte(0xc8+ds); /* mul dest by source*/
+    }
+}
+LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    usereg(d);
+    
+    if (live.spos[s]==live.tos) {
+       /* Source is on top of stack */
+       ds=stackpos(d);
+       emit_byte(0xdc);
+       emit_byte(0xf8+ds); /* div dest by source */
+    }
+    else {
+       make_tos(d);
+       ds=stackpos(s);
+       
+       emit_byte(0xd8);
+       emit_byte(0xf0+ds); /* div dest by source*/
+    }
+}
+LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    usereg(d);
+    
+    make_tos2(d,s);
+    ds=stackpos(s);
+
+    if (ds!=1) {
+       printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
+       abort();
+    }
+    emit_byte(0xd9);
+    emit_byte(0xf8); /* take rem from dest by source */
+}
+LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+{
+    int ds;
+
+    usereg(s);
+    usereg(d);
+    
+    make_tos2(d,s);
+    ds=stackpos(s);
+
+    if (ds!=1) {
+       printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
+       abort();
+    }
+    emit_byte(0xd9);
+    emit_byte(0xf5); /* take rem1 from dest by source */
+}
+LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+
+
+LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
+{
+    make_tos(r);
+    emit_byte(0xd9);  /* ftst */
+    emit_byte(0xe4);
+}
+LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
+
+static __inline__ void raw_fflags_into_flags(int r)
+{
+    int p;
+
+    usereg(r);
+    p=stackpos(r);
+
+    emit_byte(0xd9);
+    emit_byte(0xee); /* Push 0 */
+    emit_byte(0xd9);
+    emit_byte(0xc9+p); /* swap top two around */
+    if (have_cmov) {
+            // gb-- fucomi is for P6 cores only, not K6-2 then...
+    emit_byte(0xdb);
+    emit_byte(0xe9+p); /* fucomi them */
+    }
+    else {
+            emit_byte(0xdd);
+            emit_byte(0xe1+p); /* fucom them */
+            emit_byte(0x9b);
+            emit_byte(0xdf);
+            emit_byte(0xe0); /* fstsw ax */
+            raw_sahf(0); /* sahf */
+    }
+    emit_byte(0xdd);
+    emit_byte(0xd9+p);  /* store value back, and get rid of 0 */
+}
index b37a3c38b6beff90fb736dcc126c855a21b59aaa..3180331437fafd0d83e27ba977b95161497611ac 100755 (executable)
@@ -3989,7 +3989,6 @@ MIDFUNC(0,nop,(void))
 }
 MENDFUNC(0,nop,(void))
 
-
 MIDFUNC(1,f_forget_about,(FW r))
 {
     if (f_isinreg(r))
@@ -4197,6 +4196,38 @@ MIDFUNC(2,fcos_rr,(FW d, FR s))
 }
 MENDFUNC(2,fcos_rr,(FW d, FR s))
 
+MIDFUNC(2,ftan_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_ftan_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,ftan_rr,(FW d, FR s))
+
+MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
+{
+    s=f_readreg(s);  /* s for source */
+    d=f_writereg(d); /* d for sine   */
+    c=f_writereg(c); /* c for cosine */
+    raw_fsincos_rr(d,c,s);
+    f_unlock(s);
+    f_unlock(d);
+    f_unlock(c);
+}
+MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
+
+MIDFUNC(2,fscale_rr,(FRW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_rmw(d);
+    raw_fscale_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fscale_rr,(FRW d, FR s))
+
 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
 {
     s=f_readreg(s);
@@ -4217,6 +4248,26 @@ MIDFUNC(2,fetox_rr,(FW d, FR s))
 }
 MENDFUNC(2,fetox_rr,(FW d, FR s))
 
+MIDFUNC(2,fetoxM1_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fetoxM1_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fetoxM1_rr,(FW d, FR s))
+
+MIDFUNC(2,ftentox_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_ftentox_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,ftentox_rr,(FW d, FR s))
+
 MIDFUNC(2,frndint_rr,(FW d, FR s))
 {
     s=f_readreg(s);
@@ -4237,6 +4288,106 @@ MIDFUNC(2,flog2_rr,(FW d, FR s))
 }
 MENDFUNC(2,flog2_rr,(FW d, FR s))
 
+MIDFUNC(2,flogN_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_flogN_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,flogN_rr,(FW d, FR s))
+
+MIDFUNC(2,flogNP1_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_flogNP1_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,flogNP1_rr,(FW d, FR s))
+
+MIDFUNC(2,flog10_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_flog10_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,flog10_rr,(FW d, FR s))
+
+MIDFUNC(2,fasin_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fasin_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fasin_rr,(FW d, FR s))
+
+MIDFUNC(2,facos_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_facos_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,facos_rr,(FW d, FR s))
+
+MIDFUNC(2,fatan_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fatan_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fatan_rr,(FW d, FR s))
+
+MIDFUNC(2,fatanh_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fatanh_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fatanh_rr,(FW d, FR s))
+
+MIDFUNC(2,fsinh_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fsinh_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fsinh_rr,(FW d, FR s))
+
+MIDFUNC(2,fcosh_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fcosh_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fcosh_rr,(FW d, FR s))
+
+MIDFUNC(2,ftanh_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_ftanh_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,ftanh_rr,(FW d, FR s))
+
 MIDFUNC(2,fneg_rr,(FW d, FR s))
 {
     s=f_readreg(s);
diff --git a/compemu_support_old.c b/compemu_support_old.c
new file mode 100755 (executable)
index 0000000..b37a3c3
--- /dev/null
@@ -0,0 +1,6182 @@
+#define writemem_special writemem
+#define readmem_special  readmem
+
+#define USE_MATCHSTATE 0
+#define setzflg_uses_bsf 0
+#include "sysconfig.h"
+#include "sysdeps.h"
+#include "config.h"
+#include "options.h"
+#include "events.h"
+#include "include/memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "comptbl.h"
+#include "compemu.h"
+
+// %%% BRIAN KING WAS HERE %%%
+extern int canbang;
+#include <sys/mman.h>
+#include <limits.h>    /* for PAGESIZE */
+
+cpuop_func *compfunctbl[65536];
+cpuop_func *nfcompfunctbl[65536];
+#ifdef NOFLAGS_SUPPORT
+cpuop_func *nfcpufunctbl[65536];
+#endif
+uae_u8* comp_pc_p;
+
+uae_u8* start_pc_p;
+uae_u32 start_pc;
+uae_u32 current_block_pc_p;
+uae_u32 current_block_start_target;
+uae_u32 needed_flags;
+static uae_u32 next_pc_p;
+static uae_u32 taken_pc_p;
+static int     branch_cc;
+int segvcount=0;
+int soft_flush_count=0;
+int hard_flush_count=0;
+int compile_count=0;
+int checksum_count=0;
+static uae_u8* current_compile_p=NULL;
+static uae_u8* max_compile_start;
+uae_u8* compiled_code=NULL;
+static uae_s32 reg_alloc_run;
+static int have_rat_stall=0;
+
+void* pushall_call_handler=NULL;
+static void* popall_do_nothing=NULL;
+static void* popall_exec_nostats=NULL;
+static void* popall_execute_normal=NULL;
+static void* popall_cache_miss=NULL;
+static void* popall_recompile_block=NULL;
+static void* popall_check_checksum=NULL;
+
+extern uae_u32 oink;
+extern unsigned long foink3;
+extern unsigned long foink;
+
+/* The 68k only ever executes from even addresses. So right now, we
+   waste half the entries in this array
+   UPDATE: We now use those entries to store the start of the linked
+   lists that we maintain for each hash result. */
+cacheline cache_tags[TAGSIZE];
+int letit=0;
+blockinfo* hold_bi[MAX_HOLD_BI];
+blockinfo* active;
+blockinfo* dormant;
+
+op_properties prop[65536];
+
+#ifdef NOFLAGS_SUPPORT
+/* 68040 */
+extern struct cputbl op_smalltbl_0_nf[];
+#endif
+extern struct cputbl op_smalltbl_0_comp_nf[];
+extern struct cputbl op_smalltbl_0_comp_ff[];
+#ifdef NOFLAGS_SUPPORT
+/* 68020 + 68881 */
+extern struct cputbl op_smalltbl_1_nf[];
+/* 68020 */
+extern struct cputbl op_smalltbl_2_nf[];
+/* 68010 */
+extern struct cputbl op_smalltbl_3_nf[];
+/* 68000 */
+extern struct cputbl op_smalltbl_4_nf[];
+/* 68000 slow but compatible.  */
+extern struct cputbl op_smalltbl_5_nf[];
+#endif
+
+static void flush_icache_hard(int n);
+
+
+
+bigstate live;
+smallstate empty_ss;
+smallstate default_ss;
+static int optlev;
+
+static int writereg(int r, int size);
+static void unlock(int r);
+static void setlock(int r);
+static int readreg_specific(int r, int size, int spec);
+static int writereg_specific(int r, int size, int spec);
+static void prepare_for_call_1(void);
+static void prepare_for_call_2(void);
+static void align_target(uae_u32 a);
+
+static uae_s32 nextused[VREGS];
+
+static uae_u8 *popallspace;
+
+uae_u32 m68k_pc_offset;
+
+/* Some arithmetic ooperations can be optimized away if the operands
+   are known to be constant. But that's only a good idea when the
+   side effects they would have on the flags are not important. This
+   variable indicates whether we need the side effects or not 
+*/
+uae_u32 needflags=0;
+
+/* Flag handling is complicated.
+
+   x86 instructions create flags, which quite often are exactly what we
+   want. So at times, the "68k" flags are actually in the x86 flags.
+
+   Then again, sometimes we do x86 instructions that clobber the x86
+   flags, but don't represent a corresponding m68k instruction. In that
+   case, we have to save them. 
+
+   We used to save them to the stack, but now store them back directly
+   into the regflags.cznv of the traditional emulation. Thus some odd
+   names.
+
+   So flags can be in either of two places (used to be three; boy were
+   things complicated back then!); And either place can contain either
+   valid flags or invalid trash (and on the stack, there was also the
+   option of "nothing at all", now gone). A couple of variables keep
+   track of the respective states.
+
+   To make things worse, we might or might not be interested in the flags.
+   by default, we are, but a call to dont_care_flags can change that 
+   until the next call to live_flags. If we are not, pretty much whatever
+   is in the register and/or the native flags is seen as valid.
+*/
+
+
+static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
+{
+    return cache_tags[cl+1].bi;
+}
+
+static __inline__ blockinfo* get_blockinfo_addr(void* addr)
+{
+    blockinfo*  bi=get_blockinfo(cacheline(addr));
+
+    while (bi) {
+       if (bi->pc_p==addr)
+           return bi;
+       bi=bi->next_same_cl;
+    }
+    return NULL;
+}
+
+               
+/*******************************************************************
+ * All sorts of list related functions for all of the lists        *
+ *******************************************************************/
+
+static __inline__ void remove_from_cl_list(blockinfo* bi)
+{
+    uae_u32 cl=cacheline(bi->pc_p);
+
+    if (bi->prev_same_cl_p) 
+       *(bi->prev_same_cl_p)=bi->next_same_cl;
+    if (bi->next_same_cl)
+       bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
+    if (cache_tags[cl+1].bi) 
+       cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
+    else 
+       cache_tags[cl].handler=popall_execute_normal;
+}
+
+static __inline__ void remove_from_list(blockinfo* bi)
+{
+    if (bi->prev_p) 
+       *(bi->prev_p)=bi->next;
+    if (bi->next)
+       bi->next->prev_p=bi->prev_p;
+}
+
+static __inline__ void remove_from_lists(blockinfo* bi)
+{
+    remove_from_list(bi);
+    remove_from_cl_list(bi);
+}
+
+static __inline__ void add_to_cl_list(blockinfo* bi)
+{
+    uae_u32 cl=cacheline(bi->pc_p);
+    
+    if (cache_tags[cl+1].bi)
+       cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
+    bi->next_same_cl=cache_tags[cl+1].bi;
+
+    cache_tags[cl+1].bi=bi;
+    bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
+
+    cache_tags[cl].handler=bi->handler_to_use;
+}
+
+static __inline__ void raise_in_cl_list(blockinfo* bi)
+{
+    remove_from_cl_list(bi);
+    add_to_cl_list(bi);
+}
+
+static __inline__ void add_to_active(blockinfo* bi)
+{
+    if (active) 
+       active->prev_p=&(bi->next);
+    bi->next=active;
+
+    active=bi;
+    bi->prev_p=&active;
+}
+
+static __inline__ void add_to_dormant(blockinfo* bi)
+{
+    if (dormant) 
+       dormant->prev_p=&(bi->next);
+    bi->next=dormant;
+
+    dormant=bi;
+    bi->prev_p=&dormant;
+}
+
+static __inline__ void remove_dep(dependency* d)
+{
+    if (d->prev_p) 
+       *(d->prev_p)=d->next;
+    if (d->next)
+       d->next->prev_p=d->prev_p;
+    d->prev_p=NULL;
+    d->next=NULL;
+}
+
+/* This block's code is about to be thrown away, so it no longer
+   depends on anything else */
+static __inline__ void remove_deps(blockinfo* bi)
+{
+    remove_dep(&(bi->dep[0]));
+    remove_dep(&(bi->dep[1]));
+}
+
+static __inline__ void adjust_jmpdep(dependency* d, void* a)
+{
+    *(d->jmp_off)=(uae_u32)a-((uae_u32)d->jmp_off+4);
+}
+
+/********************************************************************
+ * Soft flush handling support functions                            *
+ ********************************************************************/
+
+static __inline__ void set_dhtu(blockinfo* bi, void* dh)
+{
+    //printf("bi is %p\n",bi);
+    if (dh!=bi->direct_handler_to_use) {
+       dependency* x=bi->deplist;
+       //printf("bi->deplist=%p\n",bi->deplist);
+       while (x) {
+           //printf("x is %p\n",x);
+           //printf("x->next is %p\n",x->next);
+           //printf("x->prev_p is %p\n",x->prev_p);
+           
+           if (x->jmp_off) {
+               adjust_jmpdep(x,dh);
+           }
+           x=x->next;
+       }
+       bi->direct_handler_to_use=dh;
+    }
+}
+
+static __inline__ void invalidate_block(blockinfo* bi)
+{
+    int i;
+
+    bi->optlevel=0;
+    bi->count=currprefs.optcount[0]-1;
+    bi->handler=NULL;
+    bi->handler_to_use=popall_execute_normal;
+    bi->direct_handler=NULL;
+    set_dhtu(bi,bi->direct_pen);
+    bi->needed_flags=0xff;
+
+    for (i=0;i<2;i++) {
+       bi->dep[i].jmp_off=NULL;
+       bi->dep[i].target=NULL;
+    }
+    remove_deps(bi);
+}
+
+static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
+{
+    blockinfo*  tbi=get_blockinfo_addr((void*)target);
+    
+    Dif(!tbi) {
+       printf("Could not create jmpdep!\n");
+       abort();
+    }
+    bi->dep[i].jmp_off=jmpaddr;
+    bi->dep[i].target=tbi;
+    bi->dep[i].next=tbi->deplist;
+    if (bi->dep[i].next) 
+       bi->dep[i].next->prev_p=&(bi->dep[i].next);
+    bi->dep[i].prev_p=&(tbi->deplist);
+    tbi->deplist=&(bi->dep[i]);
+}
+
+static __inline__ void big_to_small_state(bigstate* b, smallstate* s)
+{
+    int i;
+    int count=0;
+
+    for (i=0;i<N_REGS;i++) {
+       s->nat[i].validsize=0;
+       s->nat[i].dirtysize=0;
+       if (b->nat[i].nholds) {
+           int index=b->nat[i].nholds-1;
+           int r=b->nat[i].holds[index];
+           s->nat[i].holds=r;
+           s->nat[i].validsize=b->state[r].validsize;
+           s->nat[i].dirtysize=b->state[r].dirtysize;
+           count++;
+       }
+    }
+    printf("count=%d\n",count);
+    for (i=0;i<N_REGS;i++) {  // FIXME --- don't do dirty yet 
+       s->nat[i].dirtysize=0;
+    }  
+}
+
+static __inline__ void attached_state(blockinfo* bi)
+{
+    bi->havestate=1;
+    if (bi->direct_handler_to_use==bi->direct_handler)
+       set_dhtu(bi,bi->direct_pen);
+    bi->direct_handler=bi->direct_pen;
+    bi->status=BI_TARGETTED;
+}
+
+static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
+{
+    blockinfo*  bi=get_blockinfo_addr(addr);
+    int i;
+
+#if USE_OPTIMIZER
+    if (reg_alloc_run) 
+       return NULL;
+#endif
+    if (!bi) {
+       for (i=0;i<MAX_HOLD_BI && !bi;i++) {
+           if (hold_bi[i]) {
+               uae_u32 cl=cacheline(addr);
+               
+               bi=hold_bi[i];
+               hold_bi[i]=NULL;
+               bi->pc_p=addr;
+               invalidate_block(bi);
+               add_to_active(bi);
+               add_to_cl_list(bi);
+               
+           }
+       }
+    }
+    if (!bi) {
+       write_log ("Looking for blockinfo, can't find free one\n");
+       abort();
+    }
+
+#if USE_MATCHSTATE
+    if (setstate &&
+       !bi->havestate) {
+       big_to_small_state(&live,&(bi->env));
+       attached_state(bi);
+    }
+#endif
+    return bi;
+}
+
+static void prepare_block(blockinfo* bi);
+
+static __inline__ void alloc_blockinfos(void) 
+{
+    int i;
+    blockinfo* bi;
+
+    for (i=0;i<MAX_HOLD_BI;i++) {
+       if (hold_bi[i])
+           return;
+       bi=hold_bi[i]=(blockinfo*)current_compile_p;
+       current_compile_p+=sizeof(blockinfo);
+       
+       prepare_block(bi);
+    }
+}
+
+/********************************************************************
+ * Preferences handling. This is just a convenient place to put it  *
+ ********************************************************************/
+extern int have_done_picasso;
+
+void check_prefs_changed_comp (void)
+{
+    currprefs.comptrustbyte = changed_prefs.comptrustbyte;
+    currprefs.comptrustword = changed_prefs.comptrustword;
+    currprefs.comptrustlong = changed_prefs.comptrustlong;
+    currprefs.comptrustnaddr= changed_prefs.comptrustnaddr;
+    currprefs.compnf = changed_prefs.compnf;
+    currprefs.comp_hardflush= changed_prefs.comp_hardflush;
+    currprefs.comp_constjump= changed_prefs.comp_constjump;
+    currprefs.comp_oldsegv= changed_prefs.comp_oldsegv;
+    currprefs.compfpu= changed_prefs.compfpu;
+
+    if (currprefs.cachesize!=changed_prefs.cachesize) {
+       currprefs.cachesize = changed_prefs.cachesize;
+       alloc_cache();
+    }
+
+    // Turn off illegal-mem logging when using JIT...
+    if( currprefs.cachesize )
+       currprefs.illegal_mem = changed_prefs.illegal_mem;// = 0;
+
+    currprefs.comp_midopt=changed_prefs.comp_midopt;
+    currprefs.comp_lowopt=changed_prefs.comp_lowopt;
+
+    if ( ( !canbang || !currprefs.cachesize ) && 
+       currprefs.comptrustbyte != 1 )
+    {
+       // Set all of these to indirect when canbang == 0
+       // Basically, set the  compforcesettings option...
+       currprefs.comptrustbyte = 1;
+       currprefs.comptrustword = 1;
+       currprefs.comptrustlong = 1;
+       currprefs.comptrustnaddr= 1;
+       currprefs.compforcesettings = 1;
+
+       changed_prefs.comptrustbyte = 1;
+       changed_prefs.comptrustword = 1;
+       changed_prefs.comptrustlong = 1;
+       changed_prefs.comptrustnaddr= 1;
+       changed_prefs.compforcesettings = 1;
+
+       if( currprefs.cachesize )
+       {
+           write_log( "JIT: Reverting to \"indirect\" access, because canbang is zero!\n" );
+       }
+    }
+
+    if (!currprefs.compforcesettings && !have_done_picasso) {
+       int stop=0;
+       if (currprefs.comptrustbyte!=0 && currprefs.comptrustbyte!=3) 
+           stop = 1, write_log("<JIT compiler> : comptrustbyte is not 'direct' or 'afterpic'\n");
+       if (currprefs.comptrustword!=0 && currprefs.comptrustword!=3) 
+           stop = 1, write_log("<JIT compiler> : comptrustword is not 'direct' or 'afterpic'\n");
+       if (currprefs.comptrustlong!=0 && currprefs.comptrustlong!=3) 
+           stop = 1, write_log("<JIT compiler> : comptrustlong is not 'direct' or 'afterpic'\n");
+       if (currprefs.comptrustnaddr!=0 && currprefs.comptrustnaddr!=3) 
+           stop = 1, write_log("<JIT compiler> : comptrustnaddr is not 'direct' or 'afterpic'\n");
+       if (currprefs.compnf!=1) 
+           stop = 1, write_log("<JIT compiler> : compnf is not 'yes'\n");
+       if (currprefs.cachesize<1024) 
+           stop = 1, write_log("<JIT compiler> : cachesize is less than 1024\n");
+       if (currprefs.comp_hardflush) 
+           stop = 1, write_log("<JIT compiler> : comp_flushmode is 'hard'\n");
+       if (!canbang) 
+           stop = 1, write_log("<JIT compiler> : Cannot use most direct memory access,\n"
+                               "                 and unable to recover from failed guess!\n");
+#if 0
+       if (stop) {
+           gui_message("JIT: Configuration problems were detected!\n"
+                     "JIT: These will adversely affect performance, and should\n"
+                     "JIT: not be used. For more info, please see README.JIT-tuning\n"
+                     "JIT: in the UAE documentation directory. You can force\n"
+                     "JIT: your settings to be used by setting\n"
+                     "JIT:      'compforcesettings=yes'\n"
+                     "JIT: in your config file\n");
+           exit(1);
+       }
+#endif
+    }
+}
+
+/********************************************************************
+ * Get the optimizer stuff                                          *
+ ********************************************************************/
+
+#include "compemu_optimizer.c"
+
+/********************************************************************
+ * Functions to emit data into memory, and other general support    *
+ ********************************************************************/
+
+static uae_u8* target;
+
+static  void emit_init(void)
+{
+}
+
+static __inline__ void emit_byte(uae_u8 x)
+{
+    *target++=x;
+}
+
+static __inline__ void emit_word(uae_u16 x)
+{
+    *((uae_u16*)target)=x;
+    target+=2;
+}
+
+static __inline__ void emit_long(uae_u32 x)
+{
+    *((uae_u32*)target)=x;
+    target+=4;
+}
+
+static __inline__ uae_u32 reverse32(uae_u32 oldv)
+{
+    return ((oldv>>24)&0xff) | ((oldv>>8)&0xff00) |
+       ((oldv<<8)&0xff0000) | ((oldv<<24)&0xff000000);
+}
+
+    
+void set_target(uae_u8* t)
+{
+    lopt_emit_all();
+    target=t;
+}
+
+static __inline__ uae_u8* get_target_noopt(void)
+{
+    return target;
+}
+
+__inline__ uae_u8* get_target(void)
+{
+    lopt_emit_all();
+    return get_target_noopt();
+}
+
+
+/********************************************************************
+ * Getting the information about the target CPU                     *
+ ********************************************************************/
+
+#include "compemu_raw_x86.c"
+
+
+/********************************************************************
+ * Flags status handling. EMIT TIME!                                *
+ ********************************************************************/
+
+static void bt_l_ri_noclobber(R4 r, IMM i);
+
+static void make_flags_live_internal(void)
+{
+    if (live.flags_in_flags==VALID)
+       return;
+    Dif (live.flags_on_stack==TRASH) {
+       printf("Want flags, got something on stack, but it is TRASH\n");
+       abort();
+    }
+    if (live.flags_on_stack==VALID) {
+       int tmp;
+       tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
+       raw_reg_to_flags(tmp);
+       unlock(tmp);
+
+       live.flags_in_flags=VALID;
+       return;
+    }
+    printf("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
+          live.flags_in_flags,live.flags_on_stack);
+    abort();
+}
+
+static void flags_to_stack(void)
+{
+    if (live.flags_on_stack==VALID)
+       return;
+    if (!live.flags_are_important) {
+       live.flags_on_stack=VALID;
+       return;
+    }
+    Dif (live.flags_in_flags!=VALID)
+       abort();
+    else  {
+       int tmp;
+       tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
+       raw_flags_to_reg(tmp);
+       unlock(tmp);
+    }
+    live.flags_on_stack=VALID;
+}
+
+static __inline__ void clobber_flags(void)
+{
+    if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
+       flags_to_stack();
+    live.flags_in_flags=TRASH;
+}
+
+/* Prepare for leaving the compiled stuff */
+static __inline__ void flush_flags(void)
+{
+    flags_to_stack();
+    return;
+}
+
+int touchcnt;
+
+/********************************************************************
+ * register allocation per block logging                            *
+ ********************************************************************/
+
+static uae_s8 vstate[VREGS];
+static uae_s8 nstate[N_REGS];
+
+#define L_UNKNOWN -127
+#define L_UNAVAIL -1
+#define L_NEEDED -2
+#define L_UNNEEDED -3
+
+static __inline__ void log_startblock(void)
+{
+    int i;
+    for (i=0;i<VREGS;i++)
+       vstate[i]=L_UNKNOWN;
+    for (i=0;i<N_REGS;i++)
+       nstate[i]=L_UNKNOWN;
+}
+
+static __inline__ void log_isused(int n)
+{
+    if (nstate[n]==L_UNKNOWN)
+       nstate[n]=L_UNAVAIL;
+}
+
+static __inline__ void log_isreg(int n, int r)
+{
+    if (nstate[n]==L_UNKNOWN)
+       nstate[n]=r;
+    if (vstate[r]==L_UNKNOWN)
+       vstate[r]=L_NEEDED;
+}
+
+static __inline__ void log_clobberreg(int r)
+{
+    if (vstate[r]==L_UNKNOWN)
+       vstate[r]=L_UNNEEDED;
+}
+
+/* This ends all possibility of clever register allocation */
+
+static __inline__ void log_flush(void)
+{
+    int i;
+    for (i=0;i<VREGS;i++)
+       if (vstate[i]==L_UNKNOWN)
+           vstate[i]=L_NEEDED;
+    for (i=0;i<N_REGS;i++)
+       if (nstate[i]==L_UNKNOWN)
+           nstate[i]=L_UNAVAIL;
+}
+
+static __inline__ void log_dump(void)
+{
+    int i;
+    
+    return;
+
+    write_log("----------------------\n");
+    for (i=0;i<N_REGS;i++) {
+       switch(nstate[i]) {
+        case L_UNKNOWN: write_log("Nat %d : UNKNOWN\n",i); break;
+        case L_UNAVAIL: write_log("Nat %d : UNAVAIL\n",i); break;
+        default:        write_log("Nat %d : %d\n",i,nstate[i]); break;
+       }
+    }
+    for (i=0;i<VREGS;i++) {
+       if (vstate[i]==L_UNNEEDED)
+           write_log("Virt %d: UNNEEDED\n",i);
+    }
+}
+
+/********************************************************************
+ * register status handling. EMIT TIME!                             *
+ ********************************************************************/
+
+static __inline__ void set_status(int r, int status)
+{
+    if (status==ISCONST)
+       log_clobberreg(r);
+    live.state[r].status=status;
+}
+
+
+static __inline__ int isinreg(int r)
+{
+    return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
+}
+
+static __inline__ void adjust_nreg(int r, uae_u32 val)
+{
+    if (!val)
+       return;
+    raw_lea_l_brr(r,r,val);
+}
+
+static  void tomem(int r)
+{
+    int rr=live.state[r].realreg;
+
+    if (isinreg(r)) {
+       if (live.state[r].val &&
+           live.nat[rr].nholds==1 &&
+           !live.nat[rr].locked) {
+           // printf("RemovingA offset %x from reg %d (%d) at %p\n",
+           //   live.state[r].val,r,rr,target); 
+           adjust_nreg(rr,live.state[r].val);
+           live.state[r].val=0;
+           live.state[r].dirtysize=4;
+           set_status(r,DIRTY);
+       }
+    }
+
+    if (live.state[r].status==DIRTY) {
+       switch (live.state[r].dirtysize) {
+        case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
+        case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
+        case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
+        default: abort();
+       }
+       set_status(r,CLEAN);
+       live.state[r].dirtysize=0;
+    }
+}
+
+static __inline__ int isconst(int r)
+{
+    return live.state[r].status==ISCONST;
+}
+
+int is_const(int r)
+{
+    return isconst(r);
+}
+
+static __inline__ void writeback_const(int r)
+{
+    if (!isconst(r))
+       return;
+    Dif (live.state[r].needflush==NF_HANDLER) {
+       write_log ("Trying to write back constant NF_HANDLER!\n");
+       abort();
+    }
+
+    raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
+    live.state[r].val=0;
+    set_status(r,INMEM);
+}
+
+static __inline__ void tomem_c(int r)
+{
+    if (isconst(r)) {
+       writeback_const(r);
+    }
+    else
+       tomem(r);
+}
+
+static  void evict(int r)
+{
+    int rr;
+
+    if (!isinreg(r))
+       return;
+    tomem(r);
+    rr=live.state[r].realreg;
+
+    Dif (live.nat[rr].locked &&
+       live.nat[rr].nholds==1) {
+       write_log ("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
+       abort();
+    }
+
+    live.nat[rr].nholds--;
+    if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
+       int topreg=live.nat[rr].holds[live.nat[rr].nholds];
+       int thisind=live.state[r].realind;
+       live.nat[rr].holds[thisind]=topreg;
+       live.state[topreg].realind=thisind;
+    }
+    live.state[r].realreg=-1;
+    set_status(r,INMEM);
+}
+
+static __inline__ void free_nreg(int r)
+{
+    int i=live.nat[r].nholds;
+
+    while (i) {
+       int vr;
+
+       --i;
+       vr=live.nat[r].holds[i];
+       evict(vr);
+    }
+    Dif (live.nat[r].nholds!=0) {
+       printf("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
+       abort();
+    }
+}
+
+/* Use with care! */
+static __inline__ void isclean(int r)
+{
+    if (!isinreg(r))
+       return;
+    live.state[r].validsize=4;
+    live.state[r].dirtysize=0;
+    live.state[r].val=0;
+    set_status(r,CLEAN);
+}
+
+static __inline__ void disassociate(int r)
+{
+    isclean(r);
+    evict(r);
+}
+
+static __inline__ void set_const(int r, uae_u32 val)
+{
+    disassociate(r);
+    live.state[r].val=val;
+    set_status(r,ISCONST);
+}
+
+static __inline__ uae_u32 get_offset(int r)
+{
+    return live.state[r].val;
+}
+
+static  int alloc_reg_hinted(int r, int size, int willclobber, int hint)
+{
+    int bestreg;
+    uae_s32 when;
+    int i;
+    uae_s32 badness=0; /* to shut up gcc */
+    bestreg=-1;
+    when=2000000000;
+
+    for (i=N_REGS;i--;) {
+       badness=live.nat[i].touched;
+       if (live.nat[i].nholds==0)
+           badness=0;
+       if (i==hint)  
+           badness-=200000000;
+       if (!live.nat[i].locked && badness<when) {
+           if ((size==1 && live.nat[i].canbyte) ||
+               (size==2 && live.nat[i].canword) ||
+               (size==4)) {
+               bestreg=i;
+               when=badness;
+               if (live.nat[i].nholds==0 && hint<0)
+                   break;
+               if (i==hint)
+                   break;
+           }
+       }
+    }
+    Dif (bestreg==-1)
+       abort();
+
+    if (live.nat[bestreg].nholds>0) {
+       free_nreg(bestreg);
+    }
+    if (isinreg(r)) {
+       int rr=live.state[r].realreg;
+       /* This will happen if we read a partially dirty register at a
+          bigger size */
+       Dif (willclobber || live.state[r].validsize>=size)
+           abort();
+       Dif (live.nat[rr].nholds!=1)
+           abort();
+       if (size==4 && live.state[r].validsize==2) {
+           log_isused(bestreg);
+           raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
+           raw_bswap_32(bestreg);
+           raw_zero_extend_16_rr(rr,rr);
+           raw_zero_extend_16_rr(bestreg,bestreg);
+           raw_bswap_32(bestreg);
+           raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
+           live.state[r].validsize=4;
+           live.nat[rr].touched=touchcnt++;
+           return rr;
+       }
+       if (live.state[r].validsize==1) {
+           /* Nothing yet */
+       }
+       evict(r);
+    }
+
+    if (!willclobber) {
+       if (live.state[r].status!=UNDEF) {
+           if (isconst(r)) {
+               raw_mov_l_ri(bestreg,live.state[r].val);
+               live.state[r].val=0;
+               live.state[r].dirtysize=4;
+               set_status(r,DIRTY);
+               log_isused(bestreg);
+           }
+           else {
+               if (r==FLAGTMP)
+                   raw_load_flagreg(bestreg,r);
+               else if (r==FLAGX)
+                   raw_load_flagx(bestreg,r);
+               else {
+                   raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
+               }
+               live.state[r].dirtysize=0;
+               set_status(r,CLEAN);
+               log_isreg(bestreg,r);
+           }
+       }
+       else {
+           live.state[r].val=0;
+           live.state[r].dirtysize=0;
+           set_status(r,CLEAN);
+           log_isused(bestreg);
+       }
+       live.state[r].validsize=4;
+    }
+    else { /* this is the easiest way, but not optimal. FIXME! */
+       /* Now it's trickier, but hopefully still OK */
+       if (!isconst(r) || size==4) {
+           live.state[r].validsize=size;
+           live.state[r].dirtysize=size;
+           live.state[r].val=0;
+           set_status(r,DIRTY);
+           if (size==4)
+               log_isused(bestreg);
+           else
+               log_isreg(bestreg,r);
+       }
+       else {
+           if (live.state[r].status!=UNDEF)
+               raw_mov_l_ri(bestreg,live.state[r].val);
+           live.state[r].val=0;
+           live.state[r].validsize=4;
+           live.state[r].dirtysize=4;
+           set_status(r,DIRTY);
+           log_isused(bestreg);
+       }
+    }
+    live.state[r].realreg=bestreg;
+    live.state[r].realind=live.nat[bestreg].nholds;
+    live.nat[bestreg].touched=touchcnt++;
+    live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
+    live.nat[bestreg].nholds++;
+
+    return bestreg;
+}
+
+static  int alloc_reg(int r, int size, int willclobber)
+{
+    return alloc_reg_hinted(r,size,willclobber,-1);
+}
+
+static  void unlock(int r)
+{
+    Dif (!live.nat[r].locked)
+       abort();
+    live.nat[r].locked--;
+}
+
+static  void setlock(int r)
+{
+    live.nat[r].locked++;
+}
+
+
+static void mov_nregs(int d, int s)
+{
+    int ns=live.nat[s].nholds;
+    int nd=live.nat[d].nholds;
+    int i;
+
+    if (s==d)
+       return;
+
+    if (nd>0) 
+       free_nreg(d);
+
+    raw_mov_l_rr(d,s);
+    log_isused(d);
+
+    for (i=0;i<live.nat[s].nholds;i++) {
+       int vs=live.nat[s].holds[i];
+
+       live.state[vs].realreg=d;
+       live.state[vs].realind=i;
+       live.nat[d].holds[i]=vs;
+    }
+    live.nat[d].nholds=live.nat[s].nholds;
+
+    live.nat[s].nholds=0;
+}
+
+
+static __inline__ void make_exclusive(int r, int size, int spec)
+{
+    int clobber;
+    reg_status oldstate;
+    int rr=live.state[r].realreg;
+    int nr;
+    int nind;
+    int ndirt=0;
+    int i;
+
+    if (!isinreg(r))
+       return;
+    if (live.nat[rr].nholds==1)
+       return;
+    for (i=0;i<live.nat[rr].nholds;i++) {
+       int vr=live.nat[rr].holds[i];
+       if (vr!=r && 
+           (live.state[vr].status==DIRTY || live.state[vr].val))
+           ndirt++;
+    }
+    if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) { 
+       /* Everything else is clean, so let's keep this register */
+       for (i=0;i<live.nat[rr].nholds;i++) {
+           int vr=live.nat[rr].holds[i];
+           if (vr!=r) {
+               evict(vr);
+               i--; /* Try that index again! */
+           }
+       }
+       Dif (live.nat[rr].nholds!=1) {
+           printf("natreg %d holds %d vregs, %d not exclusive\n",
+                  rr,live.nat[rr].nholds,r);
+           abort();
+       }
+       return;
+    }
+
+    /* We have to split the register */
+    oldstate=live.state[r];
+
+    setlock(rr); /* Make sure this doesn't go away */
+    /* Forget about r being in the register rr */
+    disassociate(r);
+    /* Get a new register, that we will clobber completely */
+    if (oldstate.status==DIRTY) {
+       /* If dirtysize is <4, we need a register that can handle the
+          eventual smaller memory store! Thanks to Quake68k for exposing
+          this detail ;-) */
+       nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
+    }
+    else {
+       nr=alloc_reg_hinted(r,4,1,spec);
+    }
+    nind=live.state[r].realind;
+    live.state[r]=oldstate;   /* Keep all the old state info */
+    live.state[r].realreg=nr;
+    live.state[r].realind=nind;
+
+    if (size<live.state[r].validsize) {
+       if (live.state[r].val) {
+           /* Might as well compensate for the offset now */
+           raw_lea_l_brr(nr,rr,oldstate.val);
+           live.state[r].val=0;
+           live.state[r].dirtysize=4;
+           set_status(r,DIRTY);
+       }
+       else
+           raw_mov_l_rr(nr,rr);  /* Make another copy */
+    }
+    unlock(rr); 
+}
+
+static __inline__ void add_offset(int r, uae_u32 off)
+{
+    live.state[r].val+=off;
+}
+
+static __inline__ void remove_offset(int r, int spec)
+{
+    reg_status oldstate;
+    int rr;
+
+    if (isconst(r))
+       return;
+    if (live.state[r].val==0)
+       return;
+    if (isinreg(r) && live.state[r].validsize<4) 
+       evict(r);
+
+    if (!isinreg(r)) 
+       alloc_reg_hinted(r,4,0,spec);
+
+    Dif (live.state[r].validsize!=4) {
+       printf("Validsize=%d in remove_offset\n",live.state[r].validsize);
+       abort();
+    }
+    make_exclusive(r,0,-1);
+    /* make_exclusive might have done the job already */
+    if (live.state[r].val==0)
+       return;
+    
+    rr=live.state[r].realreg;
+
+    if (live.nat[rr].nholds==1) {
+       //printf("RemovingB offset %x from reg %d (%d) at %p\n",
+       //       live.state[r].val,r,rr,target); 
+       adjust_nreg(rr,live.state[r].val);
+       live.state[r].dirtysize=4;
+       live.state[r].val=0;
+       set_status(r,DIRTY);
+       return;
+    }
+    printf("Failed in remove_offset\n");
+    abort();
+}
+
+STATIC_INLINE void remove_all_offsets(void)
+{
+    int i;
+
+    for (i=0;i<VREGS;i++)
+       remove_offset(i,-1);
+}
+
+static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
+{
+    int n;
+    int answer=-1;
+    
+    if (live.state[r].status==UNDEF) {
+      printf("WARNING: Unexpected read of undefined register %d\n",r);
+    }
+    if (!can_offset)
+       remove_offset(r,spec);
+    
+    if (isinreg(r) && live.state[r].validsize>=size) {
+       n=live.state[r].realreg;
+       switch(size) {
+        case 1: 
+           if (live.nat[n].canbyte || spec>=0) { 
+               answer=n; 
+           }
+           break;
+        case 2: 
+           if (live.nat[n].canword || spec>=0) { 
+               answer=n; 
+           }
+           break;
+        case 4: 
+           answer=n; 
+           break;
+        default: abort();
+       }
+       if (answer<0)
+           evict(r);
+    }
+    /* either the value was in memory to start with, or it was evicted and 
+       is in memory now */
+    if (answer<0) {
+       answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
+    }
+
+    if (spec>=0 && spec!=answer) {
+       /* Too bad */
+       mov_nregs(spec,answer);
+       answer=spec;
+    }
+    live.nat[answer].locked++;
+    live.nat[answer].touched=touchcnt++;
+    return answer;
+}
+
+
+
+static int readreg(int r, int size)
+{
+    return readreg_general(r,size,-1,0);
+}
+
+static int readreg_specific(int r, int size, int spec)
+{
+    return readreg_general(r,size,spec,0);
+}
+
+static int readreg_offset(int r, int size)
+{
+    return readreg_general(r,size,-1,1);
+}
+
+
+static __inline__ int writereg_general(int r, int size, int spec)
+{
+    int n;
+    int answer=-1;
+
+    if (size<4) {
+       remove_offset(r,spec);
+    }
+
+    make_exclusive(r,size,spec);
+    if (isinreg(r)) {
+       int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
+       int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
+       n=live.state[r].realreg;
+
+       Dif (live.nat[n].nholds!=1)
+           abort();
+       switch(size) {
+        case 1: 
+           if (live.nat[n].canbyte || spec>=0) { 
+               live.state[r].dirtysize=ndsize;
+               live.state[r].validsize=nvsize;
+               answer=n;
+           }
+           break;
+        case 2: 
+           if (live.nat[n].canword || spec>=0) { 
+               live.state[r].dirtysize=ndsize;
+               live.state[r].validsize=nvsize;
+               answer=n;
+           }
+           break;
+        case 4: 
+           live.state[r].dirtysize=ndsize;
+           live.state[r].validsize=nvsize;
+           answer=n;
+           break;
+        default: abort();
+       }
+       if (answer<0)
+           evict(r);
+    }
+    /* either the value was in memory to start with, or it was evicted and 
+       is in memory now */
+    if (answer<0) {
+       answer=alloc_reg_hinted(r,size,1,spec);
+    }
+    if (spec>=0 && spec!=answer) {
+       mov_nregs(spec,answer);
+       answer=spec;
+    }
+    if (live.state[r].status==UNDEF)
+       live.state[r].validsize=4;
+    live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
+    live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
+    
+    live.nat[answer].locked++;
+    live.nat[answer].touched=touchcnt++;
+    if (size==4) {
+       live.state[r].val=0;
+    }
+    else {
+       Dif (live.state[r].val) {
+           printf("Problem with val\n");
+           abort();
+       }
+    }
+    set_status(r,DIRTY);
+    return answer;
+}
+
+static int writereg(int r, int size)
+{
+    return writereg_general(r,size,-1);
+}
+
+static int writereg_specific(int r, int size, int spec)
+{
+    return writereg_general(r,size,spec);
+}
+
+static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
+{
+    int n;
+    int answer=-1;
+    
+    if (live.state[r].status==UNDEF) {
+      printf("WARNING: Unexpected read of undefined register %d\n",r);
+    }
+    remove_offset(r,spec);
+    make_exclusive(r,0,spec);
+
+    Dif (wsize<rsize) {
+       printf("Cannot handle wsize<rsize in rmw_general()\n");
+       abort();
+    }
+    if (isinreg(r) && live.state[r].validsize>=rsize) {
+       n=live.state[r].realreg;
+       Dif (live.nat[n].nholds!=1)
+           abort();
+
+       switch(rsize) {
+        case 1: 
+           if (live.nat[n].canbyte || spec>=0) { 
+               answer=n; 
+           }
+           break;
+        case 2: 
+           if (live.nat[n].canword || spec>=0) { 
+               answer=n; 
+           }
+           break;
+        case 4: 
+           answer=n; 
+           break;
+        default: abort();
+       }
+       if (answer<0)
+           evict(r);
+    }
+    /* either the value was in memory to start with, or it was evicted and 
+       is in memory now */
+    if (answer<0) {
+       answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
+    }
+
+    if (spec>=0 && spec!=answer) {
+       /* Too bad */
+       mov_nregs(spec,answer);
+       answer=spec;
+    }
+    if (wsize>live.state[r].dirtysize)
+       live.state[r].dirtysize=wsize;
+    if (wsize>live.state[r].validsize)
+       live.state[r].validsize=wsize;
+    set_status(r,DIRTY);
+
+    live.nat[answer].locked++;
+    live.nat[answer].touched=touchcnt++;
+
+    Dif (live.state[r].val) {
+       printf("Problem with val(rmw)\n");
+       abort();
+    }
+    return answer;
+}
+
+static int rmw(int r, int wsize, int rsize) 
+{
+    return rmw_general(r,wsize,rsize,-1);
+}
+
+static int rmw_specific(int r, int wsize, int rsize, int spec) 
+{
+    return rmw_general(r,wsize,rsize,spec);
+}
+
+
+/* needed for restoring the carry flag on non-P6 cores */
+static void bt_l_ri_noclobber(R4 r, IMM i)
+{
+    int size=4;
+    if (i<16)
+       size=2;
+    r=readreg(r,size);
+    raw_bt_l_ri(r,i);
+    unlock(r);
+}
+
+/********************************************************************
+ * FPU register status handling. EMIT TIME!                         *
+ ********************************************************************/
+
+static  void f_tomem(int r)
+{
+    if (live.fate[r].status==DIRTY) {
+#if USE_LONG_DOUBLE
+       raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg); 
+#else
+       raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg); 
+#endif
+       live.fate[r].status=CLEAN;
+    }
+}
+
+static  void f_tomem_drop(int r)
+{
+    if (live.fate[r].status==DIRTY) {
+#if USE_LONG_DOUBLE
+       raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg); 
+#else
+       raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg); 
+#endif
+       live.fate[r].status=INMEM;
+    }
+}
+
+
+static __inline__ int f_isinreg(int r)
+{
+    return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
+}
+
+static void f_evict(int r)
+{
+    int rr;
+
+    if (!f_isinreg(r))
+       return;
+    rr=live.fate[r].realreg;
+    if (live.fat[rr].nholds==1)
+       f_tomem_drop(r);
+    else
+       f_tomem(r);
+
+    Dif (live.fat[rr].locked &&
+       live.fat[rr].nholds==1) {
+       write_log ("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
+       abort();
+    }
+
+    live.fat[rr].nholds--;
+    if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
+       int topreg=live.fat[rr].holds[live.fat[rr].nholds];
+       int thisind=live.fate[r].realind;
+       live.fat[rr].holds[thisind]=topreg;
+       live.fate[topreg].realind=thisind;
+    }
+    live.fate[r].status=INMEM;
+    live.fate[r].realreg=-1;
+}
+
+static __inline__ void f_free_nreg(int r)
+{
+    int i=live.fat[r].nholds;
+
+    while (i) {
+       int vr;
+
+       --i;
+       vr=live.fat[r].holds[i];
+       f_evict(vr);
+    }
+    Dif (live.fat[r].nholds!=0) {
+       printf("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
+       abort();
+    }
+}
+
+
+/* Use with care! */
+static __inline__ void f_isclean(int r)
+{
+    if (!f_isinreg(r))
+       return;
+    live.fate[r].status=CLEAN;
+}
+
+static __inline__ void f_disassociate(int r)
+{
+    f_isclean(r);
+    f_evict(r);
+}
+
+
+
+static  int f_alloc_reg(int r, int willclobber)
+{
+    int bestreg;
+    uae_s32 when;
+    int i;
+    uae_s32 badness;
+    bestreg=-1;
+    when=2000000000;
+    for (i=N_FREGS;i--;) {
+       badness=live.fat[i].touched;
+       if (live.fat[i].nholds==0)
+           badness=0;
+
+       if (!live.fat[i].locked && badness<when) {
+           bestreg=i;
+           when=badness;
+           if (live.fat[i].nholds==0)
+               break;
+       }
+    }
+    Dif (bestreg==-1)
+       abort();
+
+    if (live.fat[bestreg].nholds>0) {
+       f_free_nreg(bestreg);
+    }
+    if (f_isinreg(r)) {
+       f_evict(r);
+    }
+
+    if (!willclobber) {
+       if (live.fate[r].status!=UNDEF) {
+#if USE_LONG_DOUBLE
+           raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
+#else
+           raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
+#endif
+       }
+       live.fate[r].status=CLEAN;
+    }
+    else { 
+       live.fate[r].status=DIRTY;
+    }
+    live.fate[r].realreg=bestreg;
+    live.fate[r].realind=live.fat[bestreg].nholds;
+    live.fat[bestreg].touched=touchcnt++;
+    live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
+    live.fat[bestreg].nholds++;
+
+    return bestreg;
+}
+
+static  void f_unlock(int r)
+{
+    Dif (!live.fat[r].locked)
+       abort();
+    live.fat[r].locked--;
+}
+
+static  void f_setlock(int r)
+{
+    live.fat[r].locked++;
+}
+
+static __inline__ int f_readreg(int r)
+{
+    int n;
+    int answer=-1;
+
+    if (f_isinreg(r)) {
+       n=live.fate[r].realreg;
+       answer=n; 
+    }
+    /* either the value was in memory to start with, or it was evicted and 
+       is in memory now */
+    if (answer<0) 
+       answer=f_alloc_reg(r,0);
+
+    live.fat[answer].locked++;
+    live.fat[answer].touched=touchcnt++;
+    return answer;
+}
+
+static __inline__ void f_make_exclusive(int r, int clobber)
+{
+    freg_status oldstate;
+    int rr=live.fate[r].realreg;
+    int nr;
+    int nind;
+    int ndirt=0;
+    int i;
+
+    if (!f_isinreg(r))
+       return;
+    if (live.fat[rr].nholds==1)
+       return;
+    for (i=0;i<live.fat[rr].nholds;i++) {
+       int vr=live.fat[rr].holds[i];
+       if (vr!=r && live.fate[vr].status==DIRTY)
+           ndirt++;
+    }
+    if (!ndirt && !live.fat[rr].locked) { 
+       /* Everything else is clean, so let's keep this register */
+       for (i=0;i<live.fat[rr].nholds;i++) {
+           int vr=live.fat[rr].holds[i];
+           if (vr!=r) {
+               f_evict(vr);
+               i--; /* Try that index again! */
+           }
+       }
+       Dif (live.fat[rr].nholds!=1) {
+           printf("realreg %d holds %d (",rr,live.fat[rr].nholds);
+           for (i=0;i<live.fat[rr].nholds;i++) {
+               printf(" %d(%d,%d)",live.fat[rr].holds[i],
+                      live.fate[live.fat[rr].holds[i]].realreg,
+                      live.fate[live.fat[rr].holds[i]].realind);
+           }
+           printf("\n");
+           abort();
+       }
+       return;
+    }
+
+    /* We have to split the register */
+    oldstate=live.fate[r];
+
+    f_setlock(rr); /* Make sure this doesn't go away */
+    /* Forget about r being in the register rr */
+    f_disassociate(r);
+    /* Get a new register, that we will clobber completely */
+    nr=f_alloc_reg(r,1);
+    nind=live.fate[r].realind;
+    if (!clobber)
+       raw_fmov_rr(nr,rr);  /* Make another copy */
+    live.fate[r]=oldstate;   /* Keep all the old state info */
+    live.fate[r].realreg=nr;
+    live.fate[r].realind=nind;
+    f_unlock(rr); 
+}
+
+
+static __inline__ int f_writereg(int r)
+{
+    int n;
+    int answer=-1;
+
+    f_make_exclusive(r,1);
+    if (f_isinreg(r)) {
+       n=live.fate[r].realreg;
+       answer=n;
+    }
+    if (answer<0) {
+       answer=f_alloc_reg(r,1);
+    }
+    live.fate[r].status=DIRTY;
+    live.fat[answer].locked++;
+    live.fat[answer].touched=touchcnt++;
+    return answer;
+}
+
+static int f_rmw(int r)
+{
+    int n;
+
+    f_make_exclusive(r,0);
+    if (f_isinreg(r)) {
+       n=live.fate[r].realreg;
+    }
+    else 
+       n=f_alloc_reg(r,0);
+    live.fate[r].status=DIRTY;
+    live.fat[n].locked++;
+    live.fat[n].touched=touchcnt++;
+    return n;
+}
+
+static void fflags_into_flags_internal(uae_u32 tmp)
+{
+    int r;
+
+    clobber_flags();
+    r=f_readreg(FP_RESULT);
+    raw_fflags_into_flags(r);
+    f_unlock(r);
+}
+
+
+
+
+/********************************************************************
+ * CPU functions exposed to gencomp. Both CREATE and EMIT time      *
+ ********************************************************************/
+
+/* 
+ *  RULES FOR HANDLING REGISTERS:
+ *
+ *  * In the function headers, order the parameters 
+ *     - 1st registers written to
+ *     - 2nd read/modify/write registers
+ *     - 3rd registers read from
+ *  * Before calling raw_*, you must call readreg, writereg or rmw for
+ *    each register
+ *  * The order for this is
+ *     - 1st call remove_offset for all registers written to with size<4
+ *     - 2nd call readreg for all registers read without offset
+ *     - 3rd call rmw for all rmw registers
+ *     - 4th call readreg_offset for all registers that can handle offsets
+ *     - 5th call get_offset for all the registers from the previous step
+ *     - 6th call writereg for all written-to registers
+ *     - 7th call raw_*
+ *     - 8th unlock all registers that were locked
+ */
+
+MIDFUNC(0,live_flags,(void))
+{
+    live.flags_on_stack=TRASH;
+    live.flags_in_flags=VALID;
+    live.flags_are_important=1;
+}
+MENDFUNC(0,live_flags,(void))
+
+MIDFUNC(0,dont_care_flags,(void))
+{
+    live.flags_are_important=0;
+}
+MENDFUNC(0,dont_care_flags,(void))
+
+
+MIDFUNC(0,duplicate_carry,(void))
+{
+    evict(FLAGX);
+    make_flags_live_internal();
+    COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
+}
+MENDFUNC(0,duplicate_carry,(void))
+
+MIDFUNC(0,restore_carry,(void))
+{
+    if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
+       bt_l_ri_noclobber(FLAGX,0);
+    }
+    else {  /* Avoid the stall the above creates.
+              This is slow on non-P6, though.
+           */
+       COMPCALL(rol_b_ri(FLAGX,8));
+       isclean(FLAGX);
+       /* Why is the above faster than the below? */
+       //raw_rol_b_mi((uae_u32)live.state[FLAGX].mem,8);
+    }
+}
+MENDFUNC(0,restore_carry,(void))
+
+MIDFUNC(0,start_needflags,(void))
+{
+    needflags=1;
+}
+MENDFUNC(0,start_needflags,(void))
+
+MIDFUNC(0,end_needflags,(void))
+{
+    needflags=0;
+}
+MENDFUNC(0,end_needflags,(void))
+
+MIDFUNC(0,make_flags_live,(void))
+{
+    make_flags_live_internal();
+}
+MENDFUNC(0,make_flags_live,(void))
+
+MIDFUNC(1,fflags_into_flags,(W2 tmp))
+{
+    clobber_flags();
+    fflags_into_flags_internal(tmp);
+}
+MENDFUNC(1,fflags_into_flags,(W2 tmp))
+
+
+MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
+{    
+    int size=4;
+    if (i<16)
+       size=2;
+    CLOBBER_BT;
+    r=readreg(r,size);
+    raw_bt_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
+
+MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
+{
+    CLOBBER_BT;
+    r=readreg(r,4);
+    b=readreg(b,4);
+    raw_bt_l_rr(r,b);
+    unlock(r);
+    unlock(b);
+}
+MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
+
+MIDFUNC(2,btc_l_ri,(RW4 r, IMM i)) 
+{    
+    int size=4;
+    if (i<16)
+       size=2;
+    CLOBBER_BT;
+    r=rmw(r,size,size);
+    raw_btc_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,btc_l_ri,(RW4 r, IMM i)) 
+
+MIDFUNC(2,btc_l_rr,(RW4 r, R4 b)) 
+{
+    CLOBBER_BT;
+    b=readreg(b,4);
+    r=rmw(r,4,4);
+    raw_btc_l_rr(r,b);
+    unlock(r);
+    unlock(b);
+}
+MENDFUNC(2,btc_l_rr,(RW4 r, R4 b)) 
+
+
+MIDFUNC(2,btr_l_ri,(RW4 r, IMM i)) 
+{    
+    int size=4;
+    if (i<16)
+       size=2;
+    CLOBBER_BT;
+    r=rmw(r,size,size);
+    raw_btr_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,btr_l_ri,(RW4 r, IMM i)) 
+
+MIDFUNC(2,btr_l_rr,(RW4 r, R4 b)) 
+{
+    CLOBBER_BT;
+    b=readreg(b,4);
+    r=rmw(r,4,4);
+    raw_btr_l_rr(r,b);
+    unlock(r);
+    unlock(b);
+}
+MENDFUNC(2,btr_l_rr,(RW4 r, R4 b)) 
+
+
+MIDFUNC(2,bts_l_ri,(RW4 r, IMM i)) 
+{    
+    int size=4;
+    if (i<16)
+       size=2;
+    CLOBBER_BT;
+    r=rmw(r,size,size);
+    raw_bts_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,bts_l_ri,(RW4 r, IMM i)) 
+
+MIDFUNC(2,bts_l_rr,(RW4 r, R4 b)) 
+{
+    CLOBBER_BT;
+    b=readreg(b,4);
+    r=rmw(r,4,4);
+    raw_bts_l_rr(r,b);
+    unlock(r);
+    unlock(b);
+}
+MENDFUNC(2,bts_l_rr,(RW4 r, R4 b)) 
+
+MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
+{
+    CLOBBER_MOV;
+    d=writereg(d,4);
+    raw_mov_l_rm(d,s);
+    unlock(d);
+}
+MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
+
+
+MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
+{
+    r=readreg(r,4);
+    raw_call_r(r);
+    unlock(r);
+}
+MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
+
+MIDFUNC(2,sub_l_mi,(IMM d, IMM s)) 
+{
+    CLOBBER_SUB;
+    raw_sub_l_mi(d,s) ;
+}
+MENDFUNC(2,sub_l_mi,(IMM d, IMM s)) 
+
+MIDFUNC(2,mov_l_mi,(IMM d, IMM s)) 
+{
+    CLOBBER_MOV;
+    raw_mov_l_mi(d,s) ;
+}
+MENDFUNC(2,mov_l_mi,(IMM d, IMM s)) 
+
+MIDFUNC(2,mov_w_mi,(IMM d, IMM s)) 
+{
+    CLOBBER_MOV;
+    raw_mov_w_mi(d,s) ;
+}
+MENDFUNC(2,mov_w_mi,(IMM d, IMM s)) 
+
+MIDFUNC(2,mov_b_mi,(IMM d, IMM s)) 
+{
+    CLOBBER_MOV;
+    raw_mov_b_mi(d,s) ;
+}
+MENDFUNC(2,mov_b_mi,(IMM d, IMM s)) 
+
+MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
+{
+               if (!i && !needflags)
+               return;
+    CLOBBER_ROL;
+    r=rmw(r,1,1);
+    raw_rol_b_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
+{
+               if (!i && !needflags)
+               return;
+    CLOBBER_ROL;
+    r=rmw(r,2,2);
+    raw_rol_w_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
+{
+               if (!i && !needflags)
+               return;
+    CLOBBER_ROL;
+    r=rmw(r,4,4);
+    raw_rol_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,rol_l_rr,(RW4 d, R1 r)) 
+{ 
+    if (isconst(r)) {
+       COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_ROL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,4,4);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_rol_b\n",r);
+       abort();
+    }
+    raw_rol_l_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,rol_l_rr,(RW4 d, R1 r)) 
+
+MIDFUNC(2,rol_w_rr,(RW2 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_ROL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,2,2);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_rol_b\n",r);
+       abort();
+    }
+    raw_rol_w_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,rol_w_rr,(RW2 d, R1 r)) 
+
+MIDFUNC(2,rol_b_rr,(RW1 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+
+    CLOBBER_ROL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,1,1);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_rol_b\n",r);
+       abort();
+    }
+    raw_rol_b_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,rol_b_rr,(RW1 d, R1 r)) 
+
+
+MIDFUNC(2,shll_l_rr,(RW4 d, R1 r)) 
+{ 
+    if (isconst(r)) {
+       COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_SHLL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,4,4);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_rol_b\n",r);
+       abort();
+    }
+    raw_shll_l_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shll_l_rr,(RW4 d, R1 r)) 
+
+MIDFUNC(2,shll_w_rr,(RW2 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_SHLL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,2,2);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_shll_b\n",r);
+       abort();
+    }
+    raw_shll_w_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shll_w_rr,(RW2 d, R1 r)) 
+
+MIDFUNC(2,shll_b_rr,(RW1 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+
+    CLOBBER_SHLL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,1,1);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_shll_b\n",r);
+       abort();
+    }
+    raw_shll_b_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shll_b_rr,(RW1 d, R1 r)) 
+
+
+MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
+{
+               if (!i && !needflags)
+               return;
+    CLOBBER_ROR;
+    r=rmw(r,1,1);
+    raw_ror_b_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
+
+MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
+{
+               if (!i && !needflags)
+               return;
+    CLOBBER_ROR;
+    r=rmw(r,2,2);
+    raw_ror_w_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
+
+MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
+{
+               if (!i && !needflags)
+               return;
+    CLOBBER_ROR;
+    r=rmw(r,4,4);
+    raw_ror_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
+
+MIDFUNC(2,ror_l_rr,(R4 d, R1 r)) 
+{ 
+    if (isconst(r)) {
+       COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_ROR;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,4,4);
+    raw_ror_l_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,ror_l_rr,(R4 d, R1 r)) 
+
+MIDFUNC(2,ror_w_rr,(R2 d, R1 r)) 
+{ 
+    if (isconst(r)) {
+       COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_ROR;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,2,2);
+    raw_ror_w_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,ror_w_rr,(R2 d, R1 r)) 
+
+MIDFUNC(2,ror_b_rr,(R1 d, R1 r)) 
+{   
+    if (isconst(r)) {
+       COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+
+    CLOBBER_ROR;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,1,1);
+    raw_ror_b_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,ror_b_rr,(R1 d, R1 r)) 
+
+MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r)) 
+{ 
+    if (isconst(r)) {
+       COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_SHRL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,4,4);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_rol_b\n",r);
+       abort();
+    }
+    raw_shrl_l_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r)) 
+
+MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_SHRL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,2,2);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_shrl_b\n",r);
+       abort();
+    }
+    raw_shrl_w_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r)) 
+
+MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+
+    CLOBBER_SHRL;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,1,1);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_shrl_b\n",r);
+       abort();
+    }
+    raw_shrl_b_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r)) 
+
+
+
+MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    if (isconst(r) && !needflags) {
+       live.state[r].val<<=i;
+       return;
+    }
+    CLOBBER_SHLL;
+    r=rmw(r,4,4);
+    raw_shll_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    CLOBBER_SHLL;
+    r=rmw(r,2,2);
+    raw_shll_w_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    CLOBBER_SHLL;
+    r=rmw(r,1,1);
+    raw_shll_b_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    if (isconst(r) && !needflags) {
+       live.state[r].val>>=i;
+       return;
+    }
+    CLOBBER_SHRL;
+    r=rmw(r,4,4);
+    raw_shrl_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    CLOBBER_SHRL;
+    r=rmw(r,2,2);
+    raw_shrl_w_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    CLOBBER_SHRL;
+    r=rmw(r,1,1);
+    raw_shrl_b_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    CLOBBER_SHRA;
+    r=rmw(r,4,4);
+    raw_shra_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    CLOBBER_SHRA;
+    r=rmw(r,2,2);
+    raw_shra_w_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    CLOBBER_SHRA;
+    r=rmw(r,1,1);
+    raw_shra_b_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shra_l_rr,(RW4 d, R1 r)) 
+{ 
+    if (isconst(r)) {
+       COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_SHRA;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,4,4);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_rol_b\n",r);
+       abort();
+    }
+    raw_shra_l_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shra_l_rr,(RW4 d, R1 r)) 
+
+MIDFUNC(2,shra_w_rr,(RW2 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+    CLOBBER_SHRA;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,2,2);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_shra_b\n",r);
+       abort();
+    }
+    raw_shra_w_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shra_w_rr,(RW2 d, R1 r)) 
+
+MIDFUNC(2,shra_b_rr,(RW1 d, R1 r)) 
+{ /* Can only do this with r==1, i.e. cl */
+  
+    if (isconst(r)) {
+       COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
+       return;
+    }
+
+    CLOBBER_SHRA;
+    r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+    d=rmw(d,1,1);
+    Dif (r!=1) {
+       write_log ("Illegal register %d in raw_shra_b\n",r);
+       abort();
+    }
+    raw_shra_b_rr(d,r) ;
+    unlock(r);
+    unlock(d);
+}
+MENDFUNC(2,shra_b_rr,(RW1 d, R1 r)) 
+
+
+MIDFUNC(2,setcc,(W1 d, IMM cc))
+{
+    CLOBBER_SETCC;
+    d=writereg(d,1);
+    raw_setcc(d,cc);
+    unlock(d);
+}
+MENDFUNC(2,setcc,(W1 d, IMM cc))
+
+MIDFUNC(2,setcc_m,(IMM d, IMM cc))
+{
+    CLOBBER_SETCC;
+    raw_setcc_m(d,cc);
+}
+MENDFUNC(2,setcc_m,(IMM d, IMM cc))
+
+MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
+{
+    if (d==s)
+       return;
+    CLOBBER_CMOV;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+    raw_cmov_l_rr(d,s,cc);
+    unlock(s);
+    unlock(d);
+}
+MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
+
+MIDFUNC(1,setzflg_l,(RW4 r))
+{
+       if (setzflg_uses_bsf) {
+               CLOBBER_BSF;
+               r=rmw(r,4,4);
+               raw_bsf_l_rr(r,r);
+               unlock(r);
+       }
+       else {
+               Dif (live.flags_in_flags!=VALID) {
+                       write_log("setzflg() wanted flags in native flags, they are %d\n",
+                                         live.flags_in_flags);
+                       abort();
+               }
+               r=readreg(r,4);
+               {
+               int f=writereg(S11,4);
+               int t=writereg(S12,4);
+               raw_flags_set_zero(f,r,t);
+               unlock(f);
+               unlock(r);
+               unlock(t);
+               }
+       }
+}
+MENDFUNC(1,setzflg_l,(RW4 r))
+
+MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
+{
+    CLOBBER_CMOV;
+    d=rmw(d,4,4);
+    raw_cmov_l_rm(d,s,cc);
+    unlock(d);
+}
+MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
+
+MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
+{
+    CLOBBER_BSF;
+    s=readreg(s,4);
+    d=writereg(d,4);
+    raw_bsf_l_rr(d,s);
+    unlock(s);
+    unlock(d);
+}
+MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
+
+MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
+{
+    CLOBBER_MUL;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+    raw_imul_32_32(d,s);
+    unlock(s);
+    unlock(d);
+}
+MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
+
+MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
+{
+    CLOBBER_MUL;
+    s=rmw_specific(s,4,4,MUL_NREG2);
+    d=rmw_specific(d,4,4,MUL_NREG1);
+    raw_imul_64_32(d,s);
+    unlock(s);
+    unlock(d);
+}
+MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
+
+MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
+{
+    CLOBBER_MUL;
+    s=rmw_specific(s,4,4,MUL_NREG2);
+    d=rmw_specific(d,4,4,MUL_NREG1);
+    raw_mul_64_32(d,s);
+    unlock(s);
+    unlock(d);
+}
+MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
+
+MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
+{
+    CLOBBER_MUL;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+    raw_mul_32_32(d,s);
+    unlock(s);
+    unlock(d);
+}
+MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
+
+MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
+{
+    int isrmw;
+
+    if (isconst(s)) {
+       set_const(d,(uae_s32)(uae_s16)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_SE16;
+    isrmw=(s==d);
+    if (!isrmw) {
+       s=readreg(s,2);
+       d=writereg(d,4);
+    }
+    else {  /* If we try to lock this twice, with different sizes, we
+              are int trouble! */
+       s=d=rmw(s,4,2);
+    }
+    raw_sign_extend_16_rr(d,s);
+    if (!isrmw) {
+       unlock(d);
+       unlock(s);
+    }
+    else {
+       unlock(s);
+    }
+}
+MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
+
+MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
+{
+    int isrmw;
+
+    if (isconst(s)) {
+       set_const(d,(uae_s32)(uae_s8)live.state[s].val);
+       return;
+    }
+
+    isrmw=(s==d);
+    CLOBBER_SE8;
+    if (!isrmw) {
+       s=readreg(s,1);
+       d=writereg(d,4);
+    }
+    else {  /* If we try to lock this twice, with different sizes, we
+              are int trouble! */
+       s=d=rmw(s,4,1);
+    }
+  
+    raw_sign_extend_8_rr(d,s);
+
+    if (!isrmw) {
+       unlock(d);
+       unlock(s);
+    }
+    else {
+       unlock(s);
+    }
+}
+MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
+
+
+MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
+{
+    int isrmw;
+
+    if (isconst(s)) {
+       set_const(d,(uae_u32)(uae_u16)live.state[s].val);
+       return;
+    }
+
+    isrmw=(s==d);
+    CLOBBER_ZE16;
+    if (!isrmw) {
+       s=readreg(s,2);
+       d=writereg(d,4);
+    }
+    else {  /* If we try to lock this twice, with different sizes, we
+              are int trouble! */
+       s=d=rmw(s,4,2);
+    }
+    raw_zero_extend_16_rr(d,s);
+    if (!isrmw) {
+       unlock(d);
+       unlock(s);
+    }
+    else {
+       unlock(s);
+    }
+}
+MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
+
+MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
+{
+    int isrmw;
+    if (isconst(s)) {
+       set_const(d,(uae_u32)(uae_u8)live.state[s].val);
+       return;
+    }
+
+    isrmw=(s==d);
+    CLOBBER_ZE8;
+    if (!isrmw) {
+       s=readreg(s,1);
+       d=writereg(d,4);
+    }
+    else {  /* If we try to lock this twice, with different sizes, we
+              are int trouble! */
+       s=d=rmw(s,4,1);
+    }
+  
+    raw_zero_extend_8_rr(d,s);
+
+    if (!isrmw) {
+       unlock(d);
+       unlock(s);
+    }
+    else {
+       unlock(s);
+    }
+}
+MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
+
+MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
+{
+    if (d==s)
+       return;
+    if (isconst(s)) {
+       COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,1);
+    d=writereg(d,1);
+    raw_mov_b_rr(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
+
+MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
+{
+    if (d==s)
+       return;
+    if (isconst(s)) {
+       COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,2);
+    d=writereg(d,2);
+    raw_mov_w_rr(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
+
+
+MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+{
+    CLOBBER_MOV;
+    baser=readreg(baser,4);
+    index=readreg(index,4);
+    d=writereg(d,4);
+
+    raw_mov_l_rrm_indexed(d,baser,index,factor);
+    unlock(d);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+
+MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+{
+    CLOBBER_MOV;
+    baser=readreg(baser,4);
+    index=readreg(index,4);
+    d=writereg(d,2);
+
+    raw_mov_w_rrm_indexed(d,baser,index,factor);
+    unlock(d);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+
+MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+{
+    CLOBBER_MOV;
+    baser=readreg(baser,4);
+    index=readreg(index,4);
+    d=writereg(d,1);
+
+    raw_mov_b_rrm_indexed(d,baser,index,factor);
+
+    unlock(d);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+{
+    CLOBBER_MOV;
+    baser=readreg(baser,4);
+    index=readreg(index,4);
+    s=readreg(s,4);
+
+    Dif (baser==s || index==s) 
+       abort();
+
+
+    raw_mov_l_mrr_indexed(baser,index,factor,s);
+    unlock(s);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+
+MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+{
+    CLOBBER_MOV;
+    baser=readreg(baser,4);
+    index=readreg(index,4);
+    s=readreg(s,2);
+
+    raw_mov_w_mrr_indexed(baser,index,factor,s);
+    unlock(s);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+
+MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+{
+    CLOBBER_MOV;
+    s=readreg(s,1);
+    baser=readreg(baser,4);
+    index=readreg(index,4);
+
+    raw_mov_b_mrr_indexed(baser,index,factor,s);
+    unlock(s);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+
+
+MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+{
+    int basereg=baser;
+    int indexreg=index;
+
+    CLOBBER_MOV;
+    s=readreg(s,4);
+    baser=readreg_offset(baser,4);
+    index=readreg_offset(index,4);
+
+    base+=get_offset(basereg);
+    base+=factor*get_offset(indexreg);
+
+    raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
+    unlock(s);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+
+MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+{
+    int basereg=baser;
+    int indexreg=index;
+
+    CLOBBER_MOV;
+    s=readreg(s,2);
+    baser=readreg_offset(baser,4);
+    index=readreg_offset(index,4);
+
+    base+=get_offset(basereg);
+    base+=factor*get_offset(indexreg);
+
+    raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
+    unlock(s);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+
+MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+{
+    int basereg=baser;
+    int indexreg=index;
+
+    CLOBBER_MOV;
+    s=readreg(s,1);
+    baser=readreg_offset(baser,4);
+    index=readreg_offset(index,4);
+
+    base+=get_offset(basereg);
+    base+=factor*get_offset(indexreg);
+
+    raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
+    unlock(s);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+
+
+
+/* Read a long from base+baser+factor*index */
+MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+    int basereg=baser;
+    int indexreg=index;
+
+    CLOBBER_MOV;
+    baser=readreg_offset(baser,4);
+    index=readreg_offset(index,4);
+    base+=get_offset(basereg);
+    base+=factor*get_offset(indexreg);    
+    d=writereg(d,4);
+    raw_mov_l_brrm_indexed(d,base,baser,index,factor);
+    unlock(d);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+    int basereg=baser;
+    int indexreg=index;
+
+    CLOBBER_MOV;
+    remove_offset(d,-1);
+    baser=readreg_offset(baser,4);
+    index=readreg_offset(index,4);
+    base+=get_offset(basereg);
+    base+=factor*get_offset(indexreg);    
+    d=writereg(d,2);
+    raw_mov_w_brrm_indexed(d,base,baser,index,factor);
+    unlock(d);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+    int basereg=baser;
+    int indexreg=index;
+
+    CLOBBER_MOV;
+    remove_offset(d,-1);
+    baser=readreg_offset(baser,4);
+    index=readreg_offset(index,4);
+    base+=get_offset(basereg);
+    base+=factor*get_offset(indexreg);    
+    d=writereg(d,1);
+    raw_mov_b_brrm_indexed(d,base,baser,index,factor);
+    unlock(d);
+    unlock(baser);
+    unlock(index);
+}
+MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+
+/* Read a long from base+factor*index */
+MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+{
+    int indexreg=index;
+
+    if (isconst(index)) {
+       COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
+       return;
+    }
+
+    CLOBBER_MOV;
+    index=readreg_offset(index,4);
+    base+=get_offset(indexreg)*factor;
+    d=writereg(d,4);
+
+    raw_mov_l_rm_indexed(d,base,index,factor);
+    unlock(index);
+    unlock(d);
+}
+MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+
+
+/* read the long at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
+{
+    if (isconst(s)) {
+       COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
+       return;
+    }
+    CLOBBER_MOV;
+    s=readreg(s,4);
+    d=writereg(d,4);
+
+    raw_mov_l_rR(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
+{
+    if (isconst(s)) {
+       COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
+       return;
+    }
+    CLOBBER_MOV;
+    s=readreg(s,4);
+    d=writereg(d,2);
+
+    raw_mov_w_rR(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
+{
+    if (isconst(s)) {
+       COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
+       return;
+    }
+    CLOBBER_MOV;
+    s=readreg(s,4);
+    d=writereg(d,1);
+
+    raw_mov_b_rR(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
+
+/* read the long at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
+{
+    int sreg=s;
+    if (isconst(s)) {
+       COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
+       return;
+    }
+    CLOBBER_MOV;
+    s=readreg_offset(s,4);
+    offset+=get_offset(sreg);
+    d=writereg(d,4);
+    
+    raw_mov_l_brR(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
+{
+    int sreg=s;
+    if (isconst(s)) {
+       COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
+       return;
+    }
+    CLOBBER_MOV;
+    remove_offset(d,-1);
+    s=readreg_offset(s,4);
+    offset+=get_offset(sreg);
+    d=writereg(d,2);
+
+    raw_mov_w_brR(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
+{
+    int sreg=s;
+    if (isconst(s)) {
+       COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
+       return;
+    }
+    CLOBBER_MOV;
+    remove_offset(d,-1);
+    s=readreg_offset(s,4);
+    offset+=get_offset(sreg);
+    d=writereg(d,1);
+
+    raw_mov_b_brR(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
+
+MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
+{
+    int dreg=d;
+    if (isconst(d)) {
+       COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
+       return;
+    }
+
+    CLOBBER_MOV;
+    d=readreg_offset(d,4);
+    offset+=get_offset(dreg);
+    raw_mov_l_Ri(d,i,offset);
+    unlock(d);
+}
+MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
+
+MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
+{
+    int dreg=d;
+    if (isconst(d)) {
+       COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
+       return;
+    }
+
+    CLOBBER_MOV;
+    d=readreg_offset(d,4);
+    offset+=get_offset(dreg);
+    raw_mov_w_Ri(d,i,offset);
+    unlock(d);
+}
+MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
+
+MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
+{
+    int dreg=d;
+    if (isconst(d)) {
+       COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
+       return;
+    }
+
+    CLOBBER_MOV;
+    d=readreg_offset(d,4);
+    offset+=get_offset(dreg);
+    raw_mov_b_Ri(d,i,offset);
+    unlock(d);
+}
+MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
+
+     /* Warning! OFFSET is byte sized only! */
+MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
+{
+    if (isconst(d)) {
+       COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
+       return;
+    }
+    if (isconst(s)) {
+       COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,4);
+    d=readreg(d,4);
+
+    raw_mov_l_Rr(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
+
+MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
+{
+    if (isconst(d)) {
+       COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
+       return;
+    }
+    if (isconst(s)) {
+       COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,2);
+    d=readreg(d,4);
+    raw_mov_w_Rr(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
+
+MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
+{
+    if (isconst(d)) {
+       COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
+       return;
+    }
+    if (isconst(s)) {
+       COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,1);
+    d=readreg(d,4);
+    raw_mov_b_Rr(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
+
+MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
+{
+    if (isconst(s)) {
+       COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
+       return;
+    }
+#if USE_OFFSET
+    if (d==s) {
+       add_offset(d,offset);
+       return;
+    }
+#endif
+    CLOBBER_LEA;
+    s=readreg(s,4);
+    d=writereg(d,4);
+    raw_lea_l_brr(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
+
+MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+{
+    if (!offset) {
+       COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
+       return;
+    }
+    CLOBBER_LEA;
+    s=readreg(s,4);
+    index=readreg(index,4);
+    d=writereg(d,4);
+
+    raw_lea_l_brr_indexed(d,s,index,factor,offset);
+    unlock(d);
+    unlock(index);
+    unlock(s);
+}
+MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+
+MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+{
+    CLOBBER_LEA;
+    s=readreg(s,4);
+    index=readreg(index,4);
+    d=writereg(d,4);
+
+    raw_lea_l_rr_indexed(d,s,index,factor);
+    unlock(d);
+    unlock(index);
+    unlock(s);
+}
+MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+
+/* write d to the long at the address contained in s+offset */
+MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
+{
+    int dreg=d;
+    if (isconst(d)) {
+       COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,4);
+    d=readreg_offset(d,4);
+    offset+=get_offset(dreg);
+
+    raw_mov_l_bRr(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
+
+/* write the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
+{
+    int dreg=d;
+
+    if (isconst(d)) {
+       COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,2);
+    d=readreg_offset(d,4);
+    offset+=get_offset(dreg);
+    raw_mov_w_bRr(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
+
+MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
+{
+    int dreg=d;
+    if (isconst(d)) {
+       COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,1);
+    d=readreg_offset(d,4);
+    offset+=get_offset(dreg);
+    raw_mov_b_bRr(d,s,offset);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
+
+MIDFUNC(1,bswap_32,(RW4 r))
+{
+    int reg=r;
+
+    if (isconst(r)) {
+       uae_u32 oldv=live.state[r].val;
+       live.state[r].val=reverse32(oldv);
+       return;
+    }
+    
+    CLOBBER_SW32;
+    r=rmw(r,4,4);  
+    raw_bswap_32(r);
+    unlock(r);
+}
+MENDFUNC(1,bswap_32,(RW4 r))
+
+MIDFUNC(1,bswap_16,(RW2 r))
+{
+    if (isconst(r)) {
+       uae_u32 oldv=live.state[r].val;
+       live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
+           (oldv&0xffff0000);
+       return;
+    }
+
+    CLOBBER_SW16;
+    r=rmw(r,2,2);
+  
+    raw_bswap_16(r);
+    unlock(r);
+}
+MENDFUNC(1,bswap_16,(RW2 r))
+
+
+
+MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
+{
+    int olds;
+
+    if (d==s) { /* How pointless! */
+       return;
+    }
+    if (isconst(s)) {
+       COMPCALL(mov_l_ri)(d,live.state[s].val);
+       return;
+    }
+#if USE_ALIAS
+    olds=s;
+    disassociate(d);
+    s=readreg_offset(s,4);
+    live.state[d].realreg=s;
+    live.state[d].realind=live.nat[s].nholds;
+    live.state[d].val=live.state[olds].val;
+    live.state[d].validsize=4;
+    live.state[d].dirtysize=4;
+    set_status(d,DIRTY);
+
+    live.nat[s].holds[live.nat[s].nholds]=d;
+    live.nat[s].nholds++;
+    log_clobberreg(d);
+    
+    /* printf("Added %d to nreg %d(%d), now holds %d regs\n",
+       d,s,live.state[d].realind,live.nat[s].nholds); */
+    unlock(s);
+#else
+    CLOBBER_MOV;
+    s=readreg(s,4);
+    d=writereg(d,4);
+
+    raw_mov_l_rr(d,s);
+    unlock(d);
+    unlock(s);
+#endif
+}
+MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
+
+MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
+{
+    if (isconst(s)) {
+       COMPCALL(mov_l_mi)(d,live.state[s].val);
+       return;
+    }
+    CLOBBER_MOV;
+    s=readreg(s,4);
+
+    raw_mov_l_mr(d,s);
+    unlock(s);
+}
+MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
+
+
+MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
+{
+    if (isconst(s)) {
+       COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
+       return;
+    }
+    CLOBBER_MOV;
+    s=readreg(s,2);
+
+    raw_mov_w_mr(d,s);
+    unlock(s);
+}
+MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
+
+MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
+{
+    CLOBBER_MOV;
+    d=writereg(d,2);
+
+    raw_mov_w_rm(d,s);
+    unlock(d);
+}
+MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
+
+MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
+{
+    if (isconst(s)) {
+       COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_MOV;
+    s=readreg(s,1);
+
+    raw_mov_b_mr(d,s);
+    unlock(s);
+}
+MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
+
+MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
+{
+    CLOBBER_MOV;
+    d=writereg(d,1);
+
+    raw_mov_b_rm(d,s);
+    unlock(d);
+}
+MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
+
+MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
+{
+    set_const(d,s);
+    return;
+}
+MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
+
+MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
+{
+    CLOBBER_MOV;
+    d=writereg(d,2);
+
+    raw_mov_w_ri(d,s);
+    unlock(d);
+}
+MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
+
+MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
+{
+    CLOBBER_MOV;
+    d=writereg(d,1);
+
+    raw_mov_b_ri(d,s);
+    unlock(d);
+}
+MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
+
+
+MIDFUNC(2,add_l_mi,(IMM d, IMM s)) 
+{
+    CLOBBER_ADD;
+    raw_add_l_mi(d,s) ;
+}
+MENDFUNC(2,add_l_mi,(IMM d, IMM s)) 
+
+MIDFUNC(2,add_w_mi,(IMM d, IMM s)) 
+{
+    CLOBBER_ADD;
+    raw_add_w_mi(d,s) ;
+}
+MENDFUNC(2,add_w_mi,(IMM d, IMM s)) 
+
+MIDFUNC(2,add_b_mi,(IMM d, IMM s)) 
+{
+    CLOBBER_ADD;
+    raw_add_b_mi(d,s) ;
+}
+MENDFUNC(2,add_b_mi,(IMM d, IMM s)) 
+
+
+MIDFUNC(2,test_l_ri,(R4 d, IMM i))
+{
+    CLOBBER_TEST;
+    d=readreg(d,4);
+
+    raw_test_l_ri(d,i);
+    unlock(d);
+}
+MENDFUNC(2,test_l_ri,(R4 d, IMM i))
+
+MIDFUNC(2,test_l_rr,(R4 d, R4 s))
+{
+    CLOBBER_TEST;
+    d=readreg(d,4);
+    s=readreg(s,4);
+
+    raw_test_l_rr(d,s);;
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,test_l_rr,(R4 d, R4 s))
+
+MIDFUNC(2,test_w_rr,(R2 d, R2 s))
+{
+    CLOBBER_TEST;
+    d=readreg(d,2);
+    s=readreg(s,2);
+
+    raw_test_w_rr(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,test_w_rr,(R2 d, R2 s))
+
+MIDFUNC(2,test_b_rr,(R1 d, R1 s))
+{
+    CLOBBER_TEST;
+    d=readreg(d,1);
+    s=readreg(s,1);
+
+    raw_test_b_rr(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,test_b_rr,(R1 d, R1 s))
+
+
+MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
+{
+    if (isconst (d) && ! needflags) {
+       live.state[d].val &= i;
+       return;
+    }
+
+    CLOBBER_AND;
+    d=rmw(d,4,4);
+
+    raw_and_l_ri(d,i);
+    unlock(d);
+}
+MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,and_l,(RW4 d, R4 s))
+{
+    CLOBBER_AND;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+
+    raw_and_l(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,and_l,(RW4 d, R4 s))
+
+MIDFUNC(2,and_w,(RW2 d, R2 s))
+{
+    CLOBBER_AND;
+    s=readreg(s,2);
+    d=rmw(d,2,2);
+
+    raw_and_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,and_w,(RW2 d, R2 s))
+
+MIDFUNC(2,and_b,(RW1 d, R1 s))
+{
+    CLOBBER_AND;
+    s=readreg(s,1);
+    d=rmw(d,1,1);
+
+    raw_and_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,and_b,(RW1 d, R1 s))
+
+MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
+{
+    if (isconst(d) && !needflags) {
+       live.state[d].val|=i;
+       return;
+    }
+    CLOBBER_OR;
+    d=rmw(d,4,4);
+
+    raw_or_l_ri(d,i);
+    unlock(d);
+}
+MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,or_l,(RW4 d, R4 s))
+{
+    if (isconst(d) && isconst(s) && !needflags) {
+       live.state[d].val|=live.state[s].val;
+       return;
+    }
+    CLOBBER_OR;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+
+    raw_or_l(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,or_l,(RW4 d, R4 s))
+
+MIDFUNC(2,or_w,(RW2 d, R2 s))
+{
+    CLOBBER_OR;
+    s=readreg(s,2);
+    d=rmw(d,2,2);
+
+    raw_or_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,or_w,(RW2 d, R2 s))
+
+MIDFUNC(2,or_b,(RW1 d, R1 s))
+{
+    CLOBBER_OR;
+    s=readreg(s,1);
+    d=rmw(d,1,1);
+
+    raw_or_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,or_b,(RW1 d, R1 s))
+
+MIDFUNC(2,adc_l,(RW4 d, R4 s))
+{
+    CLOBBER_ADC;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+
+    raw_adc_l(d,s);
+
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,adc_l,(RW4 d, R4 s))
+
+MIDFUNC(2,adc_w,(RW2 d, R2 s))
+{
+    CLOBBER_ADC;
+    s=readreg(s,2);
+    d=rmw(d,2,2);
+
+    raw_adc_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,adc_w,(RW2 d, R2 s))
+
+MIDFUNC(2,adc_b,(RW1 d, R1 s))
+{
+    CLOBBER_ADC;
+    s=readreg(s,1);
+    d=rmw(d,1,1);
+
+    raw_adc_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,adc_b,(RW1 d, R1 s))
+
+MIDFUNC(2,add_l,(RW4 d, R4 s))
+{
+    if (isconst(s)) {
+       COMPCALL(add_l_ri)(d,live.state[s].val);
+       return;
+    }
+
+    CLOBBER_ADD;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+
+    raw_add_l(d,s);
+
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,add_l,(RW4 d, R4 s))
+
+MIDFUNC(2,add_w,(RW2 d, R2 s))
+{
+    if (isconst(s)) {
+       COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_ADD;
+    s=readreg(s,2);
+    d=rmw(d,2,2);
+
+    raw_add_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,add_w,(RW2 d, R2 s))
+
+MIDFUNC(2,add_b,(RW1 d, R1 s))
+{
+    if (isconst(s)) {
+       COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_ADD;
+    s=readreg(s,1);
+    d=rmw(d,1,1);
+
+    raw_add_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,add_b,(RW1 d, R1 s))
+
+MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    if (isconst(d) && !needflags) {
+       live.state[d].val-=i;
+       return;
+    }
+#if USE_OFFSET 
+    if (!needflags) {
+       add_offset(d,-(signed)i);
+       return;
+    }
+#endif
+
+    CLOBBER_SUB;
+    d=rmw(d,4,4);
+
+    raw_sub_l_ri(d,i);
+    unlock(d);
+}
+MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
+{
+    if (!i && !needflags)
+       return;
+
+    CLOBBER_SUB;
+    d=rmw(d,2,2);
+
+    raw_sub_w_ri(d,i);
+    unlock(d);
+}
+MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
+
+MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
+{
+    if (!i && !needflags)
+       return;
+
+    CLOBBER_SUB;
+    d=rmw(d,1,1);
+
+    raw_sub_b_ri(d,i);
+
+    unlock(d);
+}
+MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
+
+MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
+{
+    if (!i && !needflags)
+       return;
+    if (isconst(d) && !needflags) {
+       live.state[d].val+=i;
+       return;
+    }
+#if USE_OFFSET 
+    if (!needflags) {
+       add_offset(d,i);
+       return;
+    }
+#endif
+    CLOBBER_ADD;
+    d=rmw(d,4,4);
+    raw_add_l_ri(d,i);
+    unlock(d);
+}
+MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
+{
+    if (!i && !needflags)
+       return;
+
+    CLOBBER_ADD;
+    d=rmw(d,2,2);
+
+    raw_add_w_ri(d,i);
+    unlock(d);
+}
+MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
+
+MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
+{
+    if (!i && !needflags)
+       return;
+
+    CLOBBER_ADD;
+    d=rmw(d,1,1);
+
+    raw_add_b_ri(d,i);
+
+    unlock(d);
+}
+MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
+
+MIDFUNC(2,sbb_l,(RW4 d, R4 s))
+{
+    CLOBBER_SBB;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+
+    raw_sbb_l(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,sbb_l,(RW4 d, R4 s))
+
+MIDFUNC(2,sbb_w,(RW2 d, R2 s))
+{
+    CLOBBER_SBB;
+    s=readreg(s,2);
+    d=rmw(d,2,2);
+
+    raw_sbb_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,sbb_w,(RW2 d, R2 s))
+
+MIDFUNC(2,sbb_b,(RW1 d, R1 s))
+{
+    CLOBBER_SBB;
+    s=readreg(s,1);
+    d=rmw(d,1,1);
+
+    raw_sbb_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,sbb_b,(RW1 d, R1 s))
+
+MIDFUNC(2,sub_l,(RW4 d, R4 s))
+{
+    if (isconst(s)) {
+       COMPCALL(sub_l_ri)(d,live.state[s].val);
+       return;
+    }
+
+    CLOBBER_SUB;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+
+    raw_sub_l(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,sub_l,(RW4 d, R4 s))
+
+MIDFUNC(2,sub_w,(RW2 d, R2 s))
+{
+    if (isconst(s)) {
+       COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_SUB;
+    s=readreg(s,2);
+    d=rmw(d,2,2);
+
+    raw_sub_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,sub_w,(RW2 d, R2 s))
+
+MIDFUNC(2,sub_b,(RW1 d, R1 s))
+{
+    if (isconst(s)) {
+       COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
+       return;
+    }
+
+    CLOBBER_SUB;
+    s=readreg(s,1);
+    d=rmw(d,1,1);
+
+    raw_sub_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,sub_b,(RW1 d, R1 s))
+
+MIDFUNC(2,cmp_l,(R4 d, R4 s))
+{
+    CLOBBER_CMP;
+    s=readreg(s,4);
+    d=readreg(d,4);
+
+    raw_cmp_l(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,cmp_l,(R4 d, R4 s))
+
+MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
+{
+    CLOBBER_CMP;
+    r=readreg(r,4);
+
+    raw_cmp_l_ri(r,i);
+    unlock(r);
+}
+MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
+
+MIDFUNC(2,cmp_w,(R2 d, R2 s))
+{
+    CLOBBER_CMP;
+    s=readreg(s,2);
+    d=readreg(d,2);
+
+    raw_cmp_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,cmp_w,(R2 d, R2 s))
+
+MIDFUNC(2,cmp_b,(R1 d, R1 s))
+{
+    CLOBBER_CMP;
+    s=readreg(s,1);
+    d=readreg(d,1);
+
+    raw_cmp_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,cmp_b,(R1 d, R1 s))
+
+
+MIDFUNC(2,xor_l,(RW4 d, R4 s))
+{
+    CLOBBER_XOR;
+    s=readreg(s,4);
+    d=rmw(d,4,4);
+
+    raw_xor_l(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,xor_l,(RW4 d, R4 s))
+
+MIDFUNC(2,xor_w,(RW2 d, R2 s))
+{
+    CLOBBER_XOR;
+    s=readreg(s,2);
+    d=rmw(d,2,2);
+
+    raw_xor_w(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,xor_w,(RW2 d, R2 s))
+
+MIDFUNC(2,xor_b,(RW1 d, R1 s))
+{
+    CLOBBER_XOR;
+    s=readreg(s,1);
+    d=rmw(d,1,1);
+
+    raw_xor_b(d,s);
+    unlock(d);
+    unlock(s);
+}
+MENDFUNC(2,xor_b,(RW1 d, R1 s))
+
+MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
+{
+    clobber_flags();
+    remove_all_offsets();
+    if (osize==4) {
+       if (out1!=in1 && out1!=r) {
+           COMPCALL(forget_about)(out1);
+       }
+    }
+    else {
+       tomem_c(out1);
+    }
+
+    in1=readreg_specific(in1,isize,REG_PAR1);
+    r=readreg(r,4);
+    prepare_for_call_1();  /* This should ensure that there won't be
+                             any need for swapping nregs in prepare_for_call_2
+                          */
+#if USE_NORMAL_CALLING_CONVENTION
+    raw_push_l_r(in1);
+#endif
+    unlock(in1);
+    unlock(r);
+
+    prepare_for_call_2();
+    raw_call_r(r);
+
+#if USE_NORMAL_CALLING_CONVENTION
+    raw_inc_sp(4);
+#endif
+
+
+    live.nat[REG_RESULT].holds[0]=out1;
+    live.nat[REG_RESULT].nholds=1;
+    live.nat[REG_RESULT].touched=touchcnt++;
+
+    live.state[out1].realreg=REG_RESULT;
+    live.state[out1].realind=0;
+    live.state[out1].val=0;
+    live.state[out1].validsize=osize;
+    live.state[out1].dirtysize=osize;
+    set_status(out1,DIRTY);
+}
+MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
+
+MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
+{
+    clobber_flags();
+    remove_all_offsets();
+    in1=readreg_specific(in1,isize1,REG_PAR1);
+    in2=readreg_specific(in2,isize2,REG_PAR2);
+    r=readreg(r,4);
+    prepare_for_call_1();  /* This should ensure that there won't be
+                             any need for swapping nregs in prepare_for_call_2
+                          */
+#if USE_NORMAL_CALLING_CONVENTION
+    raw_push_l_r(in2);
+    raw_push_l_r(in1);
+#endif
+    unlock(r);
+    unlock(in1);
+    unlock(in2);
+    prepare_for_call_2();
+    raw_call_r(r);
+#if USE_NORMAL_CALLING_CONVENTION
+    raw_inc_sp(8);
+#endif
+}
+MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
+
+
+MIDFUNC(1,forget_about,(W4 r))
+{
+    if (isinreg(r))
+       disassociate(r);
+    live.state[r].val=0;
+    set_status(r,UNDEF);
+}
+MENDFUNC(1,forget_about,(W4 r))
+
+MIDFUNC(0,nop,(void))
+{
+    raw_nop();
+}
+MENDFUNC(0,nop,(void))
+
+
+MIDFUNC(1,f_forget_about,(FW r))
+{
+    if (f_isinreg(r))
+       f_disassociate(r);
+    live.fate[r].status=UNDEF;
+}
+MENDFUNC(1,f_forget_about,(FW r))
+
+MIDFUNC(1,fmov_pi,(FW r))
+{
+    r=f_writereg(r);
+    raw_fmov_pi(r);
+    f_unlock(r);
+}
+MENDFUNC(1,fmov_pi,(FW r))
+
+MIDFUNC(1,fmov_log10_2,(FW r))
+{
+    r=f_writereg(r);
+    raw_fmov_log10_2(r);
+    f_unlock(r);
+}
+MENDFUNC(1,fmov_log10_2,(FW r))
+
+MIDFUNC(1,fmov_log2_e,(FW r))
+{
+    r=f_writereg(r);
+    raw_fmov_log2_e(r);
+    f_unlock(r);
+}
+MENDFUNC(1,fmov_log2_e,(FW r))
+
+MIDFUNC(1,fmov_loge_2,(FW r))
+{
+    r=f_writereg(r);
+    raw_fmov_loge_2(r);
+    f_unlock(r);
+}
+MENDFUNC(1,fmov_loge_2,(FW r))
+
+MIDFUNC(1,fmov_1,(FW r))
+{
+    r=f_writereg(r);
+    raw_fmov_1(r);
+    f_unlock(r);
+}
+MENDFUNC(1,fmov_1,(FW r))
+
+MIDFUNC(1,fmov_0,(FW r))
+{
+    r=f_writereg(r);
+    raw_fmov_0(r);
+    f_unlock(r);
+}
+MENDFUNC(1,fmov_0,(FW r))
+
+MIDFUNC(2,fmov_rm,(FW r, MEMR m))
+{
+    r=f_writereg(r);
+    raw_fmov_rm(r,m);
+    f_unlock(r);
+}
+MENDFUNC(2,fmov_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
+{
+    r=f_writereg(r);
+    raw_fmovi_rm(r,m);
+    f_unlock(r);
+}
+MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
+{
+    r=f_readreg(r);
+    raw_fmovi_mr(m,r);
+    f_unlock(r);
+}
+MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
+{
+    r=f_writereg(r);
+    raw_fmovs_rm(r,m);
+    f_unlock(r);
+}
+MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
+{
+    r=f_readreg(r);
+    raw_fmovs_mr(m,r);
+    f_unlock(r);
+}
+MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
+{
+    r=f_readreg(r);
+    raw_fmov_ext_mr(m,r);
+    f_unlock(r);
+}
+MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_mr,(MEMW m, FR r))
+{
+    r=f_readreg(r);
+    raw_fmov_mr(m,r);
+    f_unlock(r);
+}
+MENDFUNC(2,fmov_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
+{
+    r=f_writereg(r);
+    raw_fmov_ext_rm(r,m);
+    f_unlock(r);
+}
+MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmov_rr,(FW d, FR s))
+{
+    if (d==s) { /* How pointless! */
+       return;
+    }
+#if USE_F_ALIAS
+    f_disassociate(d);
+    s=f_readreg(s);
+    live.fate[d].realreg=s;
+    live.fate[d].realind=live.fat[s].nholds;
+    live.fate[d].status=DIRTY;
+    live.fat[s].holds[live.fat[s].nholds]=d;
+    live.fat[s].nholds++;
+    f_unlock(s);
+#else
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fmov_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+#endif
+}
+MENDFUNC(2,fmov_rr,(FW d, FR s))
+
+MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
+{
+    index=readreg(index,4);
+
+    raw_fldcw_m_indexed(index,base);
+    unlock(index);
+}
+MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
+
+MIDFUNC(1,ftst_r,(FR r))
+{
+    r=f_readreg(r);
+    raw_ftst_r(r);
+    f_unlock(r);
+}
+MENDFUNC(1,ftst_r,(FR r))
+
+MIDFUNC(0,dont_care_fflags,(void))
+{
+    f_disassociate(FP_RESULT);
+}
+MENDFUNC(0,dont_care_fflags,(void))
+
+MIDFUNC(2,fsqrt_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fsqrt_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fsqrt_rr,(FW d, FR s))
+
+MIDFUNC(2,fabs_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fabs_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fabs_rr,(FW d, FR s))
+
+MIDFUNC(2,fsin_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fsin_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fsin_rr,(FW d, FR s))
+
+MIDFUNC(2,fcos_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fcos_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fcos_rr,(FW d, FR s))
+
+MIDFUNC(2,ftwotox_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_ftwotox_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,ftwotox_rr,(FW d, FR s))
+
+MIDFUNC(2,fetox_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fetox_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fetox_rr,(FW d, FR s))
+
+MIDFUNC(2,frndint_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_frndint_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,frndint_rr,(FW d, FR s))
+
+MIDFUNC(2,flog2_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_flog2_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,flog2_rr,(FW d, FR s))
+
+MIDFUNC(2,fneg_rr,(FW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_writereg(d);
+    raw_fneg_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fneg_rr,(FW d, FR s))
+
+MIDFUNC(2,fadd_rr,(FRW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_rmw(d);
+    raw_fadd_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fadd_rr,(FRW d, FR s))
+
+MIDFUNC(2,fsub_rr,(FRW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_rmw(d);
+    raw_fsub_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fsub_rr,(FRW d, FR s))
+
+MIDFUNC(2,fcmp_rr,(FR d, FR s))
+{
+    d=f_readreg(d);
+    s=f_readreg(s);
+    raw_fcmp_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fcmp_rr,(FR d, FR s))
+
+MIDFUNC(2,fdiv_rr,(FRW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_rmw(d);
+    raw_fdiv_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fdiv_rr,(FRW d, FR s))
+
+MIDFUNC(2,frem_rr,(FRW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_rmw(d);
+    raw_frem_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,frem_rr,(FRW d, FR s))
+
+MIDFUNC(2,frem1_rr,(FRW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_rmw(d);
+    raw_frem1_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,frem1_rr,(FRW d, FR s))
+
+MIDFUNC(2,fmul_rr,(FRW d, FR s))
+{
+    s=f_readreg(s);
+    d=f_rmw(d);
+    raw_fmul_rr(d,s);
+    f_unlock(s);
+    f_unlock(d);
+}
+MENDFUNC(2,fmul_rr,(FRW d, FR s))
+
+
+/********************************************************************
+ * Support functions exposed to gencomp. CREATE time                *
+ ********************************************************************/
+
+int kill_rodent(int r)
+{
+    return KILLTHERAT && 
+        have_rat_stall &&
+       (live.state[r].status==INMEM || 
+        live.state[r].status==CLEAN || 
+        live.state[r].status==ISCONST || 
+        live.state[r].dirtysize==4);
+}
+
+uae_u32 get_const(int r)
+{
+#if USE_OPTIMIZER
+    if (!reg_alloc_run) 
+#endif
+       Dif (!isconst(r)) {
+           printf("Register %d should be constant, but isn't\n",r);
+           abort();
+       }
+    return live.state[r].val;
+}
+
+void sync_m68k_pc(void)
+{
+    if (m68k_pc_offset) {
+       add_l_ri(PC_P,m68k_pc_offset);
+       comp_pc_p+=m68k_pc_offset;
+       m68k_pc_offset=0;
+    }
+}
+    
+/********************************************************************
+ * Support functions exposed to newcpu                              *
+ ********************************************************************/
+
+uae_u32 scratch[VREGS];
+fptype  fscratch[VFREGS];
+
+void init_comp(void)
+{
+    int i;
+    uae_u8* cb=can_byte;
+    uae_u8* cw=can_word;
+    uae_u8* au=always_used;
+
+    for (i=0;i<VREGS;i++) {
+       live.state[i].realreg=-1;
+       live.state[i].needflush=NF_SCRATCH;
+       live.state[i].val=0;
+       set_status(i,UNDEF);
+    }
+
+    for (i=0;i<VFREGS;i++) {
+       live.fate[i].status=UNDEF;
+       live.fate[i].realreg=-1;
+       live.fate[i].needflush=NF_SCRATCH;
+    }
+
+    for (i=0;i<VREGS;i++) {
+       if (i<16) { /* First 16 registers map to 68k registers */
+           live.state[i].mem=((uae_u32*)&regs)+i;
+           live.state[i].needflush=NF_TOMEM;
+           set_status(i,INMEM);
+       }
+       else
+           live.state[i].mem=scratch+i;
+    }
+    live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
+    live.state[PC_P].needflush=NF_TOMEM;
+    set_const(PC_P,(uae_u32)comp_pc_p);
+
+    live.state[FLAGX].mem=&(regflags.x);
+    live.state[FLAGX].needflush=NF_TOMEM;
+    set_status(FLAGX,INMEM);
+       
+    live.state[FLAGTMP].mem=&(regflags.cznv);
+    live.state[FLAGTMP].needflush=NF_TOMEM;
+    set_status(FLAGTMP,INMEM);
+
+    live.state[NEXT_HANDLER].needflush=NF_HANDLER;
+    set_status(NEXT_HANDLER,UNDEF);
+
+    for (i=0;i<VFREGS;i++) {
+       if (i<8) { /* First 8 registers map to 68k FPU registers */
+           live.fate[i].mem=(uae_u32*)(((fptype*)regs.fp)+i);
+           live.fate[i].needflush=NF_TOMEM;
+           live.fate[i].status=INMEM;
+       }
+       else if (i==FP_RESULT) {
+           live.fate[i].mem=(uae_u32*)(&regs.fp_result);
+           live.fate[i].needflush=NF_TOMEM;
+           live.fate[i].status=INMEM;
+       }
+       else
+           live.fate[i].mem=(uae_u32*)(fscratch+i);
+    }
+
+
+    for (i=0;i<N_REGS;i++) {
+       live.nat[i].touched=0;
+       live.nat[i].nholds=0;
+       live.nat[i].locked=0;
+       if (*cb==i) {
+           live.nat[i].canbyte=1; cb++;
+       } else live.nat[i].canbyte=0;
+       if (*cw==i) {
+           live.nat[i].canword=1; cw++;
+       } else live.nat[i].canword=0;
+       if (*au==i) {
+           live.nat[i].locked=1; au++;
+       }
+    }
+
+    for (i=0;i<N_FREGS;i++) {
+       live.fat[i].touched=0;
+       live.fat[i].nholds=0;
+       live.fat[i].locked=0;
+    }
+    
+    touchcnt=1;
+    m68k_pc_offset=0;
+    live.flags_in_flags=TRASH;
+    live.flags_on_stack=VALID;
+    live.flags_are_important=1;
+
+    raw_fp_init();
+}
+
+
+static void vinton(int i, uae_s8* vton, int depth)
+{
+    int n;
+    int rr;
+
+    Dif (vton[i]==-1) {
+       printf("Asked to load register %d, but nowhere to go\n",i);
+       abort();
+    }
+    n=vton[i];
+    Dif (live.nat[n].nholds>1) 
+       abort();
+    if (live.nat[n].nholds && depth<N_REGS) {
+       vinton(live.nat[n].holds[0],vton,depth+1);
+    }
+    if (!isinreg(i))
+       return;  /* Oops --- got rid of that one in the recursive calls */
+    rr=live.state[i].realreg;
+    if (rr!=n)
+       mov_nregs(n,rr);
+}
+
+#if USE_MATCHSTATE
+/* This is going to be, amongst other things, a more elaborate version of
+   flush() */
+static __inline__ void match_states(smallstate* s)
+{
+    uae_s8 vton[VREGS];
+    uae_s8 ndone[N_REGS];
+    int i;
+    int again=0;
+
+    for (i=0;i<VREGS;i++) 
+       vton[i]=-1;
+
+    for (i=0;i<N_REGS;i++) 
+       if (s->nat[i].validsize) 
+           vton[s->nat[i].holds]=i;
+
+    flush_flags(); /* low level */
+    sync_m68k_pc(); /* mid level */
+    
+    /* We don't do FREGS yet, so this is raw flush() code */
+    for (i=0;i<VFREGS;i++) {
+       if (live.fate[i].needflush==NF_SCRATCH || 
+           live.fate[i].status==CLEAN) {
+           f_disassociate(i);
+       }
+    }
+    for (i=0;i<VFREGS;i++) {
+       if (live.fate[i].needflush==NF_TOMEM && 
+           live.fate[i].status==DIRTY) {
+           f_evict(i);
+       }
+    }
+    raw_fp_cleanup_drop();
+
+    /* Now comes the fun part. First, we need to remove all offsets */
+    for (i=0;i<VREGS;i++)
+       if (!isconst(i) && live.state[i].val)
+           remove_offset(i,-1);
+
+    /* Next, we evict everything that does not end up in registers,
+       write back overly dirty registers, and write back constants */
+    for (i=0;i<VREGS;i++) {
+       switch (live.state[i].status) {
+        case ISCONST:
+           if (i!=PC_P)
+               writeback_const(i);
+           break;
+        case DIRTY:
+           if (vton[i]==-1) {
+               evict(i);
+               break;
+           }
+           if (live.state[i].dirtysize>s->nat[vton[i]].dirtysize)
+               tomem(i);
+           /* Fall-through! */
+        case CLEAN:
+           if (vton[i]==-1 ||
+               live.state[i].validsize<s->nat[vton[i]].validsize)
+               evict(i);
+           else
+               make_exclusive(i,0,-1);
+           break;
+        case INMEM:
+           break;
+        case UNDEF:
+           break;
+        default:
+           printf("Weird status: %d\n",live.state[i].status);
+           abort();
+       }
+    }
+
+    /* Quick consistency check */
+    for (i=0;i<VREGS;i++) {
+       if (isinreg(i)) {
+           int n=live.state[i].realreg;
+           
+           if (live.nat[n].nholds!=1) {
+               printf("Register %d isn't alone in nreg %d\n",
+                      i,n);
+               abort();
+           }
+           if (vton[i]==-1) {
+               printf("Register %d is still in register, shouldn't be\n",
+                      i);
+               abort();
+           }
+       }
+    }
+
+    /* Now we need to shuffle things around so the VREGs are in the
+       right N_REGs. */
+    for (i=0;i<VREGS;i++) {
+       if (isinreg(i) && vton[i]!=live.state[i].realreg)
+           vinton(i,vton,0);
+    }
+    
+    /* And now we may need to load some registers from memory */
+    for (i=0;i<VREGS;i++) {
+       int n=vton[i];
+       if (n==-1) {
+           Dif (isinreg(i)) {
+               printf("Register %d unexpectedly in nreg %d\n",
+                      i,live.state[i].realreg);
+               abort();
+           }
+       }
+       else {
+           switch(live.state[i].status) {
+            case CLEAN:
+            case DIRTY:
+               Dif (n!=live.state[i].realreg)
+                   abort();
+               break;
+            case INMEM:
+               Dif (live.nat[n].nholds) {
+                   printf("natreg %d holds %d vregs, should be empty\n",
+                          n,live.nat[n].nholds);
+               }
+               raw_mov_l_rm(n,(uae_u32)live.state[i].mem);
+               live.state[i].validsize=4;
+               live.state[i].dirtysize=0;
+               live.state[i].realreg=n;
+               live.state[i].realind=0;
+               live.state[i].val=0;
+               live.state[i].is_swapped=0;
+               live.nat[n].nholds=1;
+               live.nat[n].holds[0]=i;
+
+               set_status(i,CLEAN);
+               break;
+            case ISCONST:
+               if (i!=PC_P) {
+                   printf("Got constant in matchstate for reg %d. Bad!\n",i);
+                   abort();
+               }
+               break;
+            case UNDEF:
+               break;
+           }
+       }
+    }
+
+    /* One last consistency check, and adjusting the states in live
+       to those in s */
+    for (i=0;i<VREGS;i++) {
+       int n=vton[i];
+       switch(live.state[i].status) {
+        case INMEM:
+           if (n!=-1)
+               abort();
+           break;
+        case ISCONST:
+           if (i!=PC_P)
+               abort();
+           break;
+        case CLEAN:
+        case DIRTY:
+           if (n==-1)
+               abort();
+           if (live.state[i].dirtysize>s->nat[n].dirtysize)
+               abort;
+           if (live.state[i].validsize<s->nat[n].validsize)
+               abort;
+           live.state[i].dirtysize=s->nat[n].dirtysize;
+           live.state[i].validsize=s->nat[n].validsize;
+           if (live.state[i].dirtysize)
+               set_status(i,DIRTY);
+           break;
+        case UNDEF:
+           break;
+       }
+       if (n!=-1)
+           live.nat[n].touched=touchcnt++;
+    }
+}
+#else
+static __inline__ void match_states(smallstate* s)
+{
+    flush(1);
+}
+#endif
+
+/* Only do this if you really mean it! The next call should be to init!*/
+void flush(int save_regs)
+{
+    int fi,i;
+    
+    log_flush();
+    flush_flags(); /* low level */
+    sync_m68k_pc(); /* mid level */
+
+    if (save_regs) {
+       for (i=0;i<VFREGS;i++) {
+           if (live.fate[i].needflush==NF_SCRATCH || 
+               live.fate[i].status==CLEAN) {
+               f_disassociate(i);
+           }
+       }
+       for (i=0;i<VREGS;i++) {
+           if (live.state[i].needflush==NF_TOMEM) {
+               switch(live.state[i].status) {
+                case INMEM:   
+                   if (live.state[i].val) {
+                       raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
+                       live.state[i].val=0;
+                   }
+                   break;
+                case CLEAN:   
+                case DIRTY:   
+                   remove_offset(i,-1); tomem(i); break;
+                case ISCONST: 
+                   if (i!=PC_P) 
+                       writeback_const(i); 
+                   break;
+                default: break;
+               }
+               Dif (live.state[i].val && i!=PC_P) {
+                   printf("Register %d still has val %x\n",
+                          i,live.state[i].val);
+               }
+           }
+       }
+       for (i=0;i<VFREGS;i++) {
+           if (live.fate[i].needflush==NF_TOMEM && 
+               live.fate[i].status==DIRTY) {
+               f_evict(i);
+           }
+       }
+       raw_fp_cleanup_drop();
+    }
+    if (needflags) {
+       printf("Warning! flush with needflags=1!\n");
+    }
+
+    lopt_emit_all();
+}
+
+static void flush_keepflags(void)
+{
+    int fi,i;
+    
+    for (i=0;i<VFREGS;i++) {
+       if (live.fate[i].needflush==NF_SCRATCH || 
+           live.fate[i].status==CLEAN) {
+           f_disassociate(i);
+       }
+    }
+    for (i=0;i<VREGS;i++) {
+       if (live.state[i].needflush==NF_TOMEM) {
+           switch(live.state[i].status) {
+            case INMEM:   
+               /* Can't adjust the offset here --- that needs "add" */
+               break;
+            case CLEAN:   
+            case DIRTY:   
+               remove_offset(i,-1); tomem(i); break;
+            case ISCONST: 
+               if (i!=PC_P) 
+                   writeback_const(i); 
+               break;
+            default: break;
+           }
+       }
+    }
+    for (i=0;i<VFREGS;i++) {
+       if (live.fate[i].needflush==NF_TOMEM && 
+           live.fate[i].status==DIRTY) {
+           f_evict(i);
+       }
+    }
+    raw_fp_cleanup_drop();
+    lopt_emit_all();
+}
+
+void freescratch(void)
+{
+    int i;
+    for (i=0;i<N_REGS;i++)
+       if (live.nat[i].locked && i!=4)
+           printf("Warning! %d is locked\n",i);
+
+    for (i=0;i<VREGS;i++)
+       if (live.state[i].needflush==NF_SCRATCH) {
+           forget_about(i);
+       }
+
+    for (i=0;i<VFREGS;i++)
+       if (live.fate[i].needflush==NF_SCRATCH) {
+           f_forget_about(i);
+       }
+}
+
+/********************************************************************
+ * Support functions, internal                                      *
+ ********************************************************************/
+
+
+static void align_target(uae_u32 a)
+{
+    lopt_emit_all();
+    /* Fill with NOPs --- makes debugging with gdb easier */
+    while ((uae_u32)target&(a-1)) 
+       *target++=0x90;
+}
+
+extern uae_u8* kickmemory;
+static __inline__ int isinrom(uae_u32 addr)
+{
+    return (addr>=(uae_u32)kickmemory &&
+           addr<(uae_u32)kickmemory+8*65536);
+}
+
+static void flush_all(void)
+{
+    int i;
+
+    log_flush();
+    for (i=0;i<VREGS;i++)
+       if (live.state[i].status==DIRTY) {
+           if (!call_saved[live.state[i].realreg]) {
+               tomem(i);
+           }
+       }
+    for (i=0;i<VFREGS;i++)
+       if (f_isinreg(i)) 
+           f_evict(i);
+    raw_fp_cleanup_drop();
+}
+
+/* Make sure all registers that will get clobbered by a call are
+   save and sound in memory */
+static void prepare_for_call_1(void)
+{
+    flush_all();  /* If there are registers that don't get clobbered,
+                  * we should be a bit more selective here */
+}
+
+/* We will call a C routine in a moment. That will clobber all registers,
+   so we need to disassociate everything */
+static void prepare_for_call_2(void)
+{
+    int i;
+    for (i=0;i<N_REGS;i++)   
+       if (!call_saved[i] && live.nat[i].nholds>0)
+           free_nreg(i);
+
+    for (i=0;i<N_FREGS;i++)   
+       if (live.fat[i].nholds>0)
+           f_free_nreg(i);
+
+    live.flags_in_flags=TRASH;  /* Note: We assume we already rescued the
+                                  flags at the very start of the call_r
+                                  functions! */
+}
+
+
+/********************************************************************
+ * Memory access and related functions, CREATE time                 *
+ ********************************************************************/
+
+void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
+{
+    next_pc_p=not_taken;
+    taken_pc_p=taken;
+    branch_cc=cond;
+}
+
+
+static uae_u32 get_handler_address(uae_u32 addr)
+{
+    uae_u32 cl=cacheline(addr);
+    blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
+
+#if USE_OPTIMIZER
+    if (!bi && reg_alloc_run)
+       return 0;
+#endif
+    return (uae_u32)&(bi->direct_handler_to_use);
+}
+
+static uae_u32 get_handler(uae_u32 addr)
+{
+    uae_u32 cl=cacheline(addr);
+    blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
+
+#if USE_OPTIMIZER
+    if (!bi && reg_alloc_run)
+       return 0;
+#endif
+    return (uae_u32)bi->direct_handler_to_use;
+}
+
+static void load_handler(int reg, uae_u32 addr)
+{
+    mov_l_rm(reg,get_handler_address(addr));
+}
+
+/* This version assumes that it is writing *real* memory, and *will* fail
+ *  if that assumption is wrong! No branches, no second chances, just
+ *  straight go-for-it attitude */
+
+static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
+{
+    int f=tmp;
+
+#ifdef NATMEM_OFFSET
+    if (canbang) {  /* Woohoo! go directly at the memory! */
+       if (clobber)
+           f=source;
+       switch(size) {
+        case 1: mov_b_bRr(address,source,NATMEM_OFFSET); break; 
+        case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,NATMEM_OFFSET); break;
+        case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,NATMEM_OFFSET); break;
+       }
+       forget_about(tmp);
+       forget_about(f);
+       return;
+    }
+#endif
+
+    mov_l_rr(f,address);
+    shrl_l_ri(f,16);   /* The index into the baseaddr table */
+    mov_l_rm_indexed(f,(uae_u32)(baseaddr),f,4);
+
+    if (address==source && size>1) { /* IBrowse does this! */
+       add_l(f,address); /* f now has the final address */
+       switch(size) {
+        case 2: bswap_16(source); mov_w_Rr(f,source,0); bswap_16(source); break;
+        case 4: bswap_32(source); mov_l_Rr(f,source,0); bswap_32(source); break;
+       }
+    }
+    else {
+       /* f now holds the offset */
+       switch(size) {
+        case 1: mov_b_mrr_indexed(address,f,1,source); break;
+        case 2: bswap_16(source); mov_w_mrr_indexed(address,f,1,source); bswap_16(source); break;
+        case 4: bswap_32(source); mov_l_mrr_indexed(address,f,1,source); bswap_32(source); break;
+       }
+    }
+}
+
+
+
+static __inline__ void writemem(int address, int source, int offset, int size, int tmp)
+{
+    int f=tmp;
+
+    mov_l_rr(f,address);
+    shrl_l_ri(f,16);   /* The index into the mem bank table */
+    mov_l_rm_indexed(f,(uae_u32)mem_banks,f,4);
+    /* Now f holds a pointer to the actual membank */
+    mov_l_rR(f,f,offset);
+    /* Now f holds the address of the b/w/lput function */
+    call_r_02(f,address,source,4,size);
+    forget_about(tmp);
+}
+
+
+void writebyte(int address, int source, int tmp)
+{
+    int  distrust;
+    switch (currprefs.comptrustbyte) {
+     case 0: distrust=0; break;
+     case 1: distrust=1; break;
+     case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+     case 3: distrust=!have_done_picasso; break;
+     default: abort();
+    }
+
+    if ((special_mem&S_WRITE) || distrust)
+       writemem_special(address,source,20,1,tmp);
+    else
+       writemem_real(address,source,20,1,tmp,0);
+}
+
+static __inline__ void writeword_general(int address, int source, int tmp,
+                                        int clobber)
+{
+    int  distrust;
+    switch (currprefs.comptrustword) {
+     case 0: distrust=0; break;
+     case 1: distrust=1; break;
+     case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+     case 3: distrust=!have_done_picasso; break;
+     default: abort();
+    }
+
+    if ((special_mem&S_WRITE) || distrust)
+       writemem_special(address,source,16,2,tmp);
+    else
+       writemem_real(address,source,16,2,tmp,clobber);
+}
+
+void writeword_clobber(int address, int source, int tmp)
+{
+    writeword_general(address,source,tmp,1);
+}
+
+void writeword(int address, int source, int tmp)
+{
+    writeword_general(address,source,tmp,0);
+}
+
+static __inline__ void writelong_general(int address, int source, int tmp, 
+                                        int clobber)
+{
+    int  distrust;
+    switch (currprefs.comptrustlong) {
+     case 0: distrust=0; break;
+     case 1: distrust=1; break;
+     case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+     case 3: distrust=!have_done_picasso; break;
+     default: abort();
+    }
+
+    if ((special_mem&S_WRITE) || distrust)
+       writemem_special(address,source,12,4,tmp);
+    else
+       writemem_real(address,source,12,4,tmp,clobber);
+}
+
+void writelong_clobber(int address, int source, int tmp)
+{
+    writelong_general(address,source,tmp,1);
+}
+
+void writelong(int address, int source, int tmp)
+{
+    writelong_general(address,source,tmp,0);
+}
+
+
+
+/* This version assumes that it is reading *real* memory, and *will* fail
+ *  if that assumption is wrong! No branches, no second chances, just
+ *  straight go-for-it attitude */
+
+static void readmem_real(int address, int dest, int offset, int size, int tmp)
+{
+    int f=tmp; 
+
+    if (size==4 && address!=dest)
+       f=dest;
+
+#ifdef NATMEM_OFFSET
+    if (canbang) {  /* Woohoo! go directly at the memory! */
+       switch(size) {
+        case 1: mov_b_brR(dest,address,NATMEM_OFFSET); break; 
+        case 2: mov_w_brR(dest,address,NATMEM_OFFSET); bswap_16(dest); break;
+        case 4: mov_l_brR(dest,address,NATMEM_OFFSET); bswap_32(dest); break;
+       }
+       forget_about(tmp);
+       return;
+    }
+#endif
+
+    mov_l_rr(f,address);
+    shrl_l_ri(f,16);   /* The index into the baseaddr table */
+    mov_l_rm_indexed(f,(uae_u32)baseaddr,f,4);
+    /* f now holds the offset */
+  
+    switch(size) {
+     case 1: mov_b_rrm_indexed(dest,address,f,1); break;
+     case 2: mov_w_rrm_indexed(dest,address,f,1); bswap_16(dest); break;
+     case 4: mov_l_rrm_indexed(dest,address,f,1); bswap_32(dest); break;
+    }
+    forget_about(tmp);
+}
+
+
+
+static __inline__ void readmem(int address, int dest, int offset, int size, int tmp)
+{
+    int f=tmp;
+
+    mov_l_rr(f,address);
+    shrl_l_ri(f,16);   /* The index into the mem bank table */
+    mov_l_rm_indexed(f,(uae_u32)mem_banks,f,4);
+    /* Now f holds a pointer to the actual membank */
+    mov_l_rR(f,f,offset);
+    /* Now f holds the address of the b/w/lget function */
+    call_r_11(dest,f,address,size,4);
+    forget_about(tmp);
+}
+
+void readbyte(int address, int dest, int tmp)
+{
+    int  distrust;
+    switch (currprefs.comptrustbyte) {
+     case 0: distrust=0; break;
+     case 1: distrust=1; break;
+     case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+     case 3: distrust=!have_done_picasso; break;
+     default: abort();
+    }
+
+    if ((special_mem&S_READ) || distrust)
+       readmem_special(address,dest,8,1,tmp);
+    else
+       readmem_real(address,dest,8,1,tmp);
+}
+
+void readword(int address, int dest, int tmp)
+{
+    int  distrust;
+    switch (currprefs.comptrustword) {
+     case 0: distrust=0; break;
+     case 1: distrust=1; break;
+     case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+     case 3: distrust=!have_done_picasso; break;
+     default: abort();
+    }
+
+    if ((special_mem&S_READ) || distrust)
+       readmem_special(address,dest,4,2,tmp);
+    else
+       readmem_real(address,dest,4,2,tmp);
+}
+
+void readlong(int address, int dest, int tmp)
+{
+    int  distrust;
+    switch (currprefs.comptrustlong) {
+     case 0: distrust=0; break;
+     case 1: distrust=1; break;
+     case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+     case 3: distrust=!have_done_picasso; break;
+     default: abort();
+    }
+
+    if ((special_mem&S_READ) || distrust)
+       readmem_special(address,dest,0,4,tmp);
+    else
+       readmem_real(address,dest,0,4,tmp);
+}
+
+
+
+/* This one might appear a bit odd... */
+static __inline__ void get_n_addr_old(int address, int dest, int tmp)
+{
+    readmem(address,dest,24,4,tmp);
+}
+
+static __inline__ void get_n_addr_real(int address, int dest, int tmp)
+{
+    int f=tmp;
+    if (address!=dest)
+       f=dest;
+
+#ifdef NATMEM_OFFSET
+    if (canbang) {
+       lea_l_brr(dest,address,NATMEM_OFFSET);
+       forget_about(tmp);
+       return;
+    }
+#endif
+    mov_l_rr(f,address);
+    mov_l_rr(dest,address); // gb-- nop if dest==address
+    shrl_l_ri(f,16);
+    mov_l_rm_indexed(f, (uae_u32)baseaddr, f, 4);
+    add_l(dest,f);
+    forget_about(tmp);
+}
+
+void get_n_addr(int address, int dest, int tmp)
+{
+    int  distrust;
+    switch (currprefs.comptrustnaddr) {
+     case 0: distrust=0; break;
+     case 1: distrust=1; break;
+     case 2: distrust=((start_pc&0xF80000)==0xF80000); break;
+     case 3: distrust=!have_done_picasso; break;
+     default: abort();
+    }
+
+    if (special_mem || distrust)
+       get_n_addr_old(address,dest,tmp);
+    else
+       get_n_addr_real(address,dest,tmp);
+}
+
+void get_n_addr_jmp(int address, int dest, int tmp)
+{
+#if 0 /* For this, we need to get the same address as the rest of UAE
+        would --- otherwise we end up translating everything twice */
+    get_n_addr(address,dest,tmp);
+#else
+    int f=tmp;
+    if (address!=dest)
+       f=dest;
+    mov_l_rr(f,address);
+    shrl_l_ri(f,16);   /* The index into the baseaddr bank table */
+    mov_l_rm_indexed(dest,(uae_u32)baseaddr,f,4);
+    add_l(dest,address);
+    and_l_ri (dest, ~1);
+    forget_about(tmp);
+#endif
+}
+
+
+/* base is a register, but dp is an actual value. 
+   target is a register, as is tmp */
+void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
+{
+    int reg = (dp >> 12) & 15;
+    int regd_shift=(dp >> 9) & 3;
+
+    if (dp & 0x100) {
+       int ignorebase=(dp&0x80);
+       int ignorereg=(dp&0x40);
+       int addbase=0;
+       int outer=0;
+    
+       if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+       if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
+
+       if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+       if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
+
+       if ((dp & 0x4) == 0) {  /* add regd *before* the get_long */
+           if (!ignorereg) {
+               if ((dp & 0x800) == 0) 
+                   sign_extend_16_rr(target,reg);
+               else
+                   mov_l_rr(target,reg);
+               shll_l_ri(target,regd_shift);
+           }
+           else
+               mov_l_ri(target,0);
+
+           /* target is now regd */
+           if (!ignorebase)
+               add_l(target,base);
+           add_l_ri(target,addbase);
+           if (dp&0x03) readlong(target,target,tmp);
+       } else { /* do the getlong first, then add regd */
+           if (!ignorebase) {
+               mov_l_rr(target,base);
+               add_l_ri(target,addbase);
+           }
+           else
+               mov_l_ri(target,addbase);
+           if (dp&0x03) readlong(target,target,tmp);
+
+           if (!ignorereg) {
+               if ((dp & 0x800) == 0) 
+                   sign_extend_16_rr(tmp,reg);
+               else
+                   mov_l_rr(tmp,reg);
+               shll_l_ri(tmp,regd_shift);
+               /* tmp is now regd */
+               add_l(target,tmp);
+           }
+       }
+       add_l_ri(target,outer);
+    }
+    else { /* 68000 version */
+       if ((dp & 0x800) == 0) { /* Sign extend */
+           sign_extend_16_rr(target,reg);
+           lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
+       }
+       else {
+           lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
+       }
+    }
+    forget_about(tmp);
+}
+
+static __inline__ unsigned int cft_map (unsigned int f)
+{
+    return ((f >> 8) & 255) | ((f & 255) << 8);
+}
+
+void set_cache_state(int enabled)
+{
+    if (enabled!=letit)
+       flush_icache_hard(77);
+    letit=enabled;
+}
+
+int get_cache_state(void)
+{
+    return letit;
+}
+
+uae_u32 get_jitted_size(void)
+{
+    if (compiled_code)
+       return current_compile_p-compiled_code;
+    return 0;
+}
+
+void alloc_cache(void)
+{
+    if (compiled_code) {
+       flush_icache_hard(6);
+       cache_free(compiled_code);
+    }
+    if (veccode == NULL)
+       veccode = cache_alloc (256);
+    if (popallspace == NULL)
+       popallspace = cache_alloc (1024);
+    compiled_code = NULL;
+    if (currprefs.cachesize == 0)
+       return;
+
+    while (!compiled_code && currprefs.cachesize) {
+       compiled_code=cache_alloc(currprefs.cachesize*1024);
+       if (!compiled_code)
+           currprefs.cachesize/=2;
+    }
+    if (compiled_code) {
+       max_compile_start=compiled_code+currprefs.cachesize*1024-BYTES_PER_INST;
+       current_compile_p=compiled_code;
+    }
+}
+
+extern unsigned long op_illg_1 (uae_u32 opcode) REGPARAM;
+
+static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
+{
+    uae_u32 k1=0;
+    uae_u32 k2=0;
+    uae_s32 len=bi->len;
+    uae_u32 tmp=bi->min_pcp;
+    uae_u32* pos;
+
+    len+=(tmp&3);
+    tmp&=(~3);
+    pos=(uae_u32*)tmp;
+
+    if (len<0 || len>MAX_CHECKSUM_LEN) { 
+       *c1=0;
+       *c2=0;
+    }
+    else {
+       while (len>0) {
+           k1+=*pos;
+           k2^=*pos;
+           pos++;
+           len-=4;
+       }
+       *c1=k1;
+       *c2=k2;
+    }
+}
+
+static void show_checksum(blockinfo* bi)
+{
+    uae_u32 k1=0;
+    uae_u32 k2=0;
+    uae_s32 len=bi->len;
+    uae_u32 tmp=(uae_u32)bi->pc_p;
+    uae_u32* pos;
+
+    len+=(tmp&3);
+    tmp&=(~3);
+    pos=(uae_u32*)tmp;
+
+    if (len<0 || len>MAX_CHECKSUM_LEN) {
+       return;
+    }
+    else {
+       while (len>0) {
+           printf("%08x ",*pos);
+           pos++;
+           len-=4;
+       }
+       printf(" bla\n");
+    }
+}
+
+
+int check_for_cache_miss(void)
+{
+    blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+    
+    if (bi) {
+       int cl=cacheline(regs.pc_p);
+       if (bi!=cache_tags[cl+1].bi) {
+           raise_in_cl_list(bi);
+           return 1;
+       }
+    }
+    return 0;
+}
+
+    
+static void recompile_block(void)
+{
+    /* An existing block's countdown code has expired. We need to make
+       sure that execute_normal doesn't refuse to recompile due to a
+       perceived cache miss... */
+    blockinfo*  bi=get_blockinfo_addr(regs.pc_p);
+
+    Dif (!bi) 
+       abort();
+    raise_in_cl_list(bi);
+    execute_normal();
+    return;
+}
+
+static void cache_miss(void)
+{
+    blockinfo*  bi=get_blockinfo_addr(regs.pc_p);
+    uae_u32     cl=cacheline(regs.pc_p);
+    blockinfo*  bi2=get_blockinfo(cl);
+
+    if (!bi) {
+       execute_normal(); /* Compile this block now */
+       return;
+    }
+    Dif (!bi2 || bi==bi2) {
+       write_log ("Unexplained cache miss %p %p\n",bi,bi2);
+       abort();
+    }
+    raise_in_cl_list(bi);
+    return;
+}
+
+static void check_checksum(void) 
+{
+    blockinfo*  bi=get_blockinfo_addr(regs.pc_p);
+    uae_u32     cl=cacheline(regs.pc_p);
+    blockinfo*  bi2=get_blockinfo(cl);
+
+    uae_u32     c1,c2;
+    
+    checksum_count++;
+    /* These are not the droids you are looking for...  */
+    if (!bi) {
+       /* Whoever is the primary target is in a dormant state, but
+          calling it was accidental, and we should just compile this
+          new block */
+       execute_normal();
+       return;
+    }
+    if (bi!=bi2) {
+       /* The block was hit accidentally, but it does exist. Cache miss */
+       cache_miss();
+       return;
+    }
+
+    if (bi->c1 || bi->c2)
+       calc_checksum(bi,&c1,&c2);
+    else {
+       c1=c2=1;  /* Make sure it doesn't match */
+    }
+    if (c1==bi->c1 && c2==bi->c2) { 
+       /* This block is still OK. So we reactivate. Of course, that
+          means we have to move it into the needs-to-be-flushed list */
+       bi->handler_to_use=bi->handler;
+       set_dhtu(bi,bi->direct_handler);
+
+       /*      printf("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
+               c1,c2,bi->c1,bi->c2);*/
+       remove_from_list(bi);
+       add_to_active(bi);
+       raise_in_cl_list(bi);
+    }
+    else {
+       /* This block actually changed. We need to invalidate it,
+          and set it up to be recompiled */
+       /* printf("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
+          c1,c2,bi->c1,bi->c2); */
+       invalidate_block(bi);
+       raise_in_cl_list(bi);
+       execute_normal();
+    }
+}
+
+
+static __inline__ void create_popalls(void)
+{
+  int i,r;
+
+  current_compile_p=popallspace;
+  set_target(current_compile_p);
+#if USE_PUSH_POP
+  /* If we can't use gcc inline assembly, we need to pop some
+     registers before jumping back to the various get-out routines.
+     This generates the code for it.
+  */
+  popall_do_nothing=current_compile_p;
+  for (i=0;i<N_REGS;i++) {
+      if (need_to_preserve[i])
+         raw_pop_l_r(i);
+  }
+  raw_jmp((uae_u32)do_nothing);
+  align_target(32);
+  
+  popall_execute_normal=get_target();
+  for (i=0;i<N_REGS;i++) {
+      if (need_to_preserve[i])
+         raw_pop_l_r(i);
+  }
+  raw_jmp((uae_u32)execute_normal);
+  align_target(32);
+
+  popall_cache_miss=get_target();
+  for (i=0;i<N_REGS;i++) {
+      if (need_to_preserve[i])
+         raw_pop_l_r(i);
+  }
+  raw_jmp((uae_u32)cache_miss);
+  align_target(32);
+
+  popall_recompile_block=get_target();
+  for (i=0;i<N_REGS;i++) {
+      if (need_to_preserve[i])
+         raw_pop_l_r(i);
+  }
+  raw_jmp((uae_u32)recompile_block);
+  align_target(32);
+  
+  popall_exec_nostats=get_target();
+  for (i=0;i<N_REGS;i++) {
+      if (need_to_preserve[i])
+         raw_pop_l_r(i);
+  }
+  raw_jmp((uae_u32)exec_nostats);
+  align_target(32);
+  
+  popall_check_checksum=get_target();
+  for (i=0;i<N_REGS;i++) {
+      if (need_to_preserve[i])
+         raw_pop_l_r(i);
+  }
+  raw_jmp((uae_u32)check_checksum);
+  align_target(32);
+  
+  current_compile_p=get_target();
+#else
+  popall_exec_nostats=exec_nostats;
+  popall_execute_normal=execute_normal;
+  popall_cache_miss=cache_miss;
+  popall_recompile_block=recompile_block;
+  popall_do_nothing=do_nothing;
+  popall_check_checksum=check_checksum;
+#endif
+
+  /* And now, the code to do the matching pushes and then jump
+     into a handler routine */
+  pushall_call_handler=get_target();
+#if USE_PUSH_POP
+  for (i=N_REGS;i--;) {
+      if (need_to_preserve[i])
+         raw_push_l_r(i);
+  }
+#endif
+  r=REG_PC_TMP;
+  raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
+  raw_and_l_ri(r,TAGMASK);
+  raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
+}
+
+static __inline__ void reset_lists(void)
+{
+    int i;
+    
+    for (i=0;i<MAX_HOLD_BI;i++)
+       hold_bi[i]=NULL;
+    active=NULL;
+    dormant=NULL;
+}
+
+static void prepare_block(blockinfo* bi)
+{
+    int i;
+
+    set_target(current_compile_p);
+    align_target(32);
+    bi->direct_pen=(void*)get_target();
+    raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
+    raw_mov_l_mr((uae_u32)&regs.pc_p,0);
+    raw_jmp((uae_u32)popall_execute_normal);
+
+    align_target(32);
+    bi->direct_pcc=(void*)get_target();
+    raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
+    raw_mov_l_mr((uae_u32)&regs.pc_p,0);
+    raw_jmp((uae_u32)popall_check_checksum);
+
+    align_target(32);
+    current_compile_p=get_target();
+
+    bi->deplist=NULL;
+    for (i=0;i<2;i++) {
+       bi->dep[i].prev_p=NULL;
+       bi->dep[i].next=NULL;
+    }
+    bi->env=default_ss;
+    bi->status=BI_NEW;
+    bi->havestate=0;
+    //bi->env=empty_ss;
+}
+
+void compemu_reset(void)
+{
+    int i;
+
+    set_cache_state(0);
+}
+
+void build_comp(void) 
+{
+    int i;
+    int jumpcount=0;
+    unsigned long opcode;
+    struct cputbl* tbl=op_smalltbl_0_comp_ff;
+    struct cputbl* nftbl=op_smalltbl_0_comp_nf;
+    int count;
+#ifdef NOFLAGS_SUPPORT
+    struct cputbl *nfctbl = (currprefs.cpu_level >= 4 ? op_smalltbl_0_nf
+                            : currprefs.cpu_level == 3 ? op_smalltbl_1_nf
+                            : currprefs.cpu_level == 2 ? op_smalltbl_2_nf
+                            : currprefs.cpu_level == 1 ? op_smalltbl_3_nf
+                            : ! currprefs.cpu_compatible ? op_smalltbl_4_nf
+                            : op_smalltbl_5_nf);
+#endif
+    raw_init_cpu();
+#ifdef NATMEM_OFFSET
+    write_log ("JIT: Setting signal handler\n");
+#ifndef _WIN32
+    signal(SIGSEGV,vec);
+#endif
+#endif
+    write_log ("JIT: Building Compiler function table\n");
+    for (opcode = 0; opcode < 65536; opcode++) {
+#ifdef NOFLAGS_SUPPORT
+       nfcpufunctbl[opcode] = op_illg_1;
+#endif
+       compfunctbl[opcode] = NULL;
+       nfcompfunctbl[opcode] = NULL;
+       prop[opcode].use_flags = 0x1f;
+       prop[opcode].set_flags = 0x1f;
+       prop[opcode].is_jump=1;
+    }
+
+    for (i = 0; tbl[i].opcode < 65536; i++) {
+       int isjmp=(tbl[i].specific&1);
+       int isaddx=(tbl[i].specific&8);
+       int iscjmp=(tbl[i].specific&16);
+
+       prop[tbl[i].opcode].is_jump=isjmp;
+       prop[tbl[i].opcode].is_const_jump=iscjmp;
+       prop[tbl[i].opcode].is_addx=isaddx;
+       compfunctbl[tbl[i].opcode] = tbl[i].handler;
+    }
+    for (i = 0; nftbl[i].opcode < 65536; i++) {
+       nfcompfunctbl[nftbl[i].opcode] = nftbl[i].handler;
+#ifdef NOFLAGS_SUPPORT
+       nfcpufunctbl[nftbl[i].opcode] = nfctbl[i].handler;
+#endif
+    }
+
+#ifdef NOFLAGS_SUPPORT
+    for (i = 0; nfctbl[i].handler; i++) {
+       nfcpufunctbl[nfctbl[i].opcode] = nfctbl[i].handler;
+    }
+#endif
+
+    for (opcode = 0; opcode < 65536; opcode++) {
+       cpuop_func *f;
+       cpuop_func *nff;
+#ifdef NOFLAGS_SUPPORT
+       cpuop_func *nfcf;
+#endif
+       int isjmp,isaddx,iscjmp;
+
+       if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > currprefs.cpu_level)
+           continue;
+
+       if (table68k[opcode].handler != -1) {
+           f = compfunctbl[table68k[opcode].handler];
+           nff = nfcompfunctbl[table68k[opcode].handler];
+#ifdef NOFLAGS_SUPPORT
+           nfcf = nfcpufunctbl[table68k[opcode].handler];
+#endif
+           isjmp=prop[table68k[opcode].handler].is_jump;
+           iscjmp=prop[table68k[opcode].handler].is_const_jump;
+           isaddx=prop[table68k[opcode].handler].is_addx;
+           prop[opcode].is_jump=isjmp;
+           prop[opcode].is_const_jump=iscjmp;
+           prop[opcode].is_addx=isaddx;
+           compfunctbl[opcode] = f;
+           nfcompfunctbl[opcode] = nff;
+#ifdef NOFLAGS_SUPPORT
+           Dif (nfcf == op_illg_1)
+               abort();
+           nfcpufunctbl[opcode] = nfcf;
+#endif
+       }
+       prop[opcode].set_flags =table68k[opcode].flagdead;
+       prop[opcode].use_flags =table68k[opcode].flaglive;
+       /* Unconditional jumps don't evaluate condition codes, so they
+          don't actually use any flags themselves */
+       if (prop[opcode].is_const_jump)
+           prop[opcode].use_flags=0; 
+    }
+#ifdef NOFLAGS_SUPPORT
+    for (i = 0; nfctbl[i].handler != NULL; i++) {
+       if (nfctbl[i].specific)
+           nfcpufunctbl[tbl[i].opcode] = nfctbl[i].handler;
+    }
+#endif
+
+    count=0;
+    for (opcode = 0; opcode < 65536; opcode++) {
+       if (compfunctbl[opcode])
+           count++;
+    }
+    write_log ("Supposedly %d compileable opcodes!\n",count);
+
+    /* Initialise state */
+    alloc_cache();
+    create_popalls();
+    reset_lists();
+
+    for (i=0;i<TAGSIZE;i+=2) {
+       cache_tags[i].handler=(void*)popall_execute_normal;
+       cache_tags[i+1].bi=NULL;
+    }
+    compemu_reset();
+    
+    for (i=0;i<N_REGS;i++) {
+       empty_ss.nat[i].holds=-1;
+       empty_ss.nat[i].validsize=0;
+       empty_ss.nat[i].dirtysize=0;
+    }
+    default_ss=empty_ss;
+#if 0    
+    default_ss.nat[6].holds=11;
+    default_ss.nat[6].validsize=4;
+    default_ss.nat[5].holds=12;
+    default_ss.nat[5].validsize=4;
+#endif
+}
+
+    
+static void flush_icache_hard(int n)
+{
+    uae_u32 i;
+    blockinfo* bi;
+
+    hard_flush_count++;
+#if 0
+    printf("Flush Icache_hard(%d/%x/%p), %u instruction bytes\n",
+          n,regs.pc,regs.pc_p,current_compile_p-compiled_code);
+#endif
+    bi=active;
+    while(bi) {
+       cache_tags[cacheline(bi->pc_p)].handler=(void*)popall_execute_normal;
+       cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
+       bi=bi->next;
+    }
+    bi=dormant;
+    while(bi) {
+       cache_tags[cacheline(bi->pc_p)].handler=(void*)popall_execute_normal;
+       cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
+       bi=bi->next;
+    }
+
+    reset_lists();
+    if (!compiled_code)
+       return;
+    current_compile_p=compiled_code;
+    set_special(0); /* To get out of compiled code */
+}
+
+
+/* "Soft flushing" --- instead of actually throwing everything away,
+   we simply mark everything as "needs to be checked". 
+*/
+
+void flush_icache(int n)
+{
+    uae_u32 i;
+    blockinfo* bi;
+    blockinfo* bi2;
+
+    if (currprefs.comp_hardflush) {
+       flush_icache_hard(n);
+       return;
+    }
+    soft_flush_count++;
+    if (!active)
+       return;
+
+    bi=active;
+    while (bi) {
+       uae_u32 cl=cacheline(bi->pc_p);
+       if (!bi->handler) { 
+           /* invalidated block */
+           if (bi==cache_tags[cl+1].bi) 
+               cache_tags[cl].handler=popall_execute_normal;
+           bi->handler_to_use=popall_execute_normal;
+           set_dhtu(bi,bi->direct_pen);
+       }
+       else {
+           if (bi==cache_tags[cl+1].bi) 
+               cache_tags[cl].handler=popall_check_checksum;
+           bi->handler_to_use=popall_check_checksum;
+           set_dhtu(bi,bi->direct_pcc);
+       }
+       bi2=bi;
+       bi=bi->next;
+    }
+    /* bi2 is now the last entry in the active list */
+    bi2->next=dormant;
+    if (dormant)
+       dormant->prev_p=&(bi2->next);
+    
+    dormant=active;
+    active->prev_p=&dormant;
+    active=NULL;
+}
+
+
+static void catastrophe(void)
+{
+    abort();
+}
+
+int failure;
+
+
+void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
+{
+    if (letit && compiled_code && currprefs.cpu_level>=2) {
+
+       /* OK, here we need to 'compile' a block */
+       int i;
+       int r;
+       int was_comp=0;
+       uae_u8 liveflags[MAXRUN+1];
+       uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
+       uae_u32 min_pcp=max_pcp;
+       uae_u32 cl=cacheline(pc_hist[0].location);
+       void* specflags=(void*)&regs.spcflags;
+       blockinfo* bi=NULL;
+       blockinfo* bi2;
+       int extra_len=0;
+
+       compile_count++;
+       if (current_compile_p>=max_compile_start)
+           flush_icache_hard(7);
+
+       alloc_blockinfos();
+
+       bi=get_blockinfo_addr_new(pc_hist[0].location,0);
+       bi2=get_blockinfo(cl);
+
+       optlev=bi->optlevel;
+       if (bi->handler) {
+           Dif (bi!=bi2) { 
+               /* I don't think it can happen anymore. Shouldn't, in 
+                  any case. So let's make sure... */
+               printf("WOOOWOO count=%d, ol=%d %p %p\n",
+                      bi->count,bi->optlevel,bi->handler_to_use,
+                      cache_tags[cl].handler);
+               abort();
+           }
+
+           Dif (bi->count!=-1 && bi->status!=BI_TARGETTED) {
+               /* What the heck? We are not supposed to be here! */
+               abort();
+           }
+       }       
+       if (bi->count==-1) {
+           optlev++;
+           while (!currprefs.optcount[optlev])
+               optlev++;
+           bi->count=currprefs.optcount[optlev]-1;
+       }
+       current_block_pc_p=(uae_u32)pc_hist[0].location;
+       
+       remove_deps(bi); /* We are about to create new code */
+       bi->optlevel=optlev;
+       bi->pc_p=(uae_u8*)pc_hist[0].location;
+       
+       liveflags[blocklen]=0x1f; /* All flags needed afterwards */
+       i=blocklen;
+       while (i--) {
+           uae_u16* currpcp=pc_hist[i].location;
+           int op=cft_map(*currpcp);
+
+           if ((uae_u32)currpcp<min_pcp)
+               min_pcp=(uae_u32)currpcp;
+           if ((uae_u32)currpcp>max_pcp)
+               max_pcp=(uae_u32)currpcp;
+
+           if (currprefs.compnf) {
+               liveflags[i]=((liveflags[i+1]&
+                              (~prop[op].set_flags))|
+                             prop[op].use_flags);
+               if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
+                   liveflags[i]&= ~FLAG_Z;
+           }
+           else {
+               liveflags[i]=0x1f;
+           }
+       }
+
+       bi->needed_flags=liveflags[0];
+
+       /* This is the non-direct handler */
+       align_target(32);
+       set_target(get_target()+1);
+       align_target(16);
+       /* Now aligned at n*32+16 */
+
+       bi->handler=
+           bi->handler_to_use=(void*)get_target();
+       raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
+       raw_jnz((uae_u32)popall_cache_miss);
+       /* This was 16 bytes on the x86, so now aligned on (n+1)*32 */
+
+       was_comp=0;
+
+#if USE_MATCHSTATE
+       comp_pc_p=(uae_u8*)pc_hist[0].location;
+       init_comp();
+       match_states(&(bi->env));
+       was_comp=1;
+#endif
+
+       bi->direct_handler=(void*)get_target();
+       set_dhtu(bi,bi->direct_handler);
+       current_block_start_target=(uae_u32)get_target();
+
+       if (bi->count>=0) { /* Need to generate countdown code */
+           raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
+           raw_sub_l_mi((uae_u32)&(bi->count),1);
+           raw_jl((uae_u32)popall_recompile_block);
+       }
+       if (optlev==0) { /* No need to actually translate */
+           /* Execute normally without keeping stats */
+           raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
+           raw_jmp((uae_u32)popall_exec_nostats); 
+       }
+       else {
+           reg_alloc_run=0;
+           next_pc_p=0;
+           taken_pc_p=0;
+           branch_cc=0;
+
+           log_startblock();
+           for (i=0;i<blocklen &&
+                    get_target_noopt()<max_compile_start;i++) {
+               cpuop_func **cputbl;
+               cpuop_func **comptbl;
+               uae_u16 opcode;
+               
+               opcode=cft_map((uae_u16)*pc_hist[i].location);
+               special_mem=pc_hist[i].specmem;
+               needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
+               if (!needed_flags && currprefs.compnf) {
+#ifdef NOFLAGS_SUPPORT
+                   cputbl=nfcpufunctbl;
+#else
+                   cputbl=cpufunctbl;
+#endif
+                   comptbl=nfcompfunctbl;
+               }
+               else {
+                   cputbl=cpufunctbl;
+                   comptbl=compfunctbl;
+               }
+               
+               if (comptbl[opcode] && optlev>1) { 
+                   failure=0;
+                   if (!was_comp) {
+                       comp_pc_p=(uae_u8*)pc_hist[i].location;
+                       init_comp();
+                   }
+                   was_comp++;
+
+                   comptbl[opcode](opcode);
+                   freescratch();
+                   if (!(liveflags[i+1] & FLAG_CZNV)) { 
+                       /* We can forget about flags */
+                       dont_care_flags();
+                   }
+#if INDIVIDUAL_INST 
+                   flush(1);
+                   nop();
+                   flush(1);
+                   was_comp=0;
+#endif
+               }
+               else 
+                   failure=1;
+               if (failure) {
+                   if (was_comp) {
+                       flush(1);
+                       was_comp=0;
+                   }
+                   raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
+#if USE_NORMAL_CALLING_CONVENTION
+                   raw_push_l_r(REG_PAR1);
+#endif
+                   raw_mov_l_mi((uae_u32)&regs.pc_p,
+                                (uae_u32)pc_hist[i].location);
+                   raw_call((uae_u32)cputbl[opcode]);
+                   //raw_add_l_mi((uae_u32)&oink,1); // FIXME
+#if USE_NORMAL_CALLING_CONVENTION
+                   raw_inc_sp(4);
+#endif
+                   if (needed_flags) {
+                       //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode+65536);
+                   }
+                   else {
+                       //raw_mov_l_mi((uae_u32)&foink3,(uae_u32)opcode);
+                   }
+                   
+                   if (i<blocklen-1) {
+                       uae_s8* branchadd;
+                       
+                       raw_mov_l_rm(0,(uae_u32)specflags);
+                       raw_test_l_rr(0,0);
+                       raw_jz_b_oponly();
+                       branchadd=get_target();
+                       emit_byte(0);
+                       raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+                       raw_jmp((uae_u32)popall_do_nothing);
+                       *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
+                   }
+               }
+           }
+#if 0 /* This isn't completely kosher yet; It really needs to be
+        be integrated into a general inter-block-dependency scheme */
+           if (next_pc_p && taken_pc_p &&
+               was_comp && taken_pc_p==current_block_pc_p) {
+               blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
+               blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
+               uae_u8 x=bi1->needed_flags;
+               
+               if (x==0xff || 1) {  /* To be on the safe side */
+                   uae_u16* next=(uae_u16*)next_pc_p;
+                   uae_u16 op=cft_map(*next);
+
+                   x=0x1f;
+                   x&=(~prop[op].set_flags);
+                   x|=prop[op].use_flags;
+               }
+               
+               x|=bi2->needed_flags;
+               if (!(x & FLAG_CZNV)) { 
+                   /* We can forget about flags */
+                   dont_care_flags();
+                   extra_len+=2; /* The next instruction now is part of this
+                                    block */
+               }
+                   
+           }
+#endif
+
+           if (next_pc_p) { /* A branch was registered */
+               uae_u32 t1=next_pc_p;
+               uae_u32 t2=taken_pc_p;
+               int     cc=branch_cc;
+               
+               uae_u32* branchadd;
+               uae_u32* tba;
+               bigstate tmp;
+               blockinfo* tbi;
+
+               if (taken_pc_p<next_pc_p) {
+                   /* backward branch. Optimize for the "taken" case ---
+                      which means the raw_jcc should fall through when
+                      the 68k branch is taken. */
+                   t1=taken_pc_p;
+                   t2=next_pc_p;
+                   cc=branch_cc^1;
+               }
+
+#if !USE_MATCHSTATE
+               flush_keepflags();
+#endif
+               tmp=live; /* ouch! This is big... */
+               raw_jcc_l_oponly(cc);
+               branchadd=(uae_u32*)get_target();
+               emit_long(0);
+               /* predicted outcome */
+               tbi=get_blockinfo_addr_new((void*)t1,1);
+               match_states(&(tbi->env));
+               //flush(1); /* Can only get here if was_comp==1 */
+               raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+               raw_jcc_l_oponly(9);
+               tba=(uae_u32*)get_target();
+               emit_long(get_handler(t1)-((uae_u32)tba+4));
+               raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
+               raw_jmp((uae_u32)popall_do_nothing);
+               create_jmpdep(bi,0,tba,t1);
+
+               align_target(16);
+               /* not-predicted outcome */
+               *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
+               live=tmp; /* Ouch again */
+               tbi=get_blockinfo_addr_new((void*)t2,1);
+               match_states(&(tbi->env));
+
+               //flush(1); /* Can only get here if was_comp==1 */
+               raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+               raw_jcc_l_oponly(9);
+               tba=(uae_u32*)get_target();
+               emit_long(get_handler(t2)-((uae_u32)tba+4));
+               raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
+               raw_jmp((uae_u32)popall_do_nothing);
+               create_jmpdep(bi,1,tba,t2);
+           }           
+           else 
+           {
+               if (was_comp) {
+                   flush(1);
+               }
+               
+               /* Let's find out where next_handler is... */
+               if (was_comp && isinreg(PC_P)) { 
+                   int r2;
+
+                   r=live.state[PC_P].realreg;
+                   
+                   if (r==0)
+                       r2=1;
+                   else 
+                       r2=0;
+                       
+                   raw_and_l_ri(r,TAGMASK);
+                   raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
+                   raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+                   raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,9);
+                   raw_jmp_r(r2);
+               }
+               else if (was_comp && isconst(PC_P)) {
+                   uae_u32 v=live.state[PC_P].val;
+                   uae_u32* tba;
+                   blockinfo* tbi;
+
+                   tbi=get_blockinfo_addr_new((void*)v,1);
+                   match_states(&(tbi->env));
+
+                   raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+                   raw_jcc_l_oponly(9);
+                   tba=(uae_u32*)get_target();
+                   emit_long(get_handler(v)-((uae_u32)tba+4));
+                   raw_mov_l_mi((uae_u32)&regs.pc_p,v);
+                   raw_jmp((uae_u32)popall_do_nothing);
+                   create_jmpdep(bi,0,tba,v);
+               }               
+               else {
+                   int r2;
+
+                   r=REG_PC_TMP;
+                   raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
+                   if (r==0)
+                       r2=1;
+                   else 
+                       r2=0;
+                       
+                   raw_and_l_ri(r,TAGMASK);
+                   raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
+                   raw_sub_l_mi((uae_u32)&countdown,scaled_cycles(totcycles));
+                   raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,9);
+                   raw_jmp_r(r2);
+               }
+           }
+       }
+
+       if (next_pc_p+extra_len>=max_pcp && 
+           next_pc_p+extra_len<max_pcp+LONGEST_68K_INST) 
+           max_pcp=next_pc_p+extra_len;  /* extra_len covers flags magic */
+       else
+           max_pcp+=LONGEST_68K_INST;
+       bi->len=max_pcp-min_pcp;
+       bi->min_pcp=min_pcp;
+                   
+       remove_from_list(bi);
+       if (isinrom(min_pcp) && isinrom(max_pcp)) 
+           add_to_dormant(bi); /* No need to checksum it on cache flush.
+                                  Please don't start changing ROMs in
+                                  flight! */
+       else {
+           calc_checksum(bi,&(bi->c1),&(bi->c2));
+           add_to_active(bi);
+       }
+
+       log_dump();
+       align_target(32);
+       current_compile_p=get_target();
+
+        raise_in_cl_list(bi);
+        bi->nexthandler=current_compile_p;
+
+        /* We will flush soon, anyway, so let's do it now */
+        if (current_compile_p>=max_compile_start)
+            flush_icache_hard(7);
+
+        do_extra_cycles(totcycles); /* for the compilation time */
+    }
+}
+
+
diff --git a/disk.c b/disk.c
index 6370464e80a07caf70842032b1088c1a059705fd..6d8ec58d84b3319d0836f886a990776df33d138d 100755 (executable)
--- a/disk.c
+++ b/disk.c
@@ -833,6 +833,7 @@ static int drive_insert (drive * drv, struct uae_prefs *p, int dnum, const char
     currprefs.df[dnum][255] = 0;
     strncpy (changed_prefs.df[dnum], fname, 255);
     changed_prefs.df[dnum][255] = 0;
+    strcpy (drv->newname, fname);
     gui_filename (dnum, fname);
 
     memset (buffer, 0, sizeof (buffer));
@@ -1657,6 +1658,9 @@ static void setdskchangetime(drive *drv, int dsktime)
        }
     }
     drv->dskchange_time = dsktime;
+#ifdef DISK_DEBUG
+    write_dlog("delayed insert enable %d\n", dsktime);
+#endif
 }
 
 void DISK_reinsert (int num)
index 3ad3825212a26bbedbbda0d212427b954d9b13bf..709c26eb1058d886866b5a23fab1f2f297492da1 100755 (executable)
--- a/filesys.c
+++ b/filesys.c
@@ -3414,7 +3414,7 @@ static void *filesys_thread (void *unit_v)
        ui->self->cmds_sent++;
        /* The message is sent by our interrupt handler, so make sure an interrupt
         * happens. */
-       uae_int_requested = 1;
+       do_uae_int_requested();
        /* Send back the locks. */
        if (get_long (ui->self->locklist) != 0)
            write_comm_pipe_int (ui->back_pipe, (int)(get_long (ui->self->locklist)), 0);
index a8b2a756ba8325eb1547ff2315fe31d4b77a4ddc..b0ceb2888a6d3b347c3ccef6966c20d134854efc 100755 (executable)
@@ -424,9 +424,24 @@ DECLARE(fabs_rr(FW d, FR s));
 DECLARE(frndint_rr(FW d, FR s));
 DECLARE(fsin_rr(FW d, FR s));
 DECLARE(fcos_rr(FW d, FR s));
+DECLARE(ftan_rr(FW d, FR s));
+DECLARE(fsincos_rr(FW d, FW c, FR s));
+DECLARE(fscale_rr(FRW d, FR s));
 DECLARE(ftwotox_rr(FW d, FR s));
 DECLARE(fetox_rr(FW d, FR s));
+DECLARE(fetoxM1_rr(FW d, FR s));
+DECLARE(ftentox_rr(FW d, FR s));
 DECLARE(flog2_rr(FW d, FR s));
+DECLARE(flogN_rr(FW d, FR s));
+DECLARE(flogNP1_rr(FW d, FR s));
+DECLARE(flog10_rr(FW d, FR s));
+DECLARE(fasin_rr(FW d, FR s));
+DECLARE(facos_rr(FW d, FR s));
+DECLARE(fatan_rr(FW d, FR s));
+DECLARE(fsinh_rr(FW d, FR s));
+DECLARE(fcosh_rr(FW d, FR s));
+DECLARE(ftanh_rr(FW d, FR s));
+DECLARE(fatanh_rr(FW d, FR s));
 DECLARE(fneg_rr(FW d, FR s));
 DECLARE(fadd_rr(FRW d, FR s));
 DECLARE(fsub_rr(FRW d, FR s));
diff --git a/include/compemu_old.h b/include/compemu_old.h
new file mode 100755 (executable)
index 0000000..a8b2a75
--- /dev/null
@@ -0,0 +1,527 @@
+#define USE_OPTIMIZER 0
+#define USE_LOW_OPTIMIZER 0
+#define USE_ALIAS 1
+#define USE_F_ALIAS 1
+#define USE_SOFT_FLUSH 1
+#define USE_OFFSET 1
+#define COMP_DEBUG 1
+
+#if COMP_DEBUG
+#define Dif(x) if (x)
+#else
+#define Dif(x) if (0)
+#endif
+
+#define SCALE 2
+#define MAXCYCLES (1000 * CYCLE_UNIT)
+#define MAXREGOPT 65536
+
+#define BYTES_PER_INST 10240  /* paranoid ;-) */
+#define LONGEST_68K_INST 16 /* The number of bytes the longest possible
+                              68k instruction takes */
+#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums
+                                for. Anything larger will be flushed
+                                unconditionally even with SOFT_FLUSH */
+#define MAX_HOLD_BI 3  /* One for the current block, and up to two
+                         for jump targets */
+
+#define INDIVIDUAL_INST 0
+#define FLAG_C    0x0010
+#define FLAG_V    0x0008
+#define FLAG_Z    0x0004
+#define FLAG_N    0x0002
+#define FLAG_X    0x0001
+#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V)
+#define FLAG_ZNV  (FLAG_Z | FLAG_N | FLAG_V)
+
+#define KILLTHERAT 1  /* Set to 1 to avoid some partial_rat_stalls */
+
+/* Whether to preserve registers across calls to JIT compiled routines */
+#if defined X86_ASSEMBLY
+#define USE_PUSH_POP 0
+#else
+#define USE_PUSH_POP 1
+#endif
+
+#define N_REGS 8  /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
+#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
+
+/* Functions exposed to newcpu, or to what was moved from newcpu.c to
+ * compemu_support.c */
+extern void init_comp(void);
+extern void flush(int save_regs);
+extern void small_flush(int save_regs);
+extern void set_target(uae_u8* t);
+extern uae_u8* get_target(void);
+extern void freescratch(void);
+extern void build_comp(void);
+extern void set_cache_state(int enabled);
+extern int get_cache_state(void);
+extern uae_u32 get_jitted_size(void);
+#ifdef JIT
+extern void flush_icache(int n);
+#endif
+extern void alloc_cache(void);
+extern void compile_block(cpu_history* pc_hist, int blocklen, int totcyles);
+extern void lopt_emit_all(void);
+extern int check_for_cache_miss(void);
+
+
+#define scaled_cycles(x) (currprefs.m68k_speed==-1?(((x)/SCALE)?(((x)/SCALE<MAXCYCLES?((x)/SCALE):MAXCYCLES)):1):(x))
+
+
+extern uae_u32 needed_flags;
+extern cacheline cache_tags[];
+extern uae_u8* comp_pc_p;
+extern void* pushall_call_handler;
+
+#define VREGS 32
+#define VFREGS 16
+
+#define INMEM 1
+#define CLEAN 2
+#define DIRTY 3
+#define UNDEF 4
+#define ISCONST 5
+
+typedef struct {
+  uae_u32* mem;
+  uae_u32 val;
+  uae_u8 is_swapped;
+  uae_u8 status;
+  uae_u8 realreg;
+  uae_u8 realind; /* The index in the holds[] array */
+  uae_u8 needflush;
+  uae_u8 validsize;
+  uae_u8 dirtysize;
+  uae_u8 dummy;
+} reg_status;
+
+typedef struct {
+  uae_u32* mem;
+  double val;
+  uae_u8 status;
+  uae_u8 realreg;
+  uae_u8 realind;  
+  uae_u8 needflush;
+} freg_status;
+
+typedef struct {
+    uae_u8 use_flags;
+    uae_u8 set_flags;
+    uae_u8 is_jump;
+    uae_u8 is_addx;
+    uae_u8 is_const_jump;
+} op_properties;
+extern op_properties prop[65536];
+
+static __inline__ int end_block(uae_u16 opcode)
+{
+    return prop[opcode].is_jump ||
+       (prop[opcode].is_const_jump && !currprefs.comp_constjump);
+}
+
+#define PC_P 16
+#define FLAGX 17
+#define FLAGTMP 18
+#define NEXT_HANDLER 19
+#define S1 20
+#define S2 21
+#define S3 22
+#define S4 23
+#define S5 24
+#define S6 25
+#define S7 26
+#define S8 27
+#define S9 28
+#define S10 29
+#define S11 30
+#define S12 31
+
+#define FP_RESULT 8
+#define FS1 9
+#define FS2 10
+#define FS3 11
+
+typedef struct {
+  uae_u32 touched;
+  uae_s8 holds[VREGS];
+  uae_u8 nholds;
+  uae_u8 canbyte;
+  uae_u8 canword;
+  uae_u8 locked;
+} n_status;
+
+typedef struct {
+    uae_s8 holds;
+    uae_u8 validsize;
+    uae_u8 dirtysize;
+} n_smallstatus;
+
+typedef struct {
+  uae_u32 touched;
+  uae_s8 holds[VFREGS];
+  uae_u8 nholds;
+  uae_u8 locked;
+} fn_status;
+
+/* For flag handling */
+#define NADA 1
+#define TRASH 2
+#define VALID 3
+
+/* needflush values */
+#define NF_SCRATCH   0
+#define NF_TOMEM     1
+#define NF_HANDLER   2
+
+typedef struct {
+    /* Integer part */
+    reg_status state[VREGS];
+    n_status   nat[N_REGS];
+    uae_u32 flags_on_stack;
+    uae_u32 flags_in_flags;
+    uae_u32 flags_are_important;
+    /* FPU part */
+    freg_status fate[VFREGS];
+    fn_status   fat[N_FREGS];
+
+    /* x86 FPU part */
+    uae_s8 spos[N_FREGS];
+    uae_s8 onstack[6];
+    uae_s8 tos;
+} bigstate;
+
+typedef struct {
+    /* Integer part */
+    n_smallstatus  nat[N_REGS];
+} smallstate;
+
+extern bigstate live;
+extern int touchcnt;
+
+
+#define IMM uae_u32
+#define R1  uae_u32
+#define R2  uae_u32
+#define R4  uae_u32
+#define W1  uae_u32
+#define W2  uae_u32
+#define W4  uae_u32
+#define RW1 uae_u32
+#define RW2 uae_u32
+#define RW4 uae_u32
+#define MEMR uae_u32
+#define MEMW uae_u32
+#define MEMRW uae_u32
+
+#define FW   uae_u32
+#define FR   uae_u32
+#define FRW  uae_u32
+
+#define MIDFUNC(nargs,func,args) void func args
+#define MENDFUNC(nargs,func,args) 
+#define COMPCALL(func) func
+
+#define LOWFUNC(flags,mem,nargs,func,args) static __inline__ void func args
+#define LENDFUNC(flags,mem,nargs,func,args) 
+
+#if USE_OPTIMIZER
+#define REGALLOC_O 2
+#define PEEPHOLE_O 3 /* Has to be >= REGALLOC */
+#define DECLARE(func) extern void func; extern void do_##func
+#else
+#define REGALLOC_O 2000000
+#define PEEPHOLE_O 2000000
+#define DECLARE(func) extern void func
+#endif
+
+
+/* What we expose to the outside */
+DECLARE(bt_l_ri(R4 r, IMM i));
+DECLARE(bt_l_rr(R4 r, R4 b));
+DECLARE(btc_l_ri(RW4 r, IMM i));
+DECLARE(btc_l_rr(RW4 r, R4 b));
+DECLARE(bts_l_ri(RW4 r, IMM i));
+DECLARE(bts_l_rr(RW4 r, R4 b));
+DECLARE(btr_l_ri(RW4 r, IMM i));
+DECLARE(btr_l_rr(RW4 r, R4 b));
+DECLARE(mov_l_rm(W4 d, IMM s));
+DECLARE(call_r(R4 r));
+DECLARE(sub_l_mi(IMM d, IMM s));
+DECLARE(mov_l_mi(IMM d, IMM s));
+DECLARE(mov_w_mi(IMM d, IMM s));
+DECLARE(mov_b_mi(IMM d, IMM s));
+DECLARE(rol_b_ri(RW1 r, IMM i));
+DECLARE(rol_w_ri(RW2 r, IMM i));
+DECLARE(rol_l_ri(RW4 r, IMM i));
+DECLARE(rol_l_rr(RW4 d, R1 r));
+DECLARE(rol_w_rr(RW2 d, R1 r));
+DECLARE(rol_b_rr(RW1 d, R1 r));
+DECLARE(shll_l_rr(RW4 d, R1 r));
+DECLARE(shll_w_rr(RW2 d, R1 r));
+DECLARE(shll_b_rr(RW1 d, R1 r));
+DECLARE(ror_b_ri(R1 r, IMM i));
+DECLARE(ror_w_ri(R2 r, IMM i));
+DECLARE(ror_l_ri(R4 r, IMM i));
+DECLARE(ror_l_rr(R4 d, R1 r));
+DECLARE(ror_w_rr(R2 d, R1 r));
+DECLARE(ror_b_rr(R1 d, R1 r));
+DECLARE(shrl_l_rr(RW4 d, R1 r));
+DECLARE(shrl_w_rr(RW2 d, R1 r));
+DECLARE(shrl_b_rr(RW1 d, R1 r));
+DECLARE(shra_l_rr(RW4 d, R1 r));
+DECLARE(shra_w_rr(RW2 d, R1 r));
+DECLARE(shra_b_rr(RW1 d, R1 r));
+DECLARE(shll_l_ri(RW4 r, IMM i));
+DECLARE(shll_w_ri(RW2 r, IMM i));
+DECLARE(shll_b_ri(RW1 r, IMM i));
+DECLARE(shrl_l_ri(RW4 r, IMM i));
+DECLARE(shrl_w_ri(RW2 r, IMM i));
+DECLARE(shrl_b_ri(RW1 r, IMM i));
+DECLARE(shra_l_ri(RW4 r, IMM i));
+DECLARE(shra_w_ri(RW2 r, IMM i));
+DECLARE(shra_b_ri(RW1 r, IMM i));
+DECLARE(setcc(W1 d, IMM cc));
+DECLARE(setcc_m(IMM d, IMM cc));
+DECLARE(cmov_l_rr(RW4 d, R4 s, IMM cc));
+DECLARE(cmov_l_rm(RW4 d, IMM s, IMM cc));
+DECLARE(bsf_l_rr(W4 d, R4 s));
+DECLARE(pop_m(IMM d));
+DECLARE(push_m(IMM d));
+DECLARE(pop_l(W4 d));
+DECLARE(push_l_i(IMM i));
+DECLARE(push_l(R4 s));
+DECLARE(clear_16(RW4 r));
+DECLARE(clear_8(RW4 r));
+DECLARE(sign_extend_16_rr(W4 d, R2 s));
+DECLARE(sign_extend_8_rr(W4 d, R1 s));
+DECLARE(zero_extend_16_rr(W4 d, R2 s));
+DECLARE(zero_extend_8_rr(W4 d, R1 s));
+DECLARE(imul_64_32(RW4 d, RW4 s));
+DECLARE(mul_64_32(RW4 d, RW4 s));
+DECLARE(imul_32_32(RW4 d, R4 s));
+DECLARE(mul_32_32(RW4 d, R4 s));
+DECLARE(mov_b_rr(W1 d, R1 s));
+DECLARE(mov_w_rr(W2 d, R2 s));
+DECLARE(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor));
+DECLARE(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor));
+DECLARE(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor));
+DECLARE(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s));
+DECLARE(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s));
+DECLARE(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s));
+DECLARE(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s));
+DECLARE(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s));
+DECLARE(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s));
+DECLARE(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor));
+DECLARE(mov_l_rR(W4 d, R4 s, IMM offset));
+DECLARE(mov_w_rR(W2 d, R4 s, IMM offset));
+DECLARE(mov_b_rR(W1 d, R4 s, IMM offset));
+DECLARE(mov_l_brR(W4 d, R4 s, IMM offset));
+DECLARE(mov_w_brR(W2 d, R4 s, IMM offset));
+DECLARE(mov_b_brR(W1 d, R4 s, IMM offset));
+DECLARE(mov_l_Ri(R4 d, IMM i, IMM offset));
+DECLARE(mov_w_Ri(R4 d, IMM i, IMM offset));
+DECLARE(mov_b_Ri(R4 d, IMM i, IMM offset));
+DECLARE(mov_l_Rr(R4 d, R4 s, IMM offset));
+DECLARE(mov_w_Rr(R4 d, R2 s, IMM offset));
+DECLARE(mov_b_Rr(R4 d, R1 s, IMM offset));
+DECLARE(lea_l_brr(W4 d, R4 s, IMM offset));
+DECLARE(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset));
+DECLARE(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor));
+DECLARE(mov_l_bRr(R4 d, R4 s, IMM offset));
+DECLARE(mov_w_bRr(R4 d, R2 s, IMM offset));
+DECLARE(mov_b_bRr(R4 d, R1 s, IMM offset));
+DECLARE(bswap_32(RW4 r));
+DECLARE(bswap_16(RW2 r));
+DECLARE(mov_l_rr(W4 d, R4 s));
+DECLARE(mov_l_mr(IMM d, R4 s));
+DECLARE(mov_w_mr(IMM d, R2 s));
+DECLARE(mov_w_rm(W2 d, IMM s));
+DECLARE(mov_b_mr(IMM d, R1 s));
+DECLARE(mov_b_rm(W1 d, IMM s));
+DECLARE(mov_l_ri(W4 d, IMM s));
+DECLARE(mov_w_ri(W2 d, IMM s));
+DECLARE(mov_b_ri(W1 d, IMM s));
+DECLARE(add_l_mi(IMM d, IMM s) );
+DECLARE(add_w_mi(IMM d, IMM s) );
+DECLARE(add_b_mi(IMM d, IMM s) );
+DECLARE(test_l_ri(R4 d, IMM i));
+DECLARE(test_l_rr(R4 d, R4 s));
+DECLARE(test_w_rr(R2 d, R2 s));
+DECLARE(test_b_rr(R1 d, R1 s));
+DECLARE(and_l_ri(RW4 d, IMM i));
+DECLARE(and_l(RW4 d, R4 s));
+DECLARE(and_w(RW2 d, R2 s));
+DECLARE(and_b(RW1 d, R1 s));
+DECLARE(or_l_ri(RW4 d, IMM i));
+DECLARE(or_l(RW4 d, R4 s));
+DECLARE(or_w(RW2 d, R2 s));
+DECLARE(or_b(RW1 d, R1 s));
+DECLARE(adc_l(RW4 d, R4 s));
+DECLARE(adc_w(RW2 d, R2 s));
+DECLARE(adc_b(RW1 d, R1 s));
+DECLARE(add_l(RW4 d, R4 s));
+DECLARE(add_w(RW2 d, R2 s));
+DECLARE(add_b(RW1 d, R1 s));
+DECLARE(sub_l_ri(RW4 d, IMM i));
+DECLARE(sub_w_ri(RW2 d, IMM i));
+DECLARE(sub_b_ri(RW1 d, IMM i));
+DECLARE(add_l_ri(RW4 d, IMM i));
+DECLARE(add_w_ri(RW2 d, IMM i));
+DECLARE(add_b_ri(RW1 d, IMM i));
+DECLARE(sbb_l(RW4 d, R4 s));
+DECLARE(sbb_w(RW2 d, R2 s));
+DECLARE(sbb_b(RW1 d, R1 s));
+DECLARE(sub_l(RW4 d, R4 s));
+DECLARE(sub_w(RW2 d, R2 s));
+DECLARE(sub_b(RW1 d, R1 s));
+DECLARE(cmp_l(R4 d, R4 s));
+DECLARE(cmp_l_ri(R4 r, IMM i));
+DECLARE(cmp_w(R2 d, R2 s));
+DECLARE(cmp_b(R1 d, R1 s));
+DECLARE(xor_l(RW4 d, R4 s));
+DECLARE(xor_w(RW2 d, R2 s));
+DECLARE(xor_b(RW1 d, R1 s));
+DECLARE(live_flags(void));
+DECLARE(dont_care_flags(void));
+DECLARE(duplicate_carry(void));
+DECLARE(restore_carry(void));
+DECLARE(start_needflags(void));
+DECLARE(end_needflags(void));
+DECLARE(make_flags_live(void));
+DECLARE(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize));
+DECLARE(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2));
+DECLARE(readmem_new(R4 address, W4 dest, IMM offset, IMM size, W4 tmp));
+DECLARE(writemem_new(R4 address, R4 source, IMM offset, IMM size, W4 tmp));
+DECLARE(forget_about(W4 r));
+DECLARE(nop(void));
+
+DECLARE(f_forget_about(FW r));
+DECLARE(fmov_pi(FW r));
+DECLARE(fmov_log10_2(FW r));
+DECLARE(fmov_log2_e(FW r));
+DECLARE(fmov_loge_2(FW r));
+DECLARE(fmov_1(FW r));
+DECLARE(fmov_0(FW r));
+DECLARE(fmov_rm(FW r, MEMR m));
+DECLARE(fmovi_rm(FW r, MEMR m));
+DECLARE(fmovi_mr(MEMW m, FR r));
+DECLARE(fmovs_rm(FW r, MEMR m));
+DECLARE(fmovs_mr(MEMW m, FR r));
+DECLARE(fmov_mr(MEMW m, FR r));
+DECLARE(fmov_ext_mr(MEMW m, FR r));
+DECLARE(fmov_ext_rm(FW r, MEMR m));
+DECLARE(fmov_rr(FW d, FR s));
+DECLARE(fldcw_m_indexed(R4 index, IMM base));
+DECLARE(ftst_r(FR r));
+DECLARE(dont_care_fflags(void));
+DECLARE(fsqrt_rr(FW d, FR s));
+DECLARE(fabs_rr(FW d, FR s));
+DECLARE(frndint_rr(FW d, FR s));
+DECLARE(fsin_rr(FW d, FR s));
+DECLARE(fcos_rr(FW d, FR s));
+DECLARE(ftwotox_rr(FW d, FR s));
+DECLARE(fetox_rr(FW d, FR s));
+DECLARE(flog2_rr(FW d, FR s));
+DECLARE(fneg_rr(FW d, FR s));
+DECLARE(fadd_rr(FRW d, FR s));
+DECLARE(fsub_rr(FRW d, FR s));
+DECLARE(fmul_rr(FRW d, FR s));
+DECLARE(frem_rr(FRW d, FR s));
+DECLARE(frem1_rr(FRW d, FR s));
+DECLARE(fdiv_rr(FRW d, FR s));
+DECLARE(fcmp_rr(FR d, FR s));
+DECLARE(fflags_into_flags(W2 tmp));
+
+extern int failure;
+#define FAIL(x) do { failure|=x; } while (0)
+
+/* Convenience functions exposed to gencomp */
+extern uae_u32 m68k_pc_offset;
+extern void readbyte(int address, int dest, int tmp);
+extern void readword(int address, int dest, int tmp);
+extern void readlong(int address, int dest, int tmp);
+extern void writebyte(int address, int source, int tmp);
+extern void writeword(int address, int source, int tmp);
+extern void writelong(int address, int source, int tmp);
+extern void writeword_clobber(int address, int source, int tmp);
+extern void writelong_clobber(int address, int source, int tmp);
+extern void get_n_addr(int address, int dest, int tmp);
+extern void get_n_addr_jmp(int address, int dest, int tmp);
+extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
+extern int kill_rodent(int r);
+extern void sync_m68k_pc(void);
+extern uae_u32 get_const(int r);
+extern int  is_const(int r);
+extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond);
+extern void empty_optimizer(void);
+
+#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1))
+#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o)))
+#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o)))
+
+/* Preferences handling */
+void check_prefs_changed_comp (void);
+
+struct blockinfo_t;
+
+typedef struct dep_t {
+  uae_u32*            jmp_off;
+  struct blockinfo_t* target;
+  struct dep_t**      prev_p;
+  struct dep_t*       next;
+} dependency;
+
+typedef struct blockinfo_t {
+    uae_s32 count;
+    cpuop_func* direct_handler_to_use;
+    cpuop_func* handler_to_use;
+    /* The direct handler does not check for the correct address */
+
+    cpuop_func* handler; 
+    cpuop_func* direct_handler;
+
+    cpuop_func* direct_pen;
+    cpuop_func* direct_pcc;
+
+    uae_u8* nexthandler;
+    uae_u8* pc_p;
+    
+    uae_u32 c1;     
+    uae_u32 c2;
+    uae_u32 len;
+
+    struct blockinfo_t* next_same_cl;
+    struct blockinfo_t** prev_same_cl_p;  
+    struct blockinfo_t* next;
+    struct blockinfo_t** prev_p; 
+
+    uae_u32 min_pcp; 
+    uae_u8 optlevel;  
+    uae_u8 needed_flags;  
+    uae_u8 status;  
+    uae_u8 havestate;
+    
+    dependency  dep[2];  /* Holds things we depend on */
+    dependency* deplist; /* List of things that depend on this */
+    smallstate  env;
+} blockinfo;
+
+#define BI_NEW 0
+#define BI_COUNTING 1
+#define BI_TARGETTED 2
+
+typedef struct {
+    uae_u8 type;
+    uae_u8 reg;
+    uae_u32 next;
+} regacc;
+
+void execute_normal(void);
+void exec_nostats(void);
+void do_nothing(void);
+
index ca986dd034f5fc41df4be371ebe8f3a82522ecdc..606e0f8a30d13c5d4c9da4be4f23e291d5fd8936 100755 (executable)
@@ -54,3 +54,10 @@ void native2amiga_startup (void);
  * It's emptied via exter_int_helper by the EXTER interrupt. */
 extern smp_comm_pipe native2amiga_pending;
 #endif
+
+STATIC_INLINE do_uae_int_requested(void)
+{
+       uae_int_requested = 1;
+       set_uae_int_flag ();
+       INTREQ (0x8000 | 0x0008);
+}
index 48a57875eb3c43c3d86cf3f49c259e5112fe9953..fe62801643e7046541e9f36c21920213654ad410 100755 (executable)
@@ -51,7 +51,7 @@ void uae_Cause(uaecptr interrupt)
     uae_sem_wait (&n2asem);
     write_comm_pipe_int (&native2amiga_pending, 3, 0);
     write_comm_pipe_u32 (&native2amiga_pending, interrupt, 1);
-    uae_int_requested = 1;
+    do_uae_int_requested();
     uae_sem_post (&n2asem);
 }
 
@@ -60,7 +60,7 @@ void uae_ReplyMsg(uaecptr msg)
     uae_sem_wait (&n2asem);
     write_comm_pipe_int (&native2amiga_pending, 2, 0);
     write_comm_pipe_u32 (&native2amiga_pending, msg, 1);
-    uae_int_requested = 1;
+    do_uae_int_requested();
     uae_sem_post (&n2asem);
 }
 
@@ -70,7 +70,7 @@ void uae_PutMsg(uaecptr port, uaecptr msg)
     write_comm_pipe_int (&native2amiga_pending, 1, 0);
     write_comm_pipe_u32 (&native2amiga_pending, port, 0);
     write_comm_pipe_u32 (&native2amiga_pending, msg, 1);
-    uae_int_requested = 1;
+    do_uae_int_requested();
     uae_sem_post (&n2asem);
 }
 
@@ -80,7 +80,7 @@ void uae_Signal(uaecptr task, uae_u32 mask)
     write_comm_pipe_int (&native2amiga_pending, 0, 0);
     write_comm_pipe_u32 (&native2amiga_pending, task, 0);
     write_comm_pipe_int (&native2amiga_pending, mask, 1);
-    uae_int_requested = 1;
+    do_uae_int_requested();
     uae_sem_post (&n2asem);
 }
 
@@ -90,7 +90,7 @@ void uae_NotificationHack(uaecptr port, uaecptr nr)
     write_comm_pipe_int (&native2amiga_pending, 4, 0);
     write_comm_pipe_int (&native2amiga_pending, port, 0);
     write_comm_pipe_int (&native2amiga_pending, nr, 1);
-    uae_int_requested = 1;
+    do_uae_int_requested();
     uae_sem_post (&n2asem);
 }
 
index c6dae9ad8180cbd114fa6377f682c5cb2447bbb6..a89347b6093b17755d879a0321f26a8dccf4c844 100755 (executable)
@@ -64,9 +64,9 @@ void ioport_free (void)
     if (initialized) {
        pShutdownWinIo();
        FreeLibrary (ioh);
+        io_log ("io freed\n");
     }
 #endif
-    io_log ("io freed\n");
     initialized = 0;
 }
 
index e68baf1798c4815fa990a4efefea11c4ad29ca33..74e768efa69540599cc33dc5d0f173a4e8584d36 100755 (executable)
@@ -241,9 +241,8 @@ void *shmat(int shmid, LPVOID shmaddr, int shmflg)
     if ((shmids[shmid].key == shmid) && shmids[shmid].size) {
        got = FALSE;
        if (got == FALSE) {
-           if (shmaddr) {
-               result = (void*)VirtualFree(shmaddr, 0, os_winnt ? MEM_RESET : MEM_RELEASE);
-           }
+           if (shmaddr)
+               VirtualFree(shmaddr, 0, os_winnt ? MEM_RESET : MEM_RELEASE);
            result = VirtualAlloc(shmaddr, size, os_winnt ? MEM_COMMIT : (MEM_RESERVE | MEM_COMMIT),
                PAGE_EXECUTE_READWRITE);
            if (result == NULL) {
index 0b254959024af5710c957fbcc1a45bc9d79d6b80..a1b942b27bff47eb3daa1ba32159ee4946e8ce3c 100755 (executable)
@@ -22,7 +22,7 @@ extern int manual_palette_refresh_needed;
 extern int mouseactive, focus;
 extern int ignore_messages_all;
 #define WINUAEBETA 1
-#define WINUAEBETASTR " Beta 7"
+#define WINUAEBETASTR " Beta 9"
 
 extern char start_path_exe[MAX_DPATH];
 extern char start_path_data[MAX_DPATH];
index 46fc90e0ddda64456f1a52e6eda7ebc48a19b211..4cb21296505a48528ad7ad8b9d37441307fa4b78 100755 (executable)
@@ -113,12 +113,12 @@ void filesys_init( void )
            /* Is this drive-letter valid (it used to check for media in drive) */
             if( ( dwDriveMask & 1 ) /* && CheckRM( volumepath ) */ ) 
             {
-               BOOL inserted = CheckRM( volumepath ); /* Is there a disk inserted? */
-                drivetype = GetDriveType( volumepath );
+               BOOL inserted = CheckRM(volumepath); /* Is there a disk inserted? */
+                drivetype = GetDriveType(volumepath);
                if (drivetype != DRIVE_CDROM) {
 
                    get_volume_name( currprefs.mountinfo, volumepath, volumename, MAX_DPATH, inserted, drivetype, 1 );
-                   if( drivetype == DRIVE_REMOTE )
+                   if (drivetype == DRIVE_REMOTE)
                        strcat( volumepath, "." );
                    else
                        strcat( volumepath, ".." );
index f97fc89c48fdb33494710e43dd61224ecaad9cc3..5ea195697df8f6d5f0f56eee622ef78174838e9d 100755 (executable)
@@ -4281,7 +4281,7 @@ static void values_to_miscdlg (HWND hDlg)
     CheckDlgButton (hDlg, IDC_CLOCKSYNC, workprefs.tod_hack);
     cw = catweasel_detect();
     EnableWindow (GetDlgItem (hDlg, IDC_CATWEASEL), cw);
-    if (!cw)
+    if (!cw && workprefs.catweasel < 100)
        workprefs.catweasel = 0;
     CheckDlgButton (hDlg, IDC_CATWEASEL, workprefs.catweasel);
     CheckDlgButton (hDlg, IDC_STATE_CAPTURE, workprefs.statecapture);
index efb0829e072e21605e26893576aca4b13cff9584..10aa4cecdf94611da1e6616121ada7c42c78d1c2 100755 (executable)
                                BufferSecurityCheck="false"
                                EnableFunctionLevelLinking="false"
                                EnableEnhancedInstructionSet="0"
+                               FloatingPointModel="0"
                                TreatWChar_tAsBuiltInType="false"
                                RuntimeTypeInfo="false"
                                UsePrecompiledHeader="0"